Chat & Streaming¶
Basic Chat¶
Send a message and get a response:
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
response = await client.chat(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Hello!"}],
)
print(response.choices[0].message.content)
asyncio.run(main())
use liter_llm::{
ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
Message, UserContent, UserMessage,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![Message::User(UserMessage {
content: UserContent::Text("Hello!".into()),
name: None,
})],
..Default::default()
};
let response = client.chat(request).await?;
if let Some(choice) = response.choices.first() {
println!("{}", choice.message.content.as_deref().unwrap_or(""));
}
Ok(())
}
package main
import (
"context"
"fmt"
"os"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []llm.Message{
llm.NewTextMessage(llm.RoleUser, "Hello!"),
},
})
if err != nil {
panic(err)
}
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
fmt.Println(*resp.Choices[0].Message.Content)
}
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.build()) {
var response = client.chat(new ChatCompletionRequest(
"openai/gpt-4o",
List.of(new UserMessage("Hello!"))
));
System.out.println(response.choices().getFirst().message().content());
}
}
}
using LiterLlm;
await using var client = new LlmClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);
var response = await client.ChatAsync(new ChatCompletionRequest(
Model: "openai/gpt-4o",
Messages: [new UserMessage("Hello!")]
));
Console.WriteLine(response.Choices[0].Message.Content);
# frozen_string_literal: true
require "liter_llm"
require "json"
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})
response = JSON.parse(client.chat(JSON.generate(
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Hello!" }]
)))
puts response.dig("choices", 0, "message", "content")
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');
$response = json_decode($client->chat(json_encode([
'model' => 'openai/gpt-4o',
'messages' => [
['role' => 'user', 'content' => 'Hello!'],
],
])), true);
echo $response['choices'][0]['message']['content'] . PHP_EOL;
Provider Routing¶
liter-llm uses a provider/model prefix convention. The prefix determines which API endpoint, auth header, and parameter mappings to use:
openai/gpt-4o -> OpenAI
anthropic/claude-sonnet-4-20250514 -> Anthropic
groq/llama3-70b -> Groq
google/gemini-2.0-flash -> Google AI
mistral/mistral-large -> Mistral
bedrock/anthropic.claude-v2 -> AWS Bedrock
Switch providers by changing the model string -- no other code changes needed.
Message Roles¶
| Role | Purpose |
|---|---|
system |
Sets the assistant's behavior. Sent once at the start. |
user |
User input -- questions, instructions, data. |
assistant |
Previous assistant responses for multi-turn context. |
tool |
Results from tool calls. |
developer |
Developer-level instructions (some providers). |
Multi-Turn Conversations¶
Append the assistant's response and the next user message, then call chat again:
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the capital of France?"},
]
response = await client.chat(model="openai/gpt-4o", messages=messages)
content = response.choices[0].message.content
print(f"Assistant: {content}")
# Continue the conversation
messages.append({"role": "assistant", "content": content})
messages.append({"role": "user", "content": "What about Germany?"})
response = await client.chat(model="openai/gpt-4o", messages=messages)
print(f"Assistant: {response.choices[0].message.content}")
# Token usage
if response.usage:
print(f"Tokens: {response.usage.prompt_tokens} in, {response.usage.completion_tokens} out")
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const messages: Array<{ role: string; content: string }> = [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: "What is the capital of France?" },
];
let response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);
// Continue the conversation
messages.push({ role: "assistant", content: response.choices[0].message.content! });
messages.push({ role: "user", content: "What about Germany?" });
response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);
// Token usage
console.log(`Tokens: ${response.usage?.promptTokens} in, ${response.usage?.completionTokens} out`);
use liter_llm::{
ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
Message, UserContent, UserMessage, AssistantMessage, SystemMessage,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;
let mut messages = vec![
Message::System(SystemMessage {
content: "You are a helpful assistant.".into(),
name: None,
}),
Message::User(UserMessage {
content: UserContent::Text("What is the capital of France?".into()),
name: None,
}),
];
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: messages.clone(),
..Default::default()
};
let response = client.chat(request).await?;
let content = response.choices[0]
.message
.content
.clone()
.unwrap_or_default();
println!("Assistant: {content}");
// Continue the conversation
messages.push(Message::Assistant(AssistantMessage {
content: Some(content),
..Default::default()
}));
messages.push(Message::User(UserMessage {
content: UserContent::Text("What about Germany?".into()),
name: None,
}));
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages,
..Default::default()
};
let response = client.chat(request).await?;
if let Some(choice) = response.choices.first() {
println!("Assistant: {}", choice.message.content.as_deref().unwrap_or(""));
}
// Token usage
if let Some(usage) = &response.usage {
println!("Tokens: {} in, {} out", usage.prompt_tokens, usage.completion_tokens);
}
Ok(())
}
package main
import (
"context"
"fmt"
"os"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
messages := []llm.Message{
llm.NewTextMessage(llm.RoleSystem, "You are a helpful assistant."),
llm.NewTextMessage(llm.RoleUser, "What is the capital of France?"),
}
resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: messages,
})
if err != nil {
panic(err)
}
content := ""
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
content = *resp.Choices[0].Message.Content
}
fmt.Printf("Assistant: %s\n", content)
// Continue the conversation
messages = append(messages,
llm.NewTextMessage(llm.RoleAssistant, content),
llm.NewTextMessage(llm.RoleUser, "What about Germany?"),
)
resp, err = client.Chat(context.Background(), &llm.ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: messages,
})
if err != nil {
panic(err)
}
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
fmt.Printf("Assistant: %s\n", *resp.Choices[0].Message.Content)
}
// Token usage
if resp.Usage != nil {
fmt.Printf("Tokens: %d in, %d out\n", resp.Usage.PromptTokens, resp.Usage.CompletionTokens)
}
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.ArrayList;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.build()) {
var messages = new ArrayList<>(List.of(
new SystemMessage("You are a helpful assistant."),
new UserMessage("What is the capital of France?")
));
var response = client.chat(new ChatCompletionRequest(
"openai/gpt-4o", messages
));
var content = response.choices().getFirst().message().content();
System.out.println("Assistant: " + content);
// Continue the conversation
messages.add(new AssistantMessage(content));
messages.add(new UserMessage("What about Germany?"));
response = client.chat(new ChatCompletionRequest(
"openai/gpt-4o", messages
));
System.out.println("Assistant: " + response.choices().getFirst().message().content());
// Token usage
var usage = response.usage();
if (usage != null) {
System.out.printf("Tokens: %d in, %d out%n",
usage.promptTokens(), usage.completionTokens());
}
}
}
}
using LiterLlm;
await using var client = new LlmClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);
var messages = new List<IMessage>
{
new SystemMessage("You are a helpful assistant."),
new UserMessage("What is the capital of France?"),
};
var response = await client.ChatAsync(new ChatCompletionRequest(
Model: "openai/gpt-4o", Messages: messages));
var content = response.Choices[0].Message.Content;
Console.WriteLine($"Assistant: {content}");
// Continue the conversation
messages.Add(new AssistantMessage(content!));
messages.Add(new UserMessage("What about Germany?"));
response = await client.ChatAsync(new ChatCompletionRequest(
Model: "openai/gpt-4o", Messages: messages));
Console.WriteLine($"Assistant: {response.Choices[0].Message.Content}");
// Token usage
if (response.Usage is not null)
{
Console.WriteLine($"Tokens: {response.Usage.PromptTokens} in, {response.Usage.CompletionTokens} out");
}
# frozen_string_literal: true
require "liter_llm"
require "json"
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})
messages = [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: "What is the capital of France?" }
]
response = JSON.parse(client.chat(JSON.generate(
model: "openai/gpt-4o",
messages: messages
)))
content = response.dig("choices", 0, "message", "content")
puts "Assistant: #{content}"
# Continue the conversation
messages << { role: "assistant", content: content }
messages << { role: "user", content: "What about Germany?" }
response = JSON.parse(client.chat(JSON.generate(
model: "openai/gpt-4o",
messages: messages
)))
puts "Assistant: #{response.dig("choices", 0, "message", "content")}"
# Token usage
usage = response["usage"]
if usage
puts "Tokens: #{usage["prompt_tokens"]} in, #{usage["completion_tokens"]} out"
end
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');
$messages = [
['role' => 'system', 'content' => 'You are a helpful assistant.'],
['role' => 'user', 'content' => 'What is the capital of France?'],
];
$response = json_decode($client->chat(json_encode([
'model' => 'openai/gpt-4o',
'messages' => $messages,
])), true);
$content = $response['choices'][0]['message']['content'];
echo "Assistant: {$content}" . PHP_EOL;
// Continue the conversation
$messages[] = ['role' => 'assistant', 'content' => $content];
$messages[] = ['role' => 'user', 'content' => 'What about Germany?'];
$response = json_decode($client->chat(json_encode([
'model' => 'openai/gpt-4o',
'messages' => $messages,
])), true);
echo "Assistant: {$response['choices'][0]['message']['content']}" . PHP_EOL;
// Token usage
if (isset($response['usage'])) {
echo "Tokens: {$response['usage']['prompt_tokens']} in, {$response['usage']['completion_tokens']} out" . PHP_EOL;
}
messages = [
%{role: "system", content: "You are a helpful assistant."},
%{role: "user", content: "What is the capital of France?"}
]
{:ok, response} =
LiterLlm.chat(
%{model: "openai/gpt-4o", messages: messages},
api_key: System.fetch_env!("OPENAI_API_KEY")
)
content = hd(response["choices"])["message"]["content"]
IO.puts("Assistant: #{content}")
# Continue the conversation
messages =
messages ++
[
%{role: "assistant", content: content},
%{role: "user", content: "What about Germany?"}
]
{:ok, response} =
LiterLlm.chat(
%{model: "openai/gpt-4o", messages: messages},
api_key: System.fetch_env!("OPENAI_API_KEY")
)
IO.puts("Assistant: #{hd(response["choices"])["message"]["content"]}")
# Token usage
usage = response["usage"]
if usage do
IO.puts("Tokens: #{usage["prompt_tokens"]} in, #{usage["completion_tokens"]} out")
end
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const messages: Array<{ role: string; content: string }> = [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: "What is the capital of France?" },
];
let response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);
// Continue the conversation
messages.push({ role: "assistant", content: response.choices[0].message.content! });
messages.push({ role: "user", content: "What about Germany?" });
response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);
// Token usage
console.log(`Tokens: ${response.usage?.promptTokens} in, ${response.usage?.completionTokens} out`);
Streaming¶
Stream tokens as they arrive instead of waiting for the full response:
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
async for chunk in await client.chat_stream(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Tell me a story"}],
):
if chunk.choices and chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
print()
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const chunks = await client.chatStream({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Tell me a story" }],
});
for (const chunk of chunks) {
process.stdout.write(chunk.choices[0]?.delta?.content ?? "");
}
console.log();
use futures::StreamExt;
use liter_llm::{
ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
Message, UserContent, UserMessage,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![Message::User(UserMessage {
content: UserContent::Text("Tell me a story".into()),
name: None,
})],
..Default::default()
};
let mut stream = client.chat_stream(request).await?;
while let Some(chunk) = stream.next().await {
let chunk = chunk?;
if let Some(choice) = chunk.choices.first() {
if let Some(content) = &choice.delta.content {
print!("{content}");
}
}
}
println!();
Ok(())
}
package main
import (
"context"
"fmt"
"os"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
err := client.ChatStream(
context.Background(),
&llm.ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []llm.Message{
llm.NewTextMessage(llm.RoleUser, "Tell me a story"),
},
},
func(chunk *llm.ChatCompletionChunk) error {
if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != nil {
fmt.Print(*chunk.Choices[0].Delta.Content)
}
return nil
},
)
if err != nil {
panic(err)
}
fmt.Println()
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.build()) {
client.chatStream(new ChatCompletionRequest(
"openai/gpt-4o-mini",
List.of(new UserMessage("Hello"))
), chunk -> System.out.println(chunk));
}
}
}
using LiterLlm;
await using var client = new LlmClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);
var request = new ChatCompletionRequest(
Model: "openai/gpt-4o-mini",
Messages: [new UserMessage("Hello")]
);
await foreach (var chunk in client.ChatStreamAsync(request))
{
Console.WriteLine(chunk);
}
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');
$chunksJson = $client->chatStream(json_encode([
'model' => 'openai/gpt-4o',
'messages' => [
['role' => 'user', 'content' => 'Tell me a story'],
],
]));
$chunks = json_decode($chunksJson, true);
foreach ($chunks as $chunk) {
echo $chunk['choices'][0]['delta']['content'] ?? '';
}
echo PHP_EOL;
Each chunk contains choices[].delta.content with incremental text. The final chunk includes finish_reason: "stop".
Collecting the Full Response¶
Accumulate deltas to get both real-time output and the complete text:
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
full_text = ""
async for chunk in await client.chat_stream(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Explain quantum computing briefly"}],
):
delta = chunk.choices[0].delta.content if chunk.choices else None
if delta:
full_text += delta
print(delta, end="", flush=True)
print()
print(f"\nFull response length: {len(full_text)} characters")
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const chunks = await client.chatStream({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Explain quantum computing briefly" }],
});
let fullText = "";
for (const chunk of chunks) {
const delta = chunk.choices?.[0]?.delta?.content;
if (delta) {
fullText += delta;
process.stdout.write(delta);
}
}
console.log();
console.log(`\nFull response length: ${fullText.length} characters`);
use futures::StreamExt;
use liter_llm::{
ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
Message, UserContent, UserMessage,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![Message::User(UserMessage {
content: UserContent::Text("Explain quantum computing briefly".into()),
name: None,
})],
..Default::default()
};
let mut stream = client.chat_stream(request).await?;
let mut full_text = String::new();
while let Some(chunk) = stream.next().await {
let chunk = chunk?;
if let Some(choice) = chunk.choices.first() {
if let Some(content) = &choice.delta.content {
full_text.push_str(content);
print!("{content}");
}
}
}
println!();
println!("\nFull response length: {} characters", full_text.len());
Ok(())
}
package main
import (
"context"
"fmt"
"os"
"strings"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
var sb strings.Builder
err := client.ChatStream(context.Background(), &llm.ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []llm.Message{
llm.NewTextMessage(llm.RoleUser, "Explain quantum computing briefly"),
},
}, func(chunk *llm.ChatCompletionChunk) error {
if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != nil {
delta := *chunk.Choices[0].Delta.Content
sb.WriteString(delta)
fmt.Print(delta)
}
return nil
})
if err != nil {
panic(err)
}
fmt.Println()
fmt.Printf("\nFull response length: %d characters\n", sb.Len())
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.build()) {
var sb = new StringBuilder();
client.chatStream(new ChatCompletionRequest(
"openai/gpt-4o",
List.of(new UserMessage("Explain quantum computing briefly"))
), chunk -> {
var delta = chunk.choices().getFirst().delta().content();
if (delta != null) {
sb.append(delta);
System.out.print(delta);
}
});
System.out.println();
System.out.printf("%nFull response length: %d characters%n", sb.length());
}
}
}
using System.Text;
using LiterLlm;
await using var client = new LlmClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);
var request = new ChatCompletionRequest(
Model: "openai/gpt-4o",
Messages: [new UserMessage("Explain quantum computing briefly")]
);
var sb = new StringBuilder();
await foreach (var chunk in client.ChatStreamAsync(request))
{
var delta = chunk.Choices?[0]?.Delta?.Content;
if (delta is not null)
{
sb.Append(delta);
Console.Write(delta);
}
}
Console.WriteLine();
Console.WriteLine($"\nFull response length: {sb.Length} characters");
# frozen_string_literal: true
require "liter_llm"
require "json"
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})
chunks = JSON.parse(client.chat_stream(JSON.generate(
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Explain quantum computing briefly" }]
)))
full_text = ""
chunks.each do |chunk|
delta = chunk.dig("choices", 0, "delta", "content")
if delta
full_text += delta
print delta
end
end
puts
puts "\nFull response length: #{full_text.length} characters"
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');
$chunks = json_decode($client->chatStream(json_encode([
'model' => 'openai/gpt-4o',
'messages' => [
['role' => 'user', 'content' => 'Explain quantum computing briefly'],
],
])), true);
$fullText = '';
foreach ($chunks as $chunk) {
$delta = $chunk['choices'][0]['delta']['content'] ?? null;
if ($delta !== null) {
$fullText .= $delta;
echo $delta;
}
}
echo PHP_EOL;
echo "\nFull response length: " . strlen($fullText) . " characters" . PHP_EOL;
{:ok, chunks} =
LiterLlm.chat_stream(
%{
model: "openai/gpt-4o",
messages: [%{role: "user", content: "Explain quantum computing briefly"}]
},
api_key: System.fetch_env!("OPENAI_API_KEY")
)
full_text =
Enum.reduce(chunks, "", fn chunk, acc ->
delta = hd(chunk["choices"])["delta"]["content"]
if delta do
IO.write(delta)
acc <> delta
else
acc
end
end)
IO.puts("")
IO.puts("\nFull response length: #{String.length(full_text)} characters")
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const chunks = await client.chatStream({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Explain quantum computing briefly" }],
});
let fullText = "";
for (const chunk of chunks) {
const delta = chunk.choices?.[0]?.delta?.content;
if (delta) {
fullText += delta;
process.stdout.write(delta);
}
}
console.log();
console.log(`\nFull response length: ${fullText.length} characters`);
Tool Calling¶
Define tools as JSON schema functions. The model can request tool calls, which you execute and return results for:
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name"},
},
"required": ["location"],
},
},
}
]
response = await client.chat(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "What is the weather in Berlin?"}],
tools=tools,
)
choice = response.choices[0]
if choice.message.tool_calls:
for call in choice.message.tool_calls:
print(f"Tool: {call.function.name}, Args: {call.function.arguments}")
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const tools = [
{
type: "function" as const,
function: {
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: "object",
properties: {
location: { type: "string", description: "City name" },
},
required: ["location"],
},
},
},
];
const response = await client.chat({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "What is the weather in Berlin?" }],
tools,
});
for (const call of response.choices[0]?.message?.toolCalls ?? []) {
console.log(`Tool: ${call.function.name}, Args: ${call.function.arguments}`);
}
use liter_llm::{
ChatCompletionRequest, ClientConfigBuilder, DefaultClient, FunctionDefinition,
LlmClient, Message, Tool, UserContent, UserMessage,
};
use serde_json::json;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;
let tools = vec![Tool {
r#type: "function".into(),
function: FunctionDefinition {
name: "get_weather".into(),
description: Some("Get the current weather for a location".into()),
parameters: Some(json!({
"type": "object",
"properties": {
"location": { "type": "string", "description": "City name" }
},
"required": ["location"]
})),
},
}];
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![Message::User(UserMessage {
content: UserContent::Text("What is the weather in Berlin?".into()),
name: None,
})],
tools: Some(tools),
..Default::default()
};
let response = client.chat(request).await?;
if let Some(tool_calls) = &response.choices[0].message.tool_calls {
for call in tool_calls {
println!("Tool: {}, Args: {}", call.function.name, call.function.arguments);
}
}
Ok(())
}
package main
import (
"context"
"encoding/json"
"fmt"
"os"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
tools := []llm.Tool{
{
Type: "function",
Function: llm.FunctionDefinition{
Name: "get_weather",
Description: "Get the current weather for a location",
Parameters: json.RawMessage(`{
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name"}
},
"required": ["location"]
}`),
},
},
}
resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []llm.Message{
llm.NewTextMessage(llm.RoleUser, "What is the weather in Berlin?"),
},
Tools: tools,
})
if err != nil {
panic(err)
}
for _, call := range resp.Choices[0].Message.ToolCalls {
fmt.Printf("Tool: %s, Args: %s\n", call.Function.Name, call.Function.Arguments)
}
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;
import java.util.Map;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.build()) {
var tools = List.of(new Tool(
"function",
new FunctionDefinition(
"get_weather",
"Get the current weather for a location",
Map.of(
"type", "object",
"properties", Map.of(
"location", Map.of("type", "string", "description", "City name")
),
"required", List.of("location")
)
)
));
var response = client.chat(new ChatCompletionRequest(
"openai/gpt-4o",
List.of(new UserMessage("What is the weather in Berlin?")),
tools
));
for (var call : response.choices().getFirst().message().toolCalls()) {
System.out.printf("Tool: %s, Args: %s%n",
call.function().name(), call.function().arguments());
}
}
}
}
using LiterLlm;
await using var client = new LlmClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);
var tools = new[]
{
new Tool(
Type: "function",
Function: new FunctionDefinition(
Name: "get_weather",
Description: "Get the current weather for a location",
Parameters: new
{
type = "object",
properties = new
{
location = new { type = "string", description = "City name" }
},
required = new[] { "location" }
}
)
)
};
var response = await client.ChatAsync(new ChatCompletionRequest(
Model: "openai/gpt-4o",
Messages: [new UserMessage("What is the weather in Berlin?")],
Tools: tools
));
foreach (var call in response.Choices[0].Message.ToolCalls ?? [])
{
Console.WriteLine($"Tool: {call.Function.Name}, Args: {call.Function.Arguments}");
}
# frozen_string_literal: true
require "liter_llm"
require "json"
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})
tools = [
{
type: "function",
function: {
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: "object",
properties: {
location: { type: "string", description: "City name" }
},
required: ["location"]
}
}
}
]
response = JSON.parse(client.chat(JSON.generate(
model: "openai/gpt-4o",
messages: [{ role: "user", content: "What is the weather in Berlin?" }],
tools: tools
)))
response.dig("choices", 0, "message", "tool_calls")&.each do |call|
puts "Tool: #{call.dig("function", "name")}, Args: #{call.dig("function", "arguments")}"
end
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');
$tools = [
[
'type' => 'function',
'function' => [
'name' => 'get_weather',
'description' => 'Get the current weather for a location',
'parameters' => [
'type' => 'object',
'properties' => [
'location' => ['type' => 'string', 'description' => 'City name'],
],
'required' => ['location'],
],
],
],
];
$response = json_decode($client->chat(json_encode([
'model' => 'openai/gpt-4o',
'messages' => [
['role' => 'user', 'content' => 'What is the weather in Berlin?'],
],
'tools' => $tools,
])), true);
foreach ($response['choices'][0]['message']['tool_calls'] ?? [] as $call) {
echo "Tool: {$call['function']['name']}, Args: {$call['function']['arguments']}" . PHP_EOL;
}
tools = [
%{
type: "function",
function: %{
name: "get_weather",
description: "Get the current weather for a location",
parameters: %{
type: "object",
properties: %{
location: %{type: "string", description: "City name"}
},
required: ["location"]
}
}
}
]
{:ok, response} =
LiterLlm.chat(
%{
model: "openai/gpt-4o",
messages: [%{role: "user", content: "What is the weather in Berlin?"}],
tools: tools
},
api_key: System.fetch_env!("OPENAI_API_KEY")
)
for call <- hd(response["choices"])["message"]["tool_calls"] || [] do
IO.puts("Tool: #{call["function"]["name"]}, Args: #{call["function"]["arguments"]}")
end
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const tools = [
{
type: "function" as const,
function: {
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: "object",
properties: {
location: { type: "string", description: "City name" },
},
required: ["location"],
},
},
},
];
const response = await client.chat({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "What is the weather in Berlin?" }],
tools,
});
for (const call of response.choices[0]?.message?.toolCalls ?? []) {
console.log(`Tool: ${call.function.name}, Args: ${call.function.arguments}`);
}
Chat Parameters¶
All chat parameters work with both chat and chat_stream:
| Parameter | Type | Description |
|---|---|---|
model |
string | Provider/model identifier (e.g. "openai/gpt-4o") |
messages |
array | Conversation messages |
temperature |
float | Sampling temperature (0.0-2.0) |
max_tokens |
int | Maximum tokens to generate |
top_p |
float | Nucleus sampling threshold |
n |
int | Number of completions to generate |
stop |
string/array | Stop sequences |
tools |
array | Tool/function definitions |
tool_choice |
string/object | Tool selection strategy |
response_format |
object | Force JSON output ({"type": "json_object"}) |
seed |
int | Deterministic sampling seed |
presence_penalty |
float | Penalize new topics (-2.0 to 2.0) |
frequency_penalty |
float | Penalize repetition (-2.0 to 2.0) |
reasoning_effort |
string | Reasoning budget for o-series and extended-thinking models. |
extra_body |
object | Provider-specific fields passed through verbatim. |
Reasoning Effort¶
OpenAI o-series models and Anthropic extended-thinking models accept a reasoning_effort parameter that controls how much compute the model spends on internal reasoning before producing the final response.
Accepted values for OpenAI o-series: "low", "medium", "high". Anthropic extended thinking uses a budget_tokens integer instead, which maps to reasoning_effort when the binding converts the field.
Structured Outputs (JSON Schema)¶
Pass a JSON Schema to response_format to constrain the model output to a specific structure. Use "type": "json_schema" instead of "type": "json_object" for schema-validated output.
schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
"required": ["name", "age"],
"additionalProperties": False,
}
response = client.chat({
"model": "openai/gpt-4o",
"messages": [{"role": "user", "content": "Extract: Alice is 30 years old."}],
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "person",
"strict": True,
"schema": schema,
},
},
})
const response = await client.chat({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Extract: Alice is 30 years old." }],
responseFormat: {
type: "json_schema",
jsonSchema: {
name: "person",
strict: true,
schema: {
type: "object",
properties: {
name: { type: "string" },
age: { type: "integer" },
},
required: ["name", "age"],
additionalProperties: false,
},
},
},
});
use serde_json::json;
let req = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![/* ... */],
response_format: Some(json!({
"type": "json_schema",
"json_schema": {
"name": "person",
"strict": true,
"schema": {
"type": "object",
"properties": {
"name": { "type": "string" },
"age": { "type": "integer" }
},
"required": ["name", "age"],
"additionalProperties": false
}
}
})),
..Default::default()
};
Structured output availability depends on provider support. OpenAI gpt-4o and later support json_schema. Providers that do not support it fall back to json_object or return EndpointNotSupported.
extra_body¶
Pass provider-specific parameters that liter-llm does not model natively via extra_body. Fields in extra_body are merged into the top-level request JSON before it is sent to the provider.
extra_body fields take lower precedence than named fields. If a named field and an extra_body key conflict, the named field wins.
Audio Content Parts¶
Send audio inline in a user message using the input_audio content part type. The audio must be base64-encoded.
import base64
with open("audio.wav", "rb") as f:
audio_b64 = base64.b64encode(f.read()).decode()
response = client.chat({
"model": "openai/gpt-4o-audio-preview",
"messages": [{
"role": "user",
"content": [
{
"type": "input_audio",
"input_audio": {
"data": audio_b64,
"format": "wav",
},
},
{"type": "text", "text": "Transcribe and summarize this audio."},
],
}],
})
import { readFileSync } from "fs";
const audioB64 = readFileSync("audio.wav").toString("base64");
const response = await client.chat({
model: "openai/gpt-4o-audio-preview",
messages: [{
role: "user",
content: [
{
type: "input_audio",
inputAudio: { data: audioB64, format: "wav" },
},
{ type: "text", text: "Transcribe and summarize this audio." },
],
}],
});
use base64::{Engine, engine::general_purpose::STANDARD};
use liter_llm::types::{ContentPart, InputAudio};
let audio_bytes = std::fs::read("audio.wav")?;
let audio_b64 = STANDARD.encode(&audio_bytes);
let content = vec![
ContentPart::InputAudio {
input_audio: InputAudio {
data: audio_b64,
format: "wav".into(),
},
},
ContentPart::Text { text: "Transcribe and summarize this audio.".into() },
];
Supported formats depend on the provider. OpenAI gpt-4o-audio-preview accepts wav, mp3, ogg, flac, m4a.
AWS EventStream Streaming¶
When routing to Bedrock providers, responses arrive in AWS EventStream framing rather than SSE. liter-llm handles the framing transparently. chat_stream works the same way regardless of provider.
// EventStream framing is transparent to the caller.
let stream = client.chat_stream(ChatCompletionRequest {
model: "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0".into(),
messages: vec![/* ... */],
..Default::default()
}).await?;
// Consume exactly like any other stream.
pin_mut!(stream);
while let Some(chunk) = stream.next().await {
let chunk = chunk?;
if let Some(content) = chunk.choices[0].delta.content.as_deref() {
print!("{content}");
}
}
Tower streaming buffer
When Bedrock streaming is routed through the Tower middleware stack (LlmService), the entire stream is buffered in memory before chunks are yielded. This is a Tower Service trait constraint. For unbuffered Bedrock streaming, call LlmClient::chat_stream() directly, bypassing the Tower stack. See Architecture for details.