Chat & Streaming¶
Basic Chat¶
Send a message and get a response:
import asyncio
import os
from liter_llm import create_client
from liter_llm._internal_bindings import ChatCompletionRequest
async def main() -> None:
client = create_client(api_key=os.environ["OPENAI_API_KEY"])
request = ChatCompletionRequest.from_json(
'{"model":"openai/gpt-4o","messages":[{"role":"user","content":"Hello!"}]}'
)
response = await client.chat(request)
print(response.choices[0].message.content)
asyncio.run(main())
use liter_llm::{
ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
Message, UserContent, UserMessage,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![Message::User(UserMessage {
content: UserContent::Text("Hello!".into()),
name: None,
})],
..Default::default()
};
let response = client.chat(request).await?;
if let Some(choice) = response.choices.first() {
println!("{}", choice.message.content.as_deref().unwrap_or(""));
}
Ok(())
}
package main
import (
"encoding/json"
"fmt"
"os"
llm "github.com/xberg-io/liter-llm/packages/go"
)
func main() {
client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
if err != nil {
panic(err)
}
var req llm.ChatCompletionRequest
if err := json.Unmarshal([]byte(`{
"model": "openai/gpt-4o-mini",
"messages": [{"role": "user", "content": "Hello!"}]
}`), &req); err != nil {
panic(err)
}
resp, err := client.Chat(req)
if err != nil {
panic(err)
}
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
fmt.Println(*resp.Choices[0].Message.Content)
}
}
import io.xberg.literllm.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
var request = ChatCompletionRequest.builder()
.withModel("openai/gpt-4o")
.withMessages(List.of(
new Message.User(new UserMessage(UserContent.of("Hello!"), null))
))
.build();
var response = client.chat(request);
System.out.println(response.choices().getFirst().message().content());
}
}
}
using LiterLlm;
using var client = LiterLlmLib.CreateClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);
var response = await client.ChatAsync(new ChatCompletionRequest
{
Model = "openai/gpt-4o",
Messages = [new Message.User(new UserMessage { Content = UserContent.Of("Hello!") })]
});
Console.WriteLine(response.Choices[0].Message.Content);
# frozen_string_literal: true
require 'liter_llm'
client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))
result = client.chat_async(
LiterLlm::ChatCompletionRequest.new(
model: 'openai/gpt-4o-mini',
messages: [{ 'role' => 'user', 'content' => 'Hello!' }]
)
)
puts result.choices[0].message.content
<?php
declare(strict_types=1);
use Liter\Llm\LiterLlm;
use Liter\Llm\ChatCompletionRequest;
$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');
$request = ChatCompletionRequest::from_json(json_encode([
'model' => 'openai/gpt-4o-mini',
'messages' => [['role' => 'user', 'content' => 'Hello!']],
]));
$result = $client->chat($request);
echo $result->choices[0]->message->content . PHP_EOL;
import init, { createClient, WasmChatCompletionRequest } from "@xberg-io/liter-llm-wasm";
await init();
const client = createClient(process.env.OPENAI_API_KEY!);
const request = WasmChatCompletionRequest.default();
request.model = "openai/gpt-4o";
request.messages = [{ role: "user", content: "Hello!" }];
const response = await client.chat(request);
console.log(response.choices[0].message.content);
Provider Routing¶
Liter-llm uses a provider/model prefix convention. The prefix determines which API endpoint, auth header, and parameter mappings to use:
openai/gpt-4o -> OpenAI
anthropic/claude-sonnet-4-20250514 -> Anthropic
groq/llama3-70b -> Groq
google/gemini-2.0-flash -> Google AI
mistral/mistral-large -> Mistral
bedrock/anthropic.claude-v2 -> AWS Bedrock
Switch providers by changing the model string -- no other code changes needed.
Message Roles¶
| Role | Purpose |
|---|---|
system |
Sets the assistant's behavior. Sent once at the start. |
user |
User input -- questions, instructions, data. |
assistant |
Previous assistant responses for multi-turn context. |
tool |
Results from tool calls. |
developer |
Developer-level instructions (some providers). |
Multi-Turn Conversations¶
Append the assistant's response and the next user message, then call chat again:
import asyncio
import json
import os
from liter_llm import create_client
from liter_llm._internal_bindings import ChatCompletionRequest
async def main() -> None:
client = create_client(api_key=os.environ["OPENAI_API_KEY"])
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the capital of France?"},
]
first = await client.chat(
ChatCompletionRequest.from_json(json.dumps({"model": "openai/gpt-4o", "messages": messages}))
)
reply = first.choices[0].message.content
print(f"Assistant: {reply}")
messages.append({"role": "assistant", "content": reply})
messages.append({"role": "user", "content": "What about Germany?"})
second = await client.chat(
ChatCompletionRequest.from_json(json.dumps({"model": "openai/gpt-4o", "messages": messages}))
)
print(f"Assistant: {second.choices[0].message.content}")
if second.usage:
print(f"Tokens: {second.usage.prompt_tokens} in, {second.usage.completion_tokens} out")
asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";
const client = createClient(process.env.OPENAI_API_KEY!);
const messages: Array<{ role: string; content: string }> = [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: "What is the capital of France?" },
];
let response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);
messages.push({ role: "assistant", content: response.choices[0].message.content! });
messages.push({ role: "user", content: "What about Germany?" });
response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);
console.log(`Tokens: ${response.usage?.promptTokens} in, ${response.usage?.completionTokens} out`);
use liter_llm::{
ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
Message, UserContent, UserMessage, AssistantMessage, SystemMessage,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;
let mut messages = vec![
Message::System(SystemMessage {
content: "You are a helpful assistant.".into(),
name: None,
}),
Message::User(UserMessage {
content: UserContent::Text("What is the capital of France?".into()),
name: None,
}),
];
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: messages.clone(),
..Default::default()
};
let response = client.chat(request).await?;
let content = response.choices[0]
.message
.content
.clone()
.unwrap_or_default();
println!("Assistant: {content}");
// Continue the conversation
messages.push(Message::Assistant(AssistantMessage {
content: Some(content),
..Default::default()
}));
messages.push(Message::User(UserMessage {
content: UserContent::Text("What about Germany?".into()),
name: None,
}));
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages,
..Default::default()
};
let response = client.chat(request).await?;
if let Some(choice) = response.choices.first() {
println!("Assistant: {}", choice.message.content.as_deref().unwrap_or(""));
}
// Token usage
if let Some(usage) = &response.usage {
println!("Tokens: {} in, {} out", usage.prompt_tokens, usage.completion_tokens);
}
Ok(())
}
package main
import (
"encoding/json"
"fmt"
"os"
llm "github.com/xberg-io/liter-llm/packages/go"
)
func main() {
client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
if err != nil {
panic(err)
}
body := map[string]any{
"model": "openai/gpt-4o-mini",
"messages": []map[string]string{
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the capital of France?"},
},
}
var req llm.ChatCompletionRequest
raw, _ := json.Marshal(body)
_ = json.Unmarshal(raw, &req)
resp, err := client.Chat(req)
if err != nil {
panic(err)
}
answer := ""
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
answer = *resp.Choices[0].Message.Content
}
fmt.Printf("Assistant: %s\n", answer)
body["messages"] = append(body["messages"].([]map[string]string),
map[string]string{"role": "assistant", "content": answer},
map[string]string{"role": "user", "content": "What about Germany?"},
)
raw, _ = json.Marshal(body)
_ = json.Unmarshal(raw, &req)
resp, err = client.Chat(req)
if err != nil {
panic(err)
}
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
fmt.Printf("Assistant: %s\n", *resp.Choices[0].Message.Content)
}
if resp.Usage != nil {
fmt.Printf("Tokens: %d in, %d out\n", resp.Usage.PromptTokens, resp.Usage.CompletionTokens)
}
}
import io.xberg.literllm.*;
import java.util.ArrayList;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
var messages = new ArrayList<Message>(List.of(
new Message.System(new SystemMessage("You are a helpful assistant.", null)),
new Message.User(new UserMessage(UserContent.of("What is the capital of France?"), null))
));
var response = client.chat(ChatCompletionRequest.builder()
.withModel("openai/gpt-4o").withMessages(messages).build());
var content = response.choices().getFirst().message().content();
System.out.println("Assistant: " + content);
messages.add(new Message.Assistant(new AssistantMessage(content, null, null, null, null)));
messages.add(new Message.User(new UserMessage(UserContent.of("What about Germany?"), null)));
response = client.chat(ChatCompletionRequest.builder()
.withModel("openai/gpt-4o").withMessages(messages).build());
System.out.println("Assistant: " + response.choices().getFirst().message().content());
var usage = response.usage();
if (usage != null) {
System.out.printf("Tokens: %d in, %d out%n",
usage.promptTokens(), usage.completionTokens());
}
}
}
}
using LiterLlm;
using var client = LiterLlmLib.CreateClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);
var messages = new List<Message>
{
new Message.System(new SystemMessage { Content = "You are a helpful assistant." }),
new Message.User(new UserMessage { Content = UserContent.Of("What is the capital of France?") }),
};
var response = await client.ChatAsync(new ChatCompletionRequest { Model = "openai/gpt-4o", Messages = messages });
var content = response.Choices[0].Message.Content;
Console.WriteLine($"Assistant: {content}");
messages.Add(new Message.Assistant(new AssistantMessage { Content = content }));
messages.Add(new Message.User(new UserMessage { Content = UserContent.Of("What about Germany?") }));
response = await client.ChatAsync(new ChatCompletionRequest { Model = "openai/gpt-4o", Messages = messages });
Console.WriteLine($"Assistant: {response.Choices[0].Message.Content}");
if (response.Usage is not null)
{
Console.WriteLine($"Tokens: {response.Usage.PromptTokens} in, {response.Usage.CompletionTokens} out");
}
# frozen_string_literal: true
require 'liter_llm'
client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))
messages = [
{ 'role' => 'system', 'content' => 'You are a helpful assistant.' },
{ 'role' => 'user', 'content' => 'What is the capital of France?' }
]
result = client.chat_async(
LiterLlm::ChatCompletionRequest.new(model: 'openai/gpt-4o-mini', messages: messages)
)
answer = result.choices[0].message.content
puts "Assistant: #{answer}"
messages << { 'role' => 'assistant', 'content' => answer }
messages << { 'role' => 'user', 'content' => 'What about Germany?' }
result = client.chat_async(
LiterLlm::ChatCompletionRequest.new(model: 'openai/gpt-4o-mini', messages: messages)
)
puts "Assistant: #{result.choices[0].message.content}"
usage = result.usage
puts "Tokens: #{usage.prompt_tokens} in, #{usage.completion_tokens} out" if usage
<?php
declare(strict_types=1);
use Liter\Llm\LiterLlm;
use Liter\Llm\ChatCompletionRequest;
$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');
$messages = [
['role' => 'system', 'content' => 'You are a helpful assistant.'],
['role' => 'user', 'content' => 'What is the capital of France?'],
];
$result = $client->chat(ChatCompletionRequest::from_json(json_encode([
'model' => 'openai/gpt-4o-mini',
'messages' => $messages,
])));
$answer = $result->choices[0]->message->content;
echo "Assistant: {$answer}" . PHP_EOL;
$messages[] = ['role' => 'assistant', 'content' => $answer];
$messages[] = ['role' => 'user', 'content' => 'What about Germany?'];
$result = $client->chat(ChatCompletionRequest::from_json(json_encode([
'model' => 'openai/gpt-4o-mini',
'messages' => $messages,
])));
echo "Assistant: {$result->choices[0]->message->content}" . PHP_EOL;
if ($result->usage !== null) {
echo "Tokens: {$result->usage->promptTokens} in, {$result->usage->completionTokens} out" . PHP_EOL;
}
{:ok, client} = LiterLlm.create_client(System.get_env("OPENAI_API_KEY"))
messages = [
%{role: "system", content: "You are a helpful assistant."},
%{role: "user", content: "What is the capital of France?"}
]
{:ok, result} =
LiterLlm.defaultclient_chat_async(
client,
Jason.encode!(%{model: "openai/gpt-4o-mini", messages: messages})
)
answer = Enum.at(result.choices, 0).message.content
IO.puts("Assistant: #{answer}")
messages =
messages ++
[
%{role: "assistant", content: answer},
%{role: "user", content: "What about Germany?"}
]
{:ok, result} =
LiterLlm.defaultclient_chat_async(
client,
Jason.encode!(%{model: "openai/gpt-4o-mini", messages: messages})
)
IO.puts("Assistant: #{Enum.at(result.choices, 0).message.content}")
if result.usage do
IO.puts("Tokens: #{result.usage.prompt_tokens} in, #{result.usage.completion_tokens} out")
end
import init, { createClient, WasmChatCompletionRequest } from "@xberg-io/liter-llm-wasm";
await init();
const client = createClient(process.env.OPENAI_API_KEY!);
const messages: Array<{ role: string; content: string }> = [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: "What is the capital of France?" },
];
const first = WasmChatCompletionRequest.default();
first.model = "openai/gpt-4o";
first.messages = messages;
let response = await client.chat(first);
console.log(`Assistant: ${response.choices[0].message.content}`);
messages.push({ role: "assistant", content: response.choices[0].message.content! });
messages.push({ role: "user", content: "What about Germany?" });
const second = WasmChatCompletionRequest.default();
second.model = "openai/gpt-4o";
second.messages = messages;
response = await client.chat(second);
console.log(`Assistant: ${response.choices[0].message.content}`);
console.log(`Tokens: ${response.usage?.promptTokens} in, ${response.usage?.completionTokens} out`);
Streaming¶
Stream tokens as they arrive instead of waiting for the full response:
import asyncio
import os
from liter_llm import create_client
from liter_llm._internal_bindings import ChatCompletionRequest
async def main() -> None:
client = create_client(api_key=os.environ["OPENAI_API_KEY"])
request = ChatCompletionRequest.from_json(
'{"model":"openai/gpt-4o","messages":[{"role":"user","content":"Tell me a story"}],"stream":true}'
)
async for chunk in client.chat_stream(request):
if chunk.choices and chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
print()
asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";
const client = createClient(process.env.OPENAI_API_KEY!);
const chunks = await client.chatStream({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Tell me a story" }],
});
for await (const chunk of chunks) {
process.stdout.write(chunk.choices?.[0]?.delta?.content ?? "");
}
console.log();
use futures::StreamExt;
use liter_llm::{
ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
Message, UserContent, UserMessage,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![Message::User(UserMessage {
content: UserContent::Text("Tell me a story".into()),
name: None,
})],
..Default::default()
};
let mut stream = client.chat_stream(request).await?;
while let Some(chunk) = stream.next().await {
let chunk = chunk?;
if let Some(choice) = chunk.choices.first() {
if let Some(content) = &choice.delta.content {
print!("{content}");
}
}
}
println!();
Ok(())
}
package main
import (
"encoding/json"
"fmt"
"os"
llm "github.com/xberg-io/liter-llm/packages/go"
)
func main() {
client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
if err != nil {
panic(err)
}
var req llm.ChatCompletionRequest
if err := json.Unmarshal([]byte(`{
"model": "openai/gpt-4o-mini",
"messages": [{"role": "user", "content": "Count from 1 to 5."}],
"stream": true
}`), &req); err != nil {
panic(err)
}
stream, err := client.ChatStream(req)
if err != nil {
panic(err)
}
for chunk := range stream {
if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != nil {
fmt.Print(*chunk.Choices[0].Delta.Content)
}
}
fmt.Println()
}
import io.xberg.literllm.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
var request = ChatCompletionRequest.builder()
.withModel("openai/gpt-4o-mini")
.withMessages(List.of(
new Message.User(new UserMessage(UserContent.of("Hello"), null))
))
.build();
var stream = client.chatStream(request);
var iterator = stream.iterator();
while (iterator.hasNext()) {
var chunk = iterator.next();
var delta = chunk.choices().getFirst().delta().content();
if (delta != null) System.out.print(delta);
}
System.out.println();
}
}
}
using LiterLlm;
using var client = LiterLlmLib.CreateClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);
var request = new ChatCompletionRequest
{
Model = "openai/gpt-4o-mini",
Messages = [new Message.User(new UserMessage { Content = UserContent.Of("Hello") })]
};
await foreach (var chunk in client.ChatStreamAsync(request))
{
var delta = chunk.Choices.Count > 0 ? chunk.Choices[0].Delta.Content : null;
if (delta is not null) Console.Write(delta);
}
Console.WriteLine();
# frozen_string_literal: true
require 'liter_llm'
client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))
client.chat_stream(
LiterLlm::ChatCompletionRequest.new(
model: 'openai/gpt-4o-mini',
messages: [{ 'role' => 'user', 'content' => 'Count from 1 to 5.' }],
stream: true
)
) do |chunk|
delta = chunk.choices && chunk.choices[0] && chunk.choices[0].delta
print delta.content if delta && delta.content
end
puts
<?php
declare(strict_types=1);
use Liter\Llm\LiterLlm;
use Liter\Llm\ChatCompletionRequest;
$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');
$request = ChatCompletionRequest::from_json(json_encode([
'model' => 'openai/gpt-4o-mini',
'messages' => [['role' => 'user', 'content' => 'Count from 1 to 5.']],
]));
foreach ($client->chatStream($request) as $chunkJson) {
$chunk = json_decode($chunkJson, false, flags: JSON_THROW_ON_ERROR);
echo $chunk->choices[0]->delta->content ?? '';
}
echo PHP_EOL;
{:ok, client} = LiterLlm.create_client(System.get_env("OPENAI_API_KEY"))
request =
Jason.encode!(%{
model: "openai/gpt-4o-mini",
messages: [%{role: "user", content: "Count from 1 to 5."}],
stream: true
})
{:ok, stream} = LiterLlm.defaultclient_chat_stream(client, request)
Enum.each(stream, fn chunk ->
content = get_in(chunk, [:choices, Access.at(0), :delta, :content])
if content, do: IO.write(content)
end)
IO.puts("")
import init, { createClient, WasmChatCompletionRequest } from "@xberg-io/liter-llm-wasm";
await init();
const client = createClient(process.env.OPENAI_API_KEY!);
const request = WasmChatCompletionRequest.default();
request.model = "openai/gpt-4o";
request.messages = [{ role: "user", content: "Tell me a story" }];
request.stream = true;
const stream = await client.chatStream(request);
while (true) {
const chunk = await stream.next();
if (chunk === null) {
break;
}
process.stdout.write(chunk.choices?.[0]?.delta?.content ?? "");
}
console.log();
Each chunk contains choices[].delta.content with incremental text. The final chunk includes finish_reason: "stop".
Collecting the Full Response¶
Accumulate deltas to get both real-time output and the complete text:
import asyncio
import os
from liter_llm import create_client
from liter_llm._internal_bindings import ChatCompletionRequest
async def main() -> None:
client = create_client(api_key=os.environ["OPENAI_API_KEY"])
request = ChatCompletionRequest.from_json(
'{"model":"openai/gpt-4o","messages":[{"role":"user","content":"Explain quantum computing briefly"}],"stream":true}'
)
full_text = ""
async for chunk in client.chat_stream(request):
delta = chunk.choices[0].delta.content if chunk.choices else None
if delta:
full_text += delta
print(delta, end="", flush=True)
print()
print(f"Full response length: {len(full_text)} characters")
asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";
const client = createClient(process.env.OPENAI_API_KEY!);
const chunks = await client.chatStream({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Explain quantum computing briefly" }],
});
let fullText = "";
for await (const chunk of chunks) {
const delta = chunk.choices?.[0]?.delta?.content;
if (delta) {
fullText += delta;
process.stdout.write(delta);
}
}
console.log();
console.log(`Full response length: ${fullText.length} characters`);
use futures::StreamExt;
use liter_llm::{
ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
Message, UserContent, UserMessage,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![Message::User(UserMessage {
content: UserContent::Text("Explain quantum computing briefly".into()),
name: None,
})],
..Default::default()
};
let mut stream = client.chat_stream(request).await?;
let mut full_text = String::new();
while let Some(chunk) = stream.next().await {
let chunk = chunk?;
if let Some(choice) = chunk.choices.first() {
if let Some(content) = &choice.delta.content {
full_text.push_str(content);
print!("{content}");
}
}
}
println!();
println!("\nFull response length: {} characters", full_text.len());
Ok(())
}
package main
import (
"encoding/json"
"fmt"
"os"
"strings"
llm "github.com/xberg-io/liter-llm/packages/go"
)
func main() {
client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
if err != nil {
panic(err)
}
var req llm.ChatCompletionRequest
if err := json.Unmarshal([]byte(`{
"model": "openai/gpt-4o-mini",
"messages": [{"role": "user", "content": "Explain quantum computing briefly"}],
"stream": true
}`), &req); err != nil {
panic(err)
}
stream, err := client.ChatStream(req)
if err != nil {
panic(err)
}
var sb strings.Builder
for chunk := range stream {
if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != nil {
delta := *chunk.Choices[0].Delta.Content
sb.WriteString(delta)
fmt.Print(delta)
}
}
fmt.Println()
fmt.Printf("Full response length: %d characters\n", sb.Len())
}
import io.xberg.literllm.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
var request = ChatCompletionRequest.builder()
.withModel("openai/gpt-4o")
.withMessages(List.of(
new Message.User(new UserMessage(UserContent.of("Explain quantum computing briefly"), null))
))
.build();
var sb = new StringBuilder();
var stream = client.chatStream(request);
var iterator = stream.iterator();
while (iterator.hasNext()) {
var chunk = iterator.next();
var delta = chunk.choices().getFirst().delta().content();
if (delta != null) {
sb.append(delta);
System.out.print(delta);
}
}
System.out.println();
System.out.printf("%nFull response length: %d characters%n", sb.length());
}
}
}
using System.Text;
using LiterLlm;
using var client = LiterLlmLib.CreateClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);
var request = new ChatCompletionRequest
{
Model = "openai/gpt-4o",
Messages = [new Message.User(new UserMessage { Content = UserContent.Of("Explain quantum computing briefly") })]
};
var sb = new StringBuilder();
await foreach (var chunk in client.ChatStreamAsync(request))
{
var delta = chunk.Choices.Count > 0 ? chunk.Choices[0].Delta.Content : null;
if (delta is not null)
{
sb.Append(delta);
Console.Write(delta);
}
}
Console.WriteLine();
Console.WriteLine($"\nFull response length: {sb.Length} characters");
# frozen_string_literal: true
require 'liter_llm'
client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))
full_text = +''
client.chat_stream(
LiterLlm::ChatCompletionRequest.new(
model: 'openai/gpt-4o-mini',
messages: [{ 'role' => 'user', 'content' => 'Explain quantum computing briefly' }],
stream: true
)
) do |chunk|
delta = chunk.choices && chunk.choices[0] && chunk.choices[0].delta
if delta && delta.content
full_text << delta.content
print delta.content
end
end
puts
puts "Full response length: #{full_text.length} characters"
<?php
declare(strict_types=1);
use Liter\Llm\LiterLlm;
use Liter\Llm\ChatCompletionRequest;
$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');
$request = ChatCompletionRequest::from_json(json_encode([
'model' => 'openai/gpt-4o-mini',
'messages' => [['role' => 'user', 'content' => 'Explain quantum computing briefly']],
]));
$fullText = '';
foreach ($client->chatStream($request) as $chunkJson) {
$chunk = json_decode($chunkJson, false, flags: JSON_THROW_ON_ERROR);
$delta = $chunk->choices[0]->delta->content ?? null;
if ($delta !== null) {
$fullText .= $delta;
echo $delta;
}
}
echo PHP_EOL;
echo 'Full response length: ' . strlen($fullText) . ' characters' . PHP_EOL;
{:ok, client} = LiterLlm.create_client(System.get_env("OPENAI_API_KEY"))
request =
Jason.encode!(%{
model: "openai/gpt-4o-mini",
messages: [%{role: "user", content: "Explain quantum computing briefly"}],
stream: true
})
{:ok, stream} = LiterLlm.defaultclient_chat_stream(client, request)
full_text =
Enum.reduce(stream, "", fn chunk, acc ->
delta = get_in(chunk, [:choices, Access.at(0), :delta, :content])
if delta, do: (IO.write(delta); acc <> delta), else: acc
end)
IO.puts("")
IO.puts("Full response length: #{String.length(full_text)} characters")
import init, { createClient, WasmChatCompletionRequest } from "@xberg-io/liter-llm-wasm";
await init();
const client = createClient(process.env.OPENAI_API_KEY!);
const request = WasmChatCompletionRequest.default();
request.model = "openai/gpt-4o";
request.messages = [{ role: "user", content: "Explain quantum computing briefly" }];
request.stream = true;
const stream = await client.chatStream(request);
let fullText = "";
while (true) {
const chunk = await stream.next();
if (chunk === null) {
break;
}
const delta = chunk.choices?.[0]?.delta?.content;
if (delta) {
fullText += delta;
process.stdout.write(delta);
}
}
console.log();
console.log(`Full response length: ${fullText.length} characters`);
Tool Calling¶
Define tools as JSON schema functions. The model can request tool calls, which you execute and return results for:
import asyncio
import json
import os
from liter_llm import create_client
from liter_llm._internal_bindings import ChatCompletionRequest
REQUEST = {
"model": "openai/gpt-4o",
"messages": [{"role": "user", "content": "What is the weather in Berlin?"}],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string"}},
"required": ["location"],
},
},
}
],
"tool_choice": "auto",
}
async def main() -> None:
client = create_client(api_key=os.environ["OPENAI_API_KEY"])
request = ChatCompletionRequest.from_json(json.dumps(REQUEST))
response = await client.chat(request)
for call in response.choices[0].message.tool_calls or []:
print(f"Tool: {call.function.name}, Args: {call.function.arguments}")
asyncio.run(main())
import { createClient, ToolType } from "@xberg-io/liter-llm";
const client = createClient(process.env.OPENAI_API_KEY!);
const response = await client.chat({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "What is the weather in Berlin?" }],
tools: [
{
toolType: ToolType.Function,
function: {
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: "object",
properties: { location: { type: "string" } },
required: ["location"],
},
},
},
],
});
for (const call of response.choices[0]?.message?.toolCalls ?? []) {
console.log(`Tool: ${call.function.name}, Args: ${call.function.arguments}`);
}
use liter_llm::{
ChatCompletionRequest, ChatCompletionTool, ClientConfigBuilder, DefaultClient,
FunctionDefinition, LlmClient, Message, ToolType, UserContent, UserMessage,
};
use serde_json::json;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;
let tools = vec![ChatCompletionTool {
tool_type: ToolType::Function,
function: FunctionDefinition {
name: "get_weather".into(),
description: Some("Get the current weather for a location".into()),
parameters: Some(json!({
"type": "object",
"properties": {
"location": { "type": "string", "description": "City name" }
},
"required": ["location"]
})),
strict: None,
},
}];
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![Message::User(UserMessage {
content: UserContent::Text("What is the weather in Berlin?".into()),
name: None,
})],
tools: Some(tools),
..Default::default()
};
let response = client.chat(request).await?;
if let Some(tool_calls) = &response.choices[0].message.tool_calls {
for call in tool_calls {
println!("Tool: {}, Args: {}", call.function.name, call.function.arguments);
}
}
Ok(())
}
package main
import (
"encoding/json"
"fmt"
"os"
llm "github.com/xberg-io/liter-llm/packages/go"
)
func main() {
client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
if err != nil {
panic(err)
}
body := `{
"model": "openai/gpt-4o-mini",
"messages": [{"role": "user", "content": "What is the weather in Berlin?"}],
"tool_choice": "auto",
"tools": [{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string", "description": "City name"}},
"required": ["location"]
}
}
}]
}`
var req llm.ChatCompletionRequest
if err := json.Unmarshal([]byte(body), &req); err != nil {
panic(err)
}
resp, err := client.Chat(req)
if err != nil {
panic(err)
}
for _, call := range resp.Choices[0].Message.ToolCalls {
fmt.Printf("Tool: %s, Args: %s\n", call.Function.Name, call.Function.Arguments)
}
}
import io.xberg.literllm.*;
import java.util.List;
import java.util.Map;
import java.util.Optional;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
var tools = List.of(new ChatCompletionTool(
ToolType.Function,
new FunctionDefinition(
"get_weather",
"Get the current weather for a location",
Map.of(
"type", "object",
"properties", Map.of(
"location", Map.of("type", "string", "description", "City name")
),
"required", List.of("location")
),
null
)
));
var request = ChatCompletionRequest.builder()
.withModel("openai/gpt-4o")
.withMessages(List.of(
new Message.User(new UserMessage(UserContent.of("What is the weather in Berlin?"), null))
))
.withTools(Optional.of(tools))
.build();
var response = client.chat(request);
var toolCalls = response.choices().getFirst().message().toolCalls();
if (toolCalls != null) {
for (var call : toolCalls) {
System.out.printf("Tool: %s, Args: %s%n",
call.function().name(), call.function().arguments());
}
}
}
}
}
using LiterLlm;
using var client = LiterLlmLib.CreateClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);
var tools = new List<ChatCompletionTool>
{
new ChatCompletionTool
{
ToolType = ToolType.Function,
Function = new FunctionDefinition
{
Name = "get_weather",
Description = "Get the current weather for a location",
Parameters = new
{
type = "object",
properties = new
{
location = new { type = "string", description = "City name" }
},
required = new[] { "location" }
}
}
}
};
var response = await client.ChatAsync(new ChatCompletionRequest
{
Model = "openai/gpt-4o",
Messages = [new Message.User(new UserMessage { Content = UserContent.Of("What is the weather in Berlin?") })],
Tools = tools
});
foreach (var call in response.Choices[0].Message.ToolCalls ?? [])
{
Console.WriteLine($"Tool: {call.Function.Name}, Args: {call.Function.Arguments}");
}
# frozen_string_literal: true
require 'liter_llm'
client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))
tools = [
{
'type' => 'function',
'function' => {
'name' => 'get_weather',
'description' => 'Get the current weather for a location',
'parameters' => {
'type' => 'object',
'properties' => { 'location' => { 'type' => 'string', 'description' => 'City name' } },
'required' => ['location']
}
}
}
]
result = client.chat_async(
LiterLlm::ChatCompletionRequest.new(
model: 'openai/gpt-4o-mini',
messages: [{ 'role' => 'user', 'content' => 'What is the weather in Berlin?' }],
tools: tools,
tool_choice: 'auto'
)
)
(result.choices[0].message.tool_calls || []).each do |call|
puts "Tool: #{call.function.name}, Args: #{call.function.arguments}"
end
<?php
declare(strict_types=1);
use Liter\Llm\LiterLlm;
use Liter\Llm\ChatCompletionRequest;
$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');
$tools = [[
'type' => 'function',
'function' => [
'name' => 'get_weather',
'description' => 'Get the current weather for a location',
'parameters' => [
'type' => 'object',
'properties' => ['location' => ['type' => 'string', 'description' => 'City name']],
'required' => ['location'],
],
],
]];
$request = ChatCompletionRequest::from_json(json_encode([
'model' => 'openai/gpt-4o-mini',
'messages' => [['role' => 'user', 'content' => 'What is the weather in Berlin?']],
'tools' => $tools,
'tool_choice' => 'auto',
]));
$result = $client->chat($request);
foreach ($result->choices[0]->message->toolCalls ?? [] as $call) {
echo "Tool: {$call->function->name}, Args: {$call->function->arguments}" . PHP_EOL;
}
{:ok, client} = LiterLlm.create_client(System.get_env("OPENAI_API_KEY"))
tools = [
%{
type: "function",
function: %{
name: "get_weather",
description: "Get the current weather for a location",
parameters: %{
type: "object",
properties: %{location: %{type: "string", description: "City name"}},
required: ["location"]
}
}
}
]
request =
Jason.encode!(%{
model: "openai/gpt-4o-mini",
messages: [%{role: "user", content: "What is the weather in Berlin?"}],
tools: tools,
tool_choice: "auto"
})
{:ok, result} = LiterLlm.defaultclient_chat_async(client, request)
for call <- Enum.at(result.choices, 0).message.tool_calls || [] do
IO.puts("Tool: #{call.function.name}, Args: #{call.function.arguments}")
end
import init, {
createClient,
WasmChatCompletionRequest,
WasmChatCompletionTool,
WasmFunctionDefinition,
} from "@xberg-io/liter-llm-wasm";
await init();
const client = createClient(process.env.OPENAI_API_KEY!);
const tool = WasmChatCompletionTool.default();
tool.toolType = "function";
const fn = WasmFunctionDefinition.default();
fn.name = "get_weather";
fn.description = "Get the current weather for a location";
fn.parameters = {
type: "object",
properties: { location: { type: "string" } },
required: ["location"],
};
tool.function = fn;
const request = WasmChatCompletionRequest.default();
request.model = "openai/gpt-4o";
request.messages = [{ role: "user", content: "What is the weather in Berlin?" }];
request.toolChoice = "auto";
request.tools = [tool];
const response = await client.chat(request);
for (const call of response.choices[0]?.message?.toolCalls ?? []) {
console.log(`Tool: ${call.function.name}, Args: ${call.function.arguments}`);
}
Chat Parameters¶
All chat parameters work with both chat and chat_stream:
| Parameter | Type | Description |
|---|---|---|
model |
string | Provider/model identifier (e.g. "openai/gpt-4o") |
messages |
array | Conversation messages |
temperature |
float | Sampling temperature (0.0-2.0) |
max_tokens |
int | Maximum tokens to generate |
top_p |
float | Nucleus sampling threshold |
n |
int | Number of completions to generate |
stop |
string/array | Stop sequences |
tools |
array | Tool/function definitions |
tool_choice |
string/object | Tool selection strategy |
response_format |
object | Force JSON output ({"type": "json_object"}) |
seed |
int | Deterministic sampling seed |
presence_penalty |
float | Penalize new topics (-2.0 to 2.0) |
frequency_penalty |
float | Penalize repetition (-2.0 to 2.0) |
reasoning_effort |
string | Reasoning budget for o-series and extended-thinking models. |
extra_body |
object | Provider-specific fields passed through verbatim. |
Reasoning Effort¶
OpenAI o-series models and Anthropic extended-thinking models accept a reasoning_effort parameter that controls how much compute the model spends on internal reasoning before producing the final response.
Accepted values for OpenAI o-series: "low", "medium", "high". Anthropic extended thinking uses a budget_tokens integer instead, which maps to reasoning_effort when the binding converts the field.
Structured Outputs (JSON Schema)¶
Pass a JSON Schema to response_format to constrain the model output to a specific structure. Use "type": "json_schema" instead of "type": "json_object" for schema-validated output.
schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
"required": ["name", "age"],
"additionalProperties": False,
}
response = client.chat({
"model": "openai/gpt-4o",
"messages": [{"role": "user", "content": "Extract: Alice is 30 years old."}],
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "person",
"strict": True,
"schema": schema,
},
},
})
const response = await client.chat({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Extract: Alice is 30 years old." }],
responseFormat: {
type: "json_schema",
jsonSchema: {
name: "person",
strict: true,
schema: {
type: "object",
properties: {
name: { type: "string" },
age: { type: "integer" },
},
required: ["name", "age"],
additionalProperties: false,
},
},
},
});
use serde_json::json;
let req = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![/* ... */],
response_format: Some(json!({
"type": "json_schema",
"json_schema": {
"name": "person",
"strict": true,
"schema": {
"type": "object",
"properties": {
"name": { "type": "string" },
"age": { "type": "integer" }
},
"required": ["name", "age"],
"additionalProperties": false
}
}
})),
..Default::default()
};
Structured output availability depends on provider support. OpenAI gpt-4o and later support json_schema. Providers that do not support it fall back to json_object or return EndpointNotSupported.
Extra_body¶
Pass provider-specific parameters that liter-llm does not model natively via extra_body. Fields in extra_body are merged into the top-level request JSON before it is sent to the provider.
extra_body fields take lower precedence than named fields. If a named field and an extra_body key conflict, the named field wins.
Audio Content Parts¶
Send audio inline in a user message using the input_audio content part type. The audio must be base64-encoded.
import base64
with open("audio.wav", "rb") as f:
audio_b64 = base64.b64encode(f.read()).decode()
response = client.chat({
"model": "openai/gpt-4o-audio-preview",
"messages": [{
"role": "user",
"content": [
{
"type": "input_audio",
"input_audio": {
"data": audio_b64,
"format": "wav",
},
},
{"type": "text", "text": "Transcribe and summarize this audio."},
],
}],
})
import { readFileSync } from "fs";
const audioB64 = readFileSync("audio.wav").toString("base64");
const response = await client.chat({
model: "openai/gpt-4o-audio-preview",
messages: [{
role: "user",
content: [
{
type: "input_audio",
inputAudio: { data: audioB64, format: "wav" },
},
{ type: "text", text: "Transcribe and summarize this audio." },
],
}],
});
use base64::{Engine, engine::general_purpose::STANDARD};
use liter_llm::types::{AudioContent, ContentPart};
let audio_bytes = std::fs::read("audio.wav")?;
let audio_b64 = STANDARD.encode(&audio_bytes);
let content = vec![
ContentPart::InputAudio {
input_audio: AudioContent {
data: audio_b64,
format: "wav".into(),
},
},
ContentPart::Text { text: "Transcribe and summarize this audio.".into() },
];
Supported formats depend on the provider. OpenAI gpt-4o-audio-preview accepts wav, mp3, ogg, flac, m4a.
AWS EventStream Streaming¶
When routing to Bedrock providers, responses arrive in AWS EventStream framing rather than SSE. Liter-llm handles the framing transparently. chat_stream works the same way regardless of provider.
// EventStream framing is transparent to the caller.
let stream = client.chat_stream(ChatCompletionRequest {
model: "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0".into(),
messages: vec![/* ... */],
..Default::default()
}).await?;
// Consume exactly like any other stream.
pin_mut!(stream);
while let Some(chunk) = stream.next().await {
let chunk = chunk?;
if let Some(content) = chunk.choices[0].delta.content.as_deref() {
print!("{content}");
}
}
Tower streaming buffer
When Bedrock streaming is routed through the Tower middleware stack (LlmService), the entire stream is buffered in memory before chunks are yielded. This is a Tower Service trait constraint. For unbuffered Bedrock streaming, call LlmClient::chat_stream() directly, bypassing the Tower stack. See Architecture for details.