Chat Completions¶
The chat method sends a list of messages to an LLM and returns a single response. This is the primary API for most use cases.
Basic Chat¶
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
response = await client.chat(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Hello!"}],
)
print(response.choices[0].message.content)
asyncio.run(main())
package main
import (
"context"
"fmt"
"os"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []llm.Message{
llm.NewTextMessage(llm.RoleUser, "Hello!"),
},
})
if err != nil {
panic(err)
}
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
fmt.Println(*resp.Choices[0].Message.Content)
}
}
# frozen_string_literal: true
require "liter_llm"
require "json"
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})
response = JSON.parse(client.chat(JSON.generate(
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Hello!" }]
)))
puts response.dig("choices", 0, "message", "content")
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.build()) {
var response = client.chat(new ChatCompletionRequest(
"openai/gpt-4o",
List.of(new UserMessage("Hello!"))
));
System.out.println(response.choices().getFirst().message().content());
}
}
}
using LiterLlm;
await using var client = new LlmClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);
var response = await client.ChatAsync(new ChatCompletionRequest(
Model: "openai/gpt-4o",
Messages: [new UserMessage("Hello!")]
));
Console.WriteLine(response.Choices[0].Message.Content);
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');
$response = json_decode($client->chat(json_encode([
'model' => 'openai/gpt-4o',
'messages' => [
['role' => 'user', 'content' => 'Hello!'],
],
])), true);
echo $response['choices'][0]['message']['content'] . PHP_EOL;
Message Roles¶
Messages use the OpenAI-compatible role system:
| Role | Purpose |
|---|---|
system |
Sets the assistant's behavior and persona. Sent once at the start. |
user |
User input -- questions, instructions, data to process. |
assistant |
Previous assistant responses. Include these for multi-turn context. |
tool |
Results from tool calls. Sent after the assistant requests a tool invocation. |
developer |
Developer-level instructions (supported by some providers). |
Multi-Turn Conversations¶
To continue a conversation, append the assistant's response and the next user message to the messages list, then call chat again.
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the capital of France?"},
]
response = await client.chat(model="openai/gpt-4o", messages=messages)
content = response.choices[0].message.content
print(f"Assistant: {content}")
# Continue the conversation
messages.append({"role": "assistant", "content": content})
messages.append({"role": "user", "content": "What about Germany?"})
response = await client.chat(model="openai/gpt-4o", messages=messages)
print(f"Assistant: {response.choices[0].message.content}")
# Token usage
if response.usage:
print(f"Tokens: {response.usage.prompt_tokens} in, {response.usage.completion_tokens} out")
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const messages: Array<{ role: string; content: string }> = [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: "What is the capital of France?" },
];
let response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);
// Continue the conversation
messages.push({ role: "assistant", content: response.choices[0].message.content! });
messages.push({ role: "user", content: "What about Germany?" });
response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);
// Token usage
console.log(`Tokens: ${response.usage?.promptTokens} in, ${response.usage?.completionTokens} out`);
package main
import (
"context"
"fmt"
"os"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
messages := []llm.Message{
llm.NewTextMessage(llm.RoleSystem, "You are a helpful assistant."),
llm.NewTextMessage(llm.RoleUser, "What is the capital of France?"),
}
resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: messages,
})
if err != nil {
panic(err)
}
content := ""
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
content = *resp.Choices[0].Message.Content
}
fmt.Printf("Assistant: %s\n", content)
// Continue the conversation
messages = append(messages,
llm.NewTextMessage(llm.RoleAssistant, content),
llm.NewTextMessage(llm.RoleUser, "What about Germany?"),
)
resp, err = client.Chat(context.Background(), &llm.ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: messages,
})
if err != nil {
panic(err)
}
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
fmt.Printf("Assistant: %s\n", *resp.Choices[0].Message.Content)
}
// Token usage
if resp.Usage != nil {
fmt.Printf("Tokens: %d in, %d out\n", resp.Usage.PromptTokens, resp.Usage.CompletionTokens)
}
}
Sampling Parameters¶
Control response generation with these parameters:
| Parameter | Type | Default | Description |
|---|---|---|---|
temperature |
float | 1.0 | Randomness. 0 = deterministic, 2 = very random. |
top_p |
float | 1.0 | Nucleus sampling. 0.1 = only top 10% probability mass. |
max_tokens |
int | model default | Maximum tokens in the response. |
n |
int | 1 | Number of completions to generate. |
stop |
string/list | none | Stop sequences. Generation stops when any is encountered. |
presence_penalty |
float | 0 | Penalize tokens that have appeared. Range: -2.0 to 2.0. |
frequency_penalty |
float | 0 | Penalize tokens by frequency. Range: -2.0 to 2.0. |
seed |
int | none | For deterministic outputs (provider support varies). |
reasoning_effort |
string | none | Hint for reasoning models (e.g. "low", "medium", "high"). |
response = await client.chat(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Write a haiku about Rust"}],
temperature=0.7,
max_tokens=100,
top_p=0.9,
)
print(response.choices[0].message.content)
Parameter support varies by provider
Not all providers support all parameters. Unsupported parameters are silently ignored by most providers. Check your provider's documentation for specifics.
Token Usage¶
Every ChatCompletionResponse includes a usage field with token counts:
response = await client.chat(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Hello!"}],
)
if response.usage:
print(f"Prompt tokens: {response.usage.prompt_tokens}")
print(f"Completion tokens: {response.usage.completion_tokens}")
print(f"Total tokens: {response.usage.total_tokens}")
Cost Estimation¶
In Rust, the response includes an estimated_cost() method that calculates the approximate USD cost based on embedded pricing data for the provider and model:
Cost tracking at scale
For production cost tracking, use the CostTrackingLayer Tower middleware, which emits cost data as OpenTelemetry span attributes.
Response Format¶
Use response_format to request structured output:
response = await client.chat(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "List 3 colors as JSON"}],
response_format={"type": "json_object"},
)
Tool Calling¶
Pass tools to let the model invoke functions. See the tool calling example:
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name"},
},
"required": ["location"],
},
},
}
]
response = await client.chat(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "What is the weather in Berlin?"}],
tools=tools,
)
choice = response.choices[0]
if choice.message.tool_calls:
for call in choice.message.tool_calls:
print(f"Tool: {call.function.name}, Args: {call.function.arguments}")
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const tools = [
{
type: "function" as const,
function: {
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: "object",
properties: {
location: { type: "string", description: "City name" },
},
required: ["location"],
},
},
},
];
const response = await client.chat({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "What is the weather in Berlin?" }],
tools,
});
for (const call of response.choices[0]?.message?.toolCalls ?? []) {
console.log(`Tool: ${call.function.name}, Args: ${call.function.arguments}`);
}