Skip to content

Chat Completions

The chat method sends a list of messages to an LLM and returns a single response. This is the primary API for most use cases.

Basic Chat

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    response = await client.chat(
        model="openai/gpt-4o",
        messages=[{"role": "user", "content": "Hello!"}],
    )
    print(response.choices[0].message.content)

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.chat({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Hello!" }],
});
console.log(response.choices[0].message.content);
package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
  Model: "openai/gpt-4o",
  Messages: []llm.Message{
   llm.NewTextMessage(llm.RoleUser, "Hello!"),
  },
 })
 if err != nil {
  panic(err)
 }
 if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
  fmt.Println(*resp.Choices[0].Message.Content)
 }
}
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

response = JSON.parse(client.chat(JSON.generate(
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Hello!" }]
)))

puts response.dig("choices", 0, "message", "content")
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var response = client.chat(new ChatCompletionRequest(
                "openai/gpt-4o",
                List.of(new UserMessage("Hello!"))
            ));
            System.out.println(response.choices().getFirst().message().content());
        }
    }
}
using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var response = await client.ChatAsync(new ChatCompletionRequest(
    Model: "openai/gpt-4o",
    Messages: [new UserMessage("Hello!")]
));
Console.WriteLine(response.Choices[0].Message.Content);
{:ok, response} =
  LiterLlm.chat(
    %{
      model: "openai/gpt-4o",
      messages: [%{role: "user", content: "Hello!"}]
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

IO.puts(hd(response["choices"])["message"]["content"])
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$response = json_decode($client->chat(json_encode([
    'model' => 'openai/gpt-4o',
    'messages' => [
        ['role' => 'user', 'content' => 'Hello!'],
    ],
])), true);

echo $response['choices'][0]['message']['content'] . PHP_EOL;

Message Roles

Messages use the OpenAI-compatible role system:

Role Purpose
system Sets the assistant's behavior and persona. Sent once at the start.
user User input -- questions, instructions, data to process.
assistant Previous assistant responses. Include these for multi-turn context.
tool Results from tool calls. Sent after the assistant requests a tool invocation.
developer Developer-level instructions (supported by some providers).

Multi-Turn Conversations

To continue a conversation, append the assistant's response and the next user message to the messages list, then call chat again.

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is the capital of France?"},
    ]

    response = await client.chat(model="openai/gpt-4o", messages=messages)
    content = response.choices[0].message.content
    print(f"Assistant: {content}")

    # Continue the conversation
    messages.append({"role": "assistant", "content": content})
    messages.append({"role": "user", "content": "What about Germany?"})

    response = await client.chat(model="openai/gpt-4o", messages=messages)
    print(f"Assistant: {response.choices[0].message.content}")

    # Token usage
    if response.usage:
        print(f"Tokens: {response.usage.prompt_tokens} in, {response.usage.completion_tokens} out")

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const messages: Array<{ role: string; content: string }> = [
  { role: "system", content: "You are a helpful assistant." },
  { role: "user", content: "What is the capital of France?" },
];

let response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);

// Continue the conversation
messages.push({ role: "assistant", content: response.choices[0].message.content! });
messages.push({ role: "user", content: "What about Germany?" });

response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);

// Token usage
console.log(`Tokens: ${response.usage?.promptTokens} in, ${response.usage?.completionTokens} out`);
package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 messages := []llm.Message{
  llm.NewTextMessage(llm.RoleSystem, "You are a helpful assistant."),
  llm.NewTextMessage(llm.RoleUser, "What is the capital of France?"),
 }

 resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
  Model:    "openai/gpt-4o",
  Messages: messages,
 })
 if err != nil {
  panic(err)
 }
 content := ""
 if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
  content = *resp.Choices[0].Message.Content
 }
 fmt.Printf("Assistant: %s\n", content)

 // Continue the conversation
 messages = append(messages,
  llm.NewTextMessage(llm.RoleAssistant, content),
  llm.NewTextMessage(llm.RoleUser, "What about Germany?"),
 )

 resp, err = client.Chat(context.Background(), &llm.ChatCompletionRequest{
  Model:    "openai/gpt-4o",
  Messages: messages,
 })
 if err != nil {
  panic(err)
 }
 if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
  fmt.Printf("Assistant: %s\n", *resp.Choices[0].Message.Content)
 }

 // Token usage
 if resp.Usage != nil {
  fmt.Printf("Tokens: %d in, %d out\n", resp.Usage.PromptTokens, resp.Usage.CompletionTokens)
 }
}

Sampling Parameters

Control response generation with these parameters:

Parameter Type Default Description
temperature float 1.0 Randomness. 0 = deterministic, 2 = very random.
top_p float 1.0 Nucleus sampling. 0.1 = only top 10% probability mass.
max_tokens int model default Maximum tokens in the response.
n int 1 Number of completions to generate.
stop string/list none Stop sequences. Generation stops when any is encountered.
presence_penalty float 0 Penalize tokens that have appeared. Range: -2.0 to 2.0.
frequency_penalty float 0 Penalize tokens by frequency. Range: -2.0 to 2.0.
seed int none For deterministic outputs (provider support varies).
reasoning_effort string none Hint for reasoning models (e.g. "low", "medium", "high").
response = await client.chat(
    model="openai/gpt-4o",
    messages=[{"role": "user", "content": "Write a haiku about Rust"}],
    temperature=0.7,
    max_tokens=100,
    top_p=0.9,
)
print(response.choices[0].message.content)

Parameter support varies by provider

Not all providers support all parameters. Unsupported parameters are silently ignored by most providers. Check your provider's documentation for specifics.

Token Usage

Every ChatCompletionResponse includes a usage field with token counts:

response = await client.chat(
    model="openai/gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}],
)
if response.usage:
    print(f"Prompt tokens:     {response.usage.prompt_tokens}")
    print(f"Completion tokens: {response.usage.completion_tokens}")
    print(f"Total tokens:      {response.usage.total_tokens}")

Cost Estimation

In Rust, the response includes an estimated_cost() method that calculates the approximate USD cost based on embedded pricing data for the provider and model:

if let Some(cost) = response.estimated_cost() {
    println!("Estimated cost: ${cost:.6}");
}

Cost tracking at scale

For production cost tracking, use the CostTrackingLayer Tower middleware, which emits cost data as OpenTelemetry span attributes.

Response Format

Use response_format to request structured output:

response = await client.chat(
    model="openai/gpt-4o",
    messages=[{"role": "user", "content": "List 3 colors as JSON"}],
    response_format={"type": "json_object"},
)

Tool Calling

Pass tools to let the model invoke functions. See the tool calling example:

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])

    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get the current weather for a location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {"type": "string", "description": "City name"},
                    },
                    "required": ["location"],
                },
            },
        }
    ]

    response = await client.chat(
        model="openai/gpt-4o",
        messages=[{"role": "user", "content": "What is the weather in Berlin?"}],
        tools=tools,
    )

    choice = response.choices[0]
    if choice.message.tool_calls:
        for call in choice.message.tool_calls:
            print(f"Tool: {call.function.name}, Args: {call.function.arguments}")

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });

const tools = [
  {
    type: "function" as const,
    function: {
      name: "get_weather",
      description: "Get the current weather for a location",
      parameters: {
        type: "object",
        properties: {
          location: { type: "string", description: "City name" },
        },
        required: ["location"],
      },
    },
  },
];

const response = await client.chat({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "What is the weather in Berlin?" }],
  tools,
});

for (const call of response.choices[0]?.message?.toolCalls ?? []) {
  console.log(`Tool: ${call.function.name}, Args: ${call.function.arguments}`);
}