Skip to content

Chat & Streaming

Basic Chat

Send a message and get a response:

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    response = await client.chat(
        model="openai/gpt-4o",
        messages=[{"role": "user", "content": "Hello!"}],
    )
    print(response.choices[0].message.content)

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.chat({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Hello!" }],
});
console.log(response.choices[0].message.content);
use liter_llm::{
    ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
    Message, UserContent, UserMessage,
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;

    let request = ChatCompletionRequest {
        model: "openai/gpt-4o".into(),
        messages: vec![Message::User(UserMessage {
            content: UserContent::Text("Hello!".into()),
            name: None,
        })],
        ..Default::default()
    };

    let response = client.chat(request).await?;
    if let Some(choice) = response.choices.first() {
        println!("{}", choice.message.content.as_deref().unwrap_or(""));
    }
    Ok(())
}
package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
  Model: "openai/gpt-4o",
  Messages: []llm.Message{
   llm.NewTextMessage(llm.RoleUser, "Hello!"),
  },
 })
 if err != nil {
  panic(err)
 }
 if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
  fmt.Println(*resp.Choices[0].Message.Content)
 }
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var response = client.chat(new ChatCompletionRequest(
                "openai/gpt-4o",
                List.of(new UserMessage("Hello!"))
            ));
            System.out.println(response.choices().getFirst().message().content());
        }
    }
}
using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var response = await client.ChatAsync(new ChatCompletionRequest(
    Model: "openai/gpt-4o",
    Messages: [new UserMessage("Hello!")]
));
Console.WriteLine(response.Choices[0].Message.Content);
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

response = JSON.parse(client.chat(JSON.generate(
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Hello!" }]
)))

puts response.dig("choices", 0, "message", "content")
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$response = json_decode($client->chat(json_encode([
    'model' => 'openai/gpt-4o',
    'messages' => [
        ['role' => 'user', 'content' => 'Hello!'],
    ],
])), true);

echo $response['choices'][0]['message']['content'] . PHP_EOL;
{:ok, response} =
  LiterLlm.chat(
    %{
      model: "openai/gpt-4o",
      messages: [%{role: "user", content: "Hello!"}]
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

IO.puts(hd(response["choices"])["message"]["content"])
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: "sk-..." });
const response = await client.chat({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Hello!" }],
});

console.log(response.choices[0].message.content);

Provider Routing

liter-llm uses a provider/model prefix convention. The prefix determines which API endpoint, auth header, and parameter mappings to use:

openai/gpt-4o            -> OpenAI
anthropic/claude-sonnet-4-20250514  -> Anthropic
groq/llama3-70b          -> Groq
google/gemini-2.0-flash  -> Google AI
mistral/mistral-large    -> Mistral
bedrock/anthropic.claude-v2 -> AWS Bedrock

Switch providers by changing the model string -- no other code changes needed.

Message Roles

Role Purpose
system Sets the assistant's behavior. Sent once at the start.
user User input -- questions, instructions, data.
assistant Previous assistant responses for multi-turn context.
tool Results from tool calls.
developer Developer-level instructions (some providers).

Multi-Turn Conversations

Append the assistant's response and the next user message, then call chat again:

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is the capital of France?"},
    ]

    response = await client.chat(model="openai/gpt-4o", messages=messages)
    content = response.choices[0].message.content
    print(f"Assistant: {content}")

    # Continue the conversation
    messages.append({"role": "assistant", "content": content})
    messages.append({"role": "user", "content": "What about Germany?"})

    response = await client.chat(model="openai/gpt-4o", messages=messages)
    print(f"Assistant: {response.choices[0].message.content}")

    # Token usage
    if response.usage:
        print(f"Tokens: {response.usage.prompt_tokens} in, {response.usage.completion_tokens} out")

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const messages: Array<{ role: string; content: string }> = [
  { role: "system", content: "You are a helpful assistant." },
  { role: "user", content: "What is the capital of France?" },
];

let response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);

// Continue the conversation
messages.push({ role: "assistant", content: response.choices[0].message.content! });
messages.push({ role: "user", content: "What about Germany?" });

response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);

// Token usage
console.log(`Tokens: ${response.usage?.promptTokens} in, ${response.usage?.completionTokens} out`);
use liter_llm::{
    ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
    Message, UserContent, UserMessage, AssistantMessage, SystemMessage,
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;

    let mut messages = vec![
        Message::System(SystemMessage {
            content: "You are a helpful assistant.".into(),
            name: None,
        }),
        Message::User(UserMessage {
            content: UserContent::Text("What is the capital of France?".into()),
            name: None,
        }),
    ];

    let request = ChatCompletionRequest {
        model: "openai/gpt-4o".into(),
        messages: messages.clone(),
        ..Default::default()
    };
    let response = client.chat(request).await?;
    let content = response.choices[0]
        .message
        .content
        .clone()
        .unwrap_or_default();
    println!("Assistant: {content}");

    // Continue the conversation
    messages.push(Message::Assistant(AssistantMessage {
        content: Some(content),
        ..Default::default()
    }));
    messages.push(Message::User(UserMessage {
        content: UserContent::Text("What about Germany?".into()),
        name: None,
    }));

    let request = ChatCompletionRequest {
        model: "openai/gpt-4o".into(),
        messages,
        ..Default::default()
    };
    let response = client.chat(request).await?;
    if let Some(choice) = response.choices.first() {
        println!("Assistant: {}", choice.message.content.as_deref().unwrap_or(""));
    }

    // Token usage
    if let Some(usage) = &response.usage {
        println!("Tokens: {} in, {} out", usage.prompt_tokens, usage.completion_tokens);
    }
    Ok(())
}
package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 messages := []llm.Message{
  llm.NewTextMessage(llm.RoleSystem, "You are a helpful assistant."),
  llm.NewTextMessage(llm.RoleUser, "What is the capital of France?"),
 }

 resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
  Model:    "openai/gpt-4o",
  Messages: messages,
 })
 if err != nil {
  panic(err)
 }
 content := ""
 if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
  content = *resp.Choices[0].Message.Content
 }
 fmt.Printf("Assistant: %s\n", content)

 // Continue the conversation
 messages = append(messages,
  llm.NewTextMessage(llm.RoleAssistant, content),
  llm.NewTextMessage(llm.RoleUser, "What about Germany?"),
 )

 resp, err = client.Chat(context.Background(), &llm.ChatCompletionRequest{
  Model:    "openai/gpt-4o",
  Messages: messages,
 })
 if err != nil {
  panic(err)
 }
 if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
  fmt.Printf("Assistant: %s\n", *resp.Choices[0].Message.Content)
 }

 // Token usage
 if resp.Usage != nil {
  fmt.Printf("Tokens: %d in, %d out\n", resp.Usage.PromptTokens, resp.Usage.CompletionTokens)
 }
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.ArrayList;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var messages = new ArrayList<>(List.of(
                new SystemMessage("You are a helpful assistant."),
                new UserMessage("What is the capital of France?")
            ));

            var response = client.chat(new ChatCompletionRequest(
                "openai/gpt-4o", messages
            ));
            var content = response.choices().getFirst().message().content();
            System.out.println("Assistant: " + content);

            // Continue the conversation
            messages.add(new AssistantMessage(content));
            messages.add(new UserMessage("What about Germany?"));

            response = client.chat(new ChatCompletionRequest(
                "openai/gpt-4o", messages
            ));
            System.out.println("Assistant: " + response.choices().getFirst().message().content());

            // Token usage
            var usage = response.usage();
            if (usage != null) {
                System.out.printf("Tokens: %d in, %d out%n",
                    usage.promptTokens(), usage.completionTokens());
            }
        }
    }
}
using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var messages = new List<IMessage>
{
    new SystemMessage("You are a helpful assistant."),
    new UserMessage("What is the capital of France?"),
};

var response = await client.ChatAsync(new ChatCompletionRequest(
    Model: "openai/gpt-4o", Messages: messages));
var content = response.Choices[0].Message.Content;
Console.WriteLine($"Assistant: {content}");

// Continue the conversation
messages.Add(new AssistantMessage(content!));
messages.Add(new UserMessage("What about Germany?"));

response = await client.ChatAsync(new ChatCompletionRequest(
    Model: "openai/gpt-4o", Messages: messages));
Console.WriteLine($"Assistant: {response.Choices[0].Message.Content}");

// Token usage
if (response.Usage is not null)
{
    Console.WriteLine($"Tokens: {response.Usage.PromptTokens} in, {response.Usage.CompletionTokens} out");
}
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

messages = [
  { role: "system", content: "You are a helpful assistant." },
  { role: "user", content: "What is the capital of France?" }
]

response = JSON.parse(client.chat(JSON.generate(
  model: "openai/gpt-4o",
  messages: messages
)))
content = response.dig("choices", 0, "message", "content")
puts "Assistant: #{content}"

# Continue the conversation
messages << { role: "assistant", content: content }
messages << { role: "user", content: "What about Germany?" }

response = JSON.parse(client.chat(JSON.generate(
  model: "openai/gpt-4o",
  messages: messages
)))
puts "Assistant: #{response.dig("choices", 0, "message", "content")}"

# Token usage
usage = response["usage"]
if usage
  puts "Tokens: #{usage["prompt_tokens"]} in, #{usage["completion_tokens"]} out"
end
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$messages = [
    ['role' => 'system', 'content' => 'You are a helpful assistant.'],
    ['role' => 'user', 'content' => 'What is the capital of France?'],
];

$response = json_decode($client->chat(json_encode([
    'model' => 'openai/gpt-4o',
    'messages' => $messages,
])), true);
$content = $response['choices'][0]['message']['content'];
echo "Assistant: {$content}" . PHP_EOL;

// Continue the conversation
$messages[] = ['role' => 'assistant', 'content' => $content];
$messages[] = ['role' => 'user', 'content' => 'What about Germany?'];

$response = json_decode($client->chat(json_encode([
    'model' => 'openai/gpt-4o',
    'messages' => $messages,
])), true);
echo "Assistant: {$response['choices'][0]['message']['content']}" . PHP_EOL;

// Token usage
if (isset($response['usage'])) {
    echo "Tokens: {$response['usage']['prompt_tokens']} in, {$response['usage']['completion_tokens']} out" . PHP_EOL;
}
messages = [
  %{role: "system", content: "You are a helpful assistant."},
  %{role: "user", content: "What is the capital of France?"}
]

{:ok, response} =
  LiterLlm.chat(
    %{model: "openai/gpt-4o", messages: messages},
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

content = hd(response["choices"])["message"]["content"]
IO.puts("Assistant: #{content}")

# Continue the conversation
messages =
  messages ++
    [
      %{role: "assistant", content: content},
      %{role: "user", content: "What about Germany?"}
    ]

{:ok, response} =
  LiterLlm.chat(
    %{model: "openai/gpt-4o", messages: messages},
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

IO.puts("Assistant: #{hd(response["choices"])["message"]["content"]}")

# Token usage
usage = response["usage"]
if usage do
  IO.puts("Tokens: #{usage["prompt_tokens"]} in, #{usage["completion_tokens"]} out")
end
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const messages: Array<{ role: string; content: string }> = [
  { role: "system", content: "You are a helpful assistant." },
  { role: "user", content: "What is the capital of France?" },
];

let response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);

// Continue the conversation
messages.push({ role: "assistant", content: response.choices[0].message.content! });
messages.push({ role: "user", content: "What about Germany?" });

response = await client.chat({ model: "openai/gpt-4o", messages });
console.log(`Assistant: ${response.choices[0].message.content}`);

// Token usage
console.log(`Tokens: ${response.usage?.promptTokens} in, ${response.usage?.completionTokens} out`);

Streaming

Stream tokens as they arrive instead of waiting for the full response:

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    async for chunk in await client.chat_stream(
        model="openai/gpt-4o",
        messages=[{"role": "user", "content": "Tell me a story"}],
    ):
        if chunk.choices and chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)
    print()

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const chunks = await client.chatStream({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Tell me a story" }],
});

for (const chunk of chunks) {
  process.stdout.write(chunk.choices[0]?.delta?.content ?? "");
}
console.log();
use futures::StreamExt;
use liter_llm::{
    ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
    Message, UserContent, UserMessage,
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;

    let request = ChatCompletionRequest {
        model: "openai/gpt-4o".into(),
        messages: vec![Message::User(UserMessage {
            content: UserContent::Text("Tell me a story".into()),
            name: None,
        })],
        ..Default::default()
    };

    let mut stream = client.chat_stream(request).await?;
    while let Some(chunk) = stream.next().await {
        let chunk = chunk?;
        if let Some(choice) = chunk.choices.first() {
            if let Some(content) = &choice.delta.content {
                print!("{content}");
            }
        }
    }
    println!();
    Ok(())
}
package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 err := client.ChatStream(
  context.Background(),
  &llm.ChatCompletionRequest{
   Model: "openai/gpt-4o",
   Messages: []llm.Message{
    llm.NewTextMessage(llm.RoleUser, "Tell me a story"),
   },
  },
  func(chunk *llm.ChatCompletionChunk) error {
   if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != nil {
    fmt.Print(*chunk.Choices[0].Delta.Content)
   }
   return nil
  },
 )
 if err != nil {
  panic(err)
 }
 fmt.Println()
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            client.chatStream(new ChatCompletionRequest(
                "openai/gpt-4o-mini",
                List.of(new UserMessage("Hello"))
            ), chunk -> System.out.println(chunk));
        }
    }
}
using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var request = new ChatCompletionRequest(
    Model: "openai/gpt-4o-mini",
    Messages: [new UserMessage("Hello")]
);

await foreach (var chunk in client.ChatStreamAsync(request))
{
    Console.WriteLine(chunk);
}
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

chunks = JSON.parse(client.chat_stream(JSON.generate(
  model: "openai/gpt-4o-mini",
  messages: [{ role: "user", content: "Hello" }]
)))

chunks.each { |chunk| puts chunk }
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$chunksJson = $client->chatStream(json_encode([
    'model' => 'openai/gpt-4o',
    'messages' => [
        ['role' => 'user', 'content' => 'Tell me a story'],
    ],
]));

$chunks = json_decode($chunksJson, true);
foreach ($chunks as $chunk) {
    echo $chunk['choices'][0]['delta']['content'] ?? '';
}
echo PHP_EOL;
{:ok, chunks} =
  LiterLlm.Client.chat_stream(client, %{
    model: "openai/gpt-4o-mini",
    messages: [%{role: "user", content: "Hello"}]
  })

for chunk <- chunks, do: IO.inspect(chunk)
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: "sk-..." });
const stream = await client.chatStream({
  model: "openai/gpt-4o-mini",
  messages: [{ role: "user", content: "Hello" }],
});
// stream is a ReadableStream

Each chunk contains choices[].delta.content with incremental text. The final chunk includes finish_reason: "stop".

Collecting the Full Response

Accumulate deltas to get both real-time output and the complete text:

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    full_text = ""
    async for chunk in await client.chat_stream(
        model="openai/gpt-4o",
        messages=[{"role": "user", "content": "Explain quantum computing briefly"}],
    ):
        delta = chunk.choices[0].delta.content if chunk.choices else None
        if delta:
            full_text += delta
            print(delta, end="", flush=True)
    print()
    print(f"\nFull response length: {len(full_text)} characters")

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const chunks = await client.chatStream({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Explain quantum computing briefly" }],
});

let fullText = "";
for (const chunk of chunks) {
  const delta = chunk.choices?.[0]?.delta?.content;
  if (delta) {
    fullText += delta;
    process.stdout.write(delta);
  }
}
console.log();
console.log(`\nFull response length: ${fullText.length} characters`);
use futures::StreamExt;
use liter_llm::{
    ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
    Message, UserContent, UserMessage,
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;

    let request = ChatCompletionRequest {
        model: "openai/gpt-4o".into(),
        messages: vec![Message::User(UserMessage {
            content: UserContent::Text("Explain quantum computing briefly".into()),
            name: None,
        })],
        ..Default::default()
    };

    let mut stream = client.chat_stream(request).await?;
    let mut full_text = String::new();
    while let Some(chunk) = stream.next().await {
        let chunk = chunk?;
        if let Some(choice) = chunk.choices.first() {
            if let Some(content) = &choice.delta.content {
                full_text.push_str(content);
                print!("{content}");
            }
        }
    }
    println!();
    println!("\nFull response length: {} characters", full_text.len());
    Ok(())
}
package main

import (
 "context"
 "fmt"
 "os"
 "strings"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 var sb strings.Builder
 err := client.ChatStream(context.Background(), &llm.ChatCompletionRequest{
  Model: "openai/gpt-4o",
  Messages: []llm.Message{
   llm.NewTextMessage(llm.RoleUser, "Explain quantum computing briefly"),
  },
 }, func(chunk *llm.ChatCompletionChunk) error {
  if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != nil {
   delta := *chunk.Choices[0].Delta.Content
   sb.WriteString(delta)
   fmt.Print(delta)
  }
  return nil
 })
 if err != nil {
  panic(err)
 }
 fmt.Println()
 fmt.Printf("\nFull response length: %d characters\n", sb.Len())
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var sb = new StringBuilder();
            client.chatStream(new ChatCompletionRequest(
                "openai/gpt-4o",
                List.of(new UserMessage("Explain quantum computing briefly"))
            ), chunk -> {
                var delta = chunk.choices().getFirst().delta().content();
                if (delta != null) {
                    sb.append(delta);
                    System.out.print(delta);
                }
            });
            System.out.println();
            System.out.printf("%nFull response length: %d characters%n", sb.length());
        }
    }
}
using System.Text;
using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var request = new ChatCompletionRequest(
    Model: "openai/gpt-4o",
    Messages: [new UserMessage("Explain quantum computing briefly")]
);

var sb = new StringBuilder();
await foreach (var chunk in client.ChatStreamAsync(request))
{
    var delta = chunk.Choices?[0]?.Delta?.Content;
    if (delta is not null)
    {
        sb.Append(delta);
        Console.Write(delta);
    }
}
Console.WriteLine();
Console.WriteLine($"\nFull response length: {sb.Length} characters");
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

chunks = JSON.parse(client.chat_stream(JSON.generate(
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Explain quantum computing briefly" }]
)))

full_text = ""
chunks.each do |chunk|
  delta = chunk.dig("choices", 0, "delta", "content")
  if delta
    full_text += delta
    print delta
  end
end
puts
puts "\nFull response length: #{full_text.length} characters"
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$chunks = json_decode($client->chatStream(json_encode([
    'model' => 'openai/gpt-4o',
    'messages' => [
        ['role' => 'user', 'content' => 'Explain quantum computing briefly'],
    ],
])), true);

$fullText = '';
foreach ($chunks as $chunk) {
    $delta = $chunk['choices'][0]['delta']['content'] ?? null;
    if ($delta !== null) {
        $fullText .= $delta;
        echo $delta;
    }
}
echo PHP_EOL;
echo "\nFull response length: " . strlen($fullText) . " characters" . PHP_EOL;
{:ok, chunks} =
  LiterLlm.chat_stream(
    %{
      model: "openai/gpt-4o",
      messages: [%{role: "user", content: "Explain quantum computing briefly"}]
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

full_text =
  Enum.reduce(chunks, "", fn chunk, acc ->
    delta = hd(chunk["choices"])["delta"]["content"]

    if delta do
      IO.write(delta)
      acc <> delta
    else
      acc
    end
  end)

IO.puts("")
IO.puts("\nFull response length: #{String.length(full_text)} characters")
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const chunks = await client.chatStream({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Explain quantum computing briefly" }],
});

let fullText = "";
for (const chunk of chunks) {
  const delta = chunk.choices?.[0]?.delta?.content;
  if (delta) {
    fullText += delta;
    process.stdout.write(delta);
  }
}
console.log();
console.log(`\nFull response length: ${fullText.length} characters`);

Tool Calling

Define tools as JSON schema functions. The model can request tool calls, which you execute and return results for:

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])

    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get the current weather for a location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {"type": "string", "description": "City name"},
                    },
                    "required": ["location"],
                },
            },
        }
    ]

    response = await client.chat(
        model="openai/gpt-4o",
        messages=[{"role": "user", "content": "What is the weather in Berlin?"}],
        tools=tools,
    )

    choice = response.choices[0]
    if choice.message.tool_calls:
        for call in choice.message.tool_calls:
            print(f"Tool: {call.function.name}, Args: {call.function.arguments}")

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });

const tools = [
  {
    type: "function" as const,
    function: {
      name: "get_weather",
      description: "Get the current weather for a location",
      parameters: {
        type: "object",
        properties: {
          location: { type: "string", description: "City name" },
        },
        required: ["location"],
      },
    },
  },
];

const response = await client.chat({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "What is the weather in Berlin?" }],
  tools,
});

for (const call of response.choices[0]?.message?.toolCalls ?? []) {
  console.log(`Tool: ${call.function.name}, Args: ${call.function.arguments}`);
}
use liter_llm::{
    ChatCompletionRequest, ClientConfigBuilder, DefaultClient, FunctionDefinition,
    LlmClient, Message, Tool, UserContent, UserMessage,
};
use serde_json::json;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/gpt-4o"))?;

    let tools = vec![Tool {
        r#type: "function".into(),
        function: FunctionDefinition {
            name: "get_weather".into(),
            description: Some("Get the current weather for a location".into()),
            parameters: Some(json!({
                "type": "object",
                "properties": {
                    "location": { "type": "string", "description": "City name" }
                },
                "required": ["location"]
            })),
        },
    }];

    let request = ChatCompletionRequest {
        model: "openai/gpt-4o".into(),
        messages: vec![Message::User(UserMessage {
            content: UserContent::Text("What is the weather in Berlin?".into()),
            name: None,
        })],
        tools: Some(tools),
        ..Default::default()
    };

    let response = client.chat(request).await?;
    if let Some(tool_calls) = &response.choices[0].message.tool_calls {
        for call in tool_calls {
            println!("Tool: {}, Args: {}", call.function.name, call.function.arguments);
        }
    }
    Ok(())
}
package main

import (
 "context"
 "encoding/json"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))

 tools := []llm.Tool{
  {
   Type: "function",
   Function: llm.FunctionDefinition{
    Name:        "get_weather",
    Description: "Get the current weather for a location",
    Parameters: json.RawMessage(`{
     "type": "object",
     "properties": {
      "location": {"type": "string", "description": "City name"}
     },
     "required": ["location"]
    }`),
   },
  },
 }

 resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
  Model: "openai/gpt-4o",
  Messages: []llm.Message{
   llm.NewTextMessage(llm.RoleUser, "What is the weather in Berlin?"),
  },
  Tools: tools,
 })
 if err != nil {
  panic(err)
 }

 for _, call := range resp.Choices[0].Message.ToolCalls {
  fmt.Printf("Tool: %s, Args: %s\n", call.Function.Name, call.Function.Arguments)
 }
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;
import java.util.Map;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var tools = List.of(new Tool(
                "function",
                new FunctionDefinition(
                    "get_weather",
                    "Get the current weather for a location",
                    Map.of(
                        "type", "object",
                        "properties", Map.of(
                            "location", Map.of("type", "string", "description", "City name")
                        ),
                        "required", List.of("location")
                    )
                )
            ));

            var response = client.chat(new ChatCompletionRequest(
                "openai/gpt-4o",
                List.of(new UserMessage("What is the weather in Berlin?")),
                tools
            ));

            for (var call : response.choices().getFirst().message().toolCalls()) {
                System.out.printf("Tool: %s, Args: %s%n",
                    call.function().name(), call.function().arguments());
            }
        }
    }
}
using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var tools = new[]
{
    new Tool(
        Type: "function",
        Function: new FunctionDefinition(
            Name: "get_weather",
            Description: "Get the current weather for a location",
            Parameters: new
            {
                type = "object",
                properties = new
                {
                    location = new { type = "string", description = "City name" }
                },
                required = new[] { "location" }
            }
        )
    )
};

var response = await client.ChatAsync(new ChatCompletionRequest(
    Model: "openai/gpt-4o",
    Messages: [new UserMessage("What is the weather in Berlin?")],
    Tools: tools
));

foreach (var call in response.Choices[0].Message.ToolCalls ?? [])
{
    Console.WriteLine($"Tool: {call.Function.Name}, Args: {call.Function.Arguments}");
}
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

tools = [
  {
    type: "function",
    function: {
      name: "get_weather",
      description: "Get the current weather for a location",
      parameters: {
        type: "object",
        properties: {
          location: { type: "string", description: "City name" }
        },
        required: ["location"]
      }
    }
  }
]

response = JSON.parse(client.chat(JSON.generate(
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "What is the weather in Berlin?" }],
  tools: tools
)))

response.dig("choices", 0, "message", "tool_calls")&.each do |call|
  puts "Tool: #{call.dig("function", "name")}, Args: #{call.dig("function", "arguments")}"
end
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$tools = [
    [
        'type' => 'function',
        'function' => [
            'name' => 'get_weather',
            'description' => 'Get the current weather for a location',
            'parameters' => [
                'type' => 'object',
                'properties' => [
                    'location' => ['type' => 'string', 'description' => 'City name'],
                ],
                'required' => ['location'],
            ],
        ],
    ],
];

$response = json_decode($client->chat(json_encode([
    'model' => 'openai/gpt-4o',
    'messages' => [
        ['role' => 'user', 'content' => 'What is the weather in Berlin?'],
    ],
    'tools' => $tools,
])), true);

foreach ($response['choices'][0]['message']['tool_calls'] ?? [] as $call) {
    echo "Tool: {$call['function']['name']}, Args: {$call['function']['arguments']}" . PHP_EOL;
}
tools = [
  %{
    type: "function",
    function: %{
      name: "get_weather",
      description: "Get the current weather for a location",
      parameters: %{
        type: "object",
        properties: %{
          location: %{type: "string", description: "City name"}
        },
        required: ["location"]
      }
    }
  }
]

{:ok, response} =
  LiterLlm.chat(
    %{
      model: "openai/gpt-4o",
      messages: [%{role: "user", content: "What is the weather in Berlin?"}],
      tools: tools
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

for call <- hd(response["choices"])["message"]["tool_calls"] || [] do
  IO.puts("Tool: #{call["function"]["name"]}, Args: #{call["function"]["arguments"]}")
end
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });

const tools = [
  {
    type: "function" as const,
    function: {
      name: "get_weather",
      description: "Get the current weather for a location",
      parameters: {
        type: "object",
        properties: {
          location: { type: "string", description: "City name" },
        },
        required: ["location"],
      },
    },
  },
];

const response = await client.chat({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "What is the weather in Berlin?" }],
  tools,
});

for (const call of response.choices[0]?.message?.toolCalls ?? []) {
  console.log(`Tool: ${call.function.name}, Args: ${call.function.arguments}`);
}

Chat Parameters

All chat parameters work with both chat and chat_stream:

Parameter Type Description
model string Provider/model identifier (e.g. "openai/gpt-4o")
messages array Conversation messages
temperature float Sampling temperature (0.0-2.0)
max_tokens int Maximum tokens to generate
top_p float Nucleus sampling threshold
n int Number of completions to generate
stop string/array Stop sequences
tools array Tool/function definitions
tool_choice string/object Tool selection strategy
response_format object Force JSON output ({"type": "json_object"})
seed int Deterministic sampling seed
presence_penalty float Penalize new topics (-2.0 to 2.0)
frequency_penalty float Penalize repetition (-2.0 to 2.0)
reasoning_effort string Reasoning budget for o-series and extended-thinking models.
extra_body object Provider-specific fields passed through verbatim.

Reasoning Effort

OpenAI o-series models and Anthropic extended-thinking models accept a reasoning_effort parameter that controls how much compute the model spends on internal reasoning before producing the final response.

response = client.chat({
    "model": "openai/o3-mini",
    "messages": [{"role": "user", "content": "Prove the Pythagorean theorem."}],
    "reasoning_effort": "high",
})
const response = await client.chat({
  model: "openai/o3-mini",
  messages: [{ role: "user", content: "Prove the Pythagorean theorem." }],
  reasoningEffort: "high",
});
let req = ChatCompletionRequest {
    model: "openai/o3-mini".into(),
    messages: vec![/* ... */],
    reasoning_effort: Some("high".into()),
    ..Default::default()
};
resp, err := client.Chat(ctx, &llm.ChatRequest{
    Model:           "openai/o3-mini",
    Messages:        messages,
    ReasoningEffort: "high",
})

Accepted values for OpenAI o-series: "low", "medium", "high". Anthropic extended thinking uses a budget_tokens integer instead, which maps to reasoning_effort when the binding converts the field.

Structured Outputs (JSON Schema)

Pass a JSON Schema to response_format to constrain the model output to a specific structure. Use "type": "json_schema" instead of "type": "json_object" for schema-validated output.

schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age":  {"type": "integer"},
    },
    "required": ["name", "age"],
    "additionalProperties": False,
}

response = client.chat({
    "model": "openai/gpt-4o",
    "messages": [{"role": "user", "content": "Extract: Alice is 30 years old."}],
    "response_format": {
        "type": "json_schema",
        "json_schema": {
            "name": "person",
            "strict": True,
            "schema": schema,
        },
    },
})
const response = await client.chat({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Extract: Alice is 30 years old." }],
  responseFormat: {
    type: "json_schema",
    jsonSchema: {
      name: "person",
      strict: true,
      schema: {
        type: "object",
        properties: {
          name: { type: "string" },
          age:  { type: "integer" },
        },
        required: ["name", "age"],
        additionalProperties: false,
      },
    },
  },
});
use serde_json::json;

let req = ChatCompletionRequest {
    model: "openai/gpt-4o".into(),
    messages: vec![/* ... */],
    response_format: Some(json!({
        "type": "json_schema",
        "json_schema": {
            "name": "person",
            "strict": true,
            "schema": {
                "type": "object",
                "properties": {
                    "name": { "type": "string" },
                    "age":  { "type": "integer" }
                },
                "required": ["name", "age"],
                "additionalProperties": false
            }
        }
    })),
    ..Default::default()
};

Structured output availability depends on provider support. OpenAI gpt-4o and later support json_schema. Providers that do not support it fall back to json_object or return EndpointNotSupported.

extra_body

Pass provider-specific parameters that liter-llm does not model natively via extra_body. Fields in extra_body are merged into the top-level request JSON before it is sent to the provider.

response = client.chat({
    "model": "openai/gpt-4o",
    "messages": [{"role": "user", "content": "Hello"}],
    "extra_body": {
        "store": True,           # OpenAI conversation store
        "metadata": {"user": "alice"},
    },
})
const response = await client.chat({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Hello" }],
  extraBody: {
    store: true,
    metadata: { user: "alice" },
  },
});
use serde_json::json;

let req = ChatCompletionRequest {
    model: "openai/gpt-4o".into(),
    messages: vec![/* ... */],
    extra_body: Some(json!({ "store": true, "metadata": { "user": "alice" } })),
    ..Default::default()
};

extra_body fields take lower precedence than named fields. If a named field and an extra_body key conflict, the named field wins.

Audio Content Parts

Send audio inline in a user message using the input_audio content part type. The audio must be base64-encoded.

import base64

with open("audio.wav", "rb") as f:
    audio_b64 = base64.b64encode(f.read()).decode()

response = client.chat({
    "model": "openai/gpt-4o-audio-preview",
    "messages": [{
        "role": "user",
        "content": [
            {
                "type": "input_audio",
                "input_audio": {
                    "data": audio_b64,
                    "format": "wav",
                },
            },
            {"type": "text", "text": "Transcribe and summarize this audio."},
        ],
    }],
})
import { readFileSync } from "fs";

const audioB64 = readFileSync("audio.wav").toString("base64");

const response = await client.chat({
  model: "openai/gpt-4o-audio-preview",
  messages: [{
    role: "user",
    content: [
      {
        type: "input_audio",
        inputAudio: { data: audioB64, format: "wav" },
      },
      { type: "text", text: "Transcribe and summarize this audio." },
    ],
  }],
});
use base64::{Engine, engine::general_purpose::STANDARD};
use liter_llm::types::{ContentPart, InputAudio};

let audio_bytes = std::fs::read("audio.wav")?;
let audio_b64 = STANDARD.encode(&audio_bytes);

let content = vec![
    ContentPart::InputAudio {
        input_audio: InputAudio {
            data: audio_b64,
            format: "wav".into(),
        },
    },
    ContentPart::Text { text: "Transcribe and summarize this audio.".into() },
];

Supported formats depend on the provider. OpenAI gpt-4o-audio-preview accepts wav, mp3, ogg, flac, m4a.

AWS EventStream Streaming

When routing to Bedrock providers, responses arrive in AWS EventStream framing rather than SSE. liter-llm handles the framing transparently. chat_stream works the same way regardless of provider.

// EventStream framing is transparent to the caller.
let stream = client.chat_stream(ChatCompletionRequest {
    model: "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0".into(),
    messages: vec![/* ... */],
    ..Default::default()
}).await?;

// Consume exactly like any other stream.
pin_mut!(stream);
while let Some(chunk) = stream.next().await {
    let chunk = chunk?;
    if let Some(content) = chunk.choices[0].delta.content.as_deref() {
        print!("{content}");
    }
}
# EventStream framing is transparent to the caller.
for chunk in client.chat_stream({
    "model": "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0",
    "messages": [{"role": "user", "content": "Hello"}],
}):
    print(chunk["choices"][0]["delta"].get("content", ""), end="", flush=True)

Tower streaming buffer

When Bedrock streaming is routed through the Tower middleware stack (LlmService), the entire stream is buffered in memory before chunks are yielded. This is a Tower Service trait constraint. For unbuffered Bedrock streaming, call LlmClient::chat_stream() directly, bypassing the Tower stack. See Architecture for details.