Configuration¶
Configuration File¶
Create a liter-llm.toml file in your project directory. liter-llm auto-discovers it by searching the current directory and parent directories.
api_key = "sk-..."
base_url = "https://api.openai.com/v1"
model_hint = "openai"
timeout_secs = 120
max_retries = 5
[cache]
max_entries = 512
ttl_seconds = 600
[budget]
global_limit = 50.0
enforcement = "hard"
[budget.model_limits]
"openai/gpt-4o" = 25.0
[rate_limit]
rpm = 60
tpm = 100000
cooldown_secs = 30
health_check_secs = 60
cost_tracking = true
tracing = true
[[providers]]
name = "my-provider"
base_url = "https://my-llm.example.com/v1"
model_prefixes = ["my-provider/"]
Load it in code:
use liter_llm::{FileConfig, ManagedClient};
// Auto-discover
if let Some(config) = FileConfig::discover()? {
let client = ManagedClient::new(config.into_builder().build(), None)?;
}
// Or explicit path
let config = FileConfig::from_toml_file("liter-llm.toml")?;
let client = ManagedClient::new(config.into_builder().build(), None)?;
Client Construction¶
import asyncio
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(
api_key="sk-...", # or set OPENAI_API_KEY env var
base_url=None, # override provider base URL
model_hint="openai", # pre-resolve provider at construction
max_retries=3, # retry on transient failures
timeout=60, # request timeout in seconds
)
response = await client.chat(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Hello!"}],
)
print(response.choices[0].message.content)
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({
apiKey: "sk-...", // or set OPENAI_API_KEY env var
baseUrl: undefined, // override provider base URL
modelHint: "openai", // pre-resolve provider at construction
maxRetries: 3, // retry on transient failures
timeoutSecs: 60, // request timeout in seconds
});
const response = await client.chat({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Hello!" }],
});
console.log(response.choices[0].message.content);
use liter_llm::{
ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
Message, UserContent, UserMessage,
};
use std::time::Duration;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new("sk-...".to_string()) // or std::env::var("OPENAI_API_KEY")?
.base_url("https://api.openai.com/v1") // override provider base URL
.max_retries(3) // retry on transient failures
.timeout(Duration::from_secs(60)) // request timeout
.build();
let client = DefaultClient::new(config, Some("openai/gpt-4o"))?; // pre-resolve provider
let request = ChatCompletionRequest {
model: "openai/gpt-4o".into(),
messages: vec![Message::User(UserMessage {
content: UserContent::Text("Hello!".into()),
name: None,
})],
..Default::default()
};
let response = client.chat(request).await?;
if let Some(choice) = response.choices.first() {
println!("{}", choice.message.content.as_deref().unwrap_or(""));
}
Ok(())
}
package main
import (
"context"
"fmt"
"time"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(
llm.WithAPIKey("sk-..."), // or set OPENAI_API_KEY env var
llm.WithBaseURL("https://api.openai.com/v1"), // override provider base URL
llm.WithTimeout(60*time.Second), // request timeout
)
resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
Model: "openai/gpt-4o",
Messages: []llm.Message{
llm.NewTextMessage(llm.RoleUser, "Hello!"),
},
})
if err != nil {
panic(err)
}
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
fmt.Println(*resp.Choices[0].Message.Content)
}
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey("sk-...") // or System.getenv("OPENAI_API_KEY")
.baseUrl("https://api.openai.com/v1") // override provider base URL
.modelHint("openai") // pre-resolve provider at construction
.maxRetries(3) // retry on transient failures
.timeoutSecs(60) // request timeout in seconds
.build()) {
var response = client.chat(new ChatCompletionRequest(
"openai/gpt-4o",
List.of(new UserMessage("Hello!"))
));
System.out.println(response.choices().getFirst().message().content());
}
}
}
using LiterLlm;
await using var client = new LlmClient(
apiKey: "sk-...", // or Environment.GetEnvironmentVariable("OPENAI_API_KEY")!
baseUrl: "https://api.openai.com/v1", // override provider base URL
modelHint: "openai", // pre-resolve provider at construction
maxRetries: 3, // retry on transient failures
timeoutSecs: 60 // request timeout in seconds
);
var response = await client.ChatAsync(new ChatCompletionRequest(
Model: "openai/gpt-4o",
Messages: [new UserMessage("Hello!")]
));
Console.WriteLine(response.Choices[0].Message.Content);
# frozen_string_literal: true
require "liter_llm"
require "json"
client = LiterLlm::LlmClient.new(
"sk-...", # or ENV.fetch("OPENAI_API_KEY")
{
"base_url" => nil, # override provider base URL
"model_hint" => "openai", # pre-resolve provider at construction
"max_retries" => 3, # retry on transient failures
"timeout" => 60 # request timeout in seconds
}
)
response = JSON.parse(client.chat(JSON.generate(
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Hello!" }]
)))
puts response.dig("choices", 0, "message", "content")
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(
apiKey: 'sk-...', // or getenv('OPENAI_API_KEY')
baseUrl: 'https://api.openai.com/v1', // override provider base URL
modelHint: 'openai', // pre-resolve provider at construction
maxRetries: 3, // retry on transient failures
timeoutSecs: 60 // request timeout in seconds
);
$response = json_decode($client->chat(json_encode([
'model' => 'openai/gpt-4o',
'messages' => [
['role' => 'user', 'content' => 'Hello!'],
],
])), true);
echo $response['choices'][0]['message']['content'] . PHP_EOL;
{:ok, response} =
LiterLlm.chat(
%{
model: "openai/gpt-4o",
messages: [%{role: "user", content: "Hello!"}]
},
api_key: "sk-...", # or System.fetch_env!("OPENAI_API_KEY")
base_url: "https://api.openai.com/v1", # override provider base URL
model_hint: "openai", # pre-resolve provider at construction
max_retries: 3, # retry on transient failures
timeout: 60 # request timeout in seconds
)
IO.puts(hd(response["choices"])["message"]["content"])
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();
const client = new LlmClient({
apiKey: "sk-...", // or from environment
baseUrl: undefined, // override provider base URL
modelHint: "openai", // pre-resolve provider at construction
maxRetries: 3, // retry on transient failures
timeoutSecs: 60, // request timeout in seconds
});
const response = await client.chat({
model: "openai/gpt-4o",
messages: [{ role: "user", content: "Hello!" }],
});
console.log(response.choices[0].message.content);
Options¶
| Option | Type | Default | Description |
|---|---|---|---|
api_key |
string | required | Provider API key. Wrapped in SecretString internally. |
base_url |
string | from registry | Override the provider's base URL. |
model_hint |
string | none | Pre-resolve a provider at construction (e.g. "openai"). |
timeout |
duration | 60s | Request timeout. |
max_retries |
int | 3 | Retries on 429/5xx responses with exponential backoff. |
API Key Management¶
Read the standard environment variable for your provider:
| Provider | Environment Variable |
|---|---|
| OpenAI | OPENAI_API_KEY |
| Anthropic | ANTHROPIC_API_KEY |
| Google (Gemini) | GEMINI_API_KEY |
| Groq | GROQ_API_KEY |
| Mistral | MISTRAL_API_KEY |
| Cohere | CO_API_KEY |
| AWS Bedrock | AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY |
API keys passed to the constructor are wrapped in secrecy::SecretString. They are never logged, serialized, or included in error messages.
Model Hints¶
The model_hint parameter pre-resolves a provider at construction time. All requests use that provider without prefix lookup:
# All requests use OpenAI -- no "openai/" prefix needed
client = LlmClient(api_key="sk-...", model_hint="openai")
response = await client.chat(model="gpt-4o", messages=[...])
Custom Base URLs¶
Override base_url to point at a local inference server or proxy:
# Ollama running locally
client = LlmClient(api_key="unused", base_url="http://localhost:11434/v1")
# Corporate proxy
client = LlmClient(api_key="sk-...", base_url="https://llm-proxy.internal.company.com/v1")
Cache¶
Enable response caching to avoid repeated identical requests:
| Option | Type | Default | Description |
|---|---|---|---|
max_entries |
int | 256 | Maximum cached responses |
ttl_seconds |
int | 300 | Time-to-live in seconds |
Budget¶
Track and enforce spending limits:
| Option | Type | Description |
|---|---|---|
global_limit |
float | Maximum total spend in USD |
model_limits |
map | Per-model spend limits |
enforcement |
string | "hard" (reject over-budget) or "soft" (warn only) |
Hooks¶
Register lifecycle hooks for request/response/error events:
from liter_llm import LlmClient
class LoggingHook:
def on_request(self, request):
print(f"Sending request to {request['model']}")
def on_response(self, request, response):
print(f"Got response: {response.usage.total_tokens} tokens")
def on_error(self, request, error):
print(f"Error: {error}")
client = LlmClient(api_key="sk-...")
client.add_hook(LoggingHook())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
client.addHook({
onRequest(req) { console.log(`Sending: ${req.model}`); },
onResponse(req, res) { console.log(`Tokens: ${res.usage?.totalTokens}`); },
onError(req, err) { console.error(`Error: ${err}`); },
});
use liter_llm::LlmHook;
struct LoggingHook;
impl LlmHook for LoggingHook {
fn on_request(&self, req: &ChatCompletionRequest) -> Result<()> {
println!("Sending: {}", req.model);
Ok(())
}
fn on_response(&self, _req: &ChatCompletionRequest, resp: &ChatCompletionResponse) {
if let Some(u) = &resp.usage { println!("Tokens: {}", u.total_tokens); }
}
fn on_error(&self, _req: &ChatCompletionRequest, err: &LiterLlmError) {
eprintln!("Error: {err}");
}
}
type loggingHook struct{}
func (h *loggingHook) OnRequest(req *llm.ChatCompletionRequest) error {
fmt.Printf("Sending: %s\n", req.Model)
return nil
}
func (h *loggingHook) OnResponse(req *llm.ChatCompletionRequest, resp *llm.ChatCompletionResponse) {
if resp.Usage != nil { fmt.Printf("Tokens: %d\n", resp.Usage.TotalTokens) }
}
func (h *loggingHook) OnError(req *llm.ChatCompletionRequest, err error) {
fmt.Printf("Error: %v\n", err)
}
client := llm.NewClient(
llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")),
llm.WithHook(&loggingHook{}),
)
client.addHook(new LlmHook() {
@Override public void onRequest(ChatCompletionRequest req) {
System.out.println("Sending: " + req.model());
}
@Override public void onResponse(ChatCompletionRequest req, ChatCompletionResponse resp) {
System.out.println("Tokens: " + resp.usage().totalTokens());
}
@Override public void onError(ChatCompletionRequest req, LlmException err) {
System.err.println("Error: " + err.getMessage());
}
});
client.AddHook(new LoggingHook());
class LoggingHook : ILlmHook
{
public Task OnRequestAsync(ChatCompletionRequest req) {
Console.WriteLine($"Sending: {req.Model}");
return Task.CompletedTask;
}
public Task OnResponseAsync(ChatCompletionRequest req, ChatCompletionResponse resp) {
Console.WriteLine($"Tokens: {resp.Usage?.TotalTokens}");
return Task.CompletedTask;
}
public Task OnErrorAsync(ChatCompletionRequest req, Exception err) {
Console.Error.WriteLine($"Error: {err.Message}");
return Task.CompletedTask;
}
}
$client->addHook(new class {
public function onRequest(string $requestJson): void {
$req = json_decode($requestJson, true);
echo "Sending: {$req['model']}" . PHP_EOL;
}
public function onResponse(string $requestJson, string $responseJson): void {
echo "Response received" . PHP_EOL;
}
public function onError(string $requestJson, string $errorMessage): void {
echo "Error: {$errorMessage}" . PHP_EOL;
}
});
defmodule LoggingHook do
@behaviour LiterLlm.Hook
def on_request(request), do: IO.puts("Sending: #{request["model"]}")
def on_response(_request, _response), do: IO.puts("Response received")
def on_error(_request, error), do: IO.puts("Error: #{inspect(error)}")
end
client = LiterLlm.Client.new(api_key: "sk-...") |> LiterLlm.Client.add_hook(LoggingHook)
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();
const client = new LlmClient({ apiKey: "sk-..." });
client.addHook({
onRequest(req) { console.log(`Sending: ${req.model}`); },
onResponse(req, res) { console.log(`Tokens: ${res.usage?.totalTokens}`); },
onError(req, err) { console.error(`Error: ${err}`); },
});
Custom Providers¶
Register custom providers for self-hosted or unsupported LLM endpoints:
Cache Backends¶
Configure OpenDAL-backed cache backends (Redis, S3, filesystem, and 40+ more via Apache OpenDAL):
| Option | Type | Description |
|---|---|---|
backend |
string | Backend type: "redis", "s3", "fs", "gcs", "memory", etc. |
backend_config |
map | Backend-specific config (connection strings, bucket names, paths) |
ttl_seconds |
int | Time-to-live in seconds for cache entries |
Cooldown¶
Enable a cooldown (circuit breaker) period after transient errors:
Rate Limiting¶
Configure per-model rate limits (requests per minute and tokens per minute):
| Option | Type | Description |
|---|---|---|
rpm |
int | Maximum requests per minute |
tpm |
int | Maximum tokens per minute |
Health Checks¶
Enable background health checks to proactively detect provider availability:
Cost Tracking¶
Enable per-request cost tracking to monitor spend in real time:
Tracing¶
Note
The tracing reference has moved to Observability. That page covers span attributes, OTEL exporter setup, cost tracking, and Tower layer composition.