Skip to content

Configuration

Configuration File

Create a liter-llm.toml file in your project directory. liter-llm auto-discovers it by searching the current directory and parent directories.

api_key = "sk-..."
base_url = "https://api.openai.com/v1"
model_hint = "openai"
timeout_secs = 120
max_retries = 5

[cache]
max_entries = 512
ttl_seconds = 600

[budget]
global_limit = 50.0
enforcement = "hard"

[budget.model_limits]
"openai/gpt-4o" = 25.0

[rate_limit]
rpm = 60
tpm = 100000

cooldown_secs = 30
health_check_secs = 60
cost_tracking = true
tracing = true

[[providers]]
name = "my-provider"
base_url = "https://my-llm.example.com/v1"
model_prefixes = ["my-provider/"]

Load it in code:

# Auto-discover liter-llm.toml
from liter_llm import LlmClient
client = LlmClient.from_config()  # discovers liter-llm.toml
# Or explicit path
client = LlmClient.from_config("path/to/config.toml")
import { LlmClient } from "@kreuzberg/liter-llm";

// Auto-discover liter-llm.toml
const client = await LlmClient.fromConfig();
// Or explicit path
const client2 = await LlmClient.fromConfig("path/to/config.toml");
use liter_llm::{FileConfig, ManagedClient};

// Auto-discover
if let Some(config) = FileConfig::discover()? {
    let client = ManagedClient::new(config.into_builder().build(), None)?;
}
// Or explicit path
let config = FileConfig::from_toml_file("liter-llm.toml")?;
let client = ManagedClient::new(config.into_builder().build(), None)?;
// Auto-discover liter-llm.toml
client, err := llm.NewClientFromConfig()
// Or explicit path
client, err = llm.NewClientFromConfigFile("path/to/config.toml")
// Auto-discover liter-llm.toml
var client = LlmClient.fromConfig();
// Or explicit path
var client = LlmClient.fromConfig("path/to/config.toml");
// Auto-discover liter-llm.toml
var client = LlmClient.FromConfig();
// Or explicit path
var client = LlmClient.FromConfig("path/to/config.toml");
# Auto-discover liter-llm.toml
client = LiterLlm::LlmClient.from_config
# Or explicit path
client = LiterLlm::LlmClient.from_config("path/to/config.toml")
// Auto-discover liter-llm.toml
$client = LlmClient::fromConfig();
// Or explicit path
$client = LlmClient::fromConfig('path/to/config.toml');
# Auto-discover liter-llm.toml
client = LiterLlm.Client.from_config()
# Or explicit path
client = LiterLlm.Client.from_config("path/to/config.toml")
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();

// From TOML string (WASM cannot access the filesystem)
const toml = `api_key = "sk-..."`;
const client = LlmClient.fromConfigStr(toml);

Client Construction

import asyncio
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(
        api_key="sk-...",          # or set OPENAI_API_KEY env var
        base_url=None,             # override provider base URL
        model_hint="openai",       # pre-resolve provider at construction
        max_retries=3,             # retry on transient failures
        timeout=60,                # request timeout in seconds
    )
    response = await client.chat(
        model="openai/gpt-4o",
        messages=[{"role": "user", "content": "Hello!"}],
    )
    print(response.choices[0].message.content)

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({
  apiKey: "sk-...", // or set OPENAI_API_KEY env var
  baseUrl: undefined, // override provider base URL
  modelHint: "openai", // pre-resolve provider at construction
  maxRetries: 3, // retry on transient failures
  timeoutSecs: 60, // request timeout in seconds
});

const response = await client.chat({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Hello!" }],
});
console.log(response.choices[0].message.content);
use liter_llm::{
    ChatCompletionRequest, ClientConfigBuilder, DefaultClient, LlmClient,
    Message, UserContent, UserMessage,
};
use std::time::Duration;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new("sk-...".to_string()) // or std::env::var("OPENAI_API_KEY")?
        .base_url("https://api.openai.com/v1")      // override provider base URL
        .max_retries(3)                               // retry on transient failures
        .timeout(Duration::from_secs(60))             // request timeout
        .build();
    let client = DefaultClient::new(config, Some("openai/gpt-4o"))?; // pre-resolve provider

    let request = ChatCompletionRequest {
        model: "openai/gpt-4o".into(),
        messages: vec![Message::User(UserMessage {
            content: UserContent::Text("Hello!".into()),
            name: None,
        })],
        ..Default::default()
    };

    let response = client.chat(request).await?;
    if let Some(choice) = response.choices.first() {
        println!("{}", choice.message.content.as_deref().unwrap_or(""));
    }
    Ok(())
}
package main

import (
 "context"
 "fmt"
 "time"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(
  llm.WithAPIKey("sk-..."),                  // or set OPENAI_API_KEY env var
  llm.WithBaseURL("https://api.openai.com/v1"), // override provider base URL
  llm.WithTimeout(60*time.Second),            // request timeout
 )
 resp, err := client.Chat(context.Background(), &llm.ChatCompletionRequest{
  Model: "openai/gpt-4o",
  Messages: []llm.Message{
   llm.NewTextMessage(llm.RoleUser, "Hello!"),
  },
 })
 if err != nil {
  panic(err)
 }
 if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != nil {
  fmt.Println(*resp.Choices[0].Message.Content)
 }
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey("sk-...")                           // or System.getenv("OPENAI_API_KEY")
                .baseUrl("https://api.openai.com/v1")       // override provider base URL
                .modelHint("openai")                        // pre-resolve provider at construction
                .maxRetries(3)                              // retry on transient failures
                .timeoutSecs(60)                            // request timeout in seconds
                .build()) {
            var response = client.chat(new ChatCompletionRequest(
                "openai/gpt-4o",
                List.of(new UserMessage("Hello!"))
            ));
            System.out.println(response.choices().getFirst().message().content());
        }
    }
}
using LiterLlm;

await using var client = new LlmClient(
    apiKey: "sk-...",                               // or Environment.GetEnvironmentVariable("OPENAI_API_KEY")!
    baseUrl: "https://api.openai.com/v1",           // override provider base URL
    modelHint: "openai",                            // pre-resolve provider at construction
    maxRetries: 3,                                  // retry on transient failures
    timeoutSecs: 60                                 // request timeout in seconds
);

var response = await client.ChatAsync(new ChatCompletionRequest(
    Model: "openai/gpt-4o",
    Messages: [new UserMessage("Hello!")]
));
Console.WriteLine(response.Choices[0].Message.Content);
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(
  "sk-...",                     # or ENV.fetch("OPENAI_API_KEY")
  {
    "base_url" => nil,          # override provider base URL
    "model_hint" => "openai",   # pre-resolve provider at construction
    "max_retries" => 3,         # retry on transient failures
    "timeout" => 60             # request timeout in seconds
  }
)

response = JSON.parse(client.chat(JSON.generate(
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Hello!" }]
)))
puts response.dig("choices", 0, "message", "content")
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(
    apiKey: 'sk-...',                                 // or getenv('OPENAI_API_KEY')
    baseUrl: 'https://api.openai.com/v1',             // override provider base URL
    modelHint: 'openai',                              // pre-resolve provider at construction
    maxRetries: 3,                                    // retry on transient failures
    timeoutSecs: 60                                   // request timeout in seconds
);

$response = json_decode($client->chat(json_encode([
    'model' => 'openai/gpt-4o',
    'messages' => [
        ['role' => 'user', 'content' => 'Hello!'],
    ],
])), true);

echo $response['choices'][0]['message']['content'] . PHP_EOL;
{:ok, response} =
  LiterLlm.chat(
    %{
      model: "openai/gpt-4o",
      messages: [%{role: "user", content: "Hello!"}]
    },
    api_key: "sk-...",                          # or System.fetch_env!("OPENAI_API_KEY")
    base_url: "https://api.openai.com/v1",      # override provider base URL
    model_hint: "openai",                       # pre-resolve provider at construction
    max_retries: 3,                             # retry on transient failures
    timeout: 60                                 # request timeout in seconds
  )

IO.puts(hd(response["choices"])["message"]["content"])
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({
  apiKey: "sk-...", // or from environment
  baseUrl: undefined, // override provider base URL
  modelHint: "openai", // pre-resolve provider at construction
  maxRetries: 3, // retry on transient failures
  timeoutSecs: 60, // request timeout in seconds
});

const response = await client.chat({
  model: "openai/gpt-4o",
  messages: [{ role: "user", content: "Hello!" }],
});
console.log(response.choices[0].message.content);

Options

Option Type Default Description
api_key string required Provider API key. Wrapped in SecretString internally.
base_url string from registry Override the provider's base URL.
model_hint string none Pre-resolve a provider at construction (e.g. "openai").
timeout duration 60s Request timeout.
max_retries int 3 Retries on 429/5xx responses with exponential backoff.

API Key Management

Read the standard environment variable for your provider:

Provider Environment Variable
OpenAI OPENAI_API_KEY
Anthropic ANTHROPIC_API_KEY
Google (Gemini) GEMINI_API_KEY
Groq GROQ_API_KEY
Mistral MISTRAL_API_KEY
Cohere CO_API_KEY
AWS Bedrock AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY

API keys passed to the constructor are wrapped in secrecy::SecretString. They are never logged, serialized, or included in error messages.

Model Hints

The model_hint parameter pre-resolves a provider at construction time. All requests use that provider without prefix lookup:

# All requests use OpenAI -- no "openai/" prefix needed
client = LlmClient(api_key="sk-...", model_hint="openai")
response = await client.chat(model="gpt-4o", messages=[...])

Custom Base URLs

Override base_url to point at a local inference server or proxy:

# Ollama running locally
client = LlmClient(api_key="unused", base_url="http://localhost:11434/v1")

# Corporate proxy
client = LlmClient(api_key="sk-...", base_url="https://llm-proxy.internal.company.com/v1")

Cache

Enable response caching to avoid repeated identical requests:

from liter_llm import LlmClient

client = LlmClient(
    api_key="sk-...",
    cache={"max_entries": 256, "ttl_seconds": 300},
)
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({
  apiKey: process.env.OPENAI_API_KEY!,
  cache: { maxEntries: 256, ttlSeconds: 300 },
});
use liter_llm::{ClientConfigBuilder, CacheConfig};

let config = ClientConfigBuilder::new("sk-...")
    .cache(CacheConfig { max_entries: 256, ttl_seconds: 300 })
    .build();
client := llm.NewClient(
    llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")),
    llm.WithCache(llm.CacheConfig{MaxEntries: 256, TTLSeconds: 300}),
)
var client = LlmClient.builder()
        .apiKey(System.getenv("OPENAI_API_KEY"))
        .cacheConfig(new CacheConfig(256, 300))
        .build();
var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    cacheConfig: new CacheConfig(MaxEntries: 256, TtlSeconds: 300));
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {
  cache: { max_entries: 256, ttl_seconds: 300 }
})
$client = new LlmClient(
    apiKey: getenv('OPENAI_API_KEY') ?: '',
    cacheConfig: ['max_entries' => 256, 'ttl_seconds' => 300],
);
client = LiterLlm.Client.new(
  api_key: System.fetch_env!("OPENAI_API_KEY"),
  cache: [max_entries: 256, ttl_seconds: 300]
)
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();

const client = new LlmClient({
  apiKey: "sk-...",
  cache: { maxEntries: 256, ttlSeconds: 300 },
});
Option Type Default Description
max_entries int 256 Maximum cached responses
ttl_seconds int 300 Time-to-live in seconds

Budget

Track and enforce spending limits:

from liter_llm import LlmClient

client = LlmClient(
    api_key="sk-...",
    budget={"global_limit": 10.0, "model_limits": {"openai/gpt-4o": 5.0}, "enforcement": "hard"},
)
print(f"Budget used: ${client.budget_used:.2f}")
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({
  apiKey: process.env.OPENAI_API_KEY!,
  budget: { globalLimit: 10.0, modelLimits: { "openai/gpt-4o": 5.0 }, enforcement: "hard" },
});
console.log(`Budget used: $${client.budgetUsed.toFixed(2)}`);
use liter_llm::{ClientConfigBuilder, BudgetConfig};

let config = ClientConfigBuilder::new("sk-...")
    .budget(BudgetConfig {
        global_limit: Some(10.0),
        model_limits: Default::default(),
        enforcement: "hard".into(),
    })
    .build();
client := llm.NewClient(
    llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")),
    llm.WithBudget(llm.BudgetConfig{
        GlobalLimit: 10.0,
        ModelLimits: map[string]float64{"openai/gpt-4o": 5.0},
        Enforcement: "hard",
    }),
)
fmt.Printf("Budget used: $%.2f\n", client.BudgetUsed())
var client = LlmClient.builder()
        .apiKey(System.getenv("OPENAI_API_KEY"))
        .budgetConfig(new BudgetConfig(10.0, Map.of("openai/gpt-4o", 5.0), "hard"))
        .build();
System.out.printf("Budget used: $%.2f%n", client.getBudgetUsed());
var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    budgetConfig: new BudgetConfig(
        GlobalLimit: 10.0,
        ModelLimits: new() { ["openai/gpt-4o"] = 5.0 },
        Enforcement: "hard"));
Console.WriteLine($"Budget used: ${client.BudgetUsed:F2}");
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {
  budget: { global_limit: 10.0, model_limits: {}, enforcement: "hard" }
})
puts "Budget used: $#{client.budget_used}"
$client = new LlmClient(
    apiKey: getenv('OPENAI_API_KEY') ?: '',
    budgetConfig: ['global_limit' => 10.0, 'enforcement' => 'hard'],
);
echo "Budget used: $" . $client->getBudgetUsed() . PHP_EOL;
client = LiterLlm.Client.new(
  api_key: System.fetch_env!("OPENAI_API_KEY"),
  budget: [global_limit: 10.0, enforcement: "hard"]
)
IO.puts("Budget used: $#{LiterLlm.Client.budget_used(client)}")
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();

const client = new LlmClient({
  apiKey: "sk-...",
  budget: { globalLimit: 10.0, enforcement: "hard" },
});
console.log(`Budget used: $${client.budgetUsed.toFixed(2)}`);
Option Type Description
global_limit float Maximum total spend in USD
model_limits map Per-model spend limits
enforcement string "hard" (reject over-budget) or "soft" (warn only)

Hooks

Register lifecycle hooks for request/response/error events:

from liter_llm import LlmClient

class LoggingHook:
    def on_request(self, request):
        print(f"Sending request to {request['model']}")

    def on_response(self, request, response):
        print(f"Got response: {response.usage.total_tokens} tokens")

    def on_error(self, request, error):
        print(f"Error: {error}")

client = LlmClient(api_key="sk-...")
client.add_hook(LoggingHook())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
client.addHook({
  onRequest(req) { console.log(`Sending: ${req.model}`); },
  onResponse(req, res) { console.log(`Tokens: ${res.usage?.totalTokens}`); },
  onError(req, err) { console.error(`Error: ${err}`); },
});
use liter_llm::LlmHook;

struct LoggingHook;
impl LlmHook for LoggingHook {
    fn on_request(&self, req: &ChatCompletionRequest) -> Result<()> {
        println!("Sending: {}", req.model);
        Ok(())
    }
    fn on_response(&self, _req: &ChatCompletionRequest, resp: &ChatCompletionResponse) {
        if let Some(u) = &resp.usage { println!("Tokens: {}", u.total_tokens); }
    }
    fn on_error(&self, _req: &ChatCompletionRequest, err: &LiterLlmError) {
        eprintln!("Error: {err}");
    }
}
type loggingHook struct{}
func (h *loggingHook) OnRequest(req *llm.ChatCompletionRequest) error {
    fmt.Printf("Sending: %s\n", req.Model)
    return nil
}
func (h *loggingHook) OnResponse(req *llm.ChatCompletionRequest, resp *llm.ChatCompletionResponse) {
    if resp.Usage != nil { fmt.Printf("Tokens: %d\n", resp.Usage.TotalTokens) }
}
func (h *loggingHook) OnError(req *llm.ChatCompletionRequest, err error) {
    fmt.Printf("Error: %v\n", err)
}

client := llm.NewClient(
    llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")),
    llm.WithHook(&loggingHook{}),
)
client.addHook(new LlmHook() {
    @Override public void onRequest(ChatCompletionRequest req) {
        System.out.println("Sending: " + req.model());
    }
    @Override public void onResponse(ChatCompletionRequest req, ChatCompletionResponse resp) {
        System.out.println("Tokens: " + resp.usage().totalTokens());
    }
    @Override public void onError(ChatCompletionRequest req, LlmException err) {
        System.err.println("Error: " + err.getMessage());
    }
});
client.AddHook(new LoggingHook());

class LoggingHook : ILlmHook
{
    public Task OnRequestAsync(ChatCompletionRequest req) {
        Console.WriteLine($"Sending: {req.Model}");
        return Task.CompletedTask;
    }
    public Task OnResponseAsync(ChatCompletionRequest req, ChatCompletionResponse resp) {
        Console.WriteLine($"Tokens: {resp.Usage?.TotalTokens}");
        return Task.CompletedTask;
    }
    public Task OnErrorAsync(ChatCompletionRequest req, Exception err) {
        Console.Error.WriteLine($"Error: {err.Message}");
        return Task.CompletedTask;
    }
}
hook = {
  on_request: ->(req) { puts "Sending: #{JSON.parse(req)['model']}" },
  on_response: ->(req, resp) { puts "Response received" },
  on_error: ->(req, err) { puts "Error: #{err}" }
}
client.add_hook(hook)
$client->addHook(new class {
    public function onRequest(string $requestJson): void {
        $req = json_decode($requestJson, true);
        echo "Sending: {$req['model']}" . PHP_EOL;
    }
    public function onResponse(string $requestJson, string $responseJson): void {
        echo "Response received" . PHP_EOL;
    }
    public function onError(string $requestJson, string $errorMessage): void {
        echo "Error: {$errorMessage}" . PHP_EOL;
    }
});
defmodule LoggingHook do
  @behaviour LiterLlm.Hook

  def on_request(request), do: IO.puts("Sending: #{request["model"]}")
  def on_response(_request, _response), do: IO.puts("Response received")
  def on_error(_request, error), do: IO.puts("Error: #{inspect(error)}")
end

client = LiterLlm.Client.new(api_key: "sk-...") |> LiterLlm.Client.add_hook(LoggingHook)
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();

const client = new LlmClient({ apiKey: "sk-..." });
client.addHook({
  onRequest(req) { console.log(`Sending: ${req.model}`); },
  onResponse(req, res) { console.log(`Tokens: ${res.usage?.totalTokens}`); },
  onError(req, err) { console.error(`Error: ${err}`); },
});

Custom Providers

Register custom providers for self-hosted or unsupported LLM endpoints:

from liter_llm import LlmClient

client = LlmClient(api_key="sk-...")
client.register_provider({
    "name": "my-provider",
    "base_url": "https://my-llm.example.com/v1",
    "auth_header": "Authorization",
    "model_prefixes": ["my-provider/"],
})
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
client.registerProvider({
  name: "my-provider",
  baseUrl: "https://my-llm.example.com/v1",
  authHeader: "Authorization",
  modelPrefixes: ["my-provider/"],
});
use liter_llm::{register_custom_provider, CustomProviderConfig};

register_custom_provider(CustomProviderConfig {
    name: "my-provider".into(),
    base_url: "https://my-llm.example.com/v1".into(),
    auth_header: "Authorization".into(),
    model_prefixes: vec!["my-provider/".into()],
})?;
client.RegisterProvider(llm.ProviderConfig{
    Name:          "my-provider",
    BaseURL:       "https://my-llm.example.com/v1",
    AuthHeader:    "Authorization",
    ModelPrefixes: []string{"my-provider/"},
})
client.registerProvider(new ProviderConfig(
    "my-provider",
    "https://my-llm.example.com/v1",
    "Authorization",
    List.of("my-provider/")));
client.RegisterProvider(new ProviderConfig(
    Name: "my-provider",
    BaseUrl: "https://my-llm.example.com/v1",
    AuthHeader: "Authorization",
    ModelPrefixes: ["my-provider/"]));
client.register_provider(JSON.generate(
  name: "my-provider",
  base_url: "https://my-llm.example.com/v1",
  auth_header: "Authorization",
  model_prefixes: ["my-provider/"]
))
$client->registerProvider(json_encode([
    'name' => 'my-provider',
    'base_url' => 'https://my-llm.example.com/v1',
    'auth_header' => 'Authorization',
    'model_prefixes' => ['my-provider/'],
]));
LiterLlm.register_provider(%{
  name: "my-provider",
  base_url: "https://my-llm.example.com/v1",
  auth_header: "Authorization",
  model_prefixes: ["my-provider/"]
})
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();

const client = new LlmClient({ apiKey: "sk-..." });
client.registerProvider({
  name: "my-provider",
  baseUrl: "https://my-llm.example.com/v1",
  authHeader: "Authorization",
  modelPrefixes: ["my-provider/"],
});

Cache Backends

Configure OpenDAL-backed cache backends (Redis, S3, filesystem, and 40+ more via Apache OpenDAL):

from liter_llm import LlmClient

client = LlmClient(
    api_key="sk-...",
    cache={"backend": "redis", "backend_config": {"connection_string": "redis://localhost"}, "ttl_seconds": 3600},
)
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({
  apiKey: process.env.OPENAI_API_KEY!,
  cache: { backend: "redis", backendConfig: { connectionString: "redis://localhost" }, ttlSeconds: 3600 },
});
use liter_llm::{ClientConfigBuilder, CacheConfig, CacheBackend};

let config = ClientConfigBuilder::new("sk-...")
    .cache(CacheConfig {
        backend: CacheBackend::Redis { connection_string: "redis://localhost".into() },
        ttl_seconds: 3600,
    })
    .build();
client := llm.NewClient(
    llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")),
    llm.WithCache(llm.CacheConfig{
        Backend:       "redis",
        BackendConfig: map[string]string{"connection_string": "redis://localhost"},
        TTLSeconds:    3600,
    }),
)
var client = LlmClient.builder()
        .apiKey(System.getenv("OPENAI_API_KEY"))
        .cacheConfig(new CacheConfig("redis", Map.of("connection_string", "redis://localhost"), 3600))
        .build();
var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    cacheConfig: new CacheConfig(
        Backend: "redis",
        BackendConfig: new() { ["connection_string"] = "redis://localhost" },
        TtlSeconds: 3600));
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"),
  cache: { backend: "redis", backend_config: { connection_string: "redis://localhost" }, ttl_seconds: 3600 }
)
$client = new LlmClient(
    apiKey: getenv('OPENAI_API_KEY') ?: '',
    cacheConfig: [
        'backend' => 'redis',
        'backend_config' => ['connection_string' => 'redis://localhost'],
        'ttl_seconds' => 3600,
    ],
);
client = LiterLlm.Client.new(
  api_key: System.fetch_env!("OPENAI_API_KEY"),
  cache: [backend: "redis", backend_config: %{connection_string: "redis://localhost"}, ttl_seconds: 3600]
)
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();

const client = new LlmClient({
  apiKey: "sk-...",
  cache: { backend: "redis", backendConfig: { connectionString: "redis://localhost" }, ttlSeconds: 3600 },
});
Option Type Description
backend string Backend type: "redis", "s3", "fs", "gcs", "memory", etc.
backend_config map Backend-specific config (connection strings, bucket names, paths)
ttl_seconds int Time-to-live in seconds for cache entries

Cooldown

Enable a cooldown (circuit breaker) period after transient errors:

client = LlmClient(api_key="sk-...", cooldown_secs=30)
const client = new LlmClient({ apiKey: "sk-...", cooldown: 30 });
let config = ClientConfigBuilder::new("sk-...")
    .cooldown(Duration::from_secs(30))
    .build();
client := llm.NewClient(
    llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")),
    llm.WithCooldown(30 * time.Second),
)
var client = LlmClient.builder()
        .apiKey(System.getenv("OPENAI_API_KEY"))
        .cooldownSecs(30)
        .build();
var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    cooldownSecs: 30);
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"),
  cooldown_secs: 30
)
$client = new LlmClient(
    apiKey: getenv('OPENAI_API_KEY') ?: '',
    cooldownSecs: 30,
);
client = LiterLlm.Client.new(
  api_key: System.fetch_env!("OPENAI_API_KEY"),
  cooldown_secs: 30
)
const client = new LlmClient({ apiKey: "sk-...", cooldown: 30 });

Rate Limiting

Configure per-model rate limits (requests per minute and tokens per minute):

client = LlmClient(api_key="sk-...", rate_limit={"rpm": 60, "tpm": 100000})
const client = new LlmClient({ apiKey: "sk-...", rateLimit: { rpm: 60, tpm: 100000 } });
let config = ClientConfigBuilder::new("sk-...")
    .rate_limit(RateLimitConfig { rpm: Some(60), tpm: Some(100_000) })
    .build();
client := llm.NewClient(
    llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")),
    llm.WithRateLimit(llm.RateLimitConfig{RPM: 60, TPM: 100000}),
)
var client = LlmClient.builder()
        .apiKey(System.getenv("OPENAI_API_KEY"))
        .rateLimitConfig(new RateLimitConfig(60, 100000))
        .build();
var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    rateLimit: new RateLimitConfig(Rpm: 60, Tpm: 100000));
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"),
  rate_limit: { rpm: 60, tpm: 100000 }
)
$client = new LlmClient(
    apiKey: getenv('OPENAI_API_KEY') ?: '',
    rateLimit: ['rpm' => 60, 'tpm' => 100000],
);
client = LiterLlm.Client.new(
  api_key: System.fetch_env!("OPENAI_API_KEY"),
  rate_limit: [rpm: 60, tpm: 100_000]
)
const client = new LlmClient({ apiKey: "sk-...", rateLimit: { rpm: 60, tpm: 100000 } });
Option Type Description
rpm int Maximum requests per minute
tpm int Maximum tokens per minute

Health Checks

Enable background health checks to proactively detect provider availability:

client = LlmClient(api_key="sk-...", health_check_secs=60)
const client = new LlmClient({ apiKey: "sk-...", healthCheck: 60 });
let config = ClientConfigBuilder::new("sk-...")
    .health_check(Duration::from_secs(60))
    .build();
client := llm.NewClient(
    llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")),
    llm.WithHealthCheck(60 * time.Second),
)
var client = LlmClient.builder()
        .apiKey(System.getenv("OPENAI_API_KEY"))
        .healthCheckSecs(60)
        .build();
var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    healthCheckSecs: 60);
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"),
  health_check_secs: 60
)
$client = new LlmClient(
    apiKey: getenv('OPENAI_API_KEY') ?: '',
    healthCheckSecs: 60,
);
client = LiterLlm.Client.new(
  api_key: System.fetch_env!("OPENAI_API_KEY"),
  health_check_secs: 60
)
const client = new LlmClient({ apiKey: "sk-...", healthCheck: 60 });

Cost Tracking

Enable per-request cost tracking to monitor spend in real time:

client = LlmClient(api_key="sk-...", cost_tracking=True)
const client = new LlmClient({ apiKey: "sk-...", costTracking: true });
let config = ClientConfigBuilder::new("sk-...")
    .cost_tracking(true)
    .build();
client := llm.NewClient(
    llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")),
    llm.WithCostTracking(),
)
var client = LlmClient.builder()
        .apiKey(System.getenv("OPENAI_API_KEY"))
        .costTracking(true)
        .build();
var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    costTracking: true);
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"),
  cost_tracking: true
)
$client = new LlmClient(
    apiKey: getenv('OPENAI_API_KEY') ?: '',
    costTracking: true,
);
client = LiterLlm.Client.new(
  api_key: System.fetch_env!("OPENAI_API_KEY"),
  cost_tracking: true
)
const client = new LlmClient({ apiKey: "sk-...", costTracking: true });

Tracing

Note

The tracing reference has moved to Observability. That page covers span attributes, OTEL exporter setup, cost tracking, and Tower layer composition.