Skip to content

Search & OCR

Search the web or documents across 12 providers (Brave, Tavily, Google PSE, etc.):

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["BRAVE_API_KEY"])
    response = await client.search(
        model="brave/web-search",
        query="What is Rust programming language?",
        max_results=5,
    )
    for result in response.results:
        print(f"{result.title}: {result.url}")

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.BRAVE_API_KEY! });
const response = await client.search({
  model: "brave/web-search",
  query: "What is Rust programming language?",
  maxResults: 5,
});

for (const result of response.results) {
  console.log(`${result.title}: ${result.url}`);
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, SearchRequest};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("BRAVE_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("brave/web-search"))?;

    let response = client
        .search(SearchRequest {
            model: "brave/web-search".into(),
            query: "What is Rust programming language?".into(),
            max_results: Some(5),
            ..Default::default()
        })
        .await?;

    for result in &response.results {
        println!("{}: {}", result.title, result.url);
    }
    Ok(())
}
package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("BRAVE_API_KEY")))
 resp, err := client.Search(context.Background(), &llm.SearchRequest{
  Model:      "brave/web-search",
  Query:      "What is Rust programming language?",
  MaxResults: 5,
 })
 if err != nil {
  panic(err)
 }
 for _, result := range resp.Results {
  fmt.Printf("%s: %s\n", result.Title, result.URL)
 }
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("BRAVE_API_KEY"))
                .build()) {
            var response = client.search(new SearchRequest(
                "brave/web-search",
                "What is Rust programming language?",
                5
            ));
            for (var result : response.results()) {
                System.out.printf("%s: %s%n", result.title(), result.url());
            }
        }
    }
}
using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("BRAVE_API_KEY")!);

var response = await client.SearchAsync(new SearchRequest(
    Model: "brave/web-search",
    Query: "What is Rust programming language?",
    MaxResults: 5
));

foreach (var result in response.Results)
{
    Console.WriteLine($"{result.Title}: {result.Url}");
}
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("BRAVE_API_KEY"), {})

response = JSON.parse(client.search(JSON.generate(
  model: "brave/web-search",
  query: "What is Rust programming language?",
  max_results: 5
)))

response["results"].each do |result|
  puts "#{result["title"]}: #{result["url"]}"
end
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('BRAVE_API_KEY') ?: '');

$response = json_decode($client->search(json_encode([
    'model' => 'brave/web-search',
    'query' => 'What is Rust programming language?',
    'max_results' => 5,
])), true);

foreach ($response['results'] as $result) {
    echo "{$result['title']}: {$result['url']}" . PHP_EOL;
}
{:ok, response} =
  LiterLlm.search(
    %{
      model: "brave/web-search",
      query: "What is Rust programming language?",
      max_results: 5
    },
    api_key: System.fetch_env!("BRAVE_API_KEY")
  )

for result <- response["results"] do
  IO.puts("#{result["title"]}: #{result["url"]}")
end
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.BRAVE_API_KEY! });
const response = await client.search({
  model: "brave/web-search",
  query: "What is Rust programming language?",
  maxResults: 5,
});

for (const result of response.results) {
  console.log(`${result.title}: ${result.url}`);
}

Search Parameters

Parameter Type Description
model string Search provider (e.g. "brave/web-search")
query string Search query
max_results int Maximum results to return
search_domain_filter array Restrict to specific domains
country string ISO country code for localized results

Search Providers

Provider Prefix Example Model
Brave brave/ web-search
Tavily tavily/ tavily-search
Google PSE google_pse/ google-search
Serper serper/ serper-search
SerpAPI serpapi/ serpapi-search
Bing bing/ bing-search

See the Providers page for the complete capability matrix.

OCR

Extract text from documents via OCR across 4 providers (Mistral, Azure Doc Intelligence, etc.):

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["MISTRAL_API_KEY"])
    response = await client.ocr(
        model="mistral/mistral-ocr-latest",
        document={"type": "document_url", "url": "https://example.com/invoice.pdf"},
    )
    for page in response.pages:
        print(f"Page {page.index}: {page.markdown[:100]}...")

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.MISTRAL_API_KEY! });
const response = await client.ocr({
  model: "mistral/mistral-ocr-latest",
  document: { type: "document_url", url: "https://example.com/invoice.pdf" },
});

for (const page of response.pages) {
  console.log(`Page ${page.index}: ${page.markdown.slice(0, 100)}...`);
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, OcrRequest, DocumentInput};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("MISTRAL_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("mistral/mistral-ocr-latest"))?;

    let response = client
        .ocr(OcrRequest {
            model: "mistral/mistral-ocr-latest".into(),
            document: DocumentInput::Url {
                url: "https://example.com/invoice.pdf".into(),
            },
            ..Default::default()
        })
        .await?;

    for page in &response.pages {
        println!("Page {}: {}...", page.index, &page.markdown[..100]);
    }
    Ok(())
}
package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("MISTRAL_API_KEY")))
 resp, err := client.OCR(context.Background(), &llm.OCRRequest{
  Model: "mistral/mistral-ocr-latest",
  Document: llm.DocumentInput{
   Type: "document_url",
   URL:  "https://example.com/invoice.pdf",
  },
 })
 if err != nil {
  panic(err)
 }
 for _, page := range resp.Pages {
  fmt.Printf("Page %d: %.100s...\n", page.Index, page.Markdown)
 }
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("MISTRAL_API_KEY"))
                .build()) {
            var response = client.ocr(new OcrRequest(
                "mistral/mistral-ocr-latest",
                new DocumentInput("document_url", "https://example.com/invoice.pdf")
            ));
            for (var page : response.pages()) {
                System.out.printf("Page %d: %.100s...%n",
                    page.index(), page.markdown());
            }
        }
    }
}
using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("MISTRAL_API_KEY")!);

var response = await client.OcrAsync(new OcrRequest(
    Model: "mistral/mistral-ocr-latest",
    Document: new DocumentInput(Type: "document_url", Url: "https://example.com/invoice.pdf")
));

foreach (var page in response.Pages)
{
    Console.WriteLine($"Page {page.Index}: {page.Markdown[..100]}...");
}
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("MISTRAL_API_KEY"), {})

response = JSON.parse(client.ocr(JSON.generate(
  model: "mistral/mistral-ocr-latest",
  document: { type: "document_url", url: "https://example.com/invoice.pdf" }
)))

response["pages"].each do |page|
  puts "Page #{page["index"]}: #{page["markdown"][0, 100]}..."
end
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('MISTRAL_API_KEY') ?: '');

$response = json_decode($client->ocr(json_encode([
    'model' => 'mistral/mistral-ocr-latest',
    'document' => [
        'type' => 'document_url',
        'url' => 'https://example.com/invoice.pdf',
    ],
])), true);

foreach ($response['pages'] as $page) {
    echo "Page {$page['index']}: " . substr($page['markdown'], 0, 100) . "..." . PHP_EOL;
}
{:ok, response} =
  LiterLlm.ocr(
    %{
      model: "mistral/mistral-ocr-latest",
      document: %{type: "document_url", url: "https://example.com/invoice.pdf"}
    },
    api_key: System.fetch_env!("MISTRAL_API_KEY")
  )

for page <- response["pages"] do
  IO.puts("Page #{page["index"]}: #{String.slice(page["markdown"], 0, 100)}...")
end
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.MISTRAL_API_KEY! });
const response = await client.ocr({
  model: "mistral/mistral-ocr-latest",
  document: { type: "document_url", url: "https://example.com/invoice.pdf" },
});

for (const page of response.pages) {
  console.log(`Page ${page.index}: ${page.markdown.slice(0, 100)}...`);
}

OCR Parameters

Parameter Type Description
model string OCR provider (e.g. "mistral/mistral-ocr-latest")
document object Document input (URL or base64)
pages array Specific pages to process (1-indexed)
include_image_base64 bool Include extracted images

Document Input Formats

URL:

{ "type": "document_url", "url": "https://example.com/invoice.pdf" }

Base64:

{ "type": "base64", "data": "...", "media_type": "application/pdf" }