Search & OCR¶
Search¶
Search the web or documents across 12 providers (Brave, Tavily, Google PSE, etc.):
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["BRAVE_API_KEY"])
response = await client.search(
model="brave/web-search",
query="What is Rust programming language?",
max_results=5,
)
for result in response.results:
print(f"{result.title}: {result.url}")
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.BRAVE_API_KEY! });
const response = await client.search({
model: "brave/web-search",
query: "What is Rust programming language?",
maxResults: 5,
});
for (const result of response.results) {
console.log(`${result.title}: ${result.url}`);
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, SearchRequest};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("BRAVE_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("brave/web-search"))?;
let response = client
.search(SearchRequest {
model: "brave/web-search".into(),
query: "What is Rust programming language?".into(),
max_results: Some(5),
..Default::default()
})
.await?;
for result in &response.results {
println!("{}: {}", result.title, result.url);
}
Ok(())
}
package main
import (
"context"
"fmt"
"os"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("BRAVE_API_KEY")))
resp, err := client.Search(context.Background(), &llm.SearchRequest{
Model: "brave/web-search",
Query: "What is Rust programming language?",
MaxResults: 5,
})
if err != nil {
panic(err)
}
for _, result := range resp.Results {
fmt.Printf("%s: %s\n", result.Title, result.URL)
}
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey(System.getenv("BRAVE_API_KEY"))
.build()) {
var response = client.search(new SearchRequest(
"brave/web-search",
"What is Rust programming language?",
5
));
for (var result : response.results()) {
System.out.printf("%s: %s%n", result.title(), result.url());
}
}
}
}
using LiterLlm;
await using var client = new LlmClient(
apiKey: Environment.GetEnvironmentVariable("BRAVE_API_KEY")!);
var response = await client.SearchAsync(new SearchRequest(
Model: "brave/web-search",
Query: "What is Rust programming language?",
MaxResults: 5
));
foreach (var result in response.Results)
{
Console.WriteLine($"{result.Title}: {result.Url}");
}
# frozen_string_literal: true
require "liter_llm"
require "json"
client = LiterLlm::LlmClient.new(ENV.fetch("BRAVE_API_KEY"), {})
response = JSON.parse(client.search(JSON.generate(
model: "brave/web-search",
query: "What is Rust programming language?",
max_results: 5
)))
response["results"].each do |result|
puts "#{result["title"]}: #{result["url"]}"
end
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(apiKey: getenv('BRAVE_API_KEY') ?: '');
$response = json_decode($client->search(json_encode([
'model' => 'brave/web-search',
'query' => 'What is Rust programming language?',
'max_results' => 5,
])), true);
foreach ($response['results'] as $result) {
echo "{$result['title']}: {$result['url']}" . PHP_EOL;
}
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();
const client = new LlmClient({ apiKey: process.env.BRAVE_API_KEY! });
const response = await client.search({
model: "brave/web-search",
query: "What is Rust programming language?",
maxResults: 5,
});
for (const result of response.results) {
console.log(`${result.title}: ${result.url}`);
}
Search Parameters¶
| Parameter | Type | Description |
|---|---|---|
model |
string | Search provider (e.g. "brave/web-search") |
query |
string | Search query |
max_results |
int | Maximum results to return |
search_domain_filter |
array | Restrict to specific domains |
country |
string | ISO country code for localized results |
Search Providers¶
| Provider | Prefix | Example Model |
|---|---|---|
| Brave | brave/ |
web-search |
| Tavily | tavily/ |
tavily-search |
| Google PSE | google_pse/ |
google-search |
| Serper | serper/ |
serper-search |
| SerpAPI | serpapi/ |
serpapi-search |
| Bing | bing/ |
bing-search |
See the Providers page for the complete capability matrix.
OCR¶
Extract text from documents via OCR across 4 providers (Mistral, Azure Doc Intelligence, etc.):
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["MISTRAL_API_KEY"])
response = await client.ocr(
model="mistral/mistral-ocr-latest",
document={"type": "document_url", "url": "https://example.com/invoice.pdf"},
)
for page in response.pages:
print(f"Page {page.index}: {page.markdown[:100]}...")
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.MISTRAL_API_KEY! });
const response = await client.ocr({
model: "mistral/mistral-ocr-latest",
document: { type: "document_url", url: "https://example.com/invoice.pdf" },
});
for (const page of response.pages) {
console.log(`Page ${page.index}: ${page.markdown.slice(0, 100)}...`);
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, OcrRequest, DocumentInput};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("MISTRAL_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("mistral/mistral-ocr-latest"))?;
let response = client
.ocr(OcrRequest {
model: "mistral/mistral-ocr-latest".into(),
document: DocumentInput::Url {
url: "https://example.com/invoice.pdf".into(),
},
..Default::default()
})
.await?;
for page in &response.pages {
println!("Page {}: {}...", page.index, &page.markdown[..100]);
}
Ok(())
}
package main
import (
"context"
"fmt"
"os"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("MISTRAL_API_KEY")))
resp, err := client.OCR(context.Background(), &llm.OCRRequest{
Model: "mistral/mistral-ocr-latest",
Document: llm.DocumentInput{
Type: "document_url",
URL: "https://example.com/invoice.pdf",
},
})
if err != nil {
panic(err)
}
for _, page := range resp.Pages {
fmt.Printf("Page %d: %.100s...\n", page.Index, page.Markdown)
}
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey(System.getenv("MISTRAL_API_KEY"))
.build()) {
var response = client.ocr(new OcrRequest(
"mistral/mistral-ocr-latest",
new DocumentInput("document_url", "https://example.com/invoice.pdf")
));
for (var page : response.pages()) {
System.out.printf("Page %d: %.100s...%n",
page.index(), page.markdown());
}
}
}
}
using LiterLlm;
await using var client = new LlmClient(
apiKey: Environment.GetEnvironmentVariable("MISTRAL_API_KEY")!);
var response = await client.OcrAsync(new OcrRequest(
Model: "mistral/mistral-ocr-latest",
Document: new DocumentInput(Type: "document_url", Url: "https://example.com/invoice.pdf")
));
foreach (var page in response.Pages)
{
Console.WriteLine($"Page {page.Index}: {page.Markdown[..100]}...");
}
# frozen_string_literal: true
require "liter_llm"
require "json"
client = LiterLlm::LlmClient.new(ENV.fetch("MISTRAL_API_KEY"), {})
response = JSON.parse(client.ocr(JSON.generate(
model: "mistral/mistral-ocr-latest",
document: { type: "document_url", url: "https://example.com/invoice.pdf" }
)))
response["pages"].each do |page|
puts "Page #{page["index"]}: #{page["markdown"][0, 100]}..."
end
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(apiKey: getenv('MISTRAL_API_KEY') ?: '');
$response = json_decode($client->ocr(json_encode([
'model' => 'mistral/mistral-ocr-latest',
'document' => [
'type' => 'document_url',
'url' => 'https://example.com/invoice.pdf',
],
])), true);
foreach ($response['pages'] as $page) {
echo "Page {$page['index']}: " . substr($page['markdown'], 0, 100) . "..." . PHP_EOL;
}
{:ok, response} =
LiterLlm.ocr(
%{
model: "mistral/mistral-ocr-latest",
document: %{type: "document_url", url: "https://example.com/invoice.pdf"}
},
api_key: System.fetch_env!("MISTRAL_API_KEY")
)
for page <- response["pages"] do
IO.puts("Page #{page["index"]}: #{String.slice(page["markdown"], 0, 100)}...")
end
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();
const client = new LlmClient({ apiKey: process.env.MISTRAL_API_KEY! });
const response = await client.ocr({
model: "mistral/mistral-ocr-latest",
document: { type: "document_url", url: "https://example.com/invoice.pdf" },
});
for (const page of response.pages) {
console.log(`Page ${page.index}: ${page.markdown.slice(0, 100)}...`);
}
OCR Parameters¶
| Parameter | Type | Description |
|---|---|---|
model |
string | OCR provider (e.g. "mistral/mistral-ocr-latest") |
document |
object | Document input (URL or base64) |
pages |
array | Specific pages to process (1-indexed) |
include_image_base64 |
bool | Include extracted images |
Document Input Formats¶
URL:
Base64: