Search & OCR¶
Search¶
Search the web or documents across 12 providers (Brave, Tavily, Google PSE, etc.):
import asyncio
import os
from liter_llm import create_client
from liter_llm._internal_bindings import SearchRequest
async def main() -> None:
client = create_client(api_key=os.environ["BRAVE_API_KEY"])
request = SearchRequest.from_json(
'{"model":"brave/web-search","query":"What is Rust programming language?","max_results":5}'
)
response = await client.search(request)
for result in response.results:
print(f"{result.title}: {result.url}")
asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";
const client = createClient(process.env.BRAVE_API_KEY!);
const response = await client.search({
model: "brave/web-search",
query: "What is Rust programming language?",
maxResults: 5,
});
for (const result of response.results) {
console.log(`${result.title}: ${result.url}`);
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, SearchRequest};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("BRAVE_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("brave/web-search"))?;
let response = client
.search(SearchRequest {
model: "brave/web-search".into(),
query: "What is Rust programming language?".into(),
max_results: Some(5),
..Default::default()
})
.await?;
for result in &response.results {
println!("{}: {}", result.title, result.url);
}
Ok(())
}
package main
import (
"encoding/json"
"fmt"
"os"
llm "github.com/xberg-io/liter-llm/packages/go"
)
func main() {
client, err := llm.CreateClient(os.Getenv("BRAVE_API_KEY"), nil, nil, nil, nil)
if err != nil {
panic(err)
}
var req llm.SearchRequest
if err := json.Unmarshal([]byte(`{
"model": "brave/web-search",
"query": "What is Rust programming language?",
"max_results": 5
}`), &req); err != nil {
panic(err)
}
resp, err := client.Search(req)
if err != nil {
panic(err)
}
for _, r := range resp.Results {
fmt.Printf("%s: %s\n", r.Title, r.URL)
}
}
import io.xberg.literllm.*;
import java.util.Optional;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LiterLlm.createClient(System.getenv("BRAVE_API_KEY"))) {
var response = client.search(SearchRequest.builder()
.withModel("brave/web-search")
.withQuery("What is Rust programming language?")
.withMaxResults(Optional.of(5))
.build());
for (var result : response.results()) {
System.out.printf("%s: %s%n", result.title(), result.url());
}
}
}
}
using LiterLlm;
using var client = LiterLlmLib.CreateClient(
apiKey: Environment.GetEnvironmentVariable("BRAVE_API_KEY")!,
baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);
var response = await client.Search(new SearchRequest
{
Model = "brave/web-search",
Query = "What is Rust programming language?",
MaxResults = 5
});
foreach (var result in response.Results)
{
Console.WriteLine($"{result.Title}: {result.Url}");
}
# frozen_string_literal: true
require 'liter_llm'
client = LiterLlm.create_client(ENV.fetch('BRAVE_API_KEY'))
result = client.search_async(
LiterLlm::SearchRequest.new(
model: 'brave/web-search',
query: 'What is Rust programming language?',
max_results: 5
)
)
result.results.each do |r|
puts "#{r.title}: #{r.url}"
end
<?php
declare(strict_types=1);
use Liter\Llm\LiterLlm;
use Liter\Llm\SearchRequest;
$client = LiterLlm::createClient(getenv('BRAVE_API_KEY') ?: '');
$result = $client->searchAsync(new SearchRequest(
model: 'brave/web-search',
query: 'What is Rust programming language?',
maxResults: 5,
));
foreach ($result->results as $r) {
echo "{$r->title}: {$r->url}" . PHP_EOL;
}
{:ok, client} = LiterLlm.create_client(System.get_env("BRAVE_API_KEY"))
request =
Jason.encode!(%{
model: "brave/web-search",
query: "What is Rust programming language?",
max_results: 5
})
{:ok, result} = LiterLlm.defaultclient_search_async(client, request)
for r <- result.results do
IO.puts("#{r.title}: #{r.url}")
end
import init, { createClient, WasmSearchRequest } from "@xberg-io/liter-llm-wasm";
await init();
const client = createClient(process.env.BRAVE_API_KEY!);
const request = WasmSearchRequest.default();
request.model = "brave/web-search";
request.query = "What is Rust programming language?";
request.maxResults = 5;
const response = await client.search(request);
for (const result of response.results) {
console.log(`${result.title}: ${result.url}`);
}
Search Parameters¶
| Parameter | Type | Description |
|---|---|---|
model |
string | Search provider (e.g. "brave/web-search") |
query |
string | Search query |
max_results |
int | Maximum results to return |
search_domain_filter |
array | Restrict to specific domains |
country |
string | ISO country code for localized results |
Search Providers¶
The registry ships 12 search providers. Browse the full Providers capability matrix; the most common are:
| Provider | Prefix |
|---|---|
| Brave | brave/ |
| Tavily | tavily/ |
| Google PSE | google_pse/ |
| Serper | serper/ |
| DuckDuckGo | duckduckgo/ |
| Exa | exa_ai/ |
| Firecrawl | firecrawl/ |
| Linkup | linkup/ |
| Parallel AI | parallel_ai/ |
| Perplexity | perplexity/ |
| SearXNG | searxng/ |
| DataForSEO | dataforseo/ |
OCR¶
Extract text from documents via OCR across 4 providers (Mistral, Azure Doc Intelligence, etc.):
import asyncio
import os
from liter_llm import create_client
from liter_llm._internal_bindings import OcrRequest
async def main() -> None:
client = create_client(api_key=os.environ["MISTRAL_API_KEY"])
request = OcrRequest.from_json(
'{"model":"mistral/mistral-ocr-latest",'
'"document":{"type":"document_url","url":"https://example.com/invoice.pdf"}}'
)
response = await client.ocr(request)
for page in response.pages:
print(f"Page {page.index}: {page.markdown[:100]}...")
asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";
const client = createClient(process.env.MISTRAL_API_KEY!);
const response = await client.ocr({
model: "mistral/mistral-ocr-latest",
document: { type: "document_url", url: "https://example.com/invoice.pdf" },
});
for (const page of response.pages) {
console.log(`Page ${page.index}: ${page.markdown.slice(0, 100)}...`);
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, OcrDocument, OcrRequest};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("MISTRAL_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("mistral/mistral-ocr-latest"))?;
let response = client
.ocr(OcrRequest {
model: "mistral/mistral-ocr-latest".into(),
document: OcrDocument::Url {
url: "https://example.com/invoice.pdf".into(),
},
..Default::default()
})
.await?;
for page in &response.pages {
println!("Page {}: {}...", page.index, &page.markdown[..100]);
}
Ok(())
}
package main
import (
"encoding/json"
"fmt"
"os"
llm "github.com/xberg-io/liter-llm/packages/go"
)
func main() {
client, err := llm.CreateClient(os.Getenv("MISTRAL_API_KEY"), nil, nil, nil, nil)
if err != nil {
panic(err)
}
var req llm.OcrRequest
if err := json.Unmarshal([]byte(`{
"model": "mistral/mistral-ocr-latest",
"document": {"type": "document_url", "url": "https://example.com/invoice.pdf"}
}`), &req); err != nil {
panic(err)
}
resp, err := client.Ocr(req)
if err != nil {
panic(err)
}
for _, page := range resp.Pages {
fmt.Printf("Page %d: %.100s...\n", page.Index, page.Markdown)
}
}
import io.xberg.literllm.*;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LiterLlm.createClient(System.getenv("MISTRAL_API_KEY"))) {
var response = client.ocr(OcrRequest.builder()
.withModel("mistral/mistral-ocr-latest")
.withDocument(new OcrDocument.Url("https://example.com/invoice.pdf"))
.build());
for (var page : response.pages()) {
System.out.printf("Page %d: %.100s...%n",
page.index(), page.markdown());
}
}
}
}
using LiterLlm;
using var client = LiterLlmLib.CreateClient(
apiKey: Environment.GetEnvironmentVariable("MISTRAL_API_KEY")!,
baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);
var response = await client.Ocr(new OcrRequest
{
Model = "mistral/mistral-ocr-latest",
Document = new OcrDocument.Url("https://example.com/invoice.pdf")
});
foreach (var page in response.Pages)
{
var preview = page.Markdown.Length > 100 ? page.Markdown[..100] : page.Markdown;
Console.WriteLine($"Page {page.Index}: {preview}...");
}
# frozen_string_literal: true
require 'liter_llm'
client = LiterLlm.create_client(ENV.fetch('MISTRAL_API_KEY'))
result = client.ocr_async(
LiterLlm::OcrRequest.new(
model: 'mistral/mistral-ocr-latest',
document: { 'type' => 'document_url', 'url' => 'https://example.com/invoice.pdf' }
)
)
result.pages.each do |page|
preview = page.markdown[0, 100] || ''
puts "Page #{page.index}: #{preview}..."
end
<?php
declare(strict_types=1);
use Liter\Llm\LiterLlm;
use Liter\Llm\OcrRequest;
$client = LiterLlm::createClient(getenv('MISTRAL_API_KEY') ?: '');
$request = OcrRequest::from_json(json_encode([
'model' => 'mistral/mistral-ocr-latest',
'document' => ['type' => 'document_url', 'url' => 'https://example.com/invoice.pdf'],
]));
$result = $client->ocrAsync($request);
foreach ($result->pages as $page) {
echo "Page {$page->index}: " . substr($page->markdown, 0, 100) . '...' . PHP_EOL;
}
{:ok, client} = LiterLlm.create_client(System.get_env("MISTRAL_API_KEY"))
request =
Jason.encode!(%{
model: "mistral/mistral-ocr-latest",
document: %{type: "document_url", url: "https://example.com/invoice.pdf"}
})
{:ok, result} = LiterLlm.defaultclient_ocr_async(client, request)
for page <- result.pages do
IO.puts("Page #{page.index}: #{String.slice(page.markdown, 0, 100)}...")
end
import init, { createClient, WasmOcrRequest } from "@xberg-io/liter-llm-wasm";
await init();
const client = createClient(process.env.MISTRAL_API_KEY!);
const request = WasmOcrRequest.default();
request.model = "mistral/mistral-ocr-latest";
request.document = { type: "document_url", url: "https://example.com/invoice.pdf" };
const response = await client.ocr(request);
for (const page of response.pages) {
console.log(`Page ${page.index}: ${page.markdown.slice(0, 100)}...`);
}
OCR Parameters¶
| Parameter | Type | Description |
|---|---|---|
model |
string | OCR provider (e.g. "mistral/mistral-ocr-latest") |
document |
object | Document input (URL or base64) |
pages |
array | Specific pages to process (1-indexed) |
include_image_base64 |
bool | Include extracted images |
Document Input Formats¶
URL:
Base64: