Embeddings & Rerank¶
Embeddings¶
Generate vector embeddings from text. Embeddings are fixed-length numeric arrays that capture semantic meaning -- useful for search, clustering, and RAG.
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
response = await client.embed(
model="openai/text-embedding-3-small",
input=["The quick brown fox jumps over the lazy dog"],
)
print(f"Dimensions: {len(response.data[0].embedding)}")
print(f"First 5 values: {response.data[0].embedding[:5]}")
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.embed({
model: "openai/text-embedding-3-small",
input: ["The quick brown fox jumps over the lazy dog"],
});
console.log(`Dimensions: ${response.data[0].embedding.length}`);
console.log(`First 5 values: ${response.data[0].embedding.slice(0, 5)}`);
use liter_llm::{
ClientConfigBuilder, DefaultClient, EmbeddingInput, EmbeddingRequest, LlmClient,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/text-embedding-3-small"))?;
let request = EmbeddingRequest {
model: "openai/text-embedding-3-small".into(),
input: EmbeddingInput::Multiple(vec![
"The quick brown fox jumps over the lazy dog".into(),
]),
..Default::default()
};
let response = client.embed(request).await?;
let embedding = &response.data[0].embedding;
println!("Dimensions: {}", embedding.len());
println!("First 5 values: {:?}", &embedding[..5]);
Ok(())
}
package main
import (
"context"
"fmt"
"os"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
resp, err := client.Embed(context.Background(), &llm.EmbeddingRequest{
Model: "openai/text-embedding-3-small",
Input: llm.NewEmbeddingInputMultiple([]string{"The quick brown fox jumps over the lazy dog"}),
})
if err != nil {
panic(err)
}
fmt.Printf("Dimensions: %d\n", len(resp.Data[0].Embedding))
fmt.Printf("First 5 values: %v\n", resp.Data[0].Embedding[:5])
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.build()) {
var response = client.embed(new EmbeddingRequest(
"openai/text-embedding-3-small",
List.of("The quick brown fox jumps over the lazy dog")
));
var embedding = response.data().getFirst().embedding();
System.out.println("Dimensions: " + embedding.size());
System.out.println("First 5 values: " + embedding.subList(0, 5));
}
}
}
using LiterLlm;
await using var client = new LlmClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);
var response = await client.EmbedAsync(new EmbeddingRequest(
Model: "openai/text-embedding-3-small",
Input: ["The quick brown fox jumps over the lazy dog"]
));
var embedding = response.Data[0].Embedding;
Console.WriteLine($"Dimensions: {embedding.Length}");
Console.WriteLine($"First 5 values: [{string.Join(", ", embedding[..5])}]");
# frozen_string_literal: true
require "liter_llm"
require "json"
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})
response = JSON.parse(client.embed(JSON.generate(
model: "openai/text-embedding-3-small",
input: ["The quick brown fox jumps over the lazy dog"]
)))
embedding = response.dig("data", 0, "embedding")
puts "Dimensions: #{embedding.length}"
puts "First 5 values: #{embedding.first(5)}"
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');
$response = json_decode($client->embed(json_encode([
'model' => 'openai/text-embedding-3-small',
'input' => ['The quick brown fox jumps over the lazy dog'],
])), true);
$embedding = $response['data'][0]['embedding'];
echo 'Dimensions: ' . count($embedding) . PHP_EOL;
echo 'First 5 values: ' . json_encode(array_slice($embedding, 0, 5)) . PHP_EOL;
{:ok, response} =
LiterLlm.embed(
%{
model: "openai/text-embedding-3-small",
input: ["The quick brown fox jumps over the lazy dog"]
},
api_key: System.fetch_env!("OPENAI_API_KEY")
)
embedding = hd(response["data"])["embedding"]
IO.puts("Dimensions: #{length(embedding)}")
IO.puts("First 5 values: #{inspect(Enum.take(embedding, 5))}")
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.embed({
model: "openai/text-embedding-3-small",
input: ["The quick brown fox jumps over the lazy dog"],
});
console.log(`Dimensions: ${response.data[0].embedding.length}`);
console.log(`First 5 values: ${response.data[0].embedding.slice(0, 5)}`);
Embedding Parameters¶
| Parameter | Type | Description |
|---|---|---|
model |
string | Embedding model (e.g. "openai/text-embedding-3-small") |
input |
string/array | Text(s) to embed |
encoding_format |
string | Output format ("float" or "base64") |
dimensions |
int | Output dimensionality (model-dependent) |
Embedding Providers¶
| Provider | Prefix | Example Model |
|---|---|---|
| OpenAI | openai/ |
text-embedding-3-small, text-embedding-3-large |
| Cohere | cohere/ |
embed-english-v3.0 |
| Voyage AI | voyage/ |
voyage-3 |
| Mistral | mistral/ |
mistral-embed |
| Google Vertex AI | vertex_ai/ |
text-embedding-004 |
| AWS Bedrock | bedrock/ |
amazon.titan-embed-text-v2:0 |
| Ollama | ollama/ |
nomic-embed-text |
| LM Studio | lmstudio/ |
Depends on loaded model |
| vLLM | vllm/ |
BAAI/bge-base-en-v1.5 |
| llama.cpp | llamacpp/ |
Depends on loaded GGUF |
| LocalAI | localai/ |
Depends on configuration |
| llamafile | llamafile/ |
Depends on loaded model |
| Jina AI | jina_ai/ |
jina-embeddings-v3 |
See the Providers page for the complete capability matrix.
Rerank¶
Rerank documents by relevance to a query. Useful for improving retrieval quality in RAG pipelines:
import asyncio
import os
from liter_llm import LlmClient
async def main() -> None:
client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
response = await client.rerank(
model="cohere/rerank-v3.5",
query="What is the capital of France?",
documents=[
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England.",
],
)
for result in response.results:
print(f"Index: {result.index}, Score: {result.relevance_score:.4f}")
asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.rerank({
model: "cohere/rerank-v3.5",
query: "What is the capital of France?",
documents: [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England.",
],
});
for (const result of response.results) {
console.log(`Index: ${result.index}, Score: ${result.relevanceScore.toFixed(4)}`);
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, RerankRequest};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("cohere/rerank-v3.5"))?;
let response = client
.rerank(RerankRequest {
model: "cohere/rerank-v3.5".into(),
query: "What is the capital of France?".into(),
documents: vec![
"Paris is the capital of France.".into(),
"Berlin is the capital of Germany.".into(),
"London is the capital of England.".into(),
],
..Default::default()
})
.await?;
for result in &response.results {
println!("Index: {}, Score: {:.4}", result.index, result.relevance_score);
}
Ok(())
}
package main
import (
"context"
"fmt"
"os"
llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)
func main() {
client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
resp, err := client.Rerank(context.Background(), &llm.RerankRequest{
Model: "cohere/rerank-v3.5",
Query: "What is the capital of France?",
Documents: []string{
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England.",
},
})
if err != nil {
panic(err)
}
for _, result := range resp.Results {
fmt.Printf("Index: %d, Score: %.4f\n", result.Index, result.RelevanceScore)
}
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LlmClient.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.build()) {
var response = client.rerank(new RerankRequest(
"cohere/rerank-v3.5",
"What is the capital of France?",
List.of(
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England."
)
));
for (var result : response.results()) {
System.out.printf("Index: %d, Score: %.4f%n",
result.index(), result.relevanceScore());
}
}
}
}
using LiterLlm;
await using var client = new LlmClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);
var response = await client.RerankAsync(new RerankRequest(
Model: "cohere/rerank-v3.5",
Query: "What is the capital of France?",
Documents: [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England.",
]
));
foreach (var result in response.Results)
{
Console.WriteLine($"Index: {result.Index}, Score: {result.RelevanceScore:F4}");
}
# frozen_string_literal: true
require "liter_llm"
require "json"
client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})
response = JSON.parse(client.rerank(JSON.generate(
model: "cohere/rerank-v3.5",
query: "What is the capital of France?",
documents: [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England."
]
)))
response["results"].each do |result|
puts "Index: #{result["index"]}, Score: #{format("%.4f", result["relevance_score"])}"
end
<?php
declare(strict_types=1);
use LiterLlm\LlmClient;
$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');
$response = json_decode($client->rerank(json_encode([
'model' => 'cohere/rerank-v3.5',
'query' => 'What is the capital of France?',
'documents' => [
'Paris is the capital of France.',
'Berlin is the capital of Germany.',
'London is the capital of England.',
],
])), true);
foreach ($response['results'] as $result) {
echo "Index: {$result['index']}, Score: " . number_format($result['relevance_score'], 4) . PHP_EOL;
}
{:ok, response} =
LiterLlm.rerank(
%{
model: "cohere/rerank-v3.5",
query: "What is the capital of France?",
documents: [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England."
]
},
api_key: System.fetch_env!("OPENAI_API_KEY")
)
for result <- response["results"] do
IO.puts("Index: #{result["index"]}, Score: #{Float.round(result["relevance_score"], 4)}")
end
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";
await init();
const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.rerank({
model: "cohere/rerank-v3.5",
query: "What is the capital of France?",
documents: [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England.",
],
});
for (const result of response.results) {
console.log(`Index: ${result.index}, Score: ${result.relevanceScore.toFixed(4)}`);
}
Rerank Parameters¶
| Parameter | Type | Description |
|---|---|---|
model |
string | Rerank model (e.g. "cohere/rerank-v3.5") |
query |
string | The query to rank documents against |
documents |
array | Documents to rerank |
top_n |
int | Number of top results to return |
return_documents |
bool | Include document text in results |