Embeddings & Rerank¶
Embeddings¶
Generate vector embeddings from text. Embeddings are fixed-length numeric arrays that capture semantic meaning -- useful for search, clustering, and RAG.
import asyncio
import os
from liter_llm import create_client
from liter_llm._internal_bindings import EmbeddingRequest
async def main() -> None:
client = create_client(api_key=os.environ["OPENAI_API_KEY"])
request = EmbeddingRequest.from_json(
'{"model":"openai/text-embedding-3-small","input":["The quick brown fox jumps over the lazy dog"]}'
)
response = await client.embed(request)
print(f"Dimensions: {len(response.data[0].embedding)}")
print(f"First 5 values: {response.data[0].embedding[:5]}")
asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";
const client = createClient(process.env.OPENAI_API_KEY!);
const response = await client.embed({
model: "openai/text-embedding-3-small",
input: ["The quick brown fox jumps over the lazy dog"],
});
console.log(`Dimensions: ${response.data[0].embedding.length}`);
console.log(`First 5 values: ${response.data[0].embedding.slice(0, 5)}`);
use liter_llm::{
ClientConfigBuilder, DefaultClient, EmbeddingInput, EmbeddingRequest, LlmClient,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("openai/text-embedding-3-small"))?;
let request = EmbeddingRequest {
model: "openai/text-embedding-3-small".into(),
input: EmbeddingInput::Multiple(vec![
"The quick brown fox jumps over the lazy dog".into(),
]),
..Default::default()
};
let response = client.embed(request).await?;
let embedding = &response.data[0].embedding;
println!("Dimensions: {}", embedding.len());
println!("First 5 values: {:?}", &embedding[..5]);
Ok(())
}
package main
import (
"encoding/json"
"fmt"
"os"
llm "github.com/xberg-io/liter-llm/packages/go"
)
func main() {
client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
if err != nil {
panic(err)
}
var req llm.EmbeddingRequest
if err := json.Unmarshal([]byte(`{
"model": "openai/text-embedding-3-small",
"input": ["The quick brown fox jumps over the lazy dog"]
}`), &req); err != nil {
panic(err)
}
resp, err := client.Embed(req)
if err != nil {
panic(err)
}
fmt.Printf("Dimensions: %d\n", len(resp.Data[0].Embedding))
fmt.Printf("First 5 values: %v\n", resp.Data[0].Embedding[:5])
}
import io.xberg.literllm.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
var response = client.embed(EmbeddingRequest.builder()
.withModel("openai/text-embedding-3-small")
.withInput(EmbeddingInput.of(List.of("The quick brown fox jumps over the lazy dog")))
.build());
var embedding = response.data().getFirst().embedding();
System.out.println("Dimensions: " + embedding.size());
System.out.println("First 5 values: " + embedding.subList(0, 5));
}
}
}
using LiterLlm;
using var client = LiterLlmLib.CreateClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);
var response = await client.Embed(new EmbeddingRequest
{
Model = "openai/text-embedding-3-small",
Input = EmbeddingInput.Of(new[] { "The quick brown fox jumps over the lazy dog" })
});
var embedding = response.Data[0].Embedding;
Console.WriteLine($"Dimensions: {embedding.Count}");
Console.WriteLine($"First 5 values: [{string.Join(", ", embedding.Take(5))}]");
# frozen_string_literal: true
require 'liter_llm'
client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))
result = client.embed_async(
LiterLlm::EmbeddingRequest.new(
model: 'openai/text-embedding-3-small',
input: ['The quick brown fox jumps over the lazy dog']
)
)
embedding = result.data[0].embedding
puts "Dimensions: #{embedding.length}"
puts "First 5 values: #{embedding.first(5)}"
<?php
declare(strict_types=1);
use Liter\Llm\LiterLlm;
use Liter\Llm\EmbeddingRequest;
$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');
$request = EmbeddingRequest::from_json(json_encode([
'model' => 'openai/text-embedding-3-small',
'input' => ['The quick brown fox jumps over the lazy dog'],
]));
$result = $client->embedAsync($request);
$embedding = $result->data[0]->embedding;
echo 'Dimensions: ' . count($embedding) . PHP_EOL;
echo 'First 5 values: ' . json_encode(array_slice($embedding, 0, 5)) . PHP_EOL;
{:ok, client} = LiterLlm.create_client(System.get_env("OPENAI_API_KEY"))
request =
Jason.encode!(%{
model: "openai/text-embedding-3-small",
input: ["The quick brown fox jumps over the lazy dog"]
})
{:ok, result} = LiterLlm.defaultclient_embed_async(client, request)
embedding = Enum.at(result.data, 0).embedding
IO.puts("Dimensions: #{length(embedding)}")
IO.puts("First 5 values: #{inspect(Enum.take(embedding, 5))}")
import init, { createClient, WasmEmbeddingRequest } from "@xberg-io/liter-llm-wasm";
await init();
const client = createClient(process.env.OPENAI_API_KEY!);
const request = WasmEmbeddingRequest.default();
request.model = "openai/text-embedding-3-small";
request.input = ["The quick brown fox jumps over the lazy dog"];
const response = await client.embed(request);
console.log(`Dimensions: ${response.data[0].embedding.length}`);
console.log(`First 5 values: ${response.data[0].embedding.slice(0, 5)}`);
Embedding Parameters¶
| Parameter | Type | Description |
|---|---|---|
model |
string | Embedding model (e.g. "openai/text-embedding-3-small") |
input |
string/array | Text(s) to embed |
encoding_format |
string | Output format ("float" or "base64") |
dimensions |
int | Output dimensionality (model-dependent) |
Embedding Providers¶
| Provider | Prefix | Example Model |
|---|---|---|
| OpenAI | openai/ |
text-embedding-3-small, text-embedding-3-large |
| Cohere | cohere/ |
embed-english-v3.0 |
| Voyage AI | voyage/ |
voyage-3 |
| Mistral | mistral/ |
mistral-embed |
| Google Vertex AI | vertex_ai/ |
text-embedding-004 |
| AWS Bedrock | bedrock/ |
amazon.titan-embed-text-v2:0 |
| Ollama | ollama/ |
nomic-embed-text |
| LM Studio | lmstudio/ |
Depends on loaded model |
| vLLM | vllm/ |
BAAI/bge-base-en-v1.5 |
| llama.cpp | llamacpp/ |
Depends on loaded GGUF |
| LocalAI | localai/ |
Depends on configuration |
| llamafile | llamafile/ |
Depends on loaded model |
| Jina AI | jina_ai/ |
jina-embeddings-v3 |
See the Providers page for the complete capability matrix.
Rerank¶
Rerank documents by relevance to a query. Useful for improving retrieval quality in RAG pipelines:
import asyncio
import json
import os
from liter_llm import create_client
from liter_llm._internal_bindings import RerankRequest
async def main() -> None:
client = create_client(api_key=os.environ["COHERE_API_KEY"])
payload = {
"model": "cohere/rerank-v3.5",
"query": "What is the capital of France?",
"documents": [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England.",
],
}
request = RerankRequest.from_json(json.dumps(payload))
response = await client.rerank(request)
for result in response.results:
print(f"Index: {result.index}, Score: {result.relevance_score:.4f}")
asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";
const client = createClient(process.env.COHERE_API_KEY!);
const response = await client.rerank({
model: "cohere/rerank-v3.5",
query: "What is the capital of France?",
documents: [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England.",
],
});
for (const result of response.results) {
console.log(`Index: ${result.index}, Score: ${result.relevanceScore.toFixed(4)}`);
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, RerankRequest};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
.build();
let client = DefaultClient::new(config, Some("cohere/rerank-v3.5"))?;
let response = client
.rerank(RerankRequest {
model: "cohere/rerank-v3.5".into(),
query: "What is the capital of France?".into(),
documents: vec![
"Paris is the capital of France.".into(),
"Berlin is the capital of Germany.".into(),
"London is the capital of England.".into(),
],
..Default::default()
})
.await?;
for result in &response.results {
println!("Index: {}, Score: {:.4}", result.index, result.relevance_score);
}
Ok(())
}
package main
import (
"encoding/json"
"fmt"
"os"
llm "github.com/xberg-io/liter-llm/packages/go"
)
func main() {
client, err := llm.CreateClient(os.Getenv("COHERE_API_KEY"), nil, nil, nil, nil)
if err != nil {
panic(err)
}
var req llm.RerankRequest
if err := json.Unmarshal([]byte(`{
"model": "cohere/rerank-v3.5",
"query": "What is the capital of France?",
"documents": [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England."
]
}`), &req); err != nil {
panic(err)
}
resp, err := client.Rerank(req)
if err != nil {
panic(err)
}
for _, r := range resp.Results {
fmt.Printf("Index: %d, Score: %.4f\n", r.Index, r.RelevanceScore)
}
}
import io.xberg.literllm.*;
import java.util.List;
public class Main {
public static void main(String[] args) throws Exception {
try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
var docs = List.of(
RerankDocument.of("Paris is the capital of France."),
RerankDocument.of("Berlin is the capital of Germany."),
RerankDocument.of("London is the capital of England.")
);
var response = client.rerank(RerankRequest.builder()
.withModel("cohere/rerank-v3.5")
.withQuery("What is the capital of France?")
.withDocuments(docs)
.build());
for (var result : response.results()) {
System.out.printf("Index: %d, Score: %.4f%n",
result.index(), result.relevanceScore());
}
}
}
}
using LiterLlm;
using var client = LiterLlmLib.CreateClient(
apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);
var response = await client.Rerank(new RerankRequest
{
Model = "cohere/rerank-v3.5",
Query = "What is the capital of France?",
Documents =
[
RerankDocument.Of("Paris is the capital of France."),
RerankDocument.Of("Berlin is the capital of Germany."),
RerankDocument.Of("London is the capital of England."),
]
});
foreach (var result in response.Results)
{
Console.WriteLine($"Index: {result.Index}, Score: {result.RelevanceScore:F4}");
}
# frozen_string_literal: true
require 'liter_llm'
client = LiterLlm.create_client(ENV.fetch('COHERE_API_KEY'))
result = client.rerank_async(
LiterLlm::RerankRequest.new(
model: 'cohere/rerank-v3.5',
query: 'What is the capital of France?',
documents: [
'Paris is the capital of France.',
'Berlin is the capital of Germany.',
'London is the capital of England.'
]
)
)
result.results.each do |r|
puts "Index: #{r.index}, Score: #{format('%.4f', r.relevance_score)}"
end
<?php
declare(strict_types=1);
use Liter\Llm\LiterLlm;
use Liter\Llm\RerankRequest;
$client = LiterLlm::createClient(getenv('COHERE_API_KEY') ?: '');
$request = RerankRequest::from_json(json_encode([
'model' => 'cohere/rerank-v3.5',
'query' => 'What is the capital of France?',
'documents' => [
'Paris is the capital of France.',
'Berlin is the capital of Germany.',
'London is the capital of England.',
],
]));
$result = $client->rerankAsync($request);
foreach ($result->results as $r) {
echo "Index: {$r->index}, Score: " . number_format($r->relevanceScore, 4) . PHP_EOL;
}
{:ok, client} = LiterLlm.create_client(System.get_env("COHERE_API_KEY"))
request =
Jason.encode!(%{
model: "cohere/rerank-v3.5",
query: "What is the capital of France?",
documents: [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England."
]
})
{:ok, result} = LiterLlm.defaultclient_rerank_async(client, request)
for r <- result.results do
IO.puts("Index: #{r.index}, Score: #{Float.round(r.relevance_score, 4)}")
end
import init, { createClient, WasmRerankRequest } from "@xberg-io/liter-llm-wasm";
await init();
const client = createClient(process.env.COHERE_API_KEY!);
const request = WasmRerankRequest.default();
request.model = "cohere/rerank-v3.5";
request.query = "What is the capital of France?";
request.documents = [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England.",
];
const response = await client.rerank(request);
for (const result of response.results) {
console.log(`Index: ${result.index}, Score: ${result.relevanceScore.toFixed(4)}`);
}
Rerank Parameters¶
| Parameter | Type | Description |
|---|---|---|
model |
string | Rerank model (e.g. "cohere/rerank-v3.5") |
query |
string | The query to rank documents against |
documents |
array | Documents to rerank |
top_n |
int | Number of top results to return |
return_documents |
bool | Include document text in results |