Skip to content

Embeddings & Rerank

Embeddings

Generate vector embeddings from text. Embeddings are fixed-length numeric arrays that capture semantic meaning -- useful for search, clustering, and RAG.

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    response = await client.embed(
        model="openai/text-embedding-3-small",
        input=["The quick brown fox jumps over the lazy dog"],
    )
    print(f"Dimensions: {len(response.data[0].embedding)}")
    print(f"First 5 values: {response.data[0].embedding[:5]}")

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.embed({
  model: "openai/text-embedding-3-small",
  input: ["The quick brown fox jumps over the lazy dog"],
});
console.log(`Dimensions: ${response.data[0].embedding.length}`);
console.log(`First 5 values: ${response.data[0].embedding.slice(0, 5)}`);
use liter_llm::{
    ClientConfigBuilder, DefaultClient, EmbeddingInput, EmbeddingRequest, LlmClient,
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/text-embedding-3-small"))?;

    let request = EmbeddingRequest {
        model: "openai/text-embedding-3-small".into(),
        input: EmbeddingInput::Multiple(vec![
            "The quick brown fox jumps over the lazy dog".into(),
        ]),
        ..Default::default()
    };

    let response = client.embed(request).await?;
    let embedding = &response.data[0].embedding;
    println!("Dimensions: {}", embedding.len());
    println!("First 5 values: {:?}", &embedding[..5]);
    Ok(())
}
package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 resp, err := client.Embed(context.Background(), &llm.EmbeddingRequest{
  Model: "openai/text-embedding-3-small",
  Input: llm.NewEmbeddingInputMultiple([]string{"The quick brown fox jumps over the lazy dog"}),
 })
 if err != nil {
  panic(err)
 }
 fmt.Printf("Dimensions: %d\n", len(resp.Data[0].Embedding))
 fmt.Printf("First 5 values: %v\n", resp.Data[0].Embedding[:5])
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var response = client.embed(new EmbeddingRequest(
                "openai/text-embedding-3-small",
                List.of("The quick brown fox jumps over the lazy dog")
            ));
            var embedding = response.data().getFirst().embedding();
            System.out.println("Dimensions: " + embedding.size());
            System.out.println("First 5 values: " + embedding.subList(0, 5));
        }
    }
}
using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var response = await client.EmbedAsync(new EmbeddingRequest(
    Model: "openai/text-embedding-3-small",
    Input: ["The quick brown fox jumps over the lazy dog"]
));

var embedding = response.Data[0].Embedding;
Console.WriteLine($"Dimensions: {embedding.Length}");
Console.WriteLine($"First 5 values: [{string.Join(", ", embedding[..5])}]");
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

response = JSON.parse(client.embed(JSON.generate(
  model: "openai/text-embedding-3-small",
  input: ["The quick brown fox jumps over the lazy dog"]
)))

embedding = response.dig("data", 0, "embedding")
puts "Dimensions: #{embedding.length}"
puts "First 5 values: #{embedding.first(5)}"
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$response = json_decode($client->embed(json_encode([
    'model' => 'openai/text-embedding-3-small',
    'input' => ['The quick brown fox jumps over the lazy dog'],
])), true);

$embedding = $response['data'][0]['embedding'];
echo 'Dimensions: ' . count($embedding) . PHP_EOL;
echo 'First 5 values: ' . json_encode(array_slice($embedding, 0, 5)) . PHP_EOL;
{:ok, response} =
  LiterLlm.embed(
    %{
      model: "openai/text-embedding-3-small",
      input: ["The quick brown fox jumps over the lazy dog"]
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

embedding = hd(response["data"])["embedding"]
IO.puts("Dimensions: #{length(embedding)}")
IO.puts("First 5 values: #{inspect(Enum.take(embedding, 5))}")
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.embed({
  model: "openai/text-embedding-3-small",
  input: ["The quick brown fox jumps over the lazy dog"],
});
console.log(`Dimensions: ${response.data[0].embedding.length}`);
console.log(`First 5 values: ${response.data[0].embedding.slice(0, 5)}`);

Embedding Parameters

Parameter Type Description
model string Embedding model (e.g. "openai/text-embedding-3-small")
input string/array Text(s) to embed
encoding_format string Output format ("float" or "base64")
dimensions int Output dimensionality (model-dependent)

Embedding Providers

Provider Prefix Example Model
OpenAI openai/ text-embedding-3-small, text-embedding-3-large
Cohere cohere/ embed-english-v3.0
Voyage AI voyage/ voyage-3
Mistral mistral/ mistral-embed
Google Vertex AI vertex_ai/ text-embedding-004
AWS Bedrock bedrock/ amazon.titan-embed-text-v2:0
Ollama ollama/ nomic-embed-text
LM Studio lmstudio/ Depends on loaded model
vLLM vllm/ BAAI/bge-base-en-v1.5
llama.cpp llamacpp/ Depends on loaded GGUF
LocalAI localai/ Depends on configuration
llamafile llamafile/ Depends on loaded model
Jina AI jina_ai/ jina-embeddings-v3

See the Providers page for the complete capability matrix.

Rerank

Rerank documents by relevance to a query. Useful for improving retrieval quality in RAG pipelines:

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    response = await client.rerank(
        model="cohere/rerank-v3.5",
        query="What is the capital of France?",
        documents=[
            "Paris is the capital of France.",
            "Berlin is the capital of Germany.",
            "London is the capital of England.",
        ],
    )
    for result in response.results:
        print(f"Index: {result.index}, Score: {result.relevance_score:.4f}")

asyncio.run(main())
import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.rerank({
  model: "cohere/rerank-v3.5",
  query: "What is the capital of France?",
  documents: [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "London is the capital of England.",
  ],
});

for (const result of response.results) {
  console.log(`Index: ${result.index}, Score: ${result.relevanceScore.toFixed(4)}`);
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, RerankRequest};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("cohere/rerank-v3.5"))?;

    let response = client
        .rerank(RerankRequest {
            model: "cohere/rerank-v3.5".into(),
            query: "What is the capital of France?".into(),
            documents: vec![
                "Paris is the capital of France.".into(),
                "Berlin is the capital of Germany.".into(),
                "London is the capital of England.".into(),
            ],
            ..Default::default()
        })
        .await?;

    for result in &response.results {
        println!("Index: {}, Score: {:.4}", result.index, result.relevance_score);
    }
    Ok(())
}
package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 resp, err := client.Rerank(context.Background(), &llm.RerankRequest{
  Model: "cohere/rerank-v3.5",
  Query: "What is the capital of France?",
  Documents: []string{
   "Paris is the capital of France.",
   "Berlin is the capital of Germany.",
   "London is the capital of England.",
  },
 })
 if err != nil {
  panic(err)
 }
 for _, result := range resp.Results {
  fmt.Printf("Index: %d, Score: %.4f\n", result.Index, result.RelevanceScore)
 }
}
import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var response = client.rerank(new RerankRequest(
                "cohere/rerank-v3.5",
                "What is the capital of France?",
                List.of(
                    "Paris is the capital of France.",
                    "Berlin is the capital of Germany.",
                    "London is the capital of England."
                )
            ));
            for (var result : response.results()) {
                System.out.printf("Index: %d, Score: %.4f%n",
                    result.index(), result.relevanceScore());
            }
        }
    }
}
using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var response = await client.RerankAsync(new RerankRequest(
    Model: "cohere/rerank-v3.5",
    Query: "What is the capital of France?",
    Documents: [
        "Paris is the capital of France.",
        "Berlin is the capital of Germany.",
        "London is the capital of England.",
    ]
));

foreach (var result in response.Results)
{
    Console.WriteLine($"Index: {result.Index}, Score: {result.RelevanceScore:F4}");
}
# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

response = JSON.parse(client.rerank(JSON.generate(
  model: "cohere/rerank-v3.5",
  query: "What is the capital of France?",
  documents: [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "London is the capital of England."
  ]
)))

response["results"].each do |result|
  puts "Index: #{result["index"]}, Score: #{format("%.4f", result["relevance_score"])}"
end
<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$response = json_decode($client->rerank(json_encode([
    'model' => 'cohere/rerank-v3.5',
    'query' => 'What is the capital of France?',
    'documents' => [
        'Paris is the capital of France.',
        'Berlin is the capital of Germany.',
        'London is the capital of England.',
    ],
])), true);

foreach ($response['results'] as $result) {
    echo "Index: {$result['index']}, Score: " . number_format($result['relevance_score'], 4) . PHP_EOL;
}
{:ok, response} =
  LiterLlm.rerank(
    %{
      model: "cohere/rerank-v3.5",
      query: "What is the capital of France?",
      documents: [
        "Paris is the capital of France.",
        "Berlin is the capital of Germany.",
        "London is the capital of England."
      ]
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

for result <- response["results"] do
  IO.puts("Index: #{result["index"]}, Score: #{Float.round(result["relevance_score"], 4)}")
end
import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.rerank({
  model: "cohere/rerank-v3.5",
  query: "What is the capital of France?",
  documents: [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "London is the capital of England.",
  ],
});

for (const result of response.results) {
  console.log(`Index: ${result.index}, Score: ${result.relevanceScore.toFixed(4)}`);
}

Rerank Parameters

Parameter Type Description
model string Rerank model (e.g. "cohere/rerank-v3.5")
query string The query to rank documents against
documents array Documents to rerank
top_n int Number of top results to return
return_documents bool Include document text in results