Embeddings & Rerank¶

Embeddings¶

Generate vector embeddings from text. Embeddings are fixed-length numeric arrays that capture semantic meaning -- useful for search, clustering, and RAG.

PythonTypeScriptRustGoJavaC#RubyPHPElixirWASM

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    response = await client.embed(
        model="openai/text-embedding-3-small",
        input=["The quick brown fox jumps over the lazy dog"],
    )
    print(f"Dimensions: {len(response.data[0].embedding)}")
    print(f"First 5 values: {response.data[0].embedding[:5]}")

asyncio.run(main())

import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.embed({
  model: "openai/text-embedding-3-small",
  input: ["The quick brown fox jumps over the lazy dog"],
});
console.log(`Dimensions: ${response.data[0].embedding.length}`);
console.log(`First 5 values: ${response.data[0].embedding.slice(0, 5)}`);

use liter_llm::{
    ClientConfigBuilder, DefaultClient, EmbeddingInput, EmbeddingRequest, LlmClient,
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/text-embedding-3-small"))?;

    let request = EmbeddingRequest {
        model: "openai/text-embedding-3-small".into(),
        input: EmbeddingInput::Multiple(vec![
            "The quick brown fox jumps over the lazy dog".into(),
        ]),
        ..Default::default()
    };

    let response = client.embed(request).await?;
    let embedding = &response.data[0].embedding;
    println!("Dimensions: {}", embedding.len());
    println!("First 5 values: {:?}", &embedding[..5]);
    Ok(())
}

package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 resp, err := client.Embed(context.Background(), &llm.EmbeddingRequest{
  Model: "openai/text-embedding-3-small",
  Input: llm.NewEmbeddingInputMultiple([]string{"The quick brown fox jumps over the lazy dog"}),
 })
 if err != nil {
  panic(err)
 }
 fmt.Printf("Dimensions: %d\n", len(resp.Data[0].Embedding))
 fmt.Printf("First 5 values: %v\n", resp.Data[0].Embedding[:5])
}

import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var response = client.embed(new EmbeddingRequest(
                "openai/text-embedding-3-small",
                List.of("The quick brown fox jumps over the lazy dog")
            ));
            var embedding = response.data().getFirst().embedding();
            System.out.println("Dimensions: " + embedding.size());
            System.out.println("First 5 values: " + embedding.subList(0, 5));
        }
    }
}

using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var response = await client.EmbedAsync(new EmbeddingRequest(
    Model: "openai/text-embedding-3-small",
    Input: ["The quick brown fox jumps over the lazy dog"]
));

var embedding = response.Data[0].Embedding;
Console.WriteLine($"Dimensions: {embedding.Length}");
Console.WriteLine($"First 5 values: [{string.Join(", ", embedding[..5])}]");

# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

response = JSON.parse(client.embed(JSON.generate(
  model: "openai/text-embedding-3-small",
  input: ["The quick brown fox jumps over the lazy dog"]
)))

embedding = response.dig("data", 0, "embedding")
puts "Dimensions: #{embedding.length}"
puts "First 5 values: #{embedding.first(5)}"

<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$response = json_decode($client->embed(json_encode([
    'model' => 'openai/text-embedding-3-small',
    'input' => ['The quick brown fox jumps over the lazy dog'],
])), true);

$embedding = $response['data'][0]['embedding'];
echo 'Dimensions: ' . count($embedding) . PHP_EOL;
echo 'First 5 values: ' . json_encode(array_slice($embedding, 0, 5)) . PHP_EOL;

{:ok, response} =
  LiterLlm.embed(
    %{
      model: "openai/text-embedding-3-small",
      input: ["The quick brown fox jumps over the lazy dog"]
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

embedding = hd(response["data"])["embedding"]
IO.puts("Dimensions: #{length(embedding)}")
IO.puts("First 5 values: #{inspect(Enum.take(embedding, 5))}")

import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.embed({
  model: "openai/text-embedding-3-small",
  input: ["The quick brown fox jumps over the lazy dog"],
});
console.log(`Dimensions: ${response.data[0].embedding.length}`);
console.log(`First 5 values: ${response.data[0].embedding.slice(0, 5)}`);

Embedding Parameters¶

Parameter	Type	Description
`model`	string	Embedding model (e.g. `"openai/text-embedding-3-small"`)
`input`	string/array	Text(s) to embed
`encoding_format`	string	Output format (`"float"` or `"base64"`)
`dimensions`	int	Output dimensionality (model-dependent)

Embedding Providers¶

Provider	Prefix	Example Model
OpenAI	`openai/`	`text-embedding-3-small`, `text-embedding-3-large`
Cohere	`cohere/`	`embed-english-v3.0`
Voyage AI	`voyage/`	`voyage-3`
Mistral	`mistral/`	`mistral-embed`
Google Vertex AI	`vertex_ai/`	`text-embedding-004`
AWS Bedrock	`bedrock/`	`amazon.titan-embed-text-v2:0`
Ollama	`ollama/`	`nomic-embed-text`
LM Studio	`lmstudio/`	Depends on loaded model
vLLM	`vllm/`	`BAAI/bge-base-en-v1.5`
llama.cpp	`llamacpp/`	Depends on loaded GGUF
LocalAI	`localai/`	Depends on configuration
llamafile	`llamafile/`	Depends on loaded model
Jina AI	`jina_ai/`	`jina-embeddings-v3`

See the Providers page for the complete capability matrix.

Rerank¶

Rerank documents by relevance to a query. Useful for improving retrieval quality in RAG pipelines:

PythonTypeScriptRustGoJavaC#RubyPHPElixirWASM

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    response = await client.rerank(
        model="cohere/rerank-v3.5",
        query="What is the capital of France?",
        documents=[
            "Paris is the capital of France.",
            "Berlin is the capital of Germany.",
            "London is the capital of England.",
        ],
    )
    for result in response.results:
        print(f"Index: {result.index}, Score: {result.relevance_score:.4f}")

asyncio.run(main())

import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.rerank({
  model: "cohere/rerank-v3.5",
  query: "What is the capital of France?",
  documents: [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "London is the capital of England.",
  ],
});

for (const result of response.results) {
  console.log(`Index: ${result.index}, Score: ${result.relevanceScore.toFixed(4)}`);
}

use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, RerankRequest};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("cohere/rerank-v3.5"))?;

    let response = client
        .rerank(RerankRequest {
            model: "cohere/rerank-v3.5".into(),
            query: "What is the capital of France?".into(),
            documents: vec![
                "Paris is the capital of France.".into(),
                "Berlin is the capital of Germany.".into(),
                "London is the capital of England.".into(),
            ],
            ..Default::default()
        })
        .await?;

    for result in &response.results {
        println!("Index: {}, Score: {:.4}", result.index, result.relevance_score);
    }
    Ok(())
}

package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 resp, err := client.Rerank(context.Background(), &llm.RerankRequest{
  Model: "cohere/rerank-v3.5",
  Query: "What is the capital of France?",
  Documents: []string{
   "Paris is the capital of France.",
   "Berlin is the capital of Germany.",
   "London is the capital of England.",
  },
 })
 if err != nil {
  panic(err)
 }
 for _, result := range resp.Results {
  fmt.Printf("Index: %d, Score: %.4f\n", result.Index, result.RelevanceScore)
 }
}

import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var response = client.rerank(new RerankRequest(
                "cohere/rerank-v3.5",
                "What is the capital of France?",
                List.of(
                    "Paris is the capital of France.",
                    "Berlin is the capital of Germany.",
                    "London is the capital of England."
                )
            ));
            for (var result : response.results()) {
                System.out.printf("Index: %d, Score: %.4f%n",
                    result.index(), result.relevanceScore());
            }
        }
    }
}

using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var response = await client.RerankAsync(new RerankRequest(
    Model: "cohere/rerank-v3.5",
    Query: "What is the capital of France?",
    Documents: [
        "Paris is the capital of France.",
        "Berlin is the capital of Germany.",
        "London is the capital of England.",
    ]
));

foreach (var result in response.Results)
{
    Console.WriteLine($"Index: {result.Index}, Score: {result.RelevanceScore:F4}");
}

# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

response = JSON.parse(client.rerank(JSON.generate(
  model: "cohere/rerank-v3.5",
  query: "What is the capital of France?",
  documents: [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "London is the capital of England."
  ]
)))

response["results"].each do |result|
  puts "Index: #{result["index"]}, Score: #{format("%.4f", result["relevance_score"])}"
end

<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$response = json_decode($client->rerank(json_encode([
    'model' => 'cohere/rerank-v3.5',
    'query' => 'What is the capital of France?',
    'documents' => [
        'Paris is the capital of France.',
        'Berlin is the capital of Germany.',
        'London is the capital of England.',
    ],
])), true);

foreach ($response['results'] as $result) {
    echo "Index: {$result['index']}, Score: " . number_format($result['relevance_score'], 4) . PHP_EOL;
}

{:ok, response} =
  LiterLlm.rerank(
    %{
      model: "cohere/rerank-v3.5",
      query: "What is the capital of France?",
      documents: [
        "Paris is the capital of France.",
        "Berlin is the capital of Germany.",
        "London is the capital of England."
      ]
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

for result <- response["results"] do
  IO.puts("Index: #{result["index"]}, Score: #{Float.round(result["relevance_score"], 4)}")
end

import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.rerank({
  model: "cohere/rerank-v3.5",
  query: "What is the capital of France?",
  documents: [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "London is the capital of England.",
  ],
});

for (const result of response.results) {
  console.log(`Index: ${result.index}, Score: ${result.relevanceScore.toFixed(4)}`);
}

Rerank Parameters¶

Parameter	Type	Description
`model`	string	Rerank model (e.g. `"cohere/rerank-v3.5"`)
`query`	string	The query to rank documents against
`documents`	array	Documents to rerank
`top_n`	int	Number of top results to return
`return_documents`	bool	Include document text in results