Skip to content

Embeddings & Rerank

Embeddings

Generate vector embeddings from text. Embeddings are fixed-length numeric arrays that capture semantic meaning -- useful for search, clustering, and RAG.

import asyncio
import os

from liter_llm import create_client
from liter_llm._internal_bindings import EmbeddingRequest


async def main() -> None:
    client = create_client(api_key=os.environ["OPENAI_API_KEY"])
    request = EmbeddingRequest.from_json(
        '{"model":"openai/text-embedding-3-small","input":["The quick brown fox jumps over the lazy dog"]}'
    )
    response = await client.embed(request)
    print(f"Dimensions: {len(response.data[0].embedding)}")
    print(f"First 5 values: {response.data[0].embedding[:5]}")


asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";

const client = createClient(process.env.OPENAI_API_KEY!);
const response = await client.embed({
  model: "openai/text-embedding-3-small",
  input: ["The quick brown fox jumps over the lazy dog"],
});
console.log(`Dimensions: ${response.data[0].embedding.length}`);
console.log(`First 5 values: ${response.data[0].embedding.slice(0, 5)}`);
use liter_llm::{
    ClientConfigBuilder, DefaultClient, EmbeddingInput, EmbeddingRequest, LlmClient,
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/text-embedding-3-small"))?;

    let request = EmbeddingRequest {
        model: "openai/text-embedding-3-small".into(),
        input: EmbeddingInput::Multiple(vec![
            "The quick brown fox jumps over the lazy dog".into(),
        ]),
        ..Default::default()
    };

    let response = client.embed(request).await?;
    let embedding = &response.data[0].embedding;
    println!("Dimensions: {}", embedding.len());
    println!("First 5 values: {:?}", &embedding[..5]);
    Ok(())
}
package main

import (
    "encoding/json"
    "fmt"
    "os"

    llm "github.com/xberg-io/liter-llm/packages/go"
)

func main() {
    client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
    if err != nil {
        panic(err)
    }

    var req llm.EmbeddingRequest
    if err := json.Unmarshal([]byte(`{
        "model": "openai/text-embedding-3-small",
        "input": ["The quick brown fox jumps over the lazy dog"]
    }`), &req); err != nil {
        panic(err)
    }

    resp, err := client.Embed(req)
    if err != nil {
        panic(err)
    }
    fmt.Printf("Dimensions: %d\n", len(resp.Data[0].Embedding))
    fmt.Printf("First 5 values: %v\n", resp.Data[0].Embedding[:5])
}
import io.xberg.literllm.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
            var response = client.embed(EmbeddingRequest.builder()
                .withModel("openai/text-embedding-3-small")
                .withInput(EmbeddingInput.of(List.of("The quick brown fox jumps over the lazy dog")))
                .build());
            var embedding = response.data().getFirst().embedding();
            System.out.println("Dimensions: " + embedding.size());
            System.out.println("First 5 values: " + embedding.subList(0, 5));
        }
    }
}
using LiterLlm;

using var client = LiterLlmLib.CreateClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);

var response = await client.Embed(new EmbeddingRequest
{
    Model = "openai/text-embedding-3-small",
    Input = EmbeddingInput.Of(new[] { "The quick brown fox jumps over the lazy dog" })
});

var embedding = response.Data[0].Embedding;
Console.WriteLine($"Dimensions: {embedding.Count}");
Console.WriteLine($"First 5 values: [{string.Join(", ", embedding.Take(5))}]");
# frozen_string_literal: true

require 'liter_llm'

client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))

result = client.embed_async(
  LiterLlm::EmbeddingRequest.new(
    model: 'openai/text-embedding-3-small',
    input: ['The quick brown fox jumps over the lazy dog']
  )
)

embedding = result.data[0].embedding
puts "Dimensions: #{embedding.length}"
puts "First 5 values: #{embedding.first(5)}"
<?php

declare(strict_types=1);

use Liter\Llm\LiterLlm;
use Liter\Llm\EmbeddingRequest;

$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');

$request = EmbeddingRequest::from_json(json_encode([
    'model' => 'openai/text-embedding-3-small',
    'input' => ['The quick brown fox jumps over the lazy dog'],
]));

$result = $client->embedAsync($request);
$embedding = $result->data[0]->embedding;
echo 'Dimensions: ' . count($embedding) . PHP_EOL;
echo 'First 5 values: ' . json_encode(array_slice($embedding, 0, 5)) . PHP_EOL;
{:ok, client} = LiterLlm.create_client(System.get_env("OPENAI_API_KEY"))

request =
  Jason.encode!(%{
    model: "openai/text-embedding-3-small",
    input: ["The quick brown fox jumps over the lazy dog"]
  })

{:ok, result} = LiterLlm.defaultclient_embed_async(client, request)
embedding = Enum.at(result.data, 0).embedding
IO.puts("Dimensions: #{length(embedding)}")
IO.puts("First 5 values: #{inspect(Enum.take(embedding, 5))}")
import init, { createClient, WasmEmbeddingRequest } from "@xberg-io/liter-llm-wasm";

await init();

const client = createClient(process.env.OPENAI_API_KEY!);

const request = WasmEmbeddingRequest.default();
request.model = "openai/text-embedding-3-small";
request.input = ["The quick brown fox jumps over the lazy dog"];

const response = await client.embed(request);
console.log(`Dimensions: ${response.data[0].embedding.length}`);
console.log(`First 5 values: ${response.data[0].embedding.slice(0, 5)}`);

Embedding Parameters

Parameter Type Description
model string Embedding model (e.g. "openai/text-embedding-3-small")
input string/array Text(s) to embed
encoding_format string Output format ("float" or "base64")
dimensions int Output dimensionality (model-dependent)

Embedding Providers

Provider Prefix Example Model
OpenAI openai/ text-embedding-3-small, text-embedding-3-large
Cohere cohere/ embed-english-v3.0
Voyage AI voyage/ voyage-3
Mistral mistral/ mistral-embed
Google Vertex AI vertex_ai/ text-embedding-004
AWS Bedrock bedrock/ amazon.titan-embed-text-v2:0
Ollama ollama/ nomic-embed-text
LM Studio lmstudio/ Depends on loaded model
vLLM vllm/ BAAI/bge-base-en-v1.5
llama.cpp llamacpp/ Depends on loaded GGUF
LocalAI localai/ Depends on configuration
llamafile llamafile/ Depends on loaded model
Jina AI jina_ai/ jina-embeddings-v3

See the Providers page for the complete capability matrix.

Rerank

Rerank documents by relevance to a query. Useful for improving retrieval quality in RAG pipelines:

import asyncio
import json
import os

from liter_llm import create_client
from liter_llm._internal_bindings import RerankRequest


async def main() -> None:
    client = create_client(api_key=os.environ["COHERE_API_KEY"])
    payload = {
        "model": "cohere/rerank-v3.5",
        "query": "What is the capital of France?",
        "documents": [
            "Paris is the capital of France.",
            "Berlin is the capital of Germany.",
            "London is the capital of England.",
        ],
    }
    request = RerankRequest.from_json(json.dumps(payload))
    response = await client.rerank(request)
    for result in response.results:
        print(f"Index: {result.index}, Score: {result.relevance_score:.4f}")


asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";

const client = createClient(process.env.COHERE_API_KEY!);
const response = await client.rerank({
  model: "cohere/rerank-v3.5",
  query: "What is the capital of France?",
  documents: [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "London is the capital of England.",
  ],
});

for (const result of response.results) {
  console.log(`Index: ${result.index}, Score: ${result.relevanceScore.toFixed(4)}`);
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, RerankRequest};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("cohere/rerank-v3.5"))?;

    let response = client
        .rerank(RerankRequest {
            model: "cohere/rerank-v3.5".into(),
            query: "What is the capital of France?".into(),
            documents: vec![
                "Paris is the capital of France.".into(),
                "Berlin is the capital of Germany.".into(),
                "London is the capital of England.".into(),
            ],
            ..Default::default()
        })
        .await?;

    for result in &response.results {
        println!("Index: {}, Score: {:.4}", result.index, result.relevance_score);
    }
    Ok(())
}
package main

import (
    "encoding/json"
    "fmt"
    "os"

    llm "github.com/xberg-io/liter-llm/packages/go"
)

func main() {
    client, err := llm.CreateClient(os.Getenv("COHERE_API_KEY"), nil, nil, nil, nil)
    if err != nil {
        panic(err)
    }

    var req llm.RerankRequest
    if err := json.Unmarshal([]byte(`{
        "model": "cohere/rerank-v3.5",
        "query": "What is the capital of France?",
        "documents": [
            "Paris is the capital of France.",
            "Berlin is the capital of Germany.",
            "London is the capital of England."
        ]
    }`), &req); err != nil {
        panic(err)
    }

    resp, err := client.Rerank(req)
    if err != nil {
        panic(err)
    }
    for _, r := range resp.Results {
        fmt.Printf("Index: %d, Score: %.4f\n", r.Index, r.RelevanceScore)
    }
}
import io.xberg.literllm.*;
import java.util.List;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
            var docs = List.of(
                RerankDocument.of("Paris is the capital of France."),
                RerankDocument.of("Berlin is the capital of Germany."),
                RerankDocument.of("London is the capital of England.")
            );
            var response = client.rerank(RerankRequest.builder()
                .withModel("cohere/rerank-v3.5")
                .withQuery("What is the capital of France?")
                .withDocuments(docs)
                .build());
            for (var result : response.results()) {
                System.out.printf("Index: %d, Score: %.4f%n",
                    result.index(), result.relevanceScore());
            }
        }
    }
}
using LiterLlm;

using var client = LiterLlmLib.CreateClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);

var response = await client.Rerank(new RerankRequest
{
    Model = "cohere/rerank-v3.5",
    Query = "What is the capital of France?",
    Documents =
    [
        RerankDocument.Of("Paris is the capital of France."),
        RerankDocument.Of("Berlin is the capital of Germany."),
        RerankDocument.Of("London is the capital of England."),
    ]
});

foreach (var result in response.Results)
{
    Console.WriteLine($"Index: {result.Index}, Score: {result.RelevanceScore:F4}");
}
# frozen_string_literal: true

require 'liter_llm'

client = LiterLlm.create_client(ENV.fetch('COHERE_API_KEY'))

result = client.rerank_async(
  LiterLlm::RerankRequest.new(
    model: 'cohere/rerank-v3.5',
    query: 'What is the capital of France?',
    documents: [
      'Paris is the capital of France.',
      'Berlin is the capital of Germany.',
      'London is the capital of England.'
    ]
  )
)

result.results.each do |r|
  puts "Index: #{r.index}, Score: #{format('%.4f', r.relevance_score)}"
end
<?php

declare(strict_types=1);

use Liter\Llm\LiterLlm;
use Liter\Llm\RerankRequest;

$client = LiterLlm::createClient(getenv('COHERE_API_KEY') ?: '');

$request = RerankRequest::from_json(json_encode([
    'model' => 'cohere/rerank-v3.5',
    'query' => 'What is the capital of France?',
    'documents' => [
        'Paris is the capital of France.',
        'Berlin is the capital of Germany.',
        'London is the capital of England.',
    ],
]));

$result = $client->rerankAsync($request);
foreach ($result->results as $r) {
    echo "Index: {$r->index}, Score: " . number_format($r->relevanceScore, 4) . PHP_EOL;
}
{:ok, client} = LiterLlm.create_client(System.get_env("COHERE_API_KEY"))

request =
  Jason.encode!(%{
    model: "cohere/rerank-v3.5",
    query: "What is the capital of France?",
    documents: [
      "Paris is the capital of France.",
      "Berlin is the capital of Germany.",
      "London is the capital of England."
    ]
  })

{:ok, result} = LiterLlm.defaultclient_rerank_async(client, request)

for r <- result.results do
  IO.puts("Index: #{r.index}, Score: #{Float.round(r.relevance_score, 4)}")
end
import init, { createClient, WasmRerankRequest } from "@xberg-io/liter-llm-wasm";

await init();

const client = createClient(process.env.COHERE_API_KEY!);

const request = WasmRerankRequest.default();
request.model = "cohere/rerank-v3.5";
request.query = "What is the capital of France?";
request.documents = [
  "Paris is the capital of France.",
  "Berlin is the capital of Germany.",
  "London is the capital of England.",
];

const response = await client.rerank(request);
for (const result of response.results) {
  console.log(`Index: ${result.index}, Score: ${result.relevanceScore.toFixed(4)}`);
}

Rerank Parameters

Parameter Type Description
model string Rerank model (e.g. "cohere/rerank-v3.5")
query string The query to rank documents against
documents array Documents to rerank
top_n int Number of top results to return
return_documents bool Include document text in results

Edit this page on GitHub