Skip to content

Media (Images, Speech, Transcription)

Image Generation

Generate images from text prompts:

import asyncio
import os

from liter_llm import create_client
from liter_llm._internal_bindings import CreateImageRequest


async def main() -> None:
    client = create_client(api_key=os.environ["OPENAI_API_KEY"])
    request = CreateImageRequest.from_json(
        '{"model":"openai/dall-e-3","prompt":"A sunset over mountains","n":1,"size":"1024x1024"}'
    )
    response = await client.image_generate(request)
    print(response.data[0].url)


asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";

const client = createClient(process.env.OPENAI_API_KEY!);
const response = await client.imageGenerate({
  model: "openai/dall-e-3",
  prompt: "A sunset over mountains",
  n: 1,
  size: "1024x1024",
});
console.log(response.data?.[0]?.url);
use liter_llm::{
    ClientConfigBuilder, CreateImageRequest, DefaultClient, LlmClient,
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/dall-e-3"))?;

    let response = client
        .image_generate(CreateImageRequest {
            model: "openai/dall-e-3".into(),
            prompt: "A sunset over mountains".into(),
            n: Some(1),
            size: Some("1024x1024".into()),
            ..Default::default()
        })
        .await?;

    println!("{}", response.data[0].url.as_deref().unwrap_or(""));
    Ok(())
}
package main

import (
    "encoding/json"
    "fmt"
    "os"

    llm "github.com/xberg-io/liter-llm/packages/go"
)

func main() {
    client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
    if err != nil {
        panic(err)
    }

    var req llm.CreateImageRequest
    if err := json.Unmarshal([]byte(`{
        "model": "openai/dall-e-3",
        "prompt": "A sunset over mountains",
        "n": 1,
        "size": "1024x1024"
    }`), &req); err != nil {
        panic(err)
    }

    resp, err := client.ImageGenerate(req)
    if err != nil {
        panic(err)
    }
    if len(resp.Data) > 0 && resp.Data[0].URL != nil {
        fmt.Println(*resp.Data[0].URL)
    }
}
import io.xberg.literllm.*;
import java.util.Optional;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
            var response = client.imageGenerate(CreateImageRequest.builder()
                .withPrompt("A sunset over mountains")
                .withModel(Optional.of("openai/dall-e-3"))
                .withN(Optional.of(1))
                .withSize(Optional.of("1024x1024"))
                .build());
            System.out.println(response.data().getFirst().url());
        }
    }
}
using LiterLlm;

using var client = LiterLlmLib.CreateClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);

var response = await client.ImageGenerate(new CreateImageRequest
{
    Model = "openai/dall-e-3",
    Prompt = "A sunset over mountains",
    N = 1,
    Size = "1024x1024"
});
Console.WriteLine(response.Data[0].Url);
# frozen_string_literal: true

require 'liter_llm'

client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))

result = client.image_generate_async(
  LiterLlm::CreateImageRequest.new(
    model: 'openai/dall-e-3',
    prompt: 'A sunset over mountains',
    n: 1,
    size: '1024x1024'
  )
)

puts result.data[0].url
<?php

declare(strict_types=1);

use Liter\Llm\LiterLlm;
use Liter\Llm\CreateImageRequest;

$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');

$result = $client->imageGenerateAsync(new CreateImageRequest(
    prompt: 'A sunset over mountains',
    model: 'openai/dall-e-3',
    n: 1,
    size: '1024x1024',
));

echo $result->data[0]->url . PHP_EOL;
{:ok, client} = LiterLlm.create_client(System.get_env("OPENAI_API_KEY"))

request =
  Jason.encode!(%{
    model: "openai/dall-e-3",
    prompt: "A sunset over mountains",
    n: 1,
    size: "1024x1024"
  })

{:ok, result} = LiterLlm.defaultclient_image_generate_async(client, request)
IO.puts(Enum.at(result.data, 0).url)
import init, { createClient, WasmCreateImageRequest } from "@xberg-io/liter-llm-wasm";

await init();

const client = createClient(process.env.OPENAI_API_KEY!);

const request = WasmCreateImageRequest.default();
request.model = "openai/dall-e-3";
request.prompt = "A sunset over mountains";
request.n = 1;
request.size = "1024x1024";

const response = await client.imageGenerate(request);
console.log(response.data?.[0]?.url);

Image Parameters

Parameter Type Description
model string Image model (e.g. "openai/dall-e-3")
prompt string Text description of the image
n int Number of images to generate
size string Image size ("1024x1024", "1792x1024", "1024x1792")
quality string Quality level ("standard" or "hd")
style string Style ("vivid" or "natural")

Text-to-Speech

Generate audio from text:

import asyncio
import os
from pathlib import Path

from liter_llm import create_client
from liter_llm._internal_bindings import CreateSpeechRequest


async def main() -> None:
    client = create_client(api_key=os.environ["OPENAI_API_KEY"])
    request = CreateSpeechRequest.from_json(
        '{"model":"openai/tts-1","input":"Hello, world!","voice":"alloy"}'
    )
    audio_bytes = await client.speech(request)
    Path("output.mp3").write_bytes(audio_bytes)
    print(f"Wrote {len(audio_bytes)} bytes to output.mp3")


asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";
import { writeFileSync } from "node:fs";

const client = createClient(process.env.OPENAI_API_KEY!);
const audioBuffer = await client.speech({
  model: "openai/tts-1",
  input: "Hello, world!",
  voice: "alloy",
});
writeFileSync("output.mp3", audioBuffer);
console.log(`Wrote ${audioBuffer.byteLength} bytes to output.mp3`);
use liter_llm::{ClientConfigBuilder, CreateSpeechRequest, DefaultClient, LlmClient};
use tokio::fs;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/tts-1"))?;

    let audio_bytes = client
        .speech(CreateSpeechRequest {
            model: "openai/tts-1".into(),
            input: "Hello, world!".into(),
            voice: "alloy".into(),
            ..Default::default()
        })
        .await?;

    fs::write("output.mp3", &audio_bytes).await?;
    println!("Wrote {} bytes to output.mp3", audio_bytes.len());
    Ok(())
}
package main

import (
    "encoding/json"
    "fmt"
    "os"

    llm "github.com/xberg-io/liter-llm/packages/go"
)

func main() {
    client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
    if err != nil {
        panic(err)
    }

    var req llm.CreateSpeechRequest
    if err := json.Unmarshal([]byte(`{
        "model": "openai/tts-1",
        "input": "Hello, world!",
        "voice": "alloy"
    }`), &req); err != nil {
        panic(err)
    }

    audio, err := client.Speech(req)
    if err != nil {
        panic(err)
    }
    if err := os.WriteFile("output.mp3", audio, 0o644); err != nil {
        panic(err)
    }
    fmt.Printf("Wrote %d bytes to output.mp3\n", len(audio))
}
import io.xberg.literllm.*;
import java.nio.file.Files;
import java.nio.file.Path;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
            byte[] audioBytes = client.speech(CreateSpeechRequest.builder()
                .withModel("openai/tts-1")
                .withInput("Hello, world!")
                .withVoice("alloy")
                .build());
            Files.write(Path.of("output.mp3"), audioBytes);
            System.out.printf("Wrote %d bytes to output.mp3%n", audioBytes.length);
        }
    }
}
using LiterLlm;

using var client = LiterLlmLib.CreateClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);

var audioBytes = await client.Speech(new CreateSpeechRequest
{
    Model = "openai/tts-1",
    Input = "Hello, world!",
    Voice = "alloy"
});
await File.WriteAllBytesAsync("output.mp3", audioBytes);
Console.WriteLine($"Wrote {audioBytes.Length} bytes to output.mp3");
# frozen_string_literal: true

require 'liter_llm'

client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))

audio_bytes = client.speech_async(
  LiterLlm::CreateSpeechRequest.new(
    model: 'openai/tts-1',
    input: 'Hello, world!',
    voice: 'alloy'
  )
)

File.binwrite('output.mp3', audio_bytes.pack('C*'))
puts "Wrote #{audio_bytes.length} bytes to output.mp3"
<?php

declare(strict_types=1);

use Liter\Llm\LiterLlm;
use Liter\Llm\CreateSpeechRequest;

$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');

$audioBytes = $client->speechAsync(new CreateSpeechRequest(
    model: 'openai/tts-1',
    input: 'Hello, world!',
    voice: 'alloy',
));

// speechAsync returns the raw audio as an array of byte values.
$binary = pack('C*', ...$audioBytes);
file_put_contents('output.mp3', $binary);
echo 'Wrote ' . strlen($binary) . ' bytes to output.mp3' . PHP_EOL;
{:ok, client} = LiterLlm.create_client(System.get_env("OPENAI_API_KEY"))

request =
  Jason.encode!(%{
    model: "openai/tts-1",
    input: "Hello, world!",
    voice: "alloy"
  })

{:ok, audio_bytes} = LiterLlm.defaultclient_speech_async(client, request)
File.write!("output.mp3", audio_bytes)
IO.puts("Wrote #{byte_size(audio_bytes)} bytes to output.mp3")
import init, { createClient, WasmCreateSpeechRequest } from "@xberg-io/liter-llm-wasm";

await init();

const client = createClient(process.env.OPENAI_API_KEY!);

const request = WasmCreateSpeechRequest.default();
request.model = "openai/tts-1";
request.input = "Hello, world!";
request.voice = "alloy";

const audio = await client.speech(request);
console.log(`Generated ${audio.byteLength} bytes of audio`);

Speech Parameters

Parameter Type Description
model string TTS model (e.g. "openai/tts-1")
input string Text to synthesize
voice string Voice preset ("alloy", "echo", "fable", "onyx", "nova", "shimmer")
response_format string Audio format ("mp3", "opus", "aac", "flac")
speed float Playback speed (0.25-4.0)

Speech-to-Text

Transcribe audio to text:

import asyncio
import base64
import json
import os
from pathlib import Path

from liter_llm import create_client
from liter_llm._internal_bindings import CreateTranscriptionRequest


async def main() -> None:
    client = create_client(api_key=os.environ["OPENAI_API_KEY"])
    encoded = base64.b64encode(Path("audio.mp3").read_bytes()).decode("ascii")
    request = CreateTranscriptionRequest.from_json(
        json.dumps({"model": "openai/whisper-1", "file": encoded})
    )
    response = await client.transcribe(request)
    print(response.text)


asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";
import { readFileSync } from "node:fs";

const client = createClient(process.env.OPENAI_API_KEY!);
// file is a base64-encoded string, not raw bytes.
const file = readFileSync("audio.mp3").toString("base64");
const response = await client.transcribe({
  model: "openai/whisper-1",
  file,
});
console.log(response.text);
use base64::Engine;
use liter_llm::{ClientConfigBuilder, CreateTranscriptionRequest, DefaultClient, LlmClient};
use tokio::fs;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/whisper-1"))?;

    let audio_bytes = fs::read("audio.mp3").await?;
    let response = client
        .transcribe(CreateTranscriptionRequest {
            model: "openai/whisper-1".into(),
            file: base64::engine::general_purpose::STANDARD.encode(&audio_bytes),
            ..Default::default()
        })
        .await?;

    println!("{}", response.text);
    Ok(())
}
package main

import (
    "encoding/base64"
    "encoding/json"
    "fmt"
    "os"

    llm "github.com/xberg-io/liter-llm/packages/go"
)

func main() {
    client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
    if err != nil {
        panic(err)
    }

    audio, err := os.ReadFile("audio.mp3")
    if err != nil {
        panic(err)
    }

    body, _ := json.Marshal(map[string]string{
        "model": "openai/whisper-1",
        "file":  base64.StdEncoding.EncodeToString(audio),
    })

    var req llm.CreateTranscriptionRequest
    if err := json.Unmarshal(body, &req); err != nil {
        panic(err)
    }

    resp, err := client.Transcribe(req)
    if err != nil {
        panic(err)
    }
    fmt.Println(resp.Text)
}
import io.xberg.literllm.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Base64;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
            byte[] audioBytes = Files.readAllBytes(Path.of("audio.mp3"));
            String audioBase64 = Base64.getEncoder().encodeToString(audioBytes);
            var response = client.transcribe(CreateTranscriptionRequest.builder()
                .withModel("openai/whisper-1")
                .withFile(audioBase64)
                .build());
            System.out.println(response.text());
        }
    }
}
using LiterLlm;

using var client = LiterLlmLib.CreateClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);

var audioBytes = await File.ReadAllBytesAsync("audio.mp3");
var response = await client.Transcribe(new CreateTranscriptionRequest
{
    Model = "openai/whisper-1",
    File = Convert.ToBase64String(audioBytes)
});
Console.WriteLine(response.Text);
# frozen_string_literal: true

require 'base64'
require 'liter_llm'

client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))

result = client.transcribe_async(
  LiterLlm::CreateTranscriptionRequest.new(
    model: 'openai/whisper-1',
    file: Base64.strict_encode64(File.binread('audio.mp3'))
  )
)

puts result.text
<?php

declare(strict_types=1);

use Liter\Llm\LiterLlm;
use Liter\Llm\CreateTranscriptionRequest;

$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');

$result = $client->transcribeAsync(new CreateTranscriptionRequest(
    model: 'openai/whisper-1',
    file: base64_encode(file_get_contents('audio.mp3')),
));

echo $result->text . PHP_EOL;
{:ok, client} = LiterLlm.create_client(System.get_env("OPENAI_API_KEY"))

request =
  Jason.encode!(%{
    model: "openai/whisper-1",
    file: Base.encode64(File.read!("audio.mp3"))
  })

{:ok, result} = LiterLlm.defaultclient_transcribe_async(client, request)
IO.puts(result.text)
import init, { createClient, WasmCreateTranscriptionRequest } from "@xberg-io/liter-llm-wasm";

await init();

const client = createClient(process.env.OPENAI_API_KEY!);

// `file` is a base64-encoded string, not raw bytes.
const audioBytes = new Uint8Array(/* read your audio file */);
const fileBase64 = btoa(String.fromCharCode(...audioBytes));

const request = WasmCreateTranscriptionRequest.default();
request.model = "openai/whisper-1";
request.file = fileBase64;

const response = await client.transcribe(request);
console.log(response.text);

Transcription Parameters

Parameter Type Description
model string STT model (e.g. "openai/whisper-1")
file bytes Audio file data
language string ISO-639-1 language code
prompt string Optional context hint
temperature float Sampling temperature
response_format string Output format ("json", "text", "srt", "vtt")

Content Moderation

Classify content for policy violations:

import asyncio
import os

from liter_llm import create_client
from liter_llm._internal_bindings import ModerationRequest

CATEGORIES = (
    "sexual", "hate", "harassment", "self_harm", "sexual_minors",
    "hate_threatening", "violence_graphic", "self_harm_intent",
    "self_harm_instructions", "harassment_threatening", "violence",
)


async def main() -> None:
    client = create_client(api_key=os.environ["OPENAI_API_KEY"])
    request = ModerationRequest.from_json(
        '{"model":"openai/omni-moderation-latest","input":"This is a test message."}'
    )
    response = await client.moderate(request)
    result = response.results[0]
    print(f"Flagged: {result.flagged}")
    for name in CATEGORIES:
        if getattr(result.categories, name):
            score = getattr(result.category_scores, name)
            print(f"  {name}: {score:.4f}")


asyncio.run(main())
import { createClient } from "@xberg-io/liter-llm";

const client = createClient(process.env.OPENAI_API_KEY!);
const response = await client.moderate({
  model: "openai/omni-moderation-latest",
  input: "This is a test message.",
});

const result = response.results[0];
console.log(`Flagged: ${result.flagged}`);
const cats = result.categories as Record<string, boolean | undefined>;
const scores = result.categoryScores as Record<string, number | undefined>;
for (const [category, flagged] of Object.entries(cats)) {
  if (flagged) {
    console.log(`  ${category}: ${(scores[category] ?? 0).toFixed(4)}`);
  }
}
use liter_llm::{ClientConfigBuilder, DefaultClient, LlmClient, ModerationInput, ModerationRequest};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/omni-moderation-latest"))?;

    let response = client
        .moderate(ModerationRequest {
            model: Some("openai/omni-moderation-latest".into()),
            input: ModerationInput::Single("This is a test message.".into()),
        })
        .await?;

    let result = &response.results[0];
    println!("Flagged: {}", result.flagged);
    if result.categories.sexual {
        println!("  sexual: {:.4}", result.category_scores.sexual);
    }
    if result.categories.hate {
        println!("  hate: {:.4}", result.category_scores.hate);
    }
    if result.categories.self_harm {
        println!("  self-harm: {:.4}", result.category_scores.self_harm);
    }
    if result.categories.violence {
        println!("  violence: {:.4}", result.category_scores.violence);
    }
    Ok(())
}
package main

import (
    "encoding/json"
    "fmt"
    "os"

    llm "github.com/xberg-io/liter-llm/packages/go"
)

func main() {
    client, err := llm.CreateClient(os.Getenv("OPENAI_API_KEY"), nil, nil, nil, nil)
    if err != nil {
        panic(err)
    }

    var req llm.ModerationRequest
    if err := json.Unmarshal([]byte(`{
        "model": "openai/omni-moderation-latest",
        "input": "This is a test message."
    }`), &req); err != nil {
        panic(err)
    }

    resp, err := client.Moderate(req)
    if err != nil {
        panic(err)
    }
    first := resp.Results[0]
    fmt.Printf("Flagged: %v\n", first.Flagged)
}
import io.xberg.literllm.*;
import java.util.Optional;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LiterLlm.createClient(System.getenv("OPENAI_API_KEY"))) {
            var response = client.moderate(ModerationRequest.builder()
                .withInput(ModerationInput.of("This is a test message."))
                .withModel(Optional.of("openai/omni-moderation-latest"))
                .build());
            var result = response.results().getFirst();
            var cats = result.categories();
            System.out.println("Flagged: " + result.flagged());
            // ModerationCategories is a typed record with boolean fields (sexual,
            // hate, harassment, selfHarm, violence, ...); access them directly.
            if (cats.hate()) System.out.println("  hate flagged");
            if (cats.harassment()) System.out.println("  harassment flagged");
        }
    }
}
using LiterLlm;

using var client = LiterLlmLib.CreateClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!,
    baseUrl: null, timeoutSecs: null, maxRetries: null, modelHint: null);

var response = await client.Moderate(new ModerationRequest
{
    Model = "openai/omni-moderation-latest",
    Input = ModerationInput.Of("This is a test message.")
});

var result = response.Results[0];
var cats = result.Categories;
Console.WriteLine($"Flagged: {result.Flagged}");
// ModerationCategories is a typed class with bool fields (Sexual, Hate,
// Harassment, SelfHarm, Violence, ...); access them directly.
if (cats.Hate) Console.WriteLine("  hate flagged");
if (cats.Harassment) Console.WriteLine("  harassment flagged");
# frozen_string_literal: true

require 'liter_llm'

client = LiterLlm.create_client(ENV.fetch('OPENAI_API_KEY'))

result = client.moderate_async(
  LiterLlm::ModerationRequest.new(
    model: 'openai/omni-moderation-latest',
    input: 'This is a test message.'
  )
)

first = result.results[0]
puts "Flagged: #{first.flagged}"
<?php

declare(strict_types=1);

use Liter\Llm\LiterLlm;
use Liter\Llm\ModerationRequest;

$client = LiterLlm::createClient(getenv('OPENAI_API_KEY') ?: '');

$request = ModerationRequest::from_json(json_encode([
    'model' => 'openai/omni-moderation-latest',
    'input' => 'This is a test message.',
]));

$result = $client->moderateAsync($request);
$first = $result->results[0];
echo 'Flagged: ' . ($first->flagged ? 'true' : 'false') . PHP_EOL;
{:ok, client} = LiterLlm.create_client(System.get_env("OPENAI_API_KEY"))

request =
  Jason.encode!(%{
    model: "openai/omni-moderation-latest",
    input: "This is a test message."
  })

{:ok, result} = LiterLlm.defaultclient_moderate_async(client, request)
first = Enum.at(result.results, 0)
IO.puts("Flagged: #{first.flagged}")
import init, { createClient, WasmModerationRequest } from "@xberg-io/liter-llm-wasm";

await init();

const client = createClient(process.env.OPENAI_API_KEY!);

const request = WasmModerationRequest.default();
request.model = "openai/omni-moderation-latest";
request.input = "This is a test message.";

const response = await client.moderate(request);
const result = response.results[0];
console.log(`Flagged: ${result.flagged}`);
const cats = result.categories as Record<string, boolean | undefined>;
const scores = result.categoryScores as Record<string, number | undefined>;
for (const [category, flagged] of Object.entries(cats)) {
  if (flagged) {
    console.log(`  ${category}: ${(scores[category] ?? 0).toFixed(4)}`);
  }
}

Moderation Parameters

Parameter Type Description
input string/array Content to classify
model string Moderation model (e.g. "openai/omni-moderation-latest")

Edit this page on GitHub