Media (Images, Speech, Transcription)¶

Image Generation¶

Generate images from text prompts:

PythonTypeScriptRustGoJavaC#RubyPHPElixirWASM

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    response = await client.image_generate(
        model="openai/dall-e-3",
        prompt="A sunset over mountains",
        n=1,
        size="1024x1024",
    )
    print(response.data[0].url)

asyncio.run(main())

import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.imageGenerate({
  model: "openai/dall-e-3",
  prompt: "A sunset over mountains",
  n: 1,
  size: "1024x1024",
});
console.log(response.data[0].url);

use liter_llm::{
    ClientConfigBuilder, CreateImageRequest, DefaultClient, LlmClient,
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/dall-e-3"))?;

    let response = client
        .image_generate(CreateImageRequest {
            model: "openai/dall-e-3".into(),
            prompt: "A sunset over mountains".into(),
            n: Some(1),
            size: Some("1024x1024".into()),
            ..Default::default()
        })
        .await?;

    println!("{}", response.data[0].url.as_deref().unwrap_or(""));
    Ok(())
}

package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 resp, err := client.ImageGenerate(context.Background(), &llm.CreateImageRequest{
  Model:  "openai/dall-e-3",
  Prompt: "A sunset over mountains",
  N:      1,
  Size:   "1024x1024",
 })
 if err != nil {
  panic(err)
 }
 fmt.Println(resp.Data[0].URL)
}

import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var response = client.imageGenerate(new CreateImageRequest(
                "openai/dall-e-3",
                "A sunset over mountains",
                1,
                "1024x1024"
            ));
            System.out.println(response.data().getFirst().url());
        }
    }
}

using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var response = await client.ImageGenerateAsync(new CreateImageRequest(
    Model: "openai/dall-e-3",
    Prompt: "A sunset over mountains",
    N: 1,
    Size: "1024x1024"
));
Console.WriteLine(response.Data[0].Url);

# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

response = JSON.parse(client.image_generate(JSON.generate(
  model: "openai/dall-e-3",
  prompt: "A sunset over mountains",
  n: 1,
  size: "1024x1024"
)))

puts response.dig("data", 0, "url")

<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$response = json_decode($client->imageGenerate(json_encode([
    'model' => 'openai/dall-e-3',
    'prompt' => 'A sunset over mountains',
    'n' => 1,
    'size' => '1024x1024',
])), true);

echo $response['data'][0]['url'] . PHP_EOL;

{:ok, response} =
  LiterLlm.image_generate(
    %{
      model: "openai/dall-e-3",
      prompt: "A sunset over mountains",
      n: 1,
      size: "1024x1024"
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

IO.puts(hd(response["data"])["url"])

import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.imageGenerate({
  model: "openai/dall-e-3",
  prompt: "A sunset over mountains",
  n: 1,
  size: "1024x1024",
});
console.log(response.data[0].url);

Image Parameters¶

Parameter	Type	Description
`model`	string	Image model (e.g. `"openai/dall-e-3"`)
`prompt`	string	Text description of the image
`n`	int	Number of images to generate
`size`	string	Image size (`"1024x1024"`, `"1792x1024"`, `"1024x1792"`)
`quality`	string	Quality level (`"standard"` or `"hd"`)
`style`	string	Style (`"vivid"` or `"natural"`)

Text-to-Speech¶

Generate audio from text:

PythonTypeScriptRustGoJavaC#RubyPHPElixirWASM

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    audio_bytes = await client.speech(
        model="openai/tts-1",
        input="Hello, world!",
        voice="alloy",
    )
    with open("output.mp3", "wb") as f:
        f.write(audio_bytes)
    print(f"Wrote {len(audio_bytes)} bytes to output.mp3")

asyncio.run(main())

import { LlmClient } from "@kreuzberg/liter-llm";
import { writeFileSync } from "node:fs";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const audioBuffer = await client.speech({
  model: "openai/tts-1",
  input: "Hello, world!",
  voice: "alloy",
});
writeFileSync("output.mp3", audioBuffer);
console.log(`Wrote ${audioBuffer.byteLength} bytes to output.mp3`);

use liter_llm::{ClientConfigBuilder, CreateSpeechRequest, DefaultClient, LlmClient};
use tokio::fs;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/tts-1"))?;

    let audio_bytes = client
        .speech(CreateSpeechRequest {
            model: "openai/tts-1".into(),
            input: "Hello, world!".into(),
            voice: "alloy".into(),
            ..Default::default()
        })
        .await?;

    fs::write("output.mp3", &audio_bytes).await?;
    println!("Wrote {} bytes to output.mp3", audio_bytes.len());
    Ok(())
}

package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 audioBytes, err := client.Speech(context.Background(), &llm.CreateSpeechRequest{
  Model: "openai/tts-1",
  Input: "Hello, world!",
  Voice: "alloy",
 })
 if err != nil {
  panic(err)
 }
 if err := os.WriteFile("output.mp3", audioBytes, 0644); err != nil {
  panic(err)
 }
 fmt.Printf("Wrote %d bytes to output.mp3\n", len(audioBytes))
}

import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.nio.file.Files;
import java.nio.file.Path;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            byte[] audioBytes = client.speech(new CreateSpeechRequest(
                "openai/tts-1",
                "Hello, world!",
                "alloy"
            ));
            Files.write(Path.of("output.mp3"), audioBytes);
            System.out.printf("Wrote %d bytes to output.mp3%n", audioBytes.length);
        }
    }
}

using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var audioBytes = await client.SpeechAsync(new CreateSpeechRequest(
    Model: "openai/tts-1",
    Input: "Hello, world!",
    Voice: "alloy"
));
await File.WriteAllBytesAsync("output.mp3", audioBytes);
Console.WriteLine($"Wrote {audioBytes.Length} bytes to output.mp3");

# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

audio_bytes = client.speech(JSON.generate(
  model: "openai/tts-1",
  input: "Hello, world!",
  voice: "alloy"
))

File.binwrite("output.mp3", audio_bytes)
puts "Wrote #{audio_bytes.bytesize} bytes to output.mp3"

<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$audioBytes = $client->speech(json_encode([
    'model' => 'openai/tts-1',
    'input' => 'Hello, world!',
    'voice' => 'alloy',
]));

file_put_contents('output.mp3', $audioBytes);
echo 'Wrote ' . strlen($audioBytes) . ' bytes to output.mp3' . PHP_EOL;

{:ok, audio_bytes} =
  LiterLlm.speech(
    %{
      model: "openai/tts-1",
      input: "Hello, world!",
      voice: "alloy"
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

File.write!("output.mp3", audio_bytes)
IO.puts("Wrote #{byte_size(audio_bytes)} bytes to output.mp3")

import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const audioBuffer = await client.speech({
  model: "openai/tts-1",
  input: "Hello, world!",
  voice: "alloy",
});
console.log(`Generated ${audioBuffer.byteLength} bytes of audio`);

Speech Parameters¶

Parameter	Type	Description
`model`	string	TTS model (e.g. `"openai/tts-1"`)
`input`	string	Text to synthesize
`voice`	string	Voice preset (`"alloy"`, `"echo"`, `"fable"`, `"onyx"`, `"nova"`, `"shimmer"`)
`response_format`	string	Audio format (`"mp3"`, `"opus"`, `"aac"`, `"flac"`)
`speed`	float	Playback speed (0.25-4.0)

Speech-to-Text¶

Transcribe audio to text:

PythonTypeScriptRustGoJavaC#RubyPHPElixirWASM

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    with open("audio.mp3", "rb") as f:
        audio_bytes = f.read()
    response = await client.transcribe(
        model="openai/whisper-1",
        file=audio_bytes,
        filename="audio.mp3",
    )
    print(response.text)

asyncio.run(main())

import { LlmClient } from "@kreuzberg/liter-llm";
import { readFileSync } from "node:fs";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const audioBuffer = readFileSync("audio.mp3");
const response = await client.transcribe({
  model: "openai/whisper-1",
  file: audioBuffer,
  filename: "audio.mp3",
});
console.log(response.text);

use liter_llm::{ClientConfigBuilder, CreateTranscriptionRequest, DefaultClient, LlmClient};
use tokio::fs;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/whisper-1"))?;

    let audio_bytes = fs::read("audio.mp3").await?;
    let response = client
        .transcribe(CreateTranscriptionRequest {
            model: "openai/whisper-1".into(),
            file: audio_bytes,
            filename: "audio.mp3".into(),
            ..Default::default()
        })
        .await?;

    println!("{}", response.text);
    Ok(())
}

package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 audioBytes, err := os.ReadFile("audio.mp3")
 if err != nil {
  panic(err)
 }
 resp, err := client.Transcribe(context.Background(), &llm.CreateTranscriptionRequest{
  Model:    "openai/whisper-1",
  File:     audioBytes,
  Filename: "audio.mp3",
 })
 if err != nil {
  panic(err)
 }
 fmt.Println(resp.Text)
}

import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;
import java.nio.file.Files;
import java.nio.file.Path;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            byte[] audioBytes = Files.readAllBytes(Path.of("audio.mp3"));
            var response = client.transcribe(new CreateTranscriptionRequest(
                "openai/whisper-1",
                audioBytes,
                "audio.mp3"
            ));
            System.out.println(response.text());
        }
    }
}

using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var audioBytes = await File.ReadAllBytesAsync("audio.mp3");
var response = await client.TranscribeAsync(new CreateTranscriptionRequest(
    Model: "openai/whisper-1",
    File: audioBytes,
    Filename: "audio.mp3"
));
Console.WriteLine(response.Text);

# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

audio_bytes = File.binread("audio.mp3")
response = JSON.parse(client.transcribe(JSON.generate(
  model: "openai/whisper-1",
  filename: "audio.mp3"
), audio_bytes))

puts response["text"]

<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$audioBytes = file_get_contents('audio.mp3');
$response = json_decode($client->transcribe(json_encode([
    'model' => 'openai/whisper-1',
    'filename' => 'audio.mp3',
]), $audioBytes), true);

echo $response['text'] . PHP_EOL;

audio_bytes = File.read!("audio.mp3")

{:ok, response} =
  LiterLlm.transcribe(
    %{
      model: "openai/whisper-1",
      file: audio_bytes,
      filename: "audio.mp3"
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

IO.puts(response["text"])

import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const audioBuffer = new Uint8Array(/* audio file bytes */);
const response = await client.transcribe({
  model: "openai/whisper-1",
  file: audioBuffer,
  filename: "audio.mp3",
});
console.log(response.text);

Transcription Parameters¶

Parameter	Type	Description
`model`	string	STT model (e.g. `"openai/whisper-1"`)
`file`	bytes	Audio file data
`language`	string	ISO-639-1 language code
`prompt`	string	Optional context hint
`temperature`	float	Sampling temperature
`response_format`	string	Output format (`"json"`, `"text"`, `"srt"`, `"vtt"`)

Content Moderation¶

Classify content for policy violations:

PythonTypeScriptRustGoJavaC#RubyPHPElixirWASM

import asyncio
import os
from liter_llm import LlmClient

async def main() -> None:
    client = LlmClient(api_key=os.environ["OPENAI_API_KEY"])
    response = await client.moderate(
        model="openai/omni-moderation-latest",
        input="This is a test message.",
    )
    result = response.results[0]
    print(f"Flagged: {result.flagged}")
    for category, flagged in result.categories.items():
        if flagged:
            print(f"  {category}: {result.category_scores[category]:.4f}")

asyncio.run(main())

import { LlmClient } from "@kreuzberg/liter-llm";

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.moderate({
  model: "openai/omni-moderation-latest",
  input: "This is a test message.",
});

const result = response.results[0];
console.log(`Flagged: ${result.flagged}`);
for (const [category, flagged] of Object.entries(result.categories)) {
  if (flagged) {
    console.log(`  ${category}: ${result.categoryScores[category].toFixed(4)}`);
  }
}

use liter_llm::{ClientConfigBuilder, CreateModerationRequest, DefaultClient, LlmClient};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfigBuilder::new(std::env::var("OPENAI_API_KEY")?)
        .build();
    let client = DefaultClient::new(config, Some("openai/omni-moderation-latest"))?;

    let response = client
        .moderate(CreateModerationRequest {
            model: Some("openai/omni-moderation-latest".into()),
            input: "This is a test message.".into(),
        })
        .await?;

    let result = &response.results[0];
    println!("Flagged: {}", result.flagged);
    for (category, &flagged) in &result.categories {
        if flagged {
            if let Some(&score) = result.category_scores.get(category) {
                println!("  {category}: {score:.4}");
            }
        }
    }
    Ok(())
}

package main

import (
 "context"
 "fmt"
 "os"

 llm "github.com/kreuzberg-dev/liter-llm/packages/go"
)

func main() {
 client := llm.NewClient(llm.WithAPIKey(os.Getenv("OPENAI_API_KEY")))
 resp, err := client.Moderate(context.Background(), &llm.CreateModerationRequest{
  Model: "openai/omni-moderation-latest",
  Input: "This is a test message.",
 })
 if err != nil {
  panic(err)
 }
 result := resp.Results[0]
 fmt.Printf("Flagged: %v\n", result.Flagged)
 for category, flagged := range result.Categories {
  if flagged {
   fmt.Printf("  %s: %.4f\n", category, result.CategoryScores[category])
  }
 }
}

import dev.kreuzberg.literllm.LlmClient;
import dev.kreuzberg.literllm.Types.*;

public class Main {
    public static void main(String[] args) throws Exception {
        try (var client = LlmClient.builder()
                .apiKey(System.getenv("OPENAI_API_KEY"))
                .build()) {
            var response = client.moderate(new CreateModerationRequest(
                "openai/omni-moderation-latest",
                "This is a test message."
            ));
            var result = response.results().getFirst();
            System.out.println("Flagged: " + result.flagged());
            result.categories().forEach((category, flagged) -> {
                if (flagged) {
                    System.out.printf("  %s: %.4f%n",
                        category, result.categoryScores().get(category));
                }
            });
        }
    }
}

using LiterLlm;

await using var client = new LlmClient(
    apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY")!);

var response = await client.ModerateAsync(new CreateModerationRequest(
    Model: "openai/omni-moderation-latest",
    Input: "This is a test message."
));

var result = response.Results[0];
Console.WriteLine($"Flagged: {result.Flagged}");
foreach (var (category, flagged) in result.Categories)
{
    if (flagged)
    {
        Console.WriteLine($"  {category}: {result.CategoryScores[category]:F4}");
    }
}

# frozen_string_literal: true

require "liter_llm"
require "json"

client = LiterLlm::LlmClient.new(ENV.fetch("OPENAI_API_KEY"), {})

response = JSON.parse(client.moderate(JSON.generate(
  model: "openai/omni-moderation-latest",
  input: "This is a test message."
)))

result = response.dig("results", 0)
puts "Flagged: #{result["flagged"]}"
result["categories"].each do |category, flagged|
  if flagged
    puts "  #{category}: #{format("%.4f", result["category_scores"][category])}"
  end
end

<?php

declare(strict_types=1);

use LiterLlm\LlmClient;

$client = new LlmClient(apiKey: getenv('OPENAI_API_KEY') ?: '');

$response = json_decode($client->moderate(json_encode([
    'model' => 'openai/omni-moderation-latest',
    'input' => 'This is a test message.',
])), true);

$result = $response['results'][0];
echo "Flagged: " . ($result['flagged'] ? 'true' : 'false') . PHP_EOL;
foreach ($result['categories'] as $category => $flagged) {
    if ($flagged) {
        echo "  {$category}: " . number_format($result['category_scores'][$category], 4) . PHP_EOL;
    }
}

{:ok, response} =
  LiterLlm.moderate(
    %{
      model: "openai/omni-moderation-latest",
      input: "This is a test message."
    },
    api_key: System.fetch_env!("OPENAI_API_KEY")
  )

result = hd(response["results"])
IO.puts("Flagged: #{result["flagged"]}")

for {category, true} <- result["categories"] do
  score = result["category_scores"][category]
  IO.puts("  #{category}: #{Float.round(score, 4)}")
end

import init, { LlmClient } from "@kreuzberg/liter-llm-wasm";

await init();

const client = new LlmClient({ apiKey: process.env.OPENAI_API_KEY! });
const response = await client.moderate({
  model: "openai/omni-moderation-latest",
  input: "This is a test message.",
});

const result = response.results[0];
console.log(`Flagged: ${result.flagged}`);
for (const [category, flagged] of Object.entries(result.categories)) {
  if (flagged) {
    console.log(`  ${category}: ${result.categoryScores[category].toFixed(4)}`);
  }
}

Moderation Parameters¶

Parameter	Type	Description
`input`	string/array	Content to classify
`model`	string	Moderation model (e.g. `"openai/omni-moderation-latest"`)