Skip to main content

/audio/speech

LiteLLM Python SDK Usageโ€‹

Quick Startโ€‹

from pathlib import Path
from litellm import speech
import os

os.environ["OPENAI_API_KEY"] = "sk-.."

speech_file_path = Path(__file__).parent / "speech.mp3"
response = speech(
model="openai/tts-1",
voice="alloy",
input="the quick brown fox jumped over the lazy dogs",
)
response.stream_to_file(speech_file_path)

Async Usageโ€‹

from litellm import aspeech
from pathlib import Path
import os, asyncio

os.environ["OPENAI_API_KEY"] = "sk-.."

async def test_async_speech():
speech_file_path = Path(__file__).parent / "speech.mp3"
response = await litellm.aspeech(
model="openai/tts-1",
voice="alloy",
input="the quick brown fox jumped over the lazy dogs",
api_base=None,
api_key=None,
organization=None,
project=None,
max_retries=1,
timeout=600,
client=None,
optional_params={},
)
response.stream_to_file(speech_file_path)

asyncio.run(test_async_speech())

LiteLLM Proxy Usageโ€‹

LiteLLM provides an openai-compatible /audio/speech endpoint for Text-to-speech calls.

curl http://0.0.0.0:4000/v1/audio/speech \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "tts-1",
"input": "The quick brown fox jumped over the lazy dog.",
"voice": "alloy"
}' \
--output speech.mp3

Setup

- model_name: tts
litellm_params:
model: openai/tts-1
api_key: os.environ/OPENAI_API_KEY
litellm --config /path/to/config.yaml

# RUNNING on http://0.0.0.0:4000

Supported Providersโ€‹

ProviderLink to Usage
OpenAIUsage
Azure OpenAIUsage
Vertex AIUsage
GeminiUsage

/audio/speech to /chat/completions Bridgeโ€‹

LiteLLM allows you to use /chat/completions models to generate speech through the /audio/speech endpoint. This is useful for models like Gemini's TTS-enabled models that are only accessible via /chat/completions.

Gemini Text-to-Speechโ€‹

Python SDK Usageโ€‹

Gemini Text-to-Speech SDK Usage
import litellm
import os

# Set your Gemini API key
os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"

def test_audio_speech_gemini():
result = litellm.speech(
model="gemini/gemini-2.5-flash-preview-tts",
input="the quick brown fox jumped over the lazy dogs",
api_key=os.getenv("GEMINI_API_KEY"),
)

# Save to file
from pathlib import Path
speech_file_path = Path(__file__).parent / "gemini_speech.mp3"
result.stream_to_file(speech_file_path)
print(f"Audio saved to {speech_file_path}")

test_audio_speech_gemini()

Async Usageโ€‹

Gemini Text-to-Speech Async Usage
import litellm
import asyncio
import os
from pathlib import Path

os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"

async def test_async_gemini_speech():
speech_file_path = Path(__file__).parent / "gemini_speech.mp3"
response = await litellm.aspeech(
model="gemini/gemini-2.5-flash-preview-tts",
input="the quick brown fox jumped over the lazy dogs",
api_key=os.getenv("GEMINI_API_KEY"),
)
response.stream_to_file(speech_file_path)
print(f"Audio saved to {speech_file_path}")

asyncio.run(test_async_gemini_speech())

LiteLLM Proxy Usageโ€‹

Setup Config:

Gemini Proxy Configuration
model_list:
- model_name: gemini-tts
litellm_params:
model: gemini/gemini-2.5-flash-preview-tts
api_key: os.environ/GEMINI_API_KEY

Start Proxy:

Start LiteLLM Proxy
litellm --config /path/to/config.yaml

# RUNNING on http://0.0.0.0:4000

Make Request:

Gemini TTS Request
curl http://0.0.0.0:4000/v1/audio/speech \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "gemini-tts",
"input": "The quick brown fox jumped over the lazy dog.",
"voice": "alloy"
}' \
--output gemini_speech.mp3

Vertex AI Text-to-Speechโ€‹

Python SDK Usageโ€‹

Vertex AI Text-to-Speech SDK Usage
import litellm
import os
from pathlib import Path

# Set your Google credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "path/to/service-account.json"

def test_audio_speech_vertex():
result = litellm.speech(
model="vertex_ai/gemini-2.5-flash-preview-tts",
input="the quick brown fox jumped over the lazy dogs",
)

# Save to file
speech_file_path = Path(__file__).parent / "vertex_speech.mp3"
result.stream_to_file(speech_file_path)
print(f"Audio saved to {speech_file_path}")

test_audio_speech_vertex()

LiteLLM Proxy Usageโ€‹

Setup Config:

Vertex AI Proxy Configuration
model_list:
- model_name: vertex-tts
litellm_params:
model: vertex_ai/gemini-2.5-flash-preview-tts
vertex_project: your-project-id
vertex_location: us-central1

Make Request:

Vertex AI TTS Request
curl http://0.0.0.0:4000/v1/audio/speech \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "vertex-tts",
"input": "The quick brown fox jumped over the lazy dog.",
"voice": "en-US-Wavenet-D"
}' \
--output vertex_speech.mp3

โœจ Enterprise LiteLLM Proxy - Set Max Request File Sizeโ€‹

Use this when you want to limit the file size for requests sent to audio/transcriptions

- model_name: whisper
litellm_params:
model: whisper-1
api_key: sk-*******
max_file_size_mb: 0.00001 # ๐Ÿ‘ˆ max file size in MB (Set this intentionally very small for testing)
model_info:
mode: audio_transcription

Make a test Request with a valid file

curl --location 'http://localhost:4000/v1/audio/transcriptions' \
--header 'Authorization: Bearer sk-1234' \
--form 'file=@"/Users/ishaanjaffer/Github/litellm/tests/gettysburg.wav"' \
--form 'model="whisper"'

Expect to see the follow response

{"error":{"message":"File size is too large. Please check your file size. Passed file size: 0.7392807006835938 MB. Max file size: 0.0001 MB","type":"bad_request","param":"file","code":500}}%