Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/green-donkeys-talk.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@ai-sdk/elevenlabs': minor
---

feat(provider/elevenlabs): add experimental realtime ElevenAgents support
31 changes: 31 additions & 0 deletions content/providers/01-ai-sdk-providers/90-elevenlabs.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -253,3 +253,34 @@ The following provider options are available:
| ------------------------ | ------------------- | ------------------- | ------------------- | ------------------- |
| `scribe_v1` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
| `scribe_v1_experimental` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |

## Realtime Models

You can create realtime voice-agent models using the `.experimental_realtime()` factory method.

The model id is an ElevenAgents `agent_id`, not a text-to-speech model id.

```ts
const model = elevenLabs.experimental_realtime('agent_123');
```

The realtime model uses ElevenAgents WebSockets and signed URLs for browser-safe authentication.
It supports bidirectional audio/text, user transcripts, agent audio/text responses, and client tool calls.

You can pass ElevenAgents-specific session overrides using `providerOptions.elevenlabs`:

```ts
import type { ElevenLabsRealtimeModelOptions } from '@ai-sdk/elevenlabs';

const sessionConfig = {
instructions: 'You are a helpful support agent.',
voice: '21m00Tcm4TlvDq8ikWAM',
providerOptions: {
elevenlabs: {
dynamicVariables: {
user_name: 'John',
},
} satisfies ElevenLabsRealtimeModelOptions,
},
};
```
5 changes: 5 additions & 0 deletions examples/ai-e2e-next/.env.local.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# Then get your OpenAI API Key here: https://platform.openai.com/account/api-keys
OPENAI_API_KEY=xxxxxxx

# Required for the ElevenLabs provider in the realtime example.
# Create an agent at https://elevenlabs.io/app and copy its Agent ID.
ELEVENLABS_API_KEY=xxxxxxx
ELEVENLABS_AGENT_ID=xxxxxxx

# You must first create an OpenAI Assistant here: https://platform.openai.com/assistants
# Then get your Assistant ID here: https://platform.openai.com/assistants
ASSISTANT_ID=xxxxxxx
Expand Down
12 changes: 12 additions & 0 deletions examples/ai-e2e-next/app/api/realtime/[...path]/route.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { openai } from '@ai-sdk/openai';
import { google } from '@ai-sdk/google';
import { xai } from '@ai-sdk/xai';
import { elevenLabs } from '@ai-sdk/elevenlabs';
import {
experimental_getRealtimeToolDefinitions as getRealtimeToolDefinitions,
gateway,
Expand Down Expand Up @@ -51,6 +52,10 @@ const providers: Record<string, { factory: RealtimeFactory; model: string }> = {
factory: xai.experimental_realtime,
model: 'grok-voice-latest',
},
elevenlabs: {
factory: elevenLabs.experimental_realtime,
model: process.env.ELEVENLABS_AGENT_ID ?? '',
},
gateway: {
factory: gateway.experimental_realtime,
model: 'openai/gpt-realtime-2',
Expand All @@ -75,6 +80,13 @@ export async function POST(
const toolDefs = await getRealtimeToolDefinitions({ tools });

const { factory, model } = providers[provider] ?? providers.openai;
if (!model) {
return Response.json(
{ error: `Missing model configuration for provider: ${provider}` },
{ status: 400 },
);
}

const tokenResult = await factory.getToken({
model,
sessionConfig: { ...sessionConfig, tools: toolDefs },
Expand Down
33 changes: 25 additions & 8 deletions examples/ai-e2e-next/app/realtime/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ import { experimental_useRealtime } from '@ai-sdk/react';
import { openai } from '@ai-sdk/openai';
import { google } from '@ai-sdk/google';
import { xai } from '@ai-sdk/xai';
import { elevenLabs } from '@ai-sdk/elevenlabs';
import { useState, useRef, useEffect, useMemo } from 'react';

type Provider = 'openai' | 'google' | 'xai';
type Provider = 'openai' | 'google' | 'xai' | 'elevenlabs';

type VoiceOption = { id: string; label: string };

Expand All @@ -23,6 +24,7 @@ const PROVIDER_CONFIG: Record<
createModel: (
modelId: string,
) => ReturnType<typeof openai.experimental_realtime>;
usesAgentConfiguration?: boolean;
sessionConfigOverrides?: Record<string, unknown>;
}
> = {
Expand Down Expand Up @@ -59,6 +61,17 @@ const PROVIDER_CONFIG: Record<
staticVoices: toVoiceOptions(['ara', 'eve', 'leo', 'rex', 'sal']),
createModel: modelId => xai.experimental_realtime(modelId),
},
elevenlabs: {
label: 'ElevenLabs',
defaultModel: 'configured-agent',
staticVoices: [],
createModel: modelId => elevenLabs.experimental_realtime(modelId),
usesAgentConfiguration: true,
sessionConfigOverrides: {
inputAudioFormat: { type: 'audio/pcm', rate: 16000 },
outputAudioFormat: { type: 'audio/pcm', rate: 16000 },
},
},
};

export default function RealtimePage() {
Expand Down Expand Up @@ -132,7 +145,7 @@ export default function RealtimePage() {
style={selectStyle}
>
{currentVoices.length === 0 ? (
<option>No voices available</option>
<option>Agent default</option>
) : (
currentVoices.map(v => (
<option key={v.id} value={v.id}>
Expand Down Expand Up @@ -185,12 +198,16 @@ function RealtimeChat({

const sessionConfig = useMemo(
() => ({
instructions:
'You are a helpful assistant. Be concise. ' +
'You have access to tools for weather and dice rolling.',
inputAudioTranscription: {},
voice,
turnDetection: { type: 'server-vad' as const },
...(config.usesAgentConfiguration
? {}
: {
instructions:
'You are a helpful assistant. Be concise. ' +
'You have access to tools for weather and dice rolling.',
inputAudioTranscription: {},
...(voice ? { voice } : {}),
turnDetection: { type: 'server-vad' as const },
}),
...config.sessionConfigOverrides,
}),
[voice, config],
Expand Down
1 change: 1 addition & 0 deletions examples/ai-e2e-next/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"@ai-sdk/azure": "workspace:*",
"@ai-sdk/cohere": "workspace:*",
"@ai-sdk/deepseek": "workspace:*",
"@ai-sdk/elevenlabs": "workspace:*",
"@ai-sdk/fireworks": "workspace:*",
"@ai-sdk/google": "workspace:*",
"@ai-sdk/google-vertex": "workspace:*",
Expand Down
37 changes: 37 additions & 0 deletions packages/elevenlabs/src/elevenlabs-provider.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import {
NoSuchModelError,
type Experimental_RealtimeFactoryV4 as RealtimeFactoryV4,
type Experimental_RealtimeFactoryV4GetTokenOptions as RealtimeFactoryV4GetTokenOptions,
type TranscriptionModelV4,
type SpeechModelV4,
type ProviderV4,
Expand All @@ -13,6 +15,7 @@ import { ElevenLabsTranscriptionModel } from './elevenlabs-transcription-model';
import type { ElevenLabsTranscriptionModelId } from './elevenlabs-transcription-options';
import { ElevenLabsSpeechModel } from './elevenlabs-speech-model';
import type { ElevenLabsSpeechModelId } from './elevenlabs-speech-options';
import { ElevenLabsRealtimeModel } from './realtime/elevenlabs-realtime-model';
import { VERSION } from './version';

export interface ElevenLabsProvider extends ProviderV4 {
Expand All @@ -33,6 +36,12 @@ export interface ElevenLabsProvider extends ProviderV4 {
*/
speech(modelId: ElevenLabsSpeechModelId): SpeechModelV4;

/**
* Creates an experimental realtime model for bidirectional audio/text
* communication over WebSocket.
*/
experimental_realtime: RealtimeFactoryV4;

/**
* @deprecated Use `embeddingModel` instead.
*/
Expand Down Expand Up @@ -92,6 +101,33 @@ export function createElevenLabs(
fetch: options.fetch,
});

const createRealtimeModel = (modelId: string) =>
new ElevenLabsRealtimeModel(modelId, {
provider: `elevenlabs.realtime`,
baseURL: 'https://api.elevenlabs.io',
headers: getHeaders,
fetch: options.fetch,
});

const experimentalRealtimeFactory = Object.assign(
(modelId: string) => createRealtimeModel(modelId),
{
getToken: async (tokenOptions: RealtimeFactoryV4GetTokenOptions) => {
const model = createRealtimeModel(tokenOptions.model);
const secret = await model.doCreateClientSecret({
sessionConfig: tokenOptions.sessionConfig,
expiresAfterSeconds: tokenOptions.expiresAfterSeconds,
});

return {
token: secret.token,
url: secret.url,
...(secret.expiresAt != null && { expiresAt: secret.expiresAt }),
};
},
},
) as RealtimeFactoryV4;

const provider = function (modelId: ElevenLabsTranscriptionModelId) {
return {
transcription: createTranscriptionModel(modelId),
Expand All @@ -103,6 +139,7 @@ export function createElevenLabs(
provider.transcriptionModel = createTranscriptionModel;
provider.speech = createSpeechModel;
provider.speechModel = createSpeechModel;
provider.experimental_realtime = experimentalRealtimeFactory;

provider.languageModel = (modelId: string) => {
throw new NoSuchModelError({
Expand Down
6 changes: 6 additions & 0 deletions packages/elevenlabs/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@ export type {
ElevenLabsProvider,
ElevenLabsProviderSettings,
} from './elevenlabs-provider';
export { ElevenLabsRealtimeModel as Experimental_ElevenLabsRealtimeModel } from './realtime/elevenlabs-realtime-model';
export type { ElevenLabsRealtimeModelConfig as Experimental_ElevenLabsRealtimeModelConfig } from './realtime/elevenlabs-realtime-model';
export type {
ElevenLabsSpeechModelId,
ElevenLabsSpeechVoiceId,
} from './elevenlabs-speech-options';
export type { ElevenLabsSpeechModelOptions } from './elevenlabs-speech-model-options';
export type { ElevenLabsTranscriptionModelOptions } from './elevenlabs-transcription-model-options';
export type {
ElevenLabsRealtimeModelId,
ElevenLabsRealtimeModelOptions,
} from './realtime/elevenlabs-realtime-model-options';
export { VERSION } from './version';
Loading