Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/green-colts-kiss.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@tanstack/ai-groq': minor
---

Add tree-shakeable Text-to-Speech (TTS) adapter for Groq API with English and Arabic voices, multiple output formats (default WAV), configurable speed and sample rate, new types, model metadata, and unit tests.
159 changes: 159 additions & 0 deletions packages/typescript/ai-groq/src/adapters/tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import { BaseTTSAdapter } from '@tanstack/ai/adapters'
import { createGroqClient, generateId, getGroqApiKeyFromEnv } from '../utils'
import { validateAudioInput } from '../audio/audio-provider-options'
import type { GroqTTSModel } from '../model-meta'
import type {
GroqTTSFormat,
GroqTTSProviderOptions,
GroqTTSVoice,
} from '../audio/tts-provider-options'
import type { TTSOptions, TTSResult } from '@tanstack/ai'
import type Groq_SDK from 'groq-sdk'
import type { GroqClientConfig } from '../utils'

/**
* Configuration for Groq TTS adapter
*/
export interface GroqTTSConfig extends GroqClientConfig {}

/**
* Groq Text-to-Speech Adapter
*
* Tree-shakeable adapter for Groq TTS functionality.
* Supports canopylabs/orpheus-v1-english and canopylabs/orpheus-arabic-saudi models.
*
* Features:
* - English voices: autumn(f), diana(f), hannah(f), austin(m), daniel(m), troy(m)
* - Arabic voices: fahad(m), sultan(m), lulwa(f), noura(f)
* - Output formats: flac, mp3, mulaw, ogg, wav (only wav currently supported)
* - Speed control
* - Configurable sample rate
* - Vocal direction support (English voices only)
*/
export class GroqTTSAdapter<TModel extends GroqTTSModel> extends BaseTTSAdapter<
TModel,
GroqTTSProviderOptions
> {
readonly name = 'groq' as const

private client: Groq_SDK

constructor(config: GroqTTSConfig, model: TModel) {
super(config, model)
this.client = createGroqClient(config)
}

async generateSpeech(
options: TTSOptions<GroqTTSProviderOptions>,
): Promise<TTSResult> {
const {
model,
text,
voice = 'autumn',
format = 'wav',
speed,
modelOptions,
} = options

validateAudioInput({ input: text, model })

const voiceFormat = format as GroqTTSFormat

const request: Groq_SDK.Audio.Speech.SpeechCreateParams = {
model,
input: text,
voice: voice as GroqTTSVoice,
response_format: voiceFormat,
speed,
...modelOptions,
}

const response = await this.client.audio.speech.create(request)

const arrayBuffer = await response.arrayBuffer()
const base64 = Buffer.from(arrayBuffer).toString('base64')

const contentType = this.getContentType(voiceFormat)

return {
id: generateId(this.name),
model,
audio: base64,
format: voiceFormat,
contentType,
}
}

private getContentType(format: string): string {
const contentTypes: Record<string, string> = {
flac: 'audio/flac',
mp3: 'audio/mpeg',
mulaw: 'audio/basic',
ogg: 'audio/ogg',
wav: 'audio/wav',
}
return contentTypes[format] || 'audio/wav'
}
}

/**
* Creates a Groq speech adapter with explicit API key.
* Type resolution happens here at the call site.
*
* @param model - The model name (e.g., 'canopylabs/orpheus-v1-english')
* @param apiKey - Your Groq API key
* @param config - Optional additional configuration
* @returns Configured Groq speech adapter instance with resolved types
*
* @example
* ```typescript
* const adapter = createGroqSpeech('canopylabs/orpheus-v1-english', "gsk_...");
*
* const result = await generateSpeech({
* adapter,
* text: 'Hello, world!',
* voice: 'autumn'
* });
* ```
*/
export function createGroqSpeech<TModel extends GroqTTSModel>(
model: TModel,
apiKey: string,
config?: Omit<GroqTTSConfig, 'apiKey'>,
): GroqTTSAdapter<TModel> {
return new GroqTTSAdapter({ apiKey, ...config }, model)
}

/**
* Creates a Groq speech adapter with automatic API key detection from environment variables.
* Type resolution happens here at the call site.
*
* Looks for `GROQ_API_KEY` in:
* - `process.env` (Node.js)
* - `window.env` (Browser with injected env)
*
* @param model - The model name (e.g., 'canopylabs/orpheus-v1-english')
* @param config - Optional configuration (excluding apiKey which is auto-detected)
* @returns Configured Groq speech adapter instance with resolved types
* @throws Error if GROQ_API_KEY is not found in environment
*
* @example
* ```typescript
* // Automatically uses GROQ_API_KEY from environment
* const adapter = groqSpeech('canopylabs/orpheus-v1-english');
*
* const result = await generateSpeech({
* adapter,
* text: 'Welcome to TanStack AI!',
* voice: 'autumn',
* format: 'wav'
* });
* ```
*/
export function groqSpeech<TModel extends GroqTTSModel>(
model: TModel,
config?: Omit<GroqTTSConfig, 'apiKey'>,
): GroqTTSAdapter<TModel> {
const apiKey = getGroqApiKeyFromEnv()
return createGroqSpeech(model, apiKey, config)
}
25 changes: 25 additions & 0 deletions packages/typescript/ai-groq/src/audio/audio-provider-options.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/**
* Common audio provider options for Groq audio endpoints.
*/
export interface AudioProviderOptions {
/**
* The text to generate audio for.
* Maximum length is 200 characters.
* Use [directions] for vocal control (English voices only).
*/
input: string
/**
* The audio model to use for generation.
*/
model: string
}

/**
* Validates that the audio input text does not exceed the maximum length.
* @throws Error if input text exceeds 200 characters
*/
export const validateAudioInput = (options: AudioProviderOptions) => {
if (options.input.length > 200) {
throw new Error('Input text exceeds maximum length of 200 characters.')
}
}
49 changes: 49 additions & 0 deletions packages/typescript/ai-groq/src/audio/tts-provider-options.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/**
* Groq TTS voice options for English models
*/
export type GroqTTSEnglishVoice =
| 'autumn'
| 'diana'
| 'hannah'
| 'austin'
| 'daniel'
| 'troy'

/**
* Groq TTS voice options for Arabic models
*/
export type GroqTTSArabicVoice = 'fahad' | 'sultan' | 'lulwa' | 'noura'

/**
* Union of all Groq TTS voice options
*/
export type GroqTTSVoice = GroqTTSEnglishVoice | GroqTTSArabicVoice

/**
* Groq TTS output format options.
* Only wav is currently supported.
*/
export type GroqTTSFormat = 'flac' | 'mp3' | 'mulaw' | 'ogg' | 'wav'

/**
* Groq TTS sample rate options
*/
export type GroqTTSSampleRate =
| 8000
| 16000
| 22050
| 24000
| 32000
| 44100
| 48000

/**
* Provider-specific options for Groq TTS.
* These options are passed via `modelOptions` when calling `generateSpeech`.
*/
export interface GroqTTSProviderOptions {
/**
* The sample rate of the generated audio in Hz.
*/
sample_rate?: GroqTTSSampleRate
}
22 changes: 20 additions & 2 deletions packages/typescript/ai-groq/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* @module @tanstack/ai-groq
*
* Groq provider adapter for TanStack AI.
* Provides tree-shakeable adapters for Groq's Chat Completions API.
* Provides tree-shakeable adapters for Groq's Chat Completions API and TTS API.
*/

// Text (Chat) adapter
Expand All @@ -14,15 +14,33 @@ export {
type GroqTextProviderOptions,
} from './adapters/text'

// TTS adapter - for text-to-speech
export {
GroqTTSAdapter,
createGroqSpeech,
groqSpeech,
type GroqTTSConfig,
} from './adapters/tts'
export type {
GroqTTSProviderOptions,
GroqTTSVoice,
GroqTTSEnglishVoice,
GroqTTSArabicVoice,
GroqTTSFormat,
GroqTTSSampleRate,
} from './audio/tts-provider-options'

// Types
export type {
GroqChatModelProviderOptionsByName,
GroqTTSModelProviderOptionsByName,
GroqModelInputModalitiesByName,
ResolveProviderOptions,
ResolveInputModalities,
GroqChatModels,
GroqTTSModel,
} from './model-meta'
export { GROQ_CHAT_MODELS } from './model-meta'
export { GROQ_CHAT_MODELS, GROQ_TTS_MODELS } from './model-meta'
export type {
GroqTextMetadata,
GroqImageMetadata,
Expand Down
65 changes: 61 additions & 4 deletions packages/typescript/ai-groq/src/model-meta.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { GroqTextProviderOptions } from './text/text-provider-options'
import type { GroqTTSProviderOptions } from './audio/tts-provider-options'

/**
* Internal metadata structure describing a Groq model's capabilities and pricing.
Expand Down Expand Up @@ -351,14 +352,23 @@ export type GroqChatModelProviderOptionsByName = {
[K in (typeof GROQ_CHAT_MODELS)[number]]: GroqTextProviderOptions
}

/**
* Type-only map from Groq TTS model name to its provider options type.
*/
export type GroqTTSModelProviderOptionsByName = {
[K in GroqTTSModel]: GroqTTSProviderOptions
}

/**
* Resolves the provider options type for a specific Groq model.
* Falls back to generic GroqTextProviderOptions for unknown models.
* Checks TTS models first, then chat models, then falls back to generic options.
*/
export type ResolveProviderOptions<TModel extends string> =
TModel extends keyof GroqChatModelProviderOptionsByName
? GroqChatModelProviderOptionsByName[TModel]
: GroqTextProviderOptions
TModel extends GroqTTSModel
? GroqTTSProviderOptions
: TModel extends keyof GroqChatModelProviderOptionsByName
? GroqChatModelProviderOptionsByName[TModel]
: GroqTextProviderOptions

/**
* Resolve input modalities for a specific model.
Expand All @@ -368,3 +378,50 @@ export type ResolveInputModalities<TModel extends string> =
TModel extends keyof GroqModelInputModalitiesByName
? GroqModelInputModalitiesByName[TModel]
: readonly ['text']

// ============================================================================
// TTS Models
// ============================================================================

const ORPHEUS_V1_ENGLISH = {
name: 'canopylabs/orpheus-v1-english',
pricing: {
input: {
normal: 22,
},
},
supports: {
input: ['text'],
output: ['audio'],
endpoints: ['tts'],
features: [],
},
} as const satisfies ModelMeta<GroqTTSProviderOptions>

const ORPHEUS_ARABIC_SAUDI = {
name: 'canopylabs/orpheus-arabic-saudi',
pricing: {
input: {
normal: 40,
},
},
supports: {
input: ['text'],
output: ['audio'],
endpoints: ['tts'],
features: [],
},
} as const satisfies ModelMeta<GroqTTSProviderOptions>

/**
* All supported Groq TTS model identifiers.
*/
export const GROQ_TTS_MODELS = [
ORPHEUS_V1_ENGLISH.name,
ORPHEUS_ARABIC_SAUDI.name,
] as const

/**
* Union type of all supported Groq TTS model names.
*/
export type GroqTTSModel = (typeof GROQ_TTS_MODELS)[number]
5 changes: 0 additions & 5 deletions packages/typescript/ai-groq/src/tools/function-tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ export function convertFunctionToolToAdapterFormat(tool: Tool): FunctionTool {
required: [],
}) as JSONSchema

// Ensure object schemas always have properties (e.g. z.object({}) may produce { type: 'object' } without properties)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did you remove this? This introduces a bug with groq and how they parse tools. I specifically added this in to fix that

if (inputSchema.type === 'object' && !inputSchema.properties) {
inputSchema.properties = {}
}

const jsonSchema = makeGroqStructuredOutputCompatible(
inputSchema,
inputSchema.required || [],
Expand Down
Loading
Loading