# This file was auto-generated by Fern from our API Definition.

import typing_extensions
from ..types.mode import Mode
from ..types.speech_to_text_language import SpeechToTextLanguage
from ..types.speech_to_text_model import SpeechToTextModel


class SpeechToTextJobParametersParams(typing_extensions.TypedDict):
    language_code: typing_extensions.NotRequired[SpeechToTextLanguage]
    """
    Specifies the language of the input audio in BCP-47 format.
    
    **Available Options:**
    - `unknown` (default): Use when the language is not known; the API will auto-detect.
    - `hi-IN`: Hindi
    - `bn-IN`: Bengali
    - `kn-IN`: Kannada
    - `ml-IN`: Malayalam
    - `mr-IN`: Marathi
    - `od-IN`: Odia
    - `pa-IN`: Punjabi
    - `ta-IN`: Tamil
    - `te-IN`: Telugu
    - `en-IN`: English
    - `gu-IN`: Gujarati
    
    **Additional Options (saaras:v3 only):**
    - `as-IN`: Assamese
    - `ur-IN`: Urdu
    - `ne-IN`: Nepali
    - `kok-IN`: Konkani
    - `ks-IN`: Kashmiri
    - `sd-IN`: Sindhi
    - `sa-IN`: Sanskrit
    - `sat-IN`: Santali
    - `mni-IN`: Manipuri
    - `brx-IN`: Bodo
    - `mai-IN`: Maithili
    - `doi-IN`: Dogri
    """

    model: typing_extensions.NotRequired[SpeechToTextModel]
    """
    Model to be used for speech to text.
    
    - **saarika:v2.5** (default): Transcribes audio in the spoken language.
    
    - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
    """

    mode: typing_extensions.NotRequired[Mode]
    """
    Mode of operation. **Only applicable when using saaras:v3 model.**
    
    Example audio: 'मेरा फोन नंबर है 9840950950'
    
    - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
      - Output: `मेरा फोन नंबर है 9840950950`
    
    - **translate**: Translates speech from any supported Indic language to English.
      - Output: `My phone number is 9840950950`
    
    - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
      - Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
    
    - **translit**: Romanization - Transliterates speech to Latin/Roman script only.
      - Output: `mera phone number hai 9840950950`
    
    - **codemix**: Code-mixed text with English words in English and Indic words in native script.
      - Output: `मेरा phone number है 9840950950`
    """

    with_timestamps: typing_extensions.NotRequired[bool]
    """
    Whether to include timestamps in the response
    """

    with_diarization: typing_extensions.NotRequired[bool]
    """
    Enables speaker diarization, which identifies and separates different speakers in the audio. In beta mode
    """

    num_speakers: typing_extensions.NotRequired[int]
    """
    Number of speakers to be detected in the audio. This is used when with_diarization is true.
    """