o
    i2                     @  s  d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZmZ dZdd	d
ddddddddddZdddddddddddd d!Zee Zeee  Zg d"Zd#d$d%d&d'd(d)d*d+d,d-
ZdXd1d2Zi d3d4 e D i d5d(d6d(d7d'd8d'd9d'd:d*d;d*d<d&d=d&d>d#d?d%d@d,dAd,dBd)dCd)dDd)dEd+d+d+d$dFZdYdHdIZ G dJdK dKe!eZ"G dLdM dMe!eZ#G dNdO dOeZ$G dPdQ dQeZ%G dRdS dSeZ&G dTdU dUeZ'G dVdW dWeZ(dS )Zz
Pydantic schemas for TTS API requests/responses.

Ported from veena3srv/apps/api/serializers.py with full validation parity.
No Django dependencies - pure Pydantic for Modal deployment.
    )annotationsN)Enum)AnyDictListOptionalUnion)UUID)	BaseModel
ConfigDictFieldfield_validatormodel_validatoriP                          	   
      )lipakshivardanreetNandinikrishnaanikaadarshNilayAarviAshaBittuMirar   r   r   r!   r   r   r    r   r"   r#   r$   r%   )MitraAaranyaTaruNeerDhruvaIraVedaAriar"   r#   r$   r%   )
[angry]	[curious]	[excited][giggle][laughs harder][laughs]	[screams][sighs][sings]
[whispers]r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   )
z<angry>z	<curious>z	<excited>z<giggle>z<laugh_harder>z<laugh>z<scream>z<sigh>z<sing>z	<whisper>namestrreturnc                 C  s6   | t v rt |  S | tv r| S td|  ddt )a   
    Resolve friendly speaker name to internal name.
    
    Examples:
        resolve_speaker_name("Mitra") -> "lipakshi"
        resolve_speaker_name("lipakshi") -> "lipakshi"
    
    Raises:
        ValueError: if name is not a valid speaker name
    zInvalid speaker name: z. Valid names: , )FRIENDLY_SPEAKER_MAPINDIC_SPEAKERS
ValueErrorjoinALL_SPEAKER_NAMES)r8    rA   2/home/ubuntu/veenaModal/veena3modal/api/schemas.pyresolve_speaker_name;   s   rC   c                 C  s   i | ]
\}}| d |qS )z<>)strip).0kvrA   rA   rB   
<dictcomp>Q   s    rH   laughinglaughzlaughs harderzlaugh harderlaugh_hardersighssighgigglesgiggleangryexcitedwhisperswhisper	screamingscreamscreamssinging)singsingscurioustextc                 C  s4   d}dd }t ||| } d}dd }t ||| S )u   
    Normalize emotion tags to Spark TTS bracket format.
    
    Conversions:
    - <laugh> → [laughs]
    - [laughing] → [laughs]
    - etc.
    z<([a-z_]+)>c                 S  sJ   |  d  }|tv rt| S d| d}|tv rt| S d| dS )Nr   <>[])grouplowerrD   _EMOTION_TO_TAGLEGACY_EMOTION_MAP)matchemotionold_tagrA   rA   rB   replace_angleh   s   z-normalize_emotion_tags.<locals>.replace_anglez\[([^\]]+)\]c                 S  s   |  d  }d| d}|tv r|S |tv rt| S |dr2|d d tv r2t|d d  S |d tv r>t|d  S |S )Nr   r^   r_   s)r`   ra   rD   INDIC_EMOTION_TAGSrb   endswith)rd   re   bracket_tagrA   rA   rB   replace_bracketv   s   z/normalize_emotion_tags.<locals>.replace_bracket)resub)r[   angle_patternrg   bracket_patternrm   rA   rA   rB   normalize_emotion_tags\   s   
	rr   c                   @  s$   e Zd ZdZdZdZdZdZdZdS )AudioFormatzSupported audio output formats.wavopusmp3mulawflacN)	__name__
__module____qualname____doc__WAVOPUSMP3MULAWFLACrA   rA   rA   rB   rs      s    rs   c                   @  s   e Zd ZdZdZdZdS )OutputSampleRatezOutput sample rate options.16khz48khzN)ry   rz   r{   r|   SR_16KHZSR_48KHZrA   rA   rA   rB   r      s    r   c                   @  s.  e Zd ZU dZeddeddZded< edd	d
dZded< eddddZ	ded< edddZ
ded< edd	ddZded< edddddZded< edddZded < eejd!dZd"ed#< ed$d%dZd&ed'< ed(d)d*d+dZd,ed-< ed	dd.d/dZd&ed0< ed1d)d1d2dZd,ed3< ed4d5d4d6dZd&ed7< ed8d1d*d9dZd,ed:< ed;d<dZded=< edd>dZded?< ed;d@dZdedA< eejdBdZdCedD< dZdedE< dZdedF< ed;dGZ e!de"dYdJdKZ#e!de"dZdLdMZ$e!d'e"d[dNdOZ%e&dPdQd\dSdTZ'd]dUdVZ(d^d]dWdXZ)dS )_TTSGenerateRequestzx
    Request schema for /v1/tts/generate.
    
    Mirrors validation rules from veena3srv/apps/api/serializers.py.
    .r   z*Text to synthesize (max 50,000 characters))
min_length
max_lengthdescriptionr9   r[   N2   z#Speaker name (internal or friendly))r   r   Optional[str]speakeri  zBVoice description (for voiceDesign model, NOT currently supported)r   z9UUID of pre-created voice profile (for voiceDesign model))r   Optional[UUID]voice_idz0Model to use (optional). Default: indic_speakersmodelr   iz.Random seed for reproducibility (0-2147483647))geler   Optional[int]seedFzStream audio as it's generatedboolstreamzAudio output formatrs   format>  z"Sample rate in Hz (default: 16000)intsample_rateg?g        g       @zSampling temperature (0.0-2.0)floattemperatured   zTop-k sampling (1-100)top_kg      ?zNucleus sampling (0.0-1.0)top_pi      z"Maximum BiCodec tokens to generate
max_tokensg?zRepetition penalty (1.0-2.0)repetition_penaltyTz#Apply text normalization before TTS	normalizez2Return normalized text in X-Normalized-Text headernormalize_verbosez0Enable intelligent text chunking for long inputschunkingz9Output sample rate: '16khz' or '48khz' (super-resolution)r   output_original_text_resolved_speaker)use_enum_valuesrG   r:   c                 C  s2   |  }|s
tddd |D }|rtd|S )z0Validate text field: no empty, no control chars.z'Text cannot be empty or whitespace onlyc                 S  $   g | ]}t |d k r|dvr|qS     z
	ordrE   crA   rA   rB   
<listcomp>     $ z4TTSGenerateRequest.validate_text.<locals>.<listcomp>z(Text contains invalid control characters)rD   r>   )clsrG   strippedcontrol_charsrA   rA   rB   validate_text  s   z TTSGenerateRequest.validate_textc                 C  s&   |s|S dd |D }|rt d|S )zValidate description field.c                 S  r   r   r   r   rA   rA   rB   r   #  r   z;TTSGenerateRequest.validate_description.<locals>.<listcomp>z/Description contains invalid control charactersr>   )r   rG   r   rA   rA   rB   validate_description  s   z'TTSGenerateRequest.validate_descriptionc                 C  s"   g d}||vrt d| |S )z2Validate sample rate is one of the allowed values.)@  r   i"V  i]  iD  i  z$Invalid sample rate. Valid options: r   )r   rG   valid_ratesrA   rA   rB   validate_sample_rate(  s   z'TTSGenerateRequest.validate_sample_rateafter)mode'TTSGenerateRequest'c                 C  s   t | j}t | j}| jdu}|stddt | jtvr.td| j ddt t| j| _|s8|r<td| j	t
jkrJ| jdkrJd| _| S )z
        Cross-field validation: speaker required, speaker resolution, mu-law sample rate.
        
        NOTE: We assume indic_speakers model type (voiceDesign not supported yet).
        NzJParameter 'speaker' is required for indic_speakers model. Must be one of: r;   zInvalid speaker 'z'. Valid names: zParameters 'description' and 'voice_id' are only valid for voiceDesign model. Current model type is 'indic_speakers'. Use 'speaker' instead.r   )r   r   r   r   r>   r?   r@   rC   r   r   rs   r   r   )selfhas_speakerhas_descriptionhas_voice_idrA   rA   rB   cross_field_validation1  s*   



z)TTSGenerateRequest.cross_field_validationc                 C  s    | j r| j S | jrt| jS dS )z*Return the resolved internal speaker name. )r   r   rC   )r   rA   rA   rB   get_resolved_speakerY  s   z'TTSGenerateRequest.get_resolved_speakerc                 C  s*   | j }| jr|r|| _||}t|}|S )z
        Return normalized + emotion-normalized text.
        
        Args:
            normalizer_func: Optional custom normalizer (for testing).
                             If None, returns text with only emotion normalization.
        )r[   r   r   rr   )r   normalizer_funcr[   rA   rA   rB   get_normalized_text_  s   
z&TTSGenerateRequest.get_normalized_text)rG   r9   r:   r9   )rG   r   r:   r   )rG   r   r:   r   )r:   r   )r:   r9   )N)*ry   rz   r{   r|   r   MAX_TEXT_LENGTHr[   __annotations__r   r   r   r   r   r   rs   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   model_configr   classmethodr   r   r   r   r   r   r   rA   rA   rA   rB   r      s   
 
	
'r   c                   @  s   e Zd ZU dZded< ded< ded< ded< dZd	ed
< dZded< dZded< dZded< dZ	ded< dZ
ded< dZded< dZded< dZded< dZded< dS )TTSGenerateResponsez5Response metadata for non-streaming /v1/tts/generate.r9   
request_idr   r   r   r   Nr   r   r   r   r   	audio_urltokens_prompttokens_generatedzOptional[float]audio_duration_secondsaudio_bytesttfb_msrtfcredits_consumed)ry   rz   r{   r|   r   r   r   r   r   r   r   r   r   r   r   rA   rA   rA   rB   r   v  s    
 r   c                   @  sL   e Zd ZU dZded< ded< eedZded< dZd	ed
< dZ	d	ed< dS )ErrorDetailzError detail structure.r9   codemessage)default_factoryzDict[str, Any]detailsNr   r   documentation_url)
ry   rz   r{   r|   r   r   dictr   r   r   rA   rA   rA   rB   r     s   
 r   c                   @  s   e Zd ZU dZded< dS )ErrorResponsezStandard error response.r   errorNry   rz   r{   r|   r   rA   rA   rA   rB   r     s   
 r   c                   @  sB   e Zd ZU dZded< ded< ded< ded< ded	< ded
< dS )HealthResponsez%Response for /v1/tts/health endpoint.r9   statusr   model_loadedmodel_versionr   uptime_secondsgpu_availableapp_versionNr   rA   rA   rA   rB   r     s   
 r   )r8   r9   r:   r9   )r[   r9   r:   r9   ))r|   
__future__r   rn   enumr   typingr   r   r   r   r   uuidr	   pydanticr
   r   r   r   r   r   SPEAKER_MAPr<   listkeysr=   r@   rj   rc   rC   itemsrb   rr   r9   rs   r   r   r   r   r   r   rA   rA   rA   rB   <module>   s    


-	 ]	