o
    qmi6                     @   s   d dl Z d dlZd dlZd dlm  mZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ejeeef ZG dd deZ G dd deZ!dS )    N   )EventEmitterMixin	EventType)parse_obj_as)AudioOutput)FlushSignal)ErrorResponse)EventResponse)ConfigureConnection)ConfigureConnectionData)
PingSignal)SendText)SendTextDatac                          e Zd Zdejf fddZdd Zdd Z				
	
						d.dedede	de	de	de
dededede
de
ddfddZd eddfd!d"Zd/d#d$Zd/d%d&Zdefd'd(Zd)ejddfd*d+Zd)ejddfd,d-Z  ZS )0&AsyncTextToSpeechStreamingSocketClient	websocketc                      t    || _d S Nsuper__init__
_websocketselfr   	__class__ c/home/ubuntu/.local/lib/python3.10/site-packages/sarvamai/text_to_speech_streaming/socket_client.pyr         

z/AsyncTextToSpeechStreamingSocketClient.__init__c                 C  s@   | j 2 z3 d H W }t|trt|n|}tt|V  q6 d S r   r   
isinstancestrjsonloadsr   )TextToSpeechStreamingSocketClientResponser   messager   r   r   	__aiter__   s
   z0AsyncTextToSpeechStreamingSocketClient.__aiter__c              
      s   |  tjd zTz%| j2 z3 dH W }t|trt|n|}tt	|}|  tj
| q6 W n tjyH } z|  tj| W Y d}~nd}~ww W |  tjd dS W |  tjd dS |  tjd w aH  
        Start listening for messages on the websocket connection.

        Emits events in the following order:
        - EventType.OPEN when connection is established
        - EventType.MESSAGE for each message received
        - EventType.ERROR if an error occurs
        - EventType.CLOSE when connection is closed
        N_emitr   OPENr   r    r!   r"   r#   r   r$   MESSAGE
websocketsWebSocketExceptionERRORCLOSEr   raw_messageparsedexcr   r   r   start_listening!   s&   

"z6AsyncTextToSpeechStreamingSocketClient.start_listeninganushka              ?"V  Fmp3128k2      target_language_codespeakerpitchpaceloudnessspeech_sample_rateenable_preprocessingoutput_audio_codecoutput_audio_bitratemin_buffer_sizemax_chunk_lengthreturnNc                    s>   t |||||||||	|
|d}t|d}| |I dH  dS a  
        Configuration message required as the first message after establishing the WebSocket connection.
        This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
        by sending a new config message. When a config update is sent, any text currently in the buffer
        will be automatically flushed and processed before applying the new configuration.

        :param target_language_code: The language of the text is BCP-47 format
        :param speaker: The speaker voice to be used for the output audio. Default: Anushka.
            Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
            Male: Abhilash, Karun, Hitesh
        :param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
            while higher values make it sharper. The suitable range is between -0.75
            and 0.75. Default is 0.0.
        :param pace: Controls the speed of the audio. Lower values result in slower speech,
            while higher values make it faster. The suitable range is between 0.5
            and 2.0. Default is 1.0.
        :param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
            while higher values make it louder. The suitable range is between 0.3
            and 3.0. Default is 1.0.
        :param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
            8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
        :param enable_preprocessing: Controls whether normalization of English words and numeric entities
            (e.g., numbers, dates) is performed. Set to true for better handling
            of mixed-language text. Default is false.
        :param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
        :param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
        :param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
        :param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
        )r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   dataNr   r
   _send_modelr   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rL   r&   r   r   r   	configure:   s    +
z0AsyncTextToSpeechStreamingSocketClient.configuretextc                    s*   t |d}t|d}| |I dH  dS aC  
        Send text to be converted to speech. Text length should be 1-2500 characters.
        Recommended: <500 characters for optimal streaming performance.
        Real-time endpoints perform better with longer character counts.

        :param text: Text to be synthesized (1-2500 characters, recommended <500)
        )rQ   rK   Nr   r   rN   r   rQ   rL   r&   r   r   r   convertu   s   

z.AsyncTextToSpeechStreamingSocketClient.convertc                       t  }| |I dH  dS z
        Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
        Use this when you need to process remaining text that hasn't reached the minimum buffer size.
        Nr   rN   r%   r   r   r   flush      z,AsyncTextToSpeechStreamingSocketClient.flushc                    rV   z
        Send ping signal to keep the WebSocket connection alive. The connection automatically
        closes after one minute of inactivity.
        Nr   rN   r%   r   r   r   ping   rZ   z+AsyncTextToSpeechStreamingSocketClient.pingc                    s4   | j  I dH }t|trt|n|}tt|S )B
        Receive a message from the websocket connection.
        Nr   recvr    r!   r"   r#   r   r$   r   rL   r   r   r   r`      s   
z+AsyncTextToSpeechStreamingSocketClient.recvrL   c                    s,   t |trt|}| j|I dH  dS z=
        Send a message to the websocket connection.
        Nr    dictr"   dumpsr   sendra   r   r   r   _send   s   

z,AsyncTextToSpeechStreamingSocketClient._sendc                    s   |  | I dH  dS zD
        Send a Pydantic model to the websocket connection.
        Nrg   rd   ra   r   r   r   rN      s   z2AsyncTextToSpeechStreamingSocketClient._send_model
r6   r7   r8   r8   r9   Fr:   r;   r<   r=   rI   N)__name__
__module____qualname__r-   WebSocketClientProtocolr   r'   r5   r!   floatintboolrP   rU   rY   r]   r$   r`   typingAnyrg   rN   __classcell__r   r   r   r   r      Z    	

;

r   c                       r   )0!TextToSpeechStreamingSocketClientr   c                   r   r   r   r   r   r   r   r      r   z*TextToSpeechStreamingSocketClient.__init__c                 c   s6    | j D ]}t|trt|n|}tt|V  qd S r   r   r%   r   r   r   __iter__   s
   
z*TextToSpeechStreamingSocketClient.__iter__c              
   C   s   |  tjd zOz | jD ]}t|trt|n|}tt	|}|  tj
| qW n tjyB } z|  tj| W Y d}~nd}~ww W |  tjd dS W |  tjd dS |  tjd w r(   r)   r1   r   r   r   r5      s$   


"z1TextToSpeechStreamingSocketClient.start_listeningr6   r7   r8   r9   Fr:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   Nc                 C   s6   t |||||||||	|
|d}t|d}| | dS rJ   rM   rO   r   r   r   rP      s   +
z+TextToSpeechStreamingSocketClient.configurerQ   c                 C   s"   t |d}t|d}| | dS rR   rS   rT   r   r   r   rU     s   

z)TextToSpeechStreamingSocketClient.convertc                 C      t  }| | dS rW   rX   r%   r   r   r   rY        z'TextToSpeechStreamingSocketClient.flushc                 C   ry   r[   r\   r%   r   r   r   r]     rz   z&TextToSpeechStreamingSocketClient.pingc                 C   s,   | j  }t|trt|n|}tt|S )r^   r_   ra   r   r   r   r`   "  s   

z&TextToSpeechStreamingSocketClient.recvrL   c                 C   s$   t |tr
t|}| j| dS rb   rc   ra   r   r   r   rg   *  s   

z'TextToSpeechStreamingSocketClient._sendc                 C   s   |  |  dS rh   ri   ra   r   r   r   rN   2  s   z-TextToSpeechStreamingSocketClient._send_modelrj   rk   )rl   rm   rn   websockets_sync_connection
Connectionr   rx   r5   r!   rp   rq   rr   rP   rU   rY   r]   r$   r`   rs   rt   rg   rN   ru   r   r   r   r   rw      rv   rw   )"r"   rs   r-   websockets.sync.connectionsync
connectionr{   core.eventsr   r   core.pydantic_utilitiesr   types.audio_outputr   types.flush_signalr   types.error_responser   types.event_responser	   types.configure_connectionr
   types.configure_connection_datar   types.ping_signalr   types.send_textr   types.send_text_datar   Unionr$   r   rw   r   r   r   r   <module>   s&    