o
    i#                     @   s   d Z ddlmZ ddlmZmZmZmZ ddlZddl	m
Z
 ddlmZ ddlmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ dedee fddZeG dd deZG dd deZdS )zXTTS text-to-speech service implementation.

This module provides integration with Coqui XTTS streaming server for
text-to-speech synthesis using local Docker deployment.
    )	dataclass)AnyAsyncGeneratorDictOptionalN)logger)create_stream_resampler)
ErrorFrameFrame
StartFrameTTSAudioRawFrame)TTSSettings)
TTSService)Languageresolve_language)
traced_ttslanguagereturnc                 C   s   i t jdt jdt jdt jdt jdt jdt jdt jdt j	d	t j
d
t jdt jdt jdt jdt jdt jd}t| |ddS )zConvert a Language enum to XTTS language code.

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding XTTS language code, or None if not supported.
    csdeenesfrhihuitjakonlplptrutrzzh-cnT)use_base_code)r   CSDEENESFRHIHUITJAKONLPLPTRUTRZHr   )r   LANGUAGE_MAP r5   M/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/xtts/tts.pylanguage_to_xtts_language'   sD   		
r7   c                   @   s   e Zd ZdZdS )XTTSTTSSettingszSettings for XTTSService.N)__name__
__module____qualname____doc__r5   r5   r5   r6   r8   G   s    r8   c                       s   e Zd ZU dZeZeed< dejdddde	e
 de
dejded	e	e d
e	e f fddZdefddZdede	e
 fddZdef fddZede
de
deedf fddZ  ZS )XTTSServicezCoqui XTTS text-to-speech service.

    Provides text-to-speech synthesis using a locally running Coqui XTTS
    streaming server. Supports multiple languages and voice cloning through
    studio speakers configuration.
    	_settingsN)voice_idr   sample_ratesettingsr?   base_urlaiohttp_sessionr   r@   rA   c          	         s   | j ddtjd}|dur| dd ||_|dur#| dd ||_|dur,|| t jd|dd|d| || _	d| _
|| _t | _dS )	aY  Initialize the XTTS service.

        Args:
            voice_id: ID of the voice/speaker to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=XTTSService.Settings(voice=...)`` instead.

            base_url: Base URL of the XTTS streaming server.
            aiohttp_session: HTTP session for making requests to the server.
            language: Language for synthesis. Defaults to English.

                .. deprecated:: 0.0.106
                    Use ``settings=XTTSService.Settings(language=...)`` instead.

            sample_rate: Audio sample rate. If None, uses default.
            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        N)modelvoicer   r?   rE   r   T)r@   push_start_framepush_stop_framesrA   r5   )Settingsr   r&   "_warn_init_param_moved_to_settingsrE   r   apply_updatesuper__init__	_base_url_studio_speakers_aiohttp_sessionr   
_resampler)	selfr?   rB   rC   r   r@   rA   kwargsdefault_settings	__class__r5   r6   rL   Y   s2    
	zXTTSService.__init__r   c                 C   s   dS )zCheck if this service can generate processing metrics.

        Returns:
            True, as XTTS service supports metrics generation.
        Tr5   )rQ   r5   r5   r6   can_generate_metrics   s   z XTTSService.can_generate_metricsc                 C   s   t |S )zConvert a Language enum to XTTS service language format.

        Args:
            language: The language to convert.

        Returns:
            The XTTS-specific language code, or None if not supported.
        )r7   )rQ   r   r5   r5   r6   language_to_service_language   s   	z(XTTSService.language_to_service_languageframec              	      s   t  |I dH  | jrdS | j| jd 4 I dH =}|jdkrF| I dH }| jd|j d| ddI dH  	 W d  I dH  dS |	 I dH | _W d  I dH  dS 1 I dH s_w   Y  dS )zStart the XTTS service and load studio speakers.

        Args:
            frame: The start frame containing initialization parameters.
        Nz/studio_speakers   z'Error getting studio speakers (status: 	, error: ))	error_msg)
rK   startrN   rO   getrM   statustext
push_errorjson)rQ   rX   rr`   rT   r5   r6   r]      s   
.zXTTSService.startr`   
context_idc              	   C  s&  t |  d| d | jst |  d dS | j| jj }| jd }|dddd| jj|d	 |d
 ddd}| j	j
||d4 I dH }|jdkro| I dH }td|j d| ddV  	 W d  I dH  dS | |I dH  | j}t }|j|2 zJ3 dH W }	t|	dkr|  I dH  ||	 t|dkr|dd }
|dd }| jt|
d| jI dH }t|| jd|d}|V  t|dksq6 t|dkr| jt|d| jI dH }t|| jd|d}|V  W d  I dH  dS W d  I dH  dS 1 I dH sw   Y  dS )a  Generate speech from text using XTTS streaming server.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        z: Generating TTS []z no studio speakers availableNz/tts_stream. *speaker_embeddinggpt_cond_latentF   )r`   r   ri   rj   add_wav_headerstream_chunk_size)rb   rY   zError getting audio (status: rZ   r[   )errorr   i  i]     )rd   )r   debugrN   rn   r>   rE   rM   replacer   rO   postr_   r`   r	   start_tts_usage_metrics
chunk_size	bytearraycontentiter_chunkedlenstop_ttfb_metricsextendrP   resamplebytesr@   r   )rQ   r`   rd   
embeddingsurlpayloadrc   
CHUNK_SIZEbufferchunkprocess_dataresampled_audiorX   r5   r5   r6   run_tts   sf   
	





%0zXTTSService.run_tts)r9   r:   r;   r<   r8   rH   __annotations__r   r&   r   straiohttpClientSessionintrL   boolrV   rW   r   r]   r   r   r
   r   __classcell__r5   r5   rT   r6   r=   N   s4   
 D(r=   )r<   dataclassesr   typingr   r   r   r   r   logurur   pipecat.audio.utilsr   pipecat.frames.framesr	   r
   r   r   pipecat.services.settingsr   pipecat.services.tts_servicer   pipecat.transcriptions.languager   r   (pipecat.utils.tracing.service_decoratorsr   r   r7   r8   r=   r5   r5   r5   r6   <module>   s   
 