o
    iB                  
   @   sN  d Z ddlmZmZ ddlmZmZmZmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZmZ dd	lmZ dd
l m!Z! ddl"m#Z# zddl$Z$ddl%m&Z' ddl(m)Z) W n  e*y Z+ ze,de+  e,d e-de+ dZ+[+ww ed Z.eG dd deZ/G dd deZ0dS )zFish Audio text-to-speech service implementation.

This module provides integration with Fish Audio's real-time TTS WebSocket API
for streaming text-to-speech synthesis with customizable voice parameters.
    )	dataclassfield)AnyAsyncGeneratorLiteralMappingOptionalSelf)logger)	BaseModel)CancelFrameEndFrame
ErrorFrameFrameInterruptionFrame
StartFrameTTSAudioRawFrameTTSStoppedFrame)FrameDirection)	NOT_GIVENTTSSettings	_NotGiven)InterruptibleTTSService)Language)
traced_ttsN)connect)StatezException: zGIn order to use Fish Audio, you need to `pip install pipecat-ai[fish]`.zMissing module: )opusmp3pcmwavc                       s   e Zd ZU dZedd dZedB eB ed< edd dZ	e
dB eB ed< ed	d dZedB eB ed
< edd dZedB eB ed< edd dZedB eB ed< edd dZedB eB ed< edeeef def fddZ  ZS )FishAudioTTSSettingsa  Settings for FishAudioTTSService.

    Parameters:
        latency: Latency mode ("normal" or "balanced"). Defaults to "balanced".
        normalize: Whether to normalize audio output. Defaults to True.
        temperature: Controls randomness in speech generation (0.0-1.0).
        top_p: Controls diversity via nucleus sampling (0.0-1.0).
        prosody_speed: Speech speed multiplier (0.5-2.0). Defaults to 1.0.
        prosody_volume: Volume adjustment in dB (-20 to 20). Defaults to 0.
    c                   C      t S Nr    r%   r%   M/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/fish/tts.py<lambda>=       zFishAudioTTSSettings.<lambda>)default_factoryNlatencyc                   C   r"   r#   r$   r%   r%   r%   r&   r'   >   r(   	normalizec                   C   r"   r#   r$   r%   r%   r%   r&   r'   ?   r(   temperaturec                   C   r"   r#   r$   r%   r%   r%   r&   r'   @   r(   top_pc                   C   r"   r#   r$   r%   r%   r%   r&   r'   A   r(   prosody_speedc                   C   r"   r#   r$   r%   r%   r%   r&   r'   B   r(   prosody_volumesettingsreturnc                    sN   t |}|dd}t|t r!|d|d |d|d t |S )zNConstruct settings from a plain dict, destructuring legacy nested ``prosody``.prosodyNr.   speedr/   volume)dictpop
isinstance
setdefaultgetsuperfrom_mapping)clsr0   flatnested	__class__r%   r&   r;   D   s   
z!FishAudioTTSSettings.from_mapping)__name__
__module____qualname____doc__r   r*   strr   __annotations__r+   boolr,   floatr-   r.   r/   intclassmethodr   r   r	   r;   __classcell__r%   r%   r?   r&   r!   0   s   
 (r!   c                       sp  e Zd ZU dZeZeed< G dd deZddddddddde	d	e
e	 d
e
e	 de
e	 dede
e de
e de
e f fddZdefddZdedee	ef f fddZdef fddZdef fddZdef fddZ fdd Z fd!d"Zd#d$ Zd%d& Zd4d'e
e	 fd(d)Zd*d+ Zde d,e!f fd-d.Z"d/d0 Z#e$d1e	d'e	de%e&df fd2d3Z'  Z(S )5FishAudioTTSServicea  Fish Audio text-to-speech service with WebSocket streaming.

    Provides real-time text-to-speech synthesis using Fish Audio's WebSocket API.
    Supports various audio formats, customizable prosody controls, and streaming
    audio generation with interruption handling.
    	_settingsc                   @   sd   e Zd ZU dZejZee ed< dZ	ee
 ed< dZee ed< dZee ed< d	Zee ed
< dS )zFishAudioTTSService.InputParamsa-  Input parameters for Fish Audio TTS configuration.

        .. deprecated:: 0.0.105
            Use ``settings=FishAudioTTSService.Settings(...)`` instead.

        Parameters:
            language: Language for synthesis. Defaults to English.
            latency: Latency mode ("normal" or "balanced"). Defaults to "normal".
            normalize: Whether to normalize audio output. Defaults to True.
            prosody_speed: Speech speed multiplier (0.5-2.0). Defaults to 1.0.
            prosody_volume: Volume adjustment in dB. Defaults to 0.
        languagenormalr*   Tr+         ?r.   r   r/   N)rA   rB   rC   rD   r   ENrN   r   rF   r*   rE   r+   rG   r.   rH   r/   rI   r%   r%   r%   r&   InputParamsZ   s   
 rR   Nr   )reference_idmodelmodel_idoutput_formatsample_rateparamsr0   api_keyrS   rT   rU   rV   rW   rX   r0   c                   sh  |r|rt d|r1ddl}
|
  |
d |
jdtdd W d   n1 s*w   Y  |}| jdddd	d
ddddd	}|durL| dd ||_|durY| dd ||_	|dur| d |s|j
durm|j
|_
|jdurv|j|_|jdur|j|_|jdur|j|_|dur|| t jdd
d
d
||d|	 || _d| _d| _d| _d| _|| _dS )aj  Initialize the Fish Audio TTS service.

        Args:
            api_key: Fish Audio API key for authentication.
            reference_id: Reference ID of the voice model to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=FishAudioTTSService.Settings(voice=...)`` instead.

            model: Deprecated. Reference ID of the voice model to use for synthesis.

                .. deprecated:: 0.0.74
                    The ``model`` parameter is deprecated and will be removed in version 0.1.0.
                    Use ``reference_id`` instead to specify the voice model.

            model_id: Specify which Fish Audio TTS model to use (e.g. "s1").

                .. deprecated:: 0.0.105
                    Use ``settings=FishAudioTTSService.Settings(model=...)`` instead.

            output_format: Audio output format. Defaults to "pcm".
            sample_rate: Audio sample rate. If None, uses default.
            params: Additional input parameters for voice customization.

                .. deprecated:: 0.0.105
                    Use ``settings=FishAudioTTSService.Settings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to the parent service.
        zHCannot specify both 'model' and 'reference_id'. Use 'reference_id' only.r   NalwayszdParameter 'model' is deprecated and will be removed in a future version. Use 'reference_id' instead.   )
stacklevelzs2-probalancedTrP   )	rT   voicerN   r*   r+   r,   r-   r.   r/   rS   r^   rU   rT   rX   )push_stop_framespush_start_framepause_frame_processingrW   r0   z wss://api.fish.audio/v1/tts/liver%   )
ValueErrorwarningscatch_warningssimplefilterwarnDeprecationWarningSettings"_warn_init_param_moved_to_settingsr^   rT   r*   r+   r.   r/   apply_updater:   __init___api_key	_base_url
_websocket_receive_task_fish_sample_rate_output_format)selfrY   rS   rT   rU   rV   rW   rX   r0   kwargsrc   default_settingsr?   r%   r&   rk   n   sx   -







	
zFishAudioTTSService.__init__r1   c                 C   s   dS )zCheck if this service can generate processing metrics.

        Returns:
            True, as Fish Audio service supports metrics generation.
        Tr%   rr   r%   r%   r&   can_generate_metrics   s   z(FishAudioTTSService.can_generate_metricsdeltac                    s8   t  |I dH }|r|  I dH  |  I dH  |S )a=  Apply a settings delta and reconnect if needed.

        Any change to voice or model triggers a WebSocket reconnect.

        Args:
            delta: A :class:`TTSSettings` (or ``FishAudioTTSService.Settings``) delta.

        Returns:
            Dict mapping changed field names to their previous values.
        N)r:   _update_settings_disconnect_connect)rr   rw   changedr?   r%   r&   rx      s   z$FishAudioTTSService._update_settingsframec                    s.   t  |I dH  | j| _|  I dH  dS )zStart the Fish Audio TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r:   startrW   rp   rz   rr   r|   r?   r%   r&   r}      s   zFishAudioTTSService.startc                    &   t  |I dH  |  I dH  dS )zZStop the Fish Audio TTS service.

        Args:
            frame: The end frame.
        N)r:   stopry   r~   r?   r%   r&   r   
     zFishAudioTTSService.stopc                    r   )z_Cancel the Fish Audio TTS service.

        Args:
            frame: The cancel frame.
        N)r:   cancelry   r~   r?   r%   r&   r     r   zFishAudioTTSService.cancelc                    sL   t   I d H  |  I d H  | jr"| js$| | | j| _d S d S d S r#   )r:   rz   _connect_websocketrn   ro   create_task_receive_task_handler_report_errorru   r?   r%   r&   rz     s   zFishAudioTTSService._connectc                    sB   t   I d H  | jr| | jI d H  d | _|  I d H  d S r#   )r:   ry   ro   cancel_task_disconnect_websocketru   r?   r%   r&   ry   $  s   zFishAudioTTSService._disconnectc              
      s^  z| j r| j jtju rW d S td dd| j i}| jj|d< t	| j
|dI d H | _ | j| jj| j| jj| jj| jjd| jjd}| jjd urQ| jj|d< | jjd ur]| jj|d	< d
ddi|d}| j t|I d H  td | dI d H  W d S  ty } z!| jd| |dI d H  d | _ | d| I d H  W Y d }~d S d }~ww )NzConnecting to Fish AudioAuthorizationzBearer rT   )additional_headers)r3   r4   )rW   r*   formatr+   r2   rS   r,   r-   r}   text )eventrequestz Sent start message to Fish Audioon_connectedUnknown error occurred: 	error_msg	exceptionon_connection_error)rn   stater   OPENr
   debugrl   rM   rT   websocket_connectrm   rp   r*   rq   r+   r.   r/   r^   r,   r-   send	ormsgpackpackb_call_event_handler	Exception
push_error)rr   headersrequest_settingsstart_messageer%   r%   r&   r   -  s>   

"z&FishAudioTTSService._connect_websocketc              
      s   zez)|   I d H  | jr*td ddi}| jt|I d H  | j I d H  W n tyJ } z| j	d| |dI d H  W Y d }~nd }~ww W d | _| 
dI d H  d S W d | _| 
dI d H  d S d | _| 
dI d H  w )NzDisconnecting from Fish Audior   r   r   r   on_disconnected)stop_all_metricsrn   r
   r   r   r   r   closer   r   r   )rr   stop_messager   r%   r%   r&   r   Q  s*   
&z)FishAudioTTSService._disconnect_websocket
context_idc                    sP   t |  d | jr| jjtju rdS ddi}|  t	|I dH  dS )z@Flush any buffered audio by sending a flush event to Fish Audio.z: Flushing audio buffersNr   flush)
r
   tracern   r   r   CLOSED_get_websocketr   r   r   )rr   r   flush_messager%   r%   r&   flush_audio`  s   zFishAudioTTSService.flush_audioc                 C   s   | j r| j S td)NzWebsocket not connected)rn   r   ru   r%   r%   r&   r   h  s   z"FishAudioTTSService._get_websocket	directionc                    s(   t  ||I d H  |  I d H  d S r#   )r:   _handle_interruptionr   )rr   r|   r   r?   r%   r&   r   m  s   z(FishAudioTTSService._handle_interruptionc                    s  |   2 z3 d H W }z^t|trht|}t|trh|d}|dkrH|d}|rGt|dkrGt|| j	d}| 
|I d H  |  I d H  n |dkrh|dd}|dkr`| jd	d
I d H  ntd|  W q ty } z| jd| |dI d H  W Y d }~qd }~ww 6 d S )Nr   audioi      finishreasonunknownerrorz(Fish Audio server error during synthesis)r   zFish Audio session finished: r   r   )r   r7   bytesr   unpackbr5   r9   lenr   rW   
push_framestop_ttfb_metricsr   r
   r   r   )rr   messagemsgr   
audio_datar|   r   r   r%   r%   r&   _receive_messagesq  s6   




&z%FishAudioTTSService._receive_messagesr   c              
   C  s:  t |  d| d zt| jr| jjtju r|  I dH  d|d}z(|  t	
|I dH  | |I dH  ddi}|  t	
|I dH  W n/ tyz } z#td| d	V  t|d
V  |  I dH  |  I dH  W Y d}~nd}~ww dV  W dS  ty } ztd| d	V  W Y d}~dS d}~ww )a+  Generate speech from text using Fish Audio's streaming API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames and control frames for the synthesized speech.
        z: Generating Fish TTS: []Nr   )r   r   r   r   r   )r   )r   )r
   r   rn   r   r   r   rz   r   r   r   r   start_tts_usage_metricsr   r   r   ry   )rr   r   r   text_messager   r   r%   r%   r&   run_tts  s2    zFishAudioTTSService.run_ttsr#   ))rA   rB   rC   rD   r!   rh   rF   r   rR   rE   r   FishAudioOutputFormatrI   rk   rG   rv   r   r5   r   rx   r   r}   r   r   r   r   rz   ry   r   r   r   r   r   r   r   r   r   r   r   r   rK   r%   r%   r?   r&   rL   O   sX   
 	
w
			$(rL   )1rD   dataclassesr   r   typingr   r   r   r   r   r	   logurur
   pydanticr   pipecat.frames.framesr   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.settingsr   r   r   pipecat.services.tts_servicer   pipecat.transcriptions.languager   (pipecat.utils.tracing.service_decoratorsr   r   websockets.asyncio.clientr   r   websockets.protocolr   ModuleNotFoundErrorr   r   r   r   r!   rL   r%   r%   r%   r&   <module>   s2    (

