o
    iK                     @   s   d Z ddlZddlmZmZ ddlmZmZmZm	Z	m
Z
 ddlZddlmZ ddlmZ ddlmZmZmZmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlmZ dede	e  fddZ!eG dd deZ"G dd deZ#dS )zMiniMax text-to-speech service implementation.

This module provides integration with MiniMax's T2A (Text-to-Audio) API
for streaming text-to-speech synthesis.
    N)	dataclassfield)AnyAsyncGeneratorMappingOptionalSelf)logger)	BaseModel)
ErrorFrameFrame
StartFrameTTSAudioRawFrame)	NOT_GIVENTTSSettings	_NotGiven)
TTSService)Languageresolve_language)
traced_ttslanguagereturnc                 C   sN  i t jdt jdt jdt jdt jdt jdt jdt jdt j	d	t j
d
t jdt jdt jdt jdt jdt jdt jdi t jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt j d t j!d!t j"d"t j#d#t j$d$t j%d%t j&d&t j'd't j(d(i}t)| |d)d*S )+zConvert a Language enum to MiniMax language format.

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding MiniMax language name, or None if not supported.
    	AfrikaansArabic	BulgarianCatalanCzechDanishGermanGreekEnglishSpanishPersianFinnishFilipinoFrenchHebrewHindiCroatian	Hungarian
IndonesianItalianJapaneseKoreanMalay	NorwegianNynorskDutchPolish
PortugueseRomanianRussianSlovak	SlovenianSwedishTamilThaiTurkish	Ukrainian
VietnamesezChinese,YueChineseF)use_base_code)*r   AFARBGCACSDADEELENESFAFIFILFRHEHIHRHUIDITJAKOMSNBNNNLPLPTRORUSKSLSVTATHTRUKVIYUEZHr   )r   LANGUAGE_MAP ri   P/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/minimax/tts.pylanguage_to_minimax_language!   s   		
 !"#+rk   c                       s
  e Zd ZU dZedd dZedB eB ed< edd dZ	edB eB ed< ed	d dZ
edB eB ed
< edd dZedB eB ed< edd dZedB eB ed< edd dZedB eB ed< edd dZedB eB ed< edeeef def fddZ  ZS )MiniMaxTTSSettingsa  Settings for MiniMaxHttpTTSService.

    Parameters:
        speed: Speech speed (range: 0.5 to 2.0).
        volume: Speech volume (range: 0 to 10).
        pitch: Pitch adjustment (range: -12 to 12).
        emotion: Emotional tone (options: "happy", "sad", "angry", "fearful",
            "disgusted", "surprised", "calm", "fluent").
        text_normalization: Enable text normalization (Chinese/English).
        latex_read: Enable LaTeX formula reading.
        language_boost: Language boost string for multilingual support.
    c                   C      t S Nr   ri   ri   ri   rj   <lambda>g       zMiniMaxTTSSettings.<lambda>)default_factoryNspeedc                   C   rm   rn   ro   ri   ri   ri   rj   rp   h   rq   volumec                   C   rm   rn   ro   ri   ri   ri   rj   rp   i   rq   pitchc                   C   rm   rn   ro   ri   ri   ri   rj   rp   j   rq   emotionc                   C   rm   rn   ro   ri   ri   ri   rj   rp   k   rq   text_normalizationc                   C   rm   rn   ro   ri   ri   ri   rj   rp   l   rq   
latex_readc                   C   rm   rn   ro   ri   ri   ri   rj   rp   m   rq   language_boostsettingsr   c                    s   t |}|dd}t|t rE|d|d |d|d |d|d |d|d |d|d |d	|d	 t |S )
u   Construct settings from a plain dict, destructuring legacy nested dicts.

        Handles ``voice_setting`` (with ``vol`` → ``volume`` rename) and
        ``audio_setting`` (with prefixed field mapping).
        voice_settingNrs   rt   volru   rv   rw   rx   )dictpop
isinstance
setdefaultgetsuperfrom_mapping)clsrz   flatvoice	__class__ri   rj   r   o   s   
zMiniMaxTTSSettings.from_mapping)__name__
__module____qualname____doc__r   rs   floatr   __annotations__rt   ru   intrv   strrw   boolrx   ry   classmethodr   r   r   r   __classcell__ri   ri   r   rj   rl   X   s   
 (rl   c                       s   e Zd ZU dZeZeed< G dd deZddddddddd	e	d
e	de	de
e	 de
e	 dejde
e dede
e de
e f fddZdefddZdede
e	 fddZdef fddZede	de	deedf fd d!Z  ZS )"MiniMaxHttpTTSServicea  Text-to-speech service using MiniMax's T2A (Text-to-Audio) API.

    Provides streaming text-to-speech synthesis using MiniMax's HTTP API
    with support for various voice settings, emotions, and audio configurations.
    Supports real-time audio streaming with configurable voice parameters.

    Platform documentation:
    https://www.minimax.io/platform/document/T2A%20V2?key=66719005a427f0c8a5701643
    	_settingsc                   @   s   e Zd ZU dZejZee ed< dZ	ee
 ed< dZee
 ed< dZee ed< dZee ed	< dZee ed
< dZee ed< dZee ed< dZee ed< dS )z!MiniMaxHttpTTSService.InputParamsa  Configuration parameters for MiniMax TTS.

        .. deprecated:: 0.0.105
            Use ``MiniMaxHttpTTSService.Settings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language for TTS generation. Supports 40 languages.
                Note: Filipino, Tamil, and Persian require speech-2.6-* models.
            speed: Speech speed (range: 0.5 to 2.0).
            volume: Speech volume (range: 0 to 10).
            pitch: Pitch adjustment (range: -12 to 12).
            emotion: Emotional tone (options: "happy", "sad", "angry", "fearful",
                "disgusted", "surprised", "calm", "fluent").
            english_normalization: Deprecated; use `text_normalization` instead

                .. deprecated:: 0.0.96
                    The `english_normalization` parameter is deprecated and will be removed in a future version.
                    Use the `text_normalization` parameter instead.

            text_normalization: Enable text normalization (Chinese/English).
            latex_read: Enable LaTeX formula reading.
            exclude_aggregated_audio: Whether to exclude aggregated audio in final chunk.
        r         ?rs   rt   r   ru   Nrv   english_normalizationrw   rx   exclude_aggregated_audio)r   r   r   r   r   rH   r   r   r   rs   r   rt   ru   r   rv   r   r   r   rw   rx   r   ri   ri   ri   rj   InputParams   s   
 r   z https://api.minimax.io/v1/t2a_v2NT)base_urlmodelvoice_idsample_ratestreamparamsrz   api_keyr   group_idr   r   aiohttp_sessionr   r   r   rz   c       
            s  | j ddddddddddd
}|dur| dd ||_|dur)| dd	 ||_|	dur| d
 |
s|	j|_|	j|_|	j|_|	j|_|	jrR| 	|	j}|rR||_
|	jrog d}|	j|v rc|	j|_ntd|	j d|  |	jdurddl}|  |d |dt W d   n1 sw   Y  |	j|_|	jdur|	j|_|
dur||
 t jd|dd|d| || _|| _|| _| d| | _|| _d| _d| _d| _d| _ dS )a  Initialize the MiniMax TTS service.

        Args:
            api_key: MiniMax API key for authentication.
            base_url: API base URL, defaults to MiniMax's T2A endpoint.
                Global: https://api.minimax.io/v1/t2a_v2
                Mainland China: https://api.minimaxi.chat/v1/t2a_v2
                Western United States: https://api-uw.minimax.io/v1/t2a_v2
            group_id: MiniMax Group ID to identify project.
            model: TTS model name. Defaults to "speech-02-turbo". Options include:
                "speech-2.6-hd", "speech-2.6-turbo" (latest, supports Filipino/Tamil/Persian),
                "speech-02-hd", "speech-02-turbo",
                "speech-01-hd", "speech-01-turbo".

                .. deprecated:: 0.0.105
                    Use ``settings=MiniMaxHttpTTSService.Settings(model=...)`` instead.

            voice_id: Voice identifier. Defaults to "Calm_Woman".

                .. deprecated:: 0.0.105
                    Use ``settings=MiniMaxHttpTTSService.Settings(voice=...)`` instead.

            aiohttp_session: aiohttp.ClientSession for API communication.
            sample_rate: Output audio sample rate in Hz. If None, uses pipeline default.
            stream: Whether to use streaming mode. Defaults to True.
            params: Additional configuration parameters.

                .. deprecated:: 0.0.105
                    Use ``settings=MiniMaxHttpTTSService.Settings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        zspeech-02-turbo
Calm_WomanNr   r   )
r   r   r   rs   rt   ru   ry   rv   rw   rx   r   r   r   r   )happysadangryfearful	disgusted	surprisedneutralfluentzUnsupported emotion: z. Supported emotions: alwayszzParameter `english_normalization` is deprecated and will be removed in a future version. Use `text_normalization` instead.T)r   push_start_framepush_stop_framesrz   z	?GroupId=i  pcm   ri   )!Settings"_warn_init_param_moved_to_settingsr   r   rs   rt   ru   rx   r   language_to_service_languagery   rv   r	   warningr   warningscatch_warningssimplefilterwarnDeprecationWarningrw   apply_updater   __init___api_key	_group_id_stream	_base_url_session_audio_bitrate_audio_format_audio_channel_audio_sample_rate)selfr   r   r   r   r   r   r   r   r   rz   kwargsdefault_settingsservice_langsupported_emotionsr   r   ri   rj   r      s   2









zMiniMaxHttpTTSService.__init__r   c                 C   s   dS )zCheck if this service can generate processing metrics.

        Returns:
            True, as MiniMax service supports metrics generation.
        Tri   )r   ri   ri   rj   can_generate_metricsD  s   z*MiniMaxHttpTTSService.can_generate_metricsr   c                 C   s   t |S )zConvert a Language enum to MiniMax service language format.

        Args:
            language: The language to convert.

        Returns:
            The MiniMax-specific language name, or None if not supported.
        )rk   )r   r   ri   ri   rj   r   L  s   	z2MiniMaxHttpTTSService.language_to_service_languageframec                    s2   t  |I dH  | j| _td| j  dS )zStart the MiniMax TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        Nz*MiniMax TTS initialized with sample_rate: )r   startr   r   r	   debug)r   r   r   ri   rj   r   W  s   zMiniMaxHttpTTSService.starttext
context_idc                 C  s  t |  d| d ddd| j d}| jj| jj| jj| jjd}| jjdur1| jj|d	< | jj	dur=| jj	|d
< | jj
durI| jj
|d< | j| j| j| jd}| j||| jj|d}| jjdurk| jj|d< z}zJ| jj| j||d4 I dH -}|jdkrd|j }t|dV  	 W d  I dH  W W |  I dH  dS | |I dH  t }	| j}
|j|
2 z3 dH W }|sq|	| d|	v r|	d}|	d|d }|dkr|dkr|	|d }	n|	|| }|	|d }	zt |dd !d}d|v rt d W q|"di }|sW q|"d}|s"W qt#dt$||
d D ]I}||||
d   }|s=q,zt%&|}|rW|  I dH  t'|| j(d|dV  W q, t)yu } zt *d|  W Y d}~q,d}~ww W n# tj+y } zt *d | d!|dd"   W Y d}~qd}~ww d|	v sq6 W d  I dH  n1 I dH sw   Y  W n t,y } ztd#| |d$V  W Y d}~nd}~ww W |  I dH  dS W |  I dH  dS |  I dH  w )%a  Generate TTS audio from text using MiniMax's streaming API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        z: Generating TTS []z!application/json, text/plain, */*zapplication/jsonzBearer )acceptzContent-TypeAuthorization)r   rs   r|   ru   Nrv   rw   rx   )bitrateformatchannelr   )r   r{   audio_settingr   r   ry   )headersjson   zMiniMax TTS error: HTTP )errors   data:   r   zutf-8
extra_infoz$Received final chunk with extra infodataaudio   r   )r   r   num_channelsr   z Error converting hex to binary: zError decoding JSON: z, data: d   zUnknown error occurred: )r   	exception)-r	   r   r   r   r   rs   rt   ru   rv   rw   rx   r   r   r   r   r   r   ry   r   postr   statusr   stop_ttfb_metricsstart_tts_usage_metrics	bytearray
chunk_sizecontentiter_chunkedextendfindr   loadsdecoder   rangelenbytesfromhexr   r   
ValueErrorr   JSONDecodeError	Exception)r   r   r   r   r{   r   payloadresponseerror_messagebuffer
CHUNK_SIZEchunkr   
next_start
data_blockr   
chunk_data
audio_datai	hex_chunkaudio_chunkeri   ri   rj   run_ttsa  s   
	
T






*Q "zMiniMaxHttpTTSService.run_tts)r   r   r   r   rl   r   r   r
   r   r   r   aiohttpClientSessionr   r   r   r   r   r   r   r   r   r   r   r  r   ri   ri   r   rj   r      sN   
 
'	
 
(r   )$r   r   dataclassesr   r   typingr   r   r   r   r   r  logurur	   pydanticr
   pipecat.frames.framesr   r   r   r   pipecat.services.settingsr   r   r   pipecat.services.tts_servicer   pipecat.transcriptions.languager   r   (pipecat.utils.tracing.service_decoratorsr   r   rk   rl   r   ri   ri   ri   rj   <module>   s    7+