o
    êi_                     @   s  d Z ddlZddlmZ ddlmZmZmZmZm	Z	m
Z
 ddlZddlmZmZmZmZ ddlmZmZ dd	lmZmZmZmZ dd
lmZmZ ddlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' de&de(de'fddZ)de
e(e%ef de
e%ef fddZ*G dd dZ+G dd dZ,dS )z&TTS (Text-to-Speech) namespace client.    N)ThreadPoolExecutor)AsyncIterableIterableIteratorListOptionalUnion)AsyncWebSocketSessionWebSocketSessionaconnect_ws
connect_ws   )aiter_websocket_audioiter_websocket_audio   )AsyncClientWrapperClientWrapperRequestOptionsWebSocketOptions)AsyncAudioStreamAudioStream)AudioFormat
CloseEvent
FlushEventLatencyModeModelProsodyReferenceAudio
StartEvent	TextEvent	TTSConfig
TTSRequestconfigtextreturnc                 C   s   t di d|d| jd| jd| jd| jd| jd| jd| jd	| jd
| j	d| j
d| jd| jd| jd| jd| jd| jd| jS )z*Convert TTSConfig to TTSRequest with text.r#   chunk_lengthformatsample_ratemp3_bitrateopus_bitrate
referencesreference_id	normalizelatencyprosodytop_ptemperaturemax_new_tokensrepetition_penaltymin_chunk_lengthcondition_on_previous_chunksearly_stop_thresholdN )r!   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   )r"   r#   r6   r6   K/home/ubuntu/.local/lib/python3.10/site-packages/fishaudio/resources/tts.py_config_to_tts_request   sJ   	
r8   itemc                 C   s   t | ttfr	| S t| dS )zHNormalize string input to TextEvent, pass through event types unchanged.r#   )
isinstancer   r   )r9   r6   r6   r7   _normalize_to_event4   s   
r<   c                   @   sb  e Zd ZdZdefddZddddde dddded	ee d
ee	e
  dee dee dee dededee defddZddddde dddded	ee d
ee	e
  dee dee dee dededee defddZddddde dddd	deeeeef  d	ee d
ee	e
  dee dee dee dedededee dee fddZdS )	TTSClientzSynchronous TTS operations.client_wrapperc                 C   
   || _ d S N_clientselfr>   r6   r6   r7   __init__@      
zTTSClient.__init__Ns1r+   r*   r&   r-   speedr"   modelrequest_optionsr#   r+   r*   r&   r-   rI   r"   rJ   rK   r$   c       	            s   t ||}
|dur||
_|dur||
_|dur||
_|dur!||
_|dur.tj||jd|
_|
jdd}| j	j
ddd|dt||	d	  fd
d}t| S )a  
        Stream text-to-speech audio chunks.

        Args:
            text: Text to synthesize
            reference_id: Voice reference ID (overrides config.reference_id if provided)
            references: Reference audio samples (overrides config.references if provided)
            format: Audio format - "mp3", "wav", "pcm", or "opus" (overrides config.format if provided)
            latency: Latency mode - "normal" or "balanced" (overrides config.latency if provided)
            speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
            config: TTS configuration (audio settings, voice, model parameters)
            model: TTS model to use
            request_options: Request-level overrides

        Returns:
            AudioStream object that can be iterated for audio chunks

        Example:
            ```python
            from fishaudio import FishAudio

            client = FishAudio(api_key="...")

            # Stream and process chunks
            for chunk in client.tts.stream(text="Hello world"):
                process_audio_chunk(chunk)

            # Or collect all at once
            audio = client.tts.stream(text="Hello world").collect()
            ```
        NbaseTexclude_nonePOST/v1/ttsapplication/msgpackzContent-TyperJ   headerscontentrK   c                  3   s       D ]} | r| V  qd S r@   )
iter_byteschunkresponser6   r7   _stream   s   z!TTSClient.stream.<locals>._stream)r8   r+   r*   r&   r-   r   from_speed_overrider.   
model_dumprB   request	ormsgpackpackbr   rD   r#   r+   r*   r&   r-   rI   r"   rJ   rK   r_   payloadr\   r6   rZ   r7   streamC   s*   
-	
zTTSClient.streamc       	   
      C   s    | j |||||||||	d	 S )as  
        Convert text to speech and return complete audio as bytes.

        This is a convenience method that streams all audio chunks and combines them.
        For chunk-by-chunk processing, use stream() instead.

        Args:
            text: Text to synthesize
            reference_id: Voice reference ID (overrides config.reference_id if provided)
            references: Reference audio samples (overrides config.references if provided)
            format: Audio format - "mp3", "wav", "pcm", or "opus" (overrides config.format if provided)
            latency: Latency mode - "normal" or "balanced" (overrides config.latency if provided)
            speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
            config: TTS configuration (audio settings, voice, model parameters)
            model: TTS model to use
            request_options: Request-level overrides

        Returns:
            Complete audio as bytes

        Example:
            ```python
            from fishaudio import FishAudio
            from fishaudio.utils import play, save

            client = FishAudio(api_key="...")

            # Get complete audio
            audio = client.tts.convert(text="Hello world")

            # Play it
            play(audio)

            # Or save it
            save(audio, "output.mp3")
            ```
        	r#   r+   r*   r&   r-   rI   r"   rJ   rK   rd   collect)
rD   r#   r+   r*   r&   r-   rI   r"   rJ   rK   r6   r6   r7   convert   s   2
zTTSClient.convert
   )	r+   r*   r&   r-   rI   r"   rJ   max_workers
ws_optionstext_streamrj   rk   c       	      	   #   s*   t |dd|dur|_|dur|_|dur|_|dur#|_|dur0tj||jd_|
r6|
 ni }t	|	d}zPt
	d| jj| jd|id|# fd	d
}||}tD ]}|V  qc|  W d   n1 sww   Y  W |jdd dS W |jdd dS |jdd w )a  
        Stream text and receive audio in real-time via WebSocket.

        Perfect for conversational AI, live captioning, and streaming applications.

        Args:
            text_stream: Iterator of text chunks to stream
            reference_id: Voice reference ID (overrides config.reference_id if provided)
            references: Reference audio samples (overrides config.references if provided)
            format: Audio format - "mp3", "wav", "pcm", or "opus" (overrides config.format if provided)
            latency: Latency mode - "normal" or "balanced" (overrides config.latency if provided)
            speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
            config: TTS configuration (audio settings, voice, model parameters)
            model: TTS model to use
            max_workers: ThreadPoolExecutor workers for concurrent sender
            ws_options: WebSocket connection options for configuring timeouts, message size limits, etc.
                Useful for long-running generations that may exceed default timeout values.
                See WebSocketOptions class for available parameters.

        Returns:
            Iterator of audio bytes

        Example:
            ```python
            from fishaudio import FishAudio, TTSConfig, ReferenceAudio, WebSocketOptions

            client = FishAudio(api_key="...")

            def text_generator():
                yield "Hello, "
                yield "this is "
                yield "streaming text!"

            # Simple usage with defaults
            with open("output.mp3", "wb") as f:
                for audio_chunk in client.tts.stream_websocket(text_generator()):
                    f.write(audio_chunk)

            # With format and speed parameters
            with open("output.wav", "wb") as f:
                for audio_chunk in client.tts.stream_websocket(
                    text_generator(),
                    format="wav",
                    speed=1.3
                ):
                    f.write(audio_chunk)

            # With reference_id parameter
            with open("output.mp3", "wb") as f:
                for audio_chunk in client.tts.stream_websocket(text_generator(), reference_id="your_model_id"):
                    f.write(audio_chunk)

            # With references parameter
            with open("output.mp3", "wb") as f:
                for audio_chunk in client.tts.stream_websocket(
                    text_generator(),
                    references=[ReferenceAudio(audio=audio_bytes, text="sample")]
                ):
                    f.write(audio_chunk)

            # With WebSocket options for long-running generations
            # Useful if you're generating very long responses that may take >20 seconds
            ws_options = WebSocketOptions(keepalive_ping_timeout_seconds=60.0)
            with open("output.mp3", "wb") as f:
                for audio_chunk in client.tts.stream_websocket(
                    text_generator(),
                    ws_options=ws_options
                ):
                    f.write(audio_chunk)

            # Parameters override config values
            config = TTSConfig(format="mp3", latency="balanced")
            with open("output.wav", "wb") as f:
                for audio_chunk in client.tts.stream_websocket(
                    text_generator(),
                    format="wav",  # Parameter wins
                    config=config
                ):
                    f.write(audio_chunk)
            ```
         r:   NrL   )rj   /v1/tts/liverJ   clientrU   c                     sZ    ttd   D ]} t| } t|  q tt   d S N)r_   
send_bytesr`   ra   r   r^   r<   r   r9   eventrl   tts_requestwsr6   r7   senderU  s   z*TTSClient.stream_websocket.<locals>.senderF)waitrn   )r8   r+   r*   r&   r-   r   r]   r.   to_httpx_ws_kwargsr   r   rB   rp   get_headerssubmitr   resultshutdown)rD   rl   r+   r*   r&   r-   rI   r"   rJ   rj   rk   	ws_kwargsexecutorry   sender_futureaudio_chunkr6   rv   r7   stream_websocket   sF   `



zTTSClient.stream_websocket)__name__
__module____qualname____doc__r   rE   r    strr   r   r   r   r   floatr   r   r   rd   bytesrh   r   r   r   r   intr   r   r   r6   r6   r6   r7   r=   =   s    
	

V
	

B
	
r=   c                   @   sT  e Zd ZdZdefddZddddde dddded	ee d
ee	e
  dee dee dee dededee defddZddddde dddded	ee d
ee	e
  dee dee dee dededee defddZddddde ddddeeeeef  d	ee d
ee	e
  dee dee dee dededee fddZdS )AsyncTTSClientzAsynchronous TTS operations.r>   c                 C   r?   r@   rA   rC   r6   r6   r7   rE   m  rF   zAsyncTTSClient.__init__NrG   rH   r#   r+   r*   r&   r-   rI   r"   rJ   rK   r$   c       	            s   t ||}
|dur||
_|dur||
_|dur||
_|dur"||
_|dur/tj||jd|
_|
jdd}| j	j
ddd|dt||	d	I dH   fd
d}t| S )a9  
        Stream text-to-speech audio chunks (async).

        Args:
            text: Text to synthesize
            reference_id: Voice reference ID (overrides config.reference_id if provided)
            references: Reference audio samples (overrides config.references if provided)
            format: Audio format - "mp3", "wav", "pcm", or "opus" (overrides config.format if provided)
            latency: Latency mode - "normal" or "balanced" (overrides config.latency if provided)
            speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
            config: TTS configuration (audio settings, voice, model parameters)
            model: TTS model to use
            request_options: Request-level overrides

        Returns:
            AsyncAudioStream object that can be iterated for audio chunks

        Example:
            ```python
            from fishaudio import AsyncFishAudio

            client = AsyncFishAudio(api_key="...")

            # Stream and process chunks
            async for chunk in await client.tts.stream(text="Hello world"):
                await process_audio_chunk(chunk)

            # Or collect all at once
            stream = await client.tts.stream(text="Hello world")
            audio = await stream.collect()
            ```
        NrL   TrN   rP   rQ   rR   rS   rT   c                    s(      2 z3 d H W } | r| V  q6 d S r@   )aiter_bytesrX   rZ   r6   r7   r\     s   z&AsyncTTSClient.stream.<locals>._stream)r8   r+   r*   r&   r-   r   r]   r.   r^   rB   r_   r`   ra   r   rb   r6   rZ   r7   rd   p  s,   
.	
zAsyncTTSClient.streamc       	            s2   | j |||||||||	d	I dH }
|
 I dH S )a  
        Convert text to speech and return complete audio as bytes (async).

        This is a convenience method that streams all audio chunks and combines them.
        For chunk-by-chunk processing, use stream() instead.

        Args:
            text: Text to synthesize
            reference_id: Voice reference ID (overrides config.reference_id if provided)
            references: Reference audio samples (overrides config.references if provided)
            format: Audio format - "mp3", "wav", "pcm", or "opus" (overrides config.format if provided)
            latency: Latency mode - "normal" or "balanced" (overrides config.latency if provided)
            speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
            config: TTS configuration (audio settings, voice, model parameters)
            model: TTS model to use
            request_options: Request-level overrides

        Returns:
            Complete audio as bytes

        Example:
            ```python
            from fishaudio import AsyncFishAudio
            from fishaudio.utils import play, save

            client = AsyncFishAudio(api_key="...")

            # Get complete audio
            audio = await client.tts.convert(text="Hello world")

            # Play it
            play(audio)

            # Or save it
            save(audio, "output.mp3")
            ```
        re   Nrf   )rD   r#   r+   r*   r&   r-   rI   r"   rJ   rK   rd   r6   r6   r7   rh     s   2zAsyncTTSClient.convert)r+   r*   r&   r-   rI   r"   rJ   rk   rl   rk   c             	     s  t |dd|dur|_|dur|_|dur|_|dur#|_|dur0tj||jd_|	r6|	 ni }
t		d
| j
j| j
d|id|
4 I dH . fdd	}t| }t2 z	3 dH W }|V  qb6 |I dH  W d  I dH  dS 1 I dH sw   Y  dS )aw  
        Stream text and receive audio in real-time via WebSocket (async).

        Perfect for conversational AI, live captioning, and streaming applications.

        Args:
            text_stream: Async iterator of text chunks to stream
            reference_id: Voice reference ID (overrides config.reference_id if provided)
            references: Reference audio samples (overrides config.references if provided)
            format: Audio format - "mp3", "wav", "pcm", or "opus" (overrides config.format if provided)
            latency: Latency mode - "normal" or "balanced" (overrides config.latency if provided)
            speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
            config: TTS configuration (audio settings, voice, model parameters)
            model: TTS model to use
            ws_options: WebSocket connection options for configuring timeouts, message size limits, etc.
                Useful for long-running generations that may exceed default timeout values.
                See WebSocketOptions class for available parameters.

        Returns:
            Async iterator of audio bytes

        Example:
            ```python
            from fishaudio import AsyncFishAudio, TTSConfig, ReferenceAudio, WebSocketOptions

            client = AsyncFishAudio(api_key="...")

            async def text_generator():
                yield "Hello, "
                yield "this is "
                yield "async streaming!"

            # Simple usage with defaults
            async with aiofiles.open("output.mp3", "wb") as f:
                async for audio_chunk in client.tts.stream_websocket(text_generator()):
                    await f.write(audio_chunk)

            # With format and speed parameters
            async with aiofiles.open("output.wav", "wb") as f:
                async for audio_chunk in client.tts.stream_websocket(
                    text_generator(),
                    format="wav",
                    speed=1.3
                ):
                    await f.write(audio_chunk)

            # With reference_id parameter
            async with aiofiles.open("output.mp3", "wb") as f:
                async for audio_chunk in client.tts.stream_websocket(text_generator(), reference_id="your_model_id"):
                    await f.write(audio_chunk)

            # With references parameter
            async with aiofiles.open("output.mp3", "wb") as f:
                async for audio_chunk in client.tts.stream_websocket(
                    text_generator(),
                    references=[ReferenceAudio(audio=audio_bytes, text="sample")]
                ):
                    await f.write(audio_chunk)

            # With WebSocket options for long-running generations
            # Useful if you're generating very long responses that may take >20 seconds
            ws_options = WebSocketOptions(keepalive_ping_timeout_seconds=60.0)
            async with aiofiles.open("output.mp3", "wb") as f:
                async for audio_chunk in client.tts.stream_websocket(
                    text_generator(),
                    ws_options=ws_options
                ):
                    await f.write(audio_chunk)

            # Parameters override config values
            config = TTSConfig(format="mp3", latency="balanced")
            async with aiofiles.open("output.wav", "wb") as f:
                async for audio_chunk in client.tts.stream_websocket(
                    text_generator(),
                    format="wav",  # Parameter wins
                    config=config
                ):
                    await f.write(audio_chunk)
            ```
        rm   r:   NrL   rn   rJ   ro   c                     sx    ttd I d H   2 z3 d H W } t| } t| I d H  q6  tt  I d H  d S rq   rr   rt   rv   r6   r7   ry     s   
 z/AsyncTTSClient.stream_websocket.<locals>.senderr{   )r8   r+   r*   r&   r-   r   r]   r.   r|   r   rB   rp   r}   asynciocreate_taskr   )rD   rl   r+   r*   r&   r-   rI   r"   rJ   rk   r   ry   sender_taskr   r6   rv   r7   r     s>   ^
.zAsyncTTSClient.stream_websocket)r   r   r   r   r   rE   r    r   r   r   r   r   r   r   r   r   r   rd   r   rh   r   r   r   r   r   r   r6   r6   r6   r7   r   j  s    
	

W
	

C
	
r   )-r   r   concurrent.futuresr   typingr   r   r   r   r   r   r`   httpx_wsr	   r
   r   r   realtimer   r   corer   r   r   r   core.iteratorsr   r   typesr   r   r   r   r   r   r   r   r   r    r!   r   r8   r<   r=   r   r6   r6   r6   r7   <module>   s(     4

	  /