o
    it'                     @   s   d Z ddlZddlZddlmZ G dd dZdededed	efd
dZded	efddZded	ee	ef fddZ
ded	ee	ef fddZdS )z
TTS Text Builder and Utilities

Provides helper methods for constructing TTS text with pronunciation, pause,
and speed controls for Deepgram's Text-to-Speech API.
    N)Tuplec                   @   s   e Zd ZdZdd Zdedd fddZded	edd fd
dZdedd fddZ	dedd fddZ
deddfddZdefddZdS )TextBuilderu  
    Fluent builder for constructing TTS text with pronunciation and pause controls.
    
    Example:
        text = TextBuilder() \
            .text("Take ") \
            .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn") \
            .text(" twice daily with ") \
            .pronunciation("dupilumab", "duːˈpɪljuːmæb") \
            .text(" injections") \
            .pause(500) \
            .text(" Do not exceed prescribed dosage.") \
            .build()
    c                 C   s   g | _ d| _d| _d| _dS )zInitialize empty text builder.r   N)_parts_pronunciation_count_pause_count_char_count)self r	   Q/home/ubuntu/.local/lib/python3.10/site-packages/deepgram/helpers/text_builder.py__init__   s   
zTextBuilder.__init__contentreturnc                 C   s&   |r| j | |  jt|7  _| S )z
        Add plain text. Returns self for chaining.

        Args:
            content: Plain text to add

        Returns:
            Self for method chaining
        )r   appendr   len)r   r   r	   r	   r
   text$   s   
zTextBuilder.textwordipac                 C   sn   t |\}}|st|| jdkrtdtj||ddd}| j| |  jd7  _|  jt|7  _| S )a  
        Add a word with custom pronunciation.
        Formats as: {"word": "word", "pronounce":"ipa"}
        Returns self for chaining.

        Args:
            word: The word to be pronounced
            ipa: IPA pronunciation string

        Returns:
            Self for method chaining

        Raises:
            ValueError: If pronunciation limit exceeded or validation fails
          z/Maximum 500 pronunciations per request exceededr   	pronounceFensure_ascii   )	validate_ipa
ValueErrorr   jsondumpsr   r   r   r   )r   r   r   is_valid	error_msgpronunciation_jsonr	   r	   r
   pronunciation3   s   
zTextBuilder.pronunciationduration_msc                 C   sP   t |\}}|st|| jdkrtd| jd| d |  jd7  _| S )a  
        Add a pause in milliseconds.
        Formats as: {pause:duration_ms}
        Valid range: 500-5000ms in 100ms increments.
        Returns self for chaining.

        Args:
            duration_ms: Pause duration in milliseconds (500-5000, increments of 100)

        Returns:
            Self for method chaining

        Raises:
            ValueError: If pause limit exceeded or validation fails
        2   z&Maximum 50 pauses per request exceeded{pause:}r   )validate_pauser   r   r   r   )r   r!   r   r   r	   r	   r
   pauseT   s   
zTextBuilder.pause	ssml_textc                 C   s&   t |}|r| j| | | | S )u  
        Parse SSML and convert to Deepgram's inline format.
        Supports:
        - <phoneme alphabet="ipa" ph="...">word</phoneme> → pronunciation()
        - <break time="500ms"/> → pause()
        - Plain text → text()
        Returns self for chaining.

        Args:
            ssml_text: SSML-formatted text

        Returns:
            Self for method chaining
        )ssml_to_deepgramr   r   _update_counts_from_text)r   r'   	convertedr	   r	   r
   	from_ssmls   s
   
zTextBuilder.from_ssmlr   Nc                 C   sv   d}t ||}|  jt|7  _d}t ||}|  jt|7  _t |d|}t |d|}|  jt|7  _dS )z*Update internal counters from parsed text.z/\{"word":\s*"[^"]*",\s*"pronounce":\s*"[^"]*"\}z\{pause:\d+\} N)refindallr   r   r   subr   )r   r   pronunciation_patternpronunciationspause_patternpauses
clean_textr	   r	   r
   r)      s   z$TextBuilder._update_counts_from_textc                 C   s,   d | j}| jdkrtd| j d|S )z
        Return the final formatted text string.

        Returns:
            The complete formatted text ready for TTS

        Raises:
            ValueError: If character limit exceeded
        r,   i  z,Text exceeds 2000 character limit (current: z characters))joinr   r   r   )r   resultr	   r	   r
   build   s   

zTextBuilder.build)__name__
__module____qualname____doc__r   strr   r    intr&   r+   r)   r7   r	   r	   r	   r
   r      s    !r   r   r   r   r   c                 C   sT   t |\}}|st|tj||ddd}dt| d }tj||| dd}|S )u  
    Replace word in text with pronunciation control.

    Args:
        text: Source text containing the word
        word: Word to replace
        ipa: IPA pronunciation string

    Returns:
        Text with word replaced by {"word": "word", "pronounce":"ipa"}

    Example:
        text = "Take azathioprine twice daily with dupilumab injections."
        text = add_pronunciation(text, "azathioprine", "ˌæzəˈθaɪəpriːn")
        text = add_pronunciation(text, "dupilumab", "duːˈpɪljuːmæb")
    r   Fr   z\br   )count)r   r   r   r   r-   escaper/   )r   r   r   r   r   r   patternr6   r	   r	   r
   add_pronunciation   s   rA   r'   c                 C   st   |   } d}t|| tj}|r|d} d}dd }t||| } d}dd }t||| } td	d
| } |   S )u0  
    Convert SSML markup to Deepgram's inline JSON format.

    Supports:
    - <phoneme alphabet="ipa" ph="...">word</phoneme>
    - <break time="500ms"/> or <break time="0.5s"/>
    - Strips <speak> wrapper tags

    Args:
        ssml_text: SSML-formatted text

    Returns:
        Deepgram-formatted text

    Example:
        ssml = '''<speak>
            Take <phoneme alphabet="ipa" ph="ˌæzəˈθaɪəpriːn">azathioprine</phoneme>
            <break time="500ms"/> Do not exceed dosage.
        </speak>'''
        text = ssml_to_deepgram(ssml)
    z<speak[^>]*>(.*?)</speak>r   zI<phoneme\s+alphabet=["\']ipa["\']\s+ph=["\'](.*?)["\']\s*>(.*?)</phoneme>c                 S   s(   |  d}|  d}tj||dddS )Nr      r   Fr   )groupr   r   )matchr   r   r	   r	   r
   replace_phoneme   s   

z)ssml_to_deepgram.<locals>.replace_phonemez2<break\s+time=["\'](\d+(?:\.\d+)?)(ms|s)["\']\s*/>c                 S   sn   t | d}| d}|dkrt|d }nt|}t|\}}|s1tdtdt|d d }d| d	S )
Nr   rB   si  r     d   r#   r$   )floatrC   r=   r%   maxminround)rD   valueunitr!   r   r   r	   r	   r
   replace_break   s   
z'ssml_to_deepgram.<locals>.replace_breakz<[^>]+>r,   )stripr-   searchDOTALLrC   r/   )r'   speak_patternspeak_matchphoneme_patternrE   break_patternrO   r	   r	   r
   r(      s   
r(   c                 C   sZ   | sdS t | tsdS g d}|D ]}|| v r"ddt| f  S qt| dkr+dS dS )	z
    Validate IPA string format.

    Args:
        ipa: IPA pronunciation string

    Returns:
        Tuple of (is_valid, error_message)
    )Fz!IPA pronunciation cannot be empty)Fz"IPA pronunciation must be a string)"\
	Fz.IPA pronunciation contains invalid character: rH   )Fz-IPA pronunciation exceeds 100 character limitTr,   )
isinstancer<   reprr   )r   invalid_charscharr	   r	   r
   r     s   

r   r!   c                 C   s:   t | tsdS | dk rdS | dkrdS | d dkrdS d	S )
z
    Validate pause duration (500-5000ms, 100ms increments).

    Args:
        duration_ms: Pause duration in milliseconds

    Returns:
        Tuple of (is_valid, error_message)
    )Fz!Pause duration must be an integerr   )Fz%Pause duration must be at least 500msrG   )Fz%Pause duration must not exceed 5000msrH   r   )Fz*Pause duration must be in 100ms incrementsr\   )r]   r=   )r!   r	   r	   r
   r%   6  s   

r%   )r;   r   r-   typingr   r   r<   rA   r(   boolr   r=   r%   r	   r	   r	   r
   <module>   s     # J