o
    i%                     @   sv  d Z ddlZddlmZ ddlmZmZ i dd e D i ddd	dd
ddddddddddddddddddddddddddddd d d d!d"Zd#e	d$e	fd%d&Z
d#e	d$ee	ef fd'd(Zed)krg d*Zed+ ed, ed+ eD ]2Zed-e  e
eZed.e  eeZed/ red0ed1  d2 qed3ed4   qdS dS )5u   
Emotion Tag Normalizer for Spark TTS

Converts old Orpheus emotion tags to Spark TTS bracket format.
From: <emotion> → To: [emotion]

Also provides backward compatibility for legacy formats.
    N)Dict)LEGACY_EMOTION_MAPINDIC_EMOTION_TAGSc                 C   s   i | ]
\}}| d |qS )z<>)strip).0kv r	   D/home/ubuntu/veenaModal/veena3modal/processing/emotion_normalizer.py
<dictcomp>   s    r   laughingz[laughs]laughzlaughs harderz[laughs harder]zlaugh harderlaugh_hardersighsz[sighs]sighgigglesz[giggle]giggleangryz[angry]excitedz	[excited]whispersz
[whispers]whisper	screamingz	[screams]screamscreamssingingz[sings]z	[curious])singsingscurioustextreturnc                 C   s8   d}dd }t ||| } d}dd }t ||| }|S )u  
    Normalize emotion tags to Spark TTS bracket format.
    
    Conversions:
    - <laugh> → [laughs]
    - <sigh> → [sighs]
    - <giggle> → [giggle]
    - etc.
    
    Also handles natural language:
    - [laughing] → [laughs]
    - [singing] → [sings]
    
    Args:
        text: Text with potential emotion tags (old or new format)
    
    Returns:
        Text with normalized [emotion] tags for Spark TTS
    z<([a-z_]+)>c                 S   s`   |  d  }|tv rt| S d| d}|tv rt| S td| d| d d| dS )N   <>u   ⚠️  Unknown old emotion: <z> - converting to [][)grouplowerr   EMOTION_TO_TAGr   print)matchemotion_textold_tagr	   r	   r
   replace_angle_emotionE   s   z5normalize_emotion_tags.<locals>.replace_angle_emotion\[([^\]]+)\]c                 S   s   |  d  }d| d}|tv r|S |tv rt| S |dr0|d d }|tv r0t| S |d }|tv r<t| S td| d |S )Nr    r$   r#   su   ⚠️  Unknown emotion: [z] - keeping as-is)r%   r&   r   r   r'   endswithr(   )r)   r*   bracket_tagsingularplural_formr	   r	   r
   replace_bracket_emotionZ   s   
z7normalize_emotion_tags.<locals>.replace_bracket_emotion)resub)r   angle_patternr,   bracket_patternr4   normalized_textr	   r	   r
   normalize_emotion_tags.   s   r:   c                    sz   t d| }t d| }tt  fdd|D } fdd|D }t|dkdd |D dd |D ||t|dkd	S )
z
    Validate that text has proper emotion tag format for Spark TTS.
    
    Returns:
        Dict with validation results
    r-   z	<([^>]+)>c                    s(   g | ]}d | d v rd | dqS r$   r#   r	   r   tagvalid_emotion_tagsr	   r
   
<listcomp>      ( z,validate_normalized_text.<locals>.<listcomp>c                    s(   g | ]}d | d vrd | dqS r;   r	   r<   r>   r	   r
   r@      rA   r   c                 S      g | ]}d | dqS )r!   r"   r	   r<   r	   r	   r
   r@          c                 S   rB   r;   r	   r<   r	   r	   r
   r@      rC   )has_angle_bracketsangle_bracketsbracket_emotionsr?   invalid_emotion_tagsis_valid)r5   findallsetr   len)r   square_bracketsrE   
valid_tagsinvalid_tagsr	   r>   r
   validate_normalized_text{   s   

rO   __main__)zDAnd of course, the so-called 'easy' hack didn't work at all [sighs].zThat's so silly [giggles].z:[angry] I cannot believe this happened for the third time.z[whispers] Look over there.z-I'm so happy I could just [singing] la-la-la!z)And then he did it again [laughs harder]!z)Old format <laugh> text needs conversion.z%Old format <sigh> and <giggle> mixed.zP================================================================================z+Emotion Tag Normalization Tests (Spark TTS)z
Original:   zNormalized: rH   u   ✅ Valid (emotion tags: r?   )u   ⚠️  Has angle brackets: rE   )__doc__r5   typingr   veena3modal.core.constantsr   r   itemsr'   strr:   anyrO   __name__
test_casesr(   r   
normalized
validationr	   r	   r	   r
   <module>   s|    		
M