o
    i                     @   s   U d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z	m
Z
mZ ddlmZ e
d Ze	ed< d	ed
eddfddZG dd dejZG dd deZG dd deZG dd deZdS )z5Manages words count mismatches for the espeak backend    N)Logger)ListTuple)	TypeAliasLiteralUnion)	Separator)warnignoreWordMismatchmodeloggerreturnBaseWordsMismatchc                 C   sJ   t ttd}z||  |W S  ty$   td|  dd|  dw )aO  Returns a word count mismatch processor according to `mode`

    The `mode` can be one of the following:
    - `ignore` to ignore words mismatches
    - `warn` to display a warning on each mismatched utterance
    - `remove` to remove any utterance containing a words mismatch

    Raises a RuntimeError if the `mode` is unknown.

    )r
   r	   removezmode z invalid, must be in z, N)IgnoreWarnRemoveKeyErrorRuntimeErrorjoinkeys)r   r   
processors r   \/home/ubuntu/.local/lib/python3.10/site-packages/phonemizer/backend/espeak/words_mismatch.pyget_words_mismatch_processor   s   r   c                	   @   s   e Zd ZdZedZdefddZe	efde
e deeejf de
e fd	d
Zde
eeeef  fddZdedefddZde
e fddZde
e defddZejde
e de
e fddZdS )r   z4The base class of all word count mismatch processorsz\s+r   c                 C   s   || _ g | _g | _d S N)_logger
_count_txt
_count_phn)selfr   r   r   r   __init__:   s   
zBaseWordsMismatch.__init__textwordsepr   c                    s(   t  tjst   fdd|D S )z;Return the number of words contained in each line of `text`c              	      s*   g | ]}t d d t | D qS )c                 S   s   g | ]}|r|qS r   r   ).0wr   r   r   
<listcomp>I       z=BaseWordsMismatch._count_words.<locals>.<listcomp>.<listcomp>)lenresplitstripr$   liner#   r   r   r&   H   s    z2BaseWordsMismatch._count_words.<locals>.<listcomp>)
isinstancer)   Patternescape)clsr"   r#   r   r.   r   _count_words?   s
   

zBaseWordsMismatch._count_wordsc                 C   sP   t | jt | jkrtdt | j dt | j dd tt| j| jD S )zReturns a list of (num_line, nwords_input, nwords_output)

        Consider only the lines where nwords_input != nwords_output. Raises a
        RuntimeError if input and output do not have the same number of lines.

        zBnumber of lines in input and output must be equal, we have: input=z	, output=c                 S   s&   g | ]\}\}}||kr|||fqS r   r   )r$   ntpr   r   r   r&   Y   s
    
z7BaseWordsMismatch._mismatched_lines.<locals>.<listcomp>)r(   r   r   r   	enumeratezip)r    r   r   r   _mismatched_linesL   s   z#BaseWordsMismatch._mismatched_lines	nmismatchnlinesc                 C   s,   |r| j dt|| dd || dS dS )z$Logs a high level undetailed warningz1words count mismatch on %s%% of the lines (%s/%s)   d   N)r   warninground)r    r:   r;   r   r   r   _resume^   s   zBaseWordsMismatch._resumec                 C   s   |  || _dS )z-Stores the number of words in each input lineN)r3   r   r    r"   r   r   r   
count_texte   s   zBaseWordsMismatch.count_text	separatorc                 C   s   |  ||j| _dS )z.Stores the number of words in each output lineN)r3   wordr   )r    r"   rC   r   r   r   count_phonemizedi   s   z"BaseWordsMismatch.count_phonemizedc                 C   s   dS )zDetects and process word count misatches according to the mode

        This method is called at the very end of phonemization, during
        post-processing.

        Nr   rA   r   r   r   processm   s    zBaseWordsMismatch.processN)__name__
__module____qualname____doc__r)   compile
_RE_SPACESr   r!   classmethodr   strr   r0   intr3   r   r9   r@   rB   r   rE   abcabstractmethodrF   r   r   r   r   r   6   s&    
 c                   @   *   e Zd ZdZdee dee fddZdS )r   zIgnores word count mismatchesr"   r   c                 C   s   |  t|  t| |S r   )r@   r(   r9   rA   r   r   r   rF   z   s   zIgnore.processNrG   rH   rI   rJ   r   rN   rF   r   r   r   r   r   w       r   c                   @   rR   )r   z Warns on every mismatch detectedr"   r   c                 C   sF   |   }|D ]\}}}| jd|d || q| t|t| |S )Nz>words count mismatch on line %s (expected %s words but get %s)   )r9   r   r>   r@   r(   )r    r"   mismatchnumntxtnphnr   r   r   rF      s   
zWarn.processNrS   r   r   r   r   r      rT   r   c                   @   rR   )r   z6Removes any utterance containing a word count mismatchr"   r   c                 C   sH   dd |   D }| t|t| | jd |D ]}d||< q|S )Nc                 S   s   g | ]}|d  qS )r   r   r,   r   r   r   r&      r'   z"Remove.process.<locals>.<listcomp>zremoving the mismatched lines )r9   r@   r(   r   r>   )r    r"   rV   indexr   r   r   rF      s   
zRemove.processNrS   r   r   r   r   r      rT   r   )rJ   rP   r)   loggingr   typingr   r   typing_extensionsr   r   r   phonemizer.separatorr   r   __annotations__r   ABCr   r   r   r   r   r   r   r   <module>   s   A