o
    i
                     @   s   d dl mZmZ d dlmZmZ d dlZd dlmZ	 d dlm
Z
 d dlmZ G dd deZG dd	 d	eZdd
e
dee defddZG dd deZdd Zdd ZG dd deZdS )    )ABCabstractmethod)DictListN)Tensor)	TokenSpanc                   @   s.   e Zd Zedee deee  fddZdS )
ITokenizer
transcriptreturnc                 C      dS )a  Tokenize the given transcript (list of word)

        .. note::

           The toranscript must be normalized.

        Args:
            transcript (list of str): Transcript (list of word).

        Returns:
            (list of int): List of token sequences
        N selfr	   r   r   \/home/ubuntu/vllm_env/lib/python3.10/site-packages/torchaudio/pipelines/_wav2vec2/aligner.py__call__       zITokenizer.__call__N)__name__
__module____qualname__r   r   strr   r   r   r   r   r   
   s    $r   c                   @   s@   e Zd Zdeeef fddZdee deee  fddZdS )		Tokenizer
dictionaryc                 C   
   || _ d S Nr   )r   r   r   r   r   __init__      
zTokenizer.__init__r	   r
   c                    s    fdd|D S )Nc                    s   g | ]} fd d|D qS )c                    s   g | ]} j | qS r   r   ).0cr   r   r   
<listcomp>    s    z1Tokenizer.__call__.<locals>.<listcomp>.<listcomp>r   )r   wordr   r   r   r        s    z&Tokenizer.__call__.<locals>.<listcomp>r   r   r   r   r   r      s   zTokenizer.__call__N)	r   r   r   r   r   intr   r   r   r   r   r   r   r      s    "r   emissiontokensblankc                 C   sZ   | j }| d} tj|gtj|d}tj| ||d\}}| }|d |d }}||fS )Nr   )dtypedevicer%   )r'   	unsqueezetorchtensorint32Fforced_alignexp)r#   r$   r%   r'   targetsaligned_tokensscoresr   r   r   _align_emission_and_tokens#   s   
r3   c                	   @   s6   e Zd Zededeee  deee  fddZdS )IAlignerr#   r$   r
   c                 C   r   )a  Generate list of time-stamped token sequences

        Args:
            emission (Tensor): Sequence of token probability distributions in log-domain.
                Shape: `(time, tokens)`.
            tokens (list of integer sequence): Tokenized transcript.
                Output from :py:class:`torchaudio.pipelines.Wav2Vec2FABundle.Tokenizer`.

        Returns:
            (list of TokenSpan sequence): Tokens with time stamps and scores.
        Nr   )r   r#   r$   r   r   r   r   0   r   zIAligner.__call__N)	r   r   r   r   r   r   r"   r   r   r   r   r   r   r4   /   s    ,r4   c                 C   sH   t | t|ks
J d}g }|D ]}|| |||   ||7 }q|S )Nr   )lensumappend)list_lengthsiretlr   r   r   
_unflatten?   s   
r=   c                 C   s   dd | D S )Nc                 S   s   g | ]	}|D ]}|qqS r   r   )r   r8   itemr   r   r   r    J   s    z_flatten.<locals>.<listcomp>r   )nested_listr   r   r   _flattenI   s   r@   c                   @   s:   e Zd Zdd Zdedeee  deee  fddZdS )	Alignerc                 C   r   r   r(   )r   r%   r   r   r   r   N   r   zAligner.__init__r#   r$   r
   c                 C   sP   |j dkrtd|j t|t|| j\}}t||}t|dd |D S )N   z&The input emission must be 2D. Found: c                 S   s   g | ]}t |qS r   )r5   )r   tsr   r   r   r    W   s    z$Aligner.__call__.<locals>.<listcomp>)	ndim
ValueErrorshaper3   r@   r%   r-   merge_tokensr=   )r   r#   r$   r1   r2   spansr   r   r   r   Q   s
   
zAligner.__call__N)	r   r   r   r   r   r   r"   r   r   r   r   r   r   rA   M   s    *rA   )r   )abcr   r   typingr   r   r*   torchaudio.functional
functionalr-   r   r   r   r   r"   r3   r4   r=   r@   rA   r   r   r   r   <module>   s    
