o
    i)                     @   s*  d Z ddlZddlmZmZmZmZmZ ddlZ	ddl
Z
ddlmZ ddlmZmZ de
jdeded	e
jfd
dZdee dededee d	ee f
ddZdee dee d	efddZdee dee d	ee fddZdee de
jde
jded	ee f
ddZdeee eeef f d ed!ed"ed	eee eeef f f
d#d$Zdeee eeef f d"ed	eee eeef f fd%d&Zd'e
jjfd(d)Zdee d	ee fd*d+Zdee d,ed	ee fd-d.Z d/eee
j  d0ed,ed	eee
j  fd1d2Z!d3d4 Z"d;d6ed7e
jjd8efd9d:Z#dS )<z(Utility functions for Transducer models.    N)AnyDictListOptionalUnion)pad_list)ExtendedHypothesis
Hypothesislabelsblank_id	ignore_idreturnc                    sJ   | j }fdd| D }| d |g t fdd|D ||}|S )zPrepare decoder input.

    Args:
        labels: Label ID sequences. (B, L)

    Returns:
        decoder_input: Label ID sequences with blank prefix. (B, U)

    c                    s   g | ]}|| k qS  r   .0label)r   r   `/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/pytorch_backend/transducer/utils.py
<listcomp>       z%get_decoder_input.<locals>.<listcomp>r   c                    s   g | ]}t j |gd dqS )r   )dim)torchcatr   )blankr   r   r      s    )devicenewr   to)r
   r   r   r   labels_unpaddecoder_inputr   )r   r   r   get_decoder_input   s   r   aux_layer_idenc_num_layersuse_symm_kl_div_loss	subsamplec           	         s   t | tr| rtdd | D stdt| tdd}tt fdd|}||kr0td  |rj| g7 }td	t|D ]+}|||d	  d	 || d	  }d
d |D }d|v ritd||d	  || f q>|S )a  Check whether provided auxiliary encoder layer IDs are valid.

    Return the valid list sorted with duplicates removed.

    Args:
        aux_layer_id: Auxiliary encoder layer IDs.
        enc_num_layers: Number of encoder layers.
        use_symm_kl_div_loss: Whether symmetric KL divergence loss is used.
        subsample: Subsampling rate per layer.

    Returns:
        valid: Valid list of auxiliary encoder layers.

    c                 s   s    | ]}t |tV  qd S N)
isinstanceintr   layerr   r   r   	<genexpr><   s    z2valid_aux_encoder_output_layers.<locals>.<genexpr>zlaux-transducer-loss-enc-output-layers option takes a list of layer IDs. Correct argument format is: '[0, 1]'Fkeyreversec                    s   d|   ko	 k S   S )Nr   r   xr    r   r   <lambda>D   r   z1valid_aux_encoder_output_layers.<locals>.<lambda>zgProvided argument for aux-transducer-loss-enc-output-layers is incorrect. IDs should be between [0, %d]   c                 S   s   g | ]
}|d kr
dndqS )r0   FTr   )r   nr   r   r   r   Q       z3valid_aux_encoder_output_layers.<locals>.<listcomp>zEncoder layers %d and %d have different shape due to subsampling. Symmetric KL divergence loss doesn't cover such case for now.)	r$   listall
ValueErrorsortedr%   filterrangelen)	r   r    r!   r"   sorted_listvalidr1   	sub_rangevalid_shaper   r.   r   valid_aux_encoder_output_layers%   s<   
 r>   r-   prefc                 C   sH   t |t | kr
dS tt |d ddD ]}|| | | kr! dS qdS )zCheck if pref is a prefix of x.

    Args:
        x: Label ID sequence.
        pref: Prefix label ID sequence.

    Returns:
        : Whether pref is a prefix of x.

    Fr0   T)r9   r8   )r-   r?   ir   r   r   	is_prefix]   s   rB   subsetc                    s4   g }| D ] t  fdd|D rq|  q|S )zRemove elements of subset if corresponding label ID sequence already exist in x.

    Args:
        x: Set of hypotheses.
        subset: Subset of x.

    Returns:
       final: New set of hypotheses.

    c                 3   s    | ]	} j |j kV  qd S r#   yseq)r   subx_r   r   r(      s    zsubtract.<locals>.<genexpr>)anyappend)r-   rC   finalr   rG   r   subtractr   s   rL   hyps	topk_idxs
topk_logpsgammac                    sv   g }t | D ]2\}fddt|| || D }t|dd dd |tt fdd|dd d	d
 q|S )a  Return K hypotheses candidates for expansion from a list of hypothesis.

    K candidates are selected according to the extended hypotheses probabilities
    and a prune-by-value method. Where K is equal to beam_size + beta.

    Args:
        hyps: Hypotheses.
        topk_idxs: Indices of candidates hypothesis.
        topk_logps: Log-probabilities for hypotheses expansions.
        gamma: Allowed logp difference for prune-by-value method.

    Return:
        k_expansions: Best K expansion hypotheses candidates.

    c                    s&   g | ]\}}t | jt| fqS r   )r%   scorefloat)r   kv)hypr   r   r      s    z'select_k_expansions.<locals>.<listcomp>c                 S      | d S Nr0   r   r,   r   r   r   r/          z%select_k_expansions.<locals>.<lambda>)r*   r0   c                    s     | d kS rW   r   r,   )rP   
k_best_expr   r   r/      s    c                 S   rV   rW   r   r,   r   r   r   r/      rX   Tr)   )	enumeratezipmaxrJ   r6   r7   )rM   rN   rO   rP   k_expansionsrA   hyp_ir   )rP   rU   rY   r   select_k_expansions   s   
r_   	lm_statesidx	lm_layers	is_wordlmc                    sP   |r  }|S i } fddt |D |d<  fddt |D |d< |S )a  Get ID state from LM hidden states.

    Args:
        lm_states: LM hidden states.
        idx: LM state ID to extract.
        lm_layers: Number of LM layers.
        is_wordlm: Whether provided LM is a word-level LM.

    Returns:
       idx_state: LM hidden state for given ID.

    c                       g | ]
}d  |   qS cr   r&   ra   r`   r   r   r      r2   z#select_lm_state.<locals>.<listcomp>rf   c                    rd   hr   r&   rg   r   r   r      r2   ri   r8   )r`   ra   rb   rc   	idx_stater   rg   r   select_lm_state   s   rl   c                    sD   |r S i } fddt |D |d<  fddt |D |d< |S )zCreate LM hidden states.

    Args:
        lm_states: LM hidden states.
        lm_layers: Number of LM layers.
        is_wordlm: Whether provided LM is a word-level LM.

    Returns:
        new_states: LM hidden states.

    c                    $   g | ] t  fd dD qS )c                       g | ]}|d    qS re   r   r   stater'   r   r   r      r   5create_lm_batch_states.<locals>.<listcomp>.<listcomp>r   stackr   r`   rq   r   r          z*create_lm_batch_states.<locals>.<listcomp>rf   c                    rm   )c                    rn   rh   r   ro   rq   r   r   r      r   rr   rs   ru   rv   rq   r   r      rw   ri   rj   )r`   rb   rc   
new_statesr   rv   r   create_lm_batch_states   s   



ry   lm_modelc                    sj   t | j}| j}| j t|   fddt|D }d|i}|dkr3 fddt|D |d< |S )zInitialize LM hidden states.

    Args:
        lm_model: LM module.

    Returns:
        lm_state: Initial LM hidden states.

    c                    $   g | ]}t  jjjd qS )r   dtyper   zerosr   r   r}   r   _lm_unitspr   r   r      rw   z!init_lm_state.<locals>.<listcomp>ri   lstmc                    r{   r|   r~   r   r   r   r   r     rw   rf   )r9   rnntypn_unitsnext
parametersr8   )rz   rb   lm_units_typri   lm_stater   r   r   init_lm_state   s   


r   c                 C   s\   g }| D ]'}dd |D }|j |v r&||j }t|| j|j|| _q|| q|S )zRecombine hypotheses with same label ID sequence.

    Args:
        hyps: Hypotheses.

    Returns:
       final: Recombined hypotheses.

    c                 S   s   g | ]}|j r|j qS r   rD   )r   fr   r   r   r     r   z"recombine_hyps.<locals>.<listcomp>)rE   indexnp	logaddexprQ   rJ   )rM   rK   rU   	seq_finalseq_posr   r   r   recombine_hyps  s   

r   pad_idc                    s*   t dd | D   fdd| D }|S )zLeft pad label ID sequences.

    Args:
        labels: Label ID sequence.
        pad_id: Padding symbol ID.

    Returns:
        final: Padded label ID sequences.

    c                 s   s    | ]}t |V  qd S r#   r9   r   r-   r   r   r   r(   1      zpad_sequence.<locals>.<genexpr>c                    s"   g | ]}g t |  | qS r   r   r   maxlenr   r   r   r   3     " z pad_sequence.<locals>.<listcomp>)r\   )r
   r   rK   r   r   r   pad_sequence&  s   r   rp   max_lenc           
         s   du s|dk sd  d|krS d  d}||krCtd  d| }tD ]\}}|dd|dddf |< q-S t}d  d}d||f  fddt|D }	tD ]\}}||	| dd|| d |ddf< qd|	S )a!  Check decoder hidden states and left pad or trim if necessary.

    Args:
        state: Decoder hidden states. [N x (?, D_dec)]
        max_len: maximum sequence length.
        pad_id: Padding symbol ID.

    Returns:
        final: Decoder hidden states. [N x (1, max_len, D_dec)]

    Nr0   r      c                    s"   g | ]}d  j j  qS )r   )datar   fill_r   
final_dimsr   rp   r   r   r   U  r   zcheck_state.<locals>.<listcomp>)sizer%   rZ   r9   r8   )
rp   r   r   curr_lentrim_valrA   slayersddimrK   r   r   r   check_state8  s   " 
*r   c                 C   s   t | || d df}| d jj| |}t| D ]0\}}|d}||k r6||||| |ddf< q||| dddf ||ddddf< q|S )a&  Check decoder hidden states and left pad or trim if necessary.

    Args:
        state: Decoder hidden states. [N x (B, ?, D_dec)]
        max_len: maximum sequence length.
        pad_id: Padding symbol ID.

    Returns:
        final: Decoder hidden states. [N x (B, max_len, dec_dim)]

    r   r0   N)r9   r   r   r   r   rZ   )statesr   r   r   rK   rA   r   r   r   r   r   check_batch_states_  s   
,r   T
model_pathmodeltrainingc                    sh   dt j| v rtj| dd dd }n	tj| dd d}|s,d  fdd	| D }|| ~d
S )zLoad Transducer model with training-only modules and parameters removed.

    Args:
        model_path: Model path.
        model: Transducer model.

    snapshotc                 S      | S r#   r   storagelocr   r   r   r/         z#custom_torch_load.<locals>.<lambda>)map_locationr   c                 S   r   r#   r   r   r   r   r   r/     r   )mlpctc_linkl_divlm_linerror_calculatorc                    s,   i | ]\ }t  fd dD s |qS )c                 3   s    | ]}| v V  qd S r#   r   )r   modrS   r   r   r(     r   z/custom_torch_load.<locals>.<dictcomp>.<genexpr>)rI   )r   rT   	task_keysr   r   
<dictcomp>  s    z%custom_torch_load.<locals>.<dictcomp>N)ospathbasenamer   loaditemsload_state_dict)r   r   r   model_state_dictr   r   r   custom_torch_loady  s    

r   )T)$__doc__r   typingr   r   r   r   r   numpyr   r   &espnet.nets.pytorch_backend.nets_utilsr   (espnet.nets.transducer_decoder_interfacer   r	   Tensorr%   r   boolr>   rB   rL   rR   r_   strrl   ry   nnModuler   r   r   r   r   r   r   r   r   r   <module>   s    

8

(

 
' 