o
    iwN                     @   s   d dl Z d dlZd dlmZ d dlZd dlmZ d dlm	Z
 d dlZd dlZd dlm  m  mZ d dlmZ d dlmZ dZdZG dd dejZd	d
 ZdS )    N)	Namespace)CTCPrefixScore)
end_detectg      ?   c                       sN   e Zd ZdZ					d fdd	Zdd Zd	d
 ZdddZdd Z  Z	S )DecoderaM  Decoder layer.

    Args:
        eprojs (int): Dimension of input variables from encoder.
        odim (int): The output dimension.
        dtype (str): Decoder type.
        dlayers (int): Number of layers for decoder.
        dunits (int): Dimension of input vector of decoder.
        sos (int): Number to indicate the start of sequences.
        eos (int): Number to indicate the end of sequences.
        att (Module): Attention module defined at
            `espnet.espnet.nets.chainer_backend.attentions`.
        verbose (int): Verbosity level.
        char_list (List[str]): List of all characters.
        labeldist (numpy.array): Distributed array of counted transcript length.
        lsm_weight (float): Weight to use when calculating the training loss.
        sampling_probability (float): Threshold for scheduled sampling.

    r   N        c              	      s  t t|   |  K t||| _|dkrt|| |nt	|| || _
tjd|D ]}t| d| |dkr@t||nt	|| q/t||| _W d    n1 sYw   Y  || _d | _|| _|| _|| _|| _|| _|	| _|
| _|| _d | _|| _|| _d S )Nlstm   rnn%d)superr   __init__
init_scopeDLEmbedIDembedLStatelessLSTMStatelessGRUrnn0sixmovesrangesetattrLinearoutputdtypelossattdlayersdunitssoseosverbose	char_list	labeldist
vlabeldist
lsm_weightsampling_probability)selfeprojsodimr   r   r   r    r!   r   r"   r#   r$   r&   r'   i	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/chainer_backend/rnn/decoders.pyr   (   s<   


zDecoder.__init__c              	   C   s  | j dkr=| |d |d |\|d< |d< tjd| jD ]}| d|  || || ||d  \||< ||< q||fS |d d u rq| j}tjj	
| j t|j|jd | jf|j d|d< W d    n1 slw   Y  | |d ||d< tjd| jD ]O}|| d u r| j}tjj	
| j# t|j||d  jd | jf||d  j d||< W d    n1 sw   Y  | d|  || ||d  ||< q||fS )Nr   r   r	   r
   )r   )r   r   r   r   r   r   xpchainerbackendscudaget_device_from_id
_device_idVariablezerosshaper   )r(   eyz_listc_listz_prevc_prevr+   r0   r.   r.   r/   rnn_forwardX   s8   
"


$zDecoder.rnn_forwardc                    s  d_ jjgd jjgdfdd|D } fdd|D }tj|jd}tj|dd}|jd }|jd	 }t	j
jd
 tjdd |D   t	j
jd tjdd |D   dg}	dg}
tjd	jD ]}|	d |
d qzd}g }j  |}tj|d	d}tj|D ]V}||
d |\}}|dkrt jk rt	d |d }tjt|d	d}|}t||f}n	t|| |f}||
|	|
|	\}
}	||
d  qtj|d	d|| j}|}t |t!|_  j t"#dd |D d	 9  _ tj$|t!|dd}t	dtj j%  j&dkrj'dur|||d}|}t(t)|j%|j%D ]V\\}}}|t*krh nJjj||dk d	d}||dk }fdd|D }fdd|D }d+|,dd}d+|,dd}t	d| |  t	d| |  q[j-durj.du rt/0j1j-_.t2tj3t|j.d	d t4| }dj5 j  j5|  _ j |fS )a  Core function of Decoder layer.

        Args:
            hs (list of chainer.Variable | N-dimension array):
                Input variable from encoder.
            ys (list of chainer.Variable | N-dimension array):
                Input variable of decoder.

        Returns:
            chainer.Variable: A variable holding a scalar array of the training loss.
            chainer.Variable: A variable holding a scalar array of the accuracy.

        Nr+   c                       g | ]}t j |gd dqS r   axisFconcat.0yr    r.   r/   
<listcomp>       z$Decoder.__call__.<locals>.<listcomp>c                       g | ]}t j| gd dqS r@   rC   rF   r!   r.   r/   rJ      rK   paddingr   r	   z input lengths:  c                 S      g | ]}|j d  qS r   r8   )rG   hr.   r.   r/   rJ          z output lengths: c                 S   rQ   rR   rS   rF   r.   r.   r/   rJ      rU   rA   z scheduled sampling c                 S   s   g | ]}t |qS r.   )lenrG   xr.   r.   r/   rJ      s    )ignore_labelz	att loss:c                       g | ]	} j t| qS r.   r#   intrG   idxr(   r.   r/   rJ          c                    rZ   r.   r[   r]   r_   r.   r/   rJ      r`    <space> zgroundtruth[%d]: zprediction [%d]:       ?)6r   r0   arrayr!   r    rD   pad_sequencer8   logginginfor-   __name__strr   r   r   r   appendr   resetr   separaterandomr'   r   argmaxlog_softmaxhstackr>   stackreshaper   softmax_cross_entropyflattennpmeanaccuracydatar"   r#   zip	enumerateMAX_DECODER_OUTPUTjoinreplacer$   r%   r1   r6   asarraysumscalerV   r&   )r(   hsysys_inys_out	pad_ys_in
pad_ys_outbatcholengthr;   r:   _att_wz_alleysr+   att_cz_outr9   y_allaccy_haty_truey_hat_y_true_idx_hatidx_trueseq_hatseq_trueloss_regr.   )r!   r(   r    r/   __call__t   s   







"

zDecoder.__call__c           '         s   t dt|jd   dg}dg}tjd| jD ]}|d |d qd}	| j	
  |j}
|j}|j}| jd| jd}|jdkrK|jd }ntdt|j|jd  }t|j|jd  }t dt|  t dt|  |rd|g|||	dd	}n	d|g|||	d
}|durt|d| j| j}| |d< d|d< |dkrt|jd t|
t }n|jd }|g}g }tj|D ]^}t dt|  g }|D ]a}| |d | }| 	|g|d d |d \}}t||f}| ||||d |d \}}t|  |d j!}|r(|"|d |d | \}}||j#|  }n|}|dur| jj$|ddddddf d| }||d ||d \}} d| |dd|f  |||d    }|rr||j#|dd|f  7 }| jj$|ddddddf d|
 }!|dd|!f }"||! }n| jj$|ddddddf d|
 }|dd|f }"tj|
D ]s}#i }$|dd |$d< |dd |$d< ||$d< |d |"d|#f  |$d< dgdt%|d   |$d< |d |$d dt%|d < | jd||# d|$d t%|d < |r||$d< |dur | |!|#  |$d< ||!|#  |$d< ||$ qt&|dd ddd|
 }q|}t dtt%|  t dd' fdd|d d dd D (d d!  ||d krt d" |D ]}|d | jd| jd qmg }%|D ]A}|d d | jkrt%|d |kr|d  |d | 7  < |r|d  |j#|)|d  7  < || q|%| qt*||r|jdkrt d#|  nI|%}t%|dkrt d$tt%|  nt d%  n-|D ]}t d&d' fd'd|d dd D (d d!  qt d(tt%|  qt&|d)d dddtt%||j+ }&t%|&dkr\t ,d* t-d.i t.|}td|jd+ |_| /||| |S t d,t|&d d   t d-t|&d d t%|&d d    |&S )/a  Beam search implementation.

        Args:
            h (chainer.Variable): One of the output from the encoder.
            lpz (chainer.Variable | None): Result of net propagation.
            recog_args (Namespace): The argument.
            char_list (List[str]): List of all characters.
            rnnlm (Module): RNNLM module. Defined at `espnet.lm.chainer_backend.lm`

        Returns:
            List[Dict[str,Any]]: Result of recognition.

        zinput lengths: r   Nr	   r+   zmax output length: zmin output length: r   )scoreyseqr=   r<   a_prev
rnnlm_prev)r   r   r=   r<   r   ctc_state_prevctc_score_prevrd   rP   z	position r   r<   r   r=   r   rA   r   c                 S      | d S Nr   r.   rX   r.   r.   r/   <lambda>h      z(Decoder.recognize_beam.<locals>.<lambda>T)keyreverseznumber of pruned hypotheses: zbest hypo: ra   c                       g | ]} t | qS r.   r\   rW   r#   r.   r/   rJ   p      z*Decoder.recognize_beam.<locals>.<listcomp>rb   rc   z-adding <eos> in the last position in the loopzend detected at %dzremaining hypotheses: zno hypothesis. Finish decoding.zhypo: c                    r   r.   r   rW   r   r.   r/   rJ     r   znumber of ended hypotheses: c                 S   r   r   r.   r   r.   r.   r/   r     r   zOthere is no N-best results, perform recognition again with smaller minlenratio.g?ztotal log probability: znormalized log probability: r.   )0rg   rh   rj   r8   r   r   r   r   rk   r   rl   	beam_sizepenalty
ctc_weightr0   fullr    maxlenratiomaxr\   minlenratior   r!   initial_stateminCTC_SCORING_RATIOdebugr   rD   rq   r>   rp   r   ry   predict	lm_weightargsortrV   sortedr}   r~   finalr   nbestwarningr   varsrecognize_beam)'r(   rT   lpz
recog_argsr#   rnnlmr;   r:   r   abeamr   r   rH   maxlenminlenhypctc_prefix_scorectc_beamhyps
ended_hypsr+   hyps_best_keptr9   r   r   local_att_scoresrnnlm_statelocal_lm_scoreslocal_scoreslocal_best_ids
ctc_scores
ctc_statesjoint_best_idslocal_best_scoresjnew_hypremained_hyps
nbest_hypsr.   r   r/   r      sJ  





 





(
 


$zDecoder.recognize_beamc                    sF  | j | jgd | j | jgdfdd|D } fdd|D }tj|| jd}tj|dd}|jd }dg}dg}	tj	d| j
D ]}
|d |	d qFd}g }| j  | |}tj|dd	}tj	|D ]&}| ||	d
 |\}}t|| |f}| ||	||	|\}	}|| qntj|dd	}|  |jS )aZ  Calculate all of attentions.

        Args:
            hs (list of chainer.Variable | N-dimensional array):
                Input variable from encoder.
            ys (list of chainer.Variable | N-dimensional array):
                Input variable of decoder.

        Returns:
            chainer.Variable: List of attention weights.

        r+   c                    r?   r@   rC   rF   rI   r.   r/   rJ     rK   z4Decoder.calculate_all_attentions.<locals>.<listcomp>c                    rL   r@   rC   rF   rM   r.   r/   rJ     rK   rN   rP   r	   NrA   r   )r0   re   r!   r    rD   rf   r8   r   r   r   r   rk   r   rl   r   rm   rq   r>   rr   to_cpury   )r(   r   r   r   r   r   r   r   r;   r:   r   r   att_wsr   r+   r   r9   r.   )r!   r    r/   calculate_all_attentions  s2   



z Decoder.calculate_all_attentions)r   NNr   r   )N)
ri   
__module____qualname____doc__r   r>   r   r   r   __classcell__r.   r.   r,   r/   r      s    0
g `r   c                 C   s0   t | j|| j| j| j|||| j| j|| j| jS )a  Return the decoding layer corresponding to the args.

    Args:
        args (Namespace): The program arguments.
        odim (int): The output dimension.
        sos (int): Number to indicate the start of sequences.
        eos (int) Number to indicate the end of sequences.
        att (Module):
            Attention module defined at `espnet.nets.chainer_backend.attentions`.
        labeldist (numpy.array): Distributed array of length od transcript.

    Returns:
        chainer.Chain: The decoder module.

    )	r   r)   r   r   r   r"   r#   r&   r'   )argsr*   r    r!   r   r$   r.   r.   r/   decoder_for  s   r   )rg   rn   argparser   r1   chainer.functions	functionsrD   chainer.linkslinksr   numpyrv   r   2espnet.nets.chainer_backend.deterministic_embed_idnetschainer_backenddeterministic_embed_idr   espnet.nets.ctc_prefix_scorer   espnet.nets.e2e_asr_commonr   r   r|   Chainr   r   r.   r.   r.   r/   <module>   s$       `