o
    i                     @   s   d Z ddlZddlZddlZddlmZ ddlZddlZddl	Z	ddl
m  mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ dZdZG dd de	jjeZ dd Z!dS )zRNN decoder module.    N)	Namespace)CTCPrefixScore)CTCPrefixScoreTH)ScorerInterface
end_detect)mask_by_length)pad_list)th_accuracy)	to_device)att_to_numpy   g      ?c                   @   s   e Zd ZdZ									d"ddZd	d
 Zdd Zd#ddZd$ddZ				d%ddZ	d#ddZ
edd Zedd Zedd Zedd Zdd Zd d! ZdS )&DecoderaF  Decoder module

    :param int eprojs: encoder projection units
    :param int odim: dimension of outputs
    :param str dtype: gru or lstm
    :param int dlayers: decoder layers
    :param int dunits: decoder units
    :param int sos: start of sequence symbol id
    :param int eos: end of sequence symbol id
    :param torch.nn.Module att: attention module
    :param int verbose: verbose level
    :param list char_list: list of character strings
    :param ndarray labeldist: distribution of label smoothing
    :param float lsm_weight: label smoothing weight
    :param float sampling_probability: scheduled sampling probability
    :param float dropout: dropout rate
    :param float context_residual: if True, use context vector for token generation
    :param float replace_sos: use for multilingual (speech/text) translation
    r   N        F   c                 C   s  t jj|  || _|| _|| _|| _t j||| _	t jj
|d| _t j | _t j | _|  j| jdkr@t j|| |nt j|| |g7  _|  jt jj
|dg7  _tjd| jD ](}|  j| jdkrrt j||nt j||g7  _|  jt jj
|dg7  _qad| _|rt j|| || _nt j||| _d | _|| _|| _|| _|| _|| _|	| _|
| _|| _d | _|| _ || _!|| _"|| _#|| _$d| _%d S )N)plstmr   g    _)&torchnnModule__init__dtypedunitsdlayerscontext_residual	EmbeddingembedDropoutdropout_emb
ModuleListdecoderdropout_decLSTMCellGRUCellsixmovesrange	ignore_idLinearoutputlossattsoseosodimverbose	char_list	labeldist
vlabeldist
lsm_weightsampling_probabilitydropoutnum_encsreplace_soslogzero)selfeprojsr/   r   r   r   r-   r.   r,   r0   r1   r2   r4   r5   r6   r   r8   r7   _ r=   ]/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/language_model/rnn/decoders.pyr   0   sT   


zDecoder.__init__c                 C   s   | |d| jS Nr   )	new_zerossizer   )r:   hs_padr=   r=   r>   
zero_state~   s   zDecoder.zero_statec                 C   s   | j dkrG| jd ||d |d f\|d< |d< tjd| jD ]!}| j| | j|d  ||d  || || f\||< ||< q!||fS | jd ||d |d< tjd| jD ]}| j| | j|d  ||d  || ||< q[||fS )Nr   r   r   )r   r!   r%   r&   r'   r   r"   )r:   eyz_listc_listz_prevc_previr=   r=   r>   rnn_forward   s   
(&0zDecoder.rnn_forwardc           )   
      s  j dkr|g}gfdd|D }t|tjd }fddtj D d_|d jg |d jgj	rMdd t
||D }n	fdd|D } fd	d|D }	t|j}
t|	j}|d}|d}tj D ]}tjjd
j |d |   qztjjd tdd |	D   |d g}|d g}tjdjD ]}||d  ||d  qg }j dkrd}j|   ndgj d  }dgj  }tj d D ]	}j|   q|
}tj|D ]}j dkr/j| |d d jd |d |\}}nOtj D ] }j| || | jd |d || \||< ||< q4tj|dd}j gt| }jj  ||jd |d |j  \}|j < |dkrt j k rtd !|d }t"j#|$ % dd}t&|d |}tj'||fdd}ntj'|dd|ddf |fdd}(|||||\}}j)r|tj'jd |d |fdd q|jd |d  qtj|dd*|| d}!|}t+j,||*djdd_t-.j/ } jt"0dd |D d 9  _t1||jd}tdd2tj/ 3d  j4dkrȈj5dur|*||d} |}!t
t6| $ % 7 |!$ % 7 D ]O\\}}"}#|t8kr nCt"j#|"|#jk dd}$|#|#jk }%fdd|$D }&fdd|%D }'d2|&}&d2|'}'td| |'  td| |&  qxj9durj:du rt&|d t;j9_:tj<t+j=|ddj: *ddd t| }(dj> j j>|(  _j||fS )a  Decoder forward

        :param torch.Tensor hs_pad: batch of padded hidden state sequences (B, Tmax, D)
                                    [in multi-encoder case,
                                    list of torch.Tensor,
                                    [(B, Tmax_1, D), (B, Tmax_2, D), ..., ] ]
        :param torch.Tensor hlens: batch of lengths of hidden state sequences (B)
                                   [in multi-encoder case, list of torch.Tensor,
                                   [(B), (B), ..., ]
        :param torch.Tensor ys_pad: batch of padded character id sequence tensor
                                    (B, Lmax)
        :param int strm_idx: stream index indicates the index of decoding stream.
        :param torch.Tensor lang_ids: batch of target language id tensor (B, 1)
        :return: attention loss value
        :rtype: torch.Tensor
        :return: accuracy
        :rtype: float
        r   c                       g | ]	}|| j k qS r=   r(   .0yr:   r=   r>   
<listcomp>       z#Decoder.forward.<locals>.<listcomp>c                       g | ]}t tt | qS r=   listmapintrN   idxhlensr=   r>   rQ          Nr   c                 S   "   g | ]\}}t j||gd dqS r   dimr   catrN   rY   rO   r=   r=   r>   rQ         " c                       g | ]}t j |gd dqS r^   ra   rM   r-   r=   r>   rQ      r\   c                       g | ]}t j| gd dqS r^   ra   rM   r.   r=   r>   rQ      r\   /Number of Encoder:{}; enc{}: input lengths: {}.z output lengths: c                 S   s   g | ]}| d qS r   rA   rM   r=   r=   r>   rQ      s    r_   z scheduled sampling r   )axismean)ignore_index	reductionc                 S      g | ]}t |qS r=   lenrN   xr=   r=   r>   rQ         )ignore_labelz	att loss: 
c                       g | ]	} j t| qS r=   r1   rW   rX   rP   r=   r>   rQ   "  rR   c                    ry   r=   rz   rX   rP   r=   r>   rQ   #  rR   zgroundtruth[%d]: zprediction [%d]:       ?)?r7   minrr   r,   r'   r+   newr.   r-   r8   zipr	   r(   rA   logginginfo	__class____name__formatstrrC   r%   r&   r   appendresetr   r   r"   r   stackrandomr5   r*   npargmaxdetachcpur   rb   rJ   r   viewFcross_entropymathexpitemrm   r
   joinsplitr0   r1   	enumeratenumpyMAX_DECODER_OUTPUTr2   r3   
from_numpysumlog_softmaxr4   ))r:   rB   r[   ys_padstrm_idxlang_idsysatt_idxys_inys_out	ys_in_pad
ys_out_padbatcholengthrY   rF   rE   r<   z_allatt_w
att_w_list
att_c_listeysrI   att_c
hs_pad_han	hlens_hanz_outrD   y_allpplaccys_hatys_truey_haty_trueidx_hatidx_trueseq_hatseq_trueloss_regr=   )r.   r[   r:   r-   r>   forward   s   






$
"$ 


zDecoder.forwardc           *         s
  	j dkrgg	j dkrdu rg	j  t	j D ]}td	j |d d d qt|t	jd }		d 
dg}			d 
dg}
tjd	jD ]}|			d 
d |
		d 
d q\	j dkrd}	j|   n'dg	j d  }dg	j d   dg	j  }t	j d D ]	}	j|   q|j}|j}t|dd}d dur߈	j dkr|jt|j }tddd	d
 |D   ndg}	jr|jr|j}n	j}tdt|  td|   d d }tfdd
t	j D }|jdkr,t dt!|j| }t!|j"| }tdt|  tdt|  |rSd|g|	|
|dd}n	d|g|	|
|d}d dur	fdd
t	j D fdd
t	j D |d< dg	j  |d< |dkrtd j#d t!|t$ }nd j#d }|g}g }tj|D ]B}t%dt|  g }|D ]R}|d | |d< 	&	'|}	j dkr	j| d 
dd dg	j(d |d d |d \}}nXt	j D ]+}	j| | 
d| dg	j(d |d d |d | \||<  |< qt)j*|dd}	j	j  |	j g	j(d |d d |d 	j  \} 	j < t)j+||fdd}	,||
|	|d |d \}
}		j-r	.t)j+	j(d |
d |fdd} n	.	j(d |
d } t/j0| dd}!|r|1|d |\}"}#|!|j2|#  }$n|!}$d durRt)j3|!|dd\}%}&dg	j  dg	j  t	j D ]}| |d |&d |d | \|< |< qd| |!dd|&d f  }$	j dkr|$|t)4d |d d   7 }$nt	j D ]}|$|||  t)4| |d |   7 }$q|r=|$|j2|#dd|&d f  7 }$t)j3|$|dd\}%|&ddd f }&n
t)j3|$|dd\}%}&tj|D ]i }'|
dd |'d< |	dd |'d< 	j dkr|dd |'d< n fd d
t	j d D |'d< |d! |%df  |'d!< dgdt|d   |'d< |d |'d dt|d < t!|&df |'d t|d < |r|"|'d< d durfd"d
t	j D |'d< fd#d
t	j D |'d< ||' qbt5|d$d% d&d'd| }q|}t%d(tt|  t%d)d*fd+d
|d d dd D   ||d krPtd, |D ]}|d 	j6 qDg }(|D ]A}|d d 	j6krt|d |kr|d!  |d | 7  < |r|d!  |j2|7|d  7  < || qT|(| qTt8||r|jdkrtd-|  nF|(}t|dkrt%d.tt|  ntd/  n*|D ]}t%d0d*fd1d
|d dd D   qt%d2tt|  qt5|d3d% d&d'dtt||j9 })t|)dkr=t:d4 t;d8i t<|}t d|j"d5 |_"	j dkr4	=d d ||S 	=||S td6t|)d d!   td7t|)d d! t|)d d    |)S )9a-  beam search implementation

        :param torch.Tensor h: encoder hidden state (T, eprojs)
                                [in multi-encoder case, list of torch.Tensor,
                                [(T1, eprojs), (T2, eprojs), ...] ]
        :param torch.Tensor lpz: ctc log softmax output (T, odim)
                                [in multi-encoder case, list of torch.Tensor,
                                [(T1, odim), (T2, odim), ...] ]
        :param Namespace recog_args: argument Namespace containing options
        :param char_list: list of character strings
        :param torch.nn.Module rnnlm: language module
        :param int strm_idx:
            stream index for speaker parallel attention in multi-speaker case
        :return: N-best decoding results
        :rtype: list of dicts
        r   Nri   r   
ctc_weightFctc weights (decoding):  c                 S   rp   r=   r   rs   r=   r=   r>   rQ   n  ru   z*Decoder.recognize_beam.<locals>.<listcomp>r{   <sos> index: <sos> mark: c                    s   g | ]	} |  d qS rj   rk   rX   )hr=   r>   rQ   {  rR   max output length: min output length: r   )scoreyseqrH   rG   a_prev
rnnlm_prev)r   r   rH   rG   r   c                    s(   g | ]}t  |   d jtqS rj   )r   r   r   r.   r   rX   )lpzr:   r=   r>   rQ         c                    s   g | ]} |   qS r=   )initial_staterX   )ctc_prefix_scorer=   r>   rQ     s    ctc_state_prevctc_score_prevr   	position r   rG   r   r_   rH   r   c                    s   g | ]
} | d d  qS Nr=   rX   )r   r=   r>   rQ         r   c                        g | ]} | d f  qS rj   r=   rX   )
ctc_statesjjoint_best_idsr=   r>   rQ         c                    r   rj   r=   rX   )
ctc_scoresr   r   r=   r>   rQ     r   c                 S      | d S Nr   r=   rt   r=   r=   r>   <lambda>      z(Decoder.recognize_beam.<locals>.<lambda>Tkeyreverseznumber of pruned hypotheses: zbest hypo: rw   c                       g | ]} t | qS r=   rW   rs   r1   r=   r>   rQ         z-adding <eos> in the last position in the loopzend detected at %dzremaining hypotheses: zno hypothesis. Finish decoding.zhypo: c                    r   r=   r   rs   r   r=   r>   rQ   8  r   znumber of ended hypotheses: c                 S   r   r   r=   r   r=   r=   r>   r   <  r   zOthere is no N-best results, perform recognition again with smaller minlenratio.g?ztotal log probability: znormalized log probability: r=   )>r7   r'   r   r   r   rA   r|   rr   r,   rC   	unsqueezer%   r&   r   r   r   	beam_sizepenaltygetattrweights_ctc_decr   r   r   r8   tgt_langindexr-   r   r@   longaminmaxlenratiomaxrW   minlenratioshapeCTC_SCORING_RATIOdebugr   r   r"   r   r   rb   rJ   r   r*   r   r   predict	lm_weighttopkr   sortedr.   finalr   nbestwarningr   varsrecognize_beam)*r:   r   r   
recog_argsr1   rnnlmr   rY   r   rF   rE   r<   ar   beamr   r   r   rO   vymaxlenminlenhypctc_beamhyps
ended_hypsrI   hyps_best_keptrD   r   r   h_hanlogitslocal_att_scoresrnnlm_statelocal_lm_scoreslocal_scoreslocal_best_scoreslocal_best_idsnew_hypremained_hyps
nbest_hypsr=   )
r   r1   r   r   r   r   r   r   r   r:   r>   r   3  s  

 











  

2
 

2zDecoder.recognize_beamTc
           @   
      s  j dkr	g	
g
gj dkrd u rgj  t|tjd }
tj D ] }tdj |d 	| d t		| 
| d	|< q,t
d  j
j}tdd}d| }tddd d urj dkrjtj }tdd	d
d |D   ndg}  t	d t  dd}t
fddtj D }jdkr|}n
tdtj| }tj| }tdt|  tdt|  	fddtjD }	fddtjD }	fddtjD 	fddtjD t	d t }d }j dkr?d g}d gd gd g}}}j|
   n5d gj d  }d gj d  }d gj  }d gj  d gj  }}tj d D ]
}j|   qijrjrtdt j  tdj  fddt!j"D }n5d urfddt!j"D }n!tdtj#  tdj#   fddt!j"D }fddt!j"D }dd t!j" D dd t!j" D }dd t D  
fddtj D fddtj D 	fddtj D 	fd dtj D d d urgt|dkrOd j$sOtt% ndd d} 
fd!dtj D }t!j"|D ]at&d"t  t	d t'(|}!)*|!}"j dkrj|
 d d j+d |d |d \}#}$|$g}nKtj D ] }j| | | j+d |d || \||< ||< qtj,|dd#}%jj  |%j g j+d |d |j  \}#|j < tj-|"|#fdd#}".|"||\j/r(0tj-j+d d |#fdd#}&n0j+d d }&|t1j2|&dd# }'|rP|3||!\}}(|'j4|(  }'|d rj5|'d d df< | dkrmtj6|'| dd#d nd })tj D ]-}|| }$t7|$tj8r|$n|$d }*|| ||| |)|*\}+||< |'|||  |+  }'qt|' j9}'dkrj5|'d d dd d d f< |'d d d d j:f | },| d;ddj9}j5|d d d d j:f< ||'  d}t6|d\}-}.t<|.j9dj=> ? }|.j9 | dj=> ? }/|d d  d d  }0@||/}A||}|-}t	d t'|/g }j dkrAj nj d }1t|1D ]u}t7|| tj8rmtB|| jg|| jCdd  R  d}2nLt7|| tDrfd$d|| D }2n7tB|| d dd}3tB|| d d dd}4tB|| d d dd}5|3|4|5ff}2|E|2 qJfd%dtjD }fd&dtjD }|krd}6d | }7|-d d df }8t!j" D ] r|6 }6qt!j"D ]z}9d }:|,|9f |8 kr>|0|6 d d  };t|;t
fd'd(tj D kr=|, |9 |7 }:n|d krU||6 d d  };| |9 |7 }:|:r|;Ej: |rm|:j4|jF||6d) 7 }:|:j=> G }< E|;|:|<d* |6d }6q
qfd+dt!j" D tDtH}=t|=dkr|=d r n&|rI|d}|d rtj D ]}|| J|| |.||< qqm|jK}>|>jLd,krtjMK  tjMN  W d    n	1 sw   Y  j#j:gtOtPd- gd.gfd/dt!j" D |r4t!j" D ] D ]}?|?d0  t|?d1   < q"qfd2dt!j" D }|S )3Nr   ri   r   r   r   r{   ctc_window_marginr   r   c                 S   rp   r=   r   rs   r=   r=   r>   rQ     ru   z0Decoder.recognize_beam_batch.<locals>.<listcomp>r   c                    s   g | ]}t  | qS r=   )r   rX   rZ   r=   r>   rQ     r   r   r   c                    $   g | ]}t  d  tjqS rj   r   r   zerosr   rN   r<   r   n_bbr:   r=   r>   rQ        $ c                    r  rj   r  r  r  r=   r>   rQ     r  c                    r  rj   r  r  r  r=   r>   rQ     r  c                    r  rj   r  r  r  r=   r>   rQ     r  r   r   c                    s   g | ]	}  jgqS r=   )r   r   r  )r1   r   r=   r>   rQ     rR   c                    s   g | ]
} |j   gqS r=   )r   )rN   b)r   r   r=   r>   rQ     r   c                    s   g | ]} j gqS r=   rf   r  rP   r=   r>   rQ     ru   c                    s   g | ]} j qS r=   rf   r  rP   r=   r>   rQ     s    c                 S   s   g | ]}d qS )Fr=   r  r=   r=   r>   rQ         c                 S      g | ]}g qS r=   r=   r  r=   r=   r>   rQ     r  c                 S   r  r=   r=   r  r=   r=   r>   rQ     r  c                    s.   g | ]}|   d d qS )r   r   )repeatr   	transpose
contiguousrX   )r   r   r[   r=   r>   rQ     s     c                    s   g | ]} |  d  qS )r   )r   tolistrX   )	exp_hlensr=   r>   rQ     r\   c                    s*   g | ]}|  d d  d d  qS )r   )r   r  r   rX   )r   r   r=   r>   rQ     s    c                    s6   g | ]} |  |  d  |  d qS )r      )r   rA   rX   )exp_hr   r  r=   r>   rQ     s    (c              	      s(   g | ]}t | | d j dqS )r   )margin)r   r.   rX   )
ctc_marginr[   r   r:   r=   r>   rQ     s    r   r_   c                    s"   g | ]}t | d dqS r   r   r   index_selectr   )rN   	att_w_one)r  vidxr=   r>   rQ   -      c                    s&   g | ]}t |  d dqS r'  r(  rN   li)r  r+  rE   r=   r>   rQ   8      c                    s&   g | ]}t  | d dqS r'  r(  r-  )rF   r  r+  r=   r>   rQ   ;  r/  c                 3   s    | ]	} |  V  qd S r   r=   rX   )r[   samp_ir=   r>   	<genexpr>L  s    z/Decoder.recognize_beam_batch.<locals>.<genexpr>)r   )r   vscorer   c                    s"   g | ]}| pt  | qS r=   r   rN   r0  )r  rI   stop_searchr=   r>   rQ   \  r,  cudainf)r   r   c                    s(   g | ]}t | d kr| n qS rj   rq   r3  )
dummy_hypsr  r=   r>   rQ   r  r   r   r   c                    s:   g | ]}t  | d d dddtt | j qS )c                 S   r   r   r=   r   r=   r=   r>   r   |  r   z9Decoder.recognize_beam_batch.<locals>.<listcomp>.<lambda>Tr   N)r   r|   rr   r   r3  )r  r   r=   r>   rQ   {  s    )Qr7   r|   rr   r,   r'   r   r   r   rA   r   r   r   r   r   r   r   r   r   r   aranger   r   r   r   rW   r   r   r   r  r   r8   r   r   r%   r&   r-   is_cudar   r   
LongTensor_get_last_yseqr   r   r"   r   rb   rJ   r   r*   r   r   buff_predictr   r9   r   
isinstanceTensorr/   r.   r  fmoddatar   r!  _index_select_list_append_idsr)  r   rU   r   r   r   set_index_select_lm_stateindex_select_statedevicetyper5  empty_cachearrayfloat)@r:   r   r[   r   r   r1   r   normalize_scorer   r   r   rY   r   r   
att_weightr   pad_bmax_hlenr  r  rH   rG   vscoresr  r   r   
ctc_scorer	ctc_stater   r   accum_odim_idsr  scoring_numr   rD   r   r   	exp_h_hanr	  r  r  part_idsatt_w_local_ctc_scoreseos_vscoresaccum_best_scoresaccum_best_idsaccum_padded_beam_idsy_prevnum_atts_a_prev_a_prev__h_prev__c_prev_k	penalty_ithrbeam_j_vscoreyk_scorestop_search_summaryrF  rt   r=   )r   r   rF   r1   r&  r7  r  r$  r"  r   r[   rI   r   r   r  r   r0  r:   r4  r+  rE   r>   recognize_beam_batchV  s  
  

$"

(
 
&""

(


"

zDecoder.recognize_beam_batchc                    sJ  j dkr|g}gfdd|D }t|tjd }fddtj D d_|d jg |d jgj	rMdd t
||D }n	fdd|D } fd	d|D }	t|j}
t|	j}|d}|d g}|d g}tjdjD ]}||d  ||d  qg }j dkrd}j|   ndgj d  }dgj  }tj d D ]	}j|   qň|
}tj|D ]}j dkrj| |d d jd |d |\}}|| nVtj D ] }j| || | jd |d || \||< ||< qtj|dd
}j gt| }jj  ||jd |d |j  \}|j < ||  tj|dd|ddf |fdd
}|||||\}}q݈j dkrt|j| }|S g }tt
| D ]\}}t|j| }|| q|}|S )a  Calculate all of attentions

        :param torch.Tensor hs_pad: batch of padded hidden state sequences
                                    (B, Tmax, D)
                                    in multi-encoder case, list of torch.Tensor,
                                    [(B, Tmax_1, D), (B, Tmax_2, D), ..., ] ]
        :param torch.Tensor hlen: batch of lengths of hidden state sequences (B)
                                    [in multi-encoder case, list of torch.Tensor,
                                    [(B), (B), ..., ]
        :param torch.Tensor ys_pad:
            batch of padded character id sequence tensor (B, Lmax)
        :param int strm_idx:
            stream index for parallel speaker attention in multi-speaker case
        :param torch.Tensor lang_ids: batch of target language id tensor (B, 1)
        :return: attention weights with the following shape,
            1) multi-head case => attention weights (B, H, Lmax, Tmax),
            2) multi-encoder case =>
                [(B, Lmax, Tmax1), (B, Lmax, Tmax2), ..., (B, Lmax, NumEncs)]
            3) other case => attention weights (B, Lmax, Tmax).
        :rtype: float ndarray
        r   c                    rK   r=   rL   rM   rP   r=   r>   rQ     rR   z4Decoder.calculate_all_attentions.<locals>.<listcomp>c                    rS   r=   rT   rX   )hlenr=   r>   rQ     r\   Nr   c                 S   r]   r^   ra   rc   r=   r=   r>   rQ     rd   c                    re   r^   ra   rM   rf   r=   r>   rQ     r\   c                    rg   r^   ra   rM   rh   r=   r>   rQ     r\   r_   )r7   r|   rr   r,   r'   r+   r}   r.   r-   r8   r~   r	   r(   rA   rC   r%   r&   r   r   r   r   r   r"   r   r   copyrb   rJ   r   r   )r:   rB   rk  r   r   r   r   r   r   r   r   r   r   rF   rE   r<   att_wsr   r   r   rY   r   rI   r   r   hlen_hanrD   _att_wswsr=   )r.   rk  r:   r-   r>   calculate_all_attentions  s~   



$z Decoder.calculate_all_attentionsc                 C   s    g }| D ]	}| |d  q|S )Nr   r   )exp_yseqlasty_seqr=   r=   r>   r;    s   zDecoder._get_last_yseqc                 C   sR   t |trt|D ]\}}| | | q	| S tt| D ]	}| | | q| S r   )r=  rU   r   r   r'   rr   )r   idsrI   r   r=   r=   r>   rB    s   
zDecoder._append_idsc                 C   s(   g }|D ]}| | | d d   q|S r   rr  )r   lstnew_yseqrI   r=   r=   r>   rA    s   zDecoder._index_select_listc                    sr   t | tri }|  D ]\}} fdd|D ||< q|S t | tr7g }D ]}|| t| d d   q'|S )Nc                    s   g | ]	}t | qS r=   )r   r)  )rN   vir`   r+  r=   r>   rQ     rR   z2Decoder._index_select_lm_state.<locals>.<listcomp>)r=  dictitemsrU   r   rW   )r  r`   r+  	new_staterb  vrI   r=   rz  r>   rD  
  s   

zDecoder._index_select_lm_statec           	      C   s  | j dkr|g}| |d dg}| |d dg}tjd| jD ]}|| |d d || |d d q&d}t|t	| j
d }| j dkr^d }| j
|   nd g| j d  }t| j d D ]	}| j
|   qmt|d d  |d d  ||||fdS )Nr   r   rH   rG   r   	workspace)r7   rC   r   r%   r&   r'   r   r   r|   rr   r,   r   r{  )	r:   rt   rF   rE   r<   
strm_indexr   r   rY   r=   r=   r>   
init_state  s*   



zDecoder.init_statec                 C   s
  | j dkr|g}|d \}}}|d d}| | |}| j dkrF| j| |d d|d dg| jd |d d |d \}	}
ned g| j d  }
d g| j  }t| j D ]*}| j| || d|| dg| jd |d d |d | \||< |
|< qYtj	|dd}| j| j  || j g| jd |d d |d | j  \}	|
| j < tj
||	fdd}| ||||d |d \}}| jr| tj
| jd |d |	fdd}n| | jd |d }tj|ddd}|t|d d  |d d  |
|||fd	fS )
Nr   r  r   r   rG   r   r_   rH   r  )r7   r   r   r   r,   rA   r"   r'   r   r   rb   rJ   r   r*   r   r   squeezer{  )r:   r   statert   r   rE   rF   r   rD   r   r   r   rY   r  r	  logpr=   r=   r>   r   3  sT   




(

zDecoder.score)	r   NNr   r   r   FFr   )r   Nr?   )NTr   N)r   
__module____qualname____doc__r   rC   rJ   r   r   rj  rq  staticmethodr;  rB  rA  rD  r  r   r=   r=   r=   r>   r      sJ    
N
 
&  ,
  
0n

	

r   c                 C   sR   t | j|| j| j| j|||| j| j|| j| j| j	t
| ddt
| ddt
| ddS )Nr   Fr8   r7   r   )r   r;   r   r   r   r0   r1   r4   r5   dropout_rate_decoderr   )argsr/   r-   r.   r,   r2   r=   r=   r>   decoder_forf  s&   


r  )"r  r   r   r   argparser   r   r   r%   r   torch.nn.functionalr   
functionalr   8funasr.models.transformer.utils.scorers.ctc_prefix_scorer   r   8funasr.models.transformer.utils.scorers.scorer_interfacer   funasr.metricsr   *funasr.models.transformer.utils.nets_utilsr   r	   funasr.metrics.compute_accr
   r   +funasr.models.language_model.rnn.attentionsr   r   r   r   r   r  r=   r=   r=   r>   <module>   s<            S