o
    i#                     @   sh   d Z ddlmZmZmZmZmZmZ ddlZddl	m
Z
 ddlmZ ddlmZmZ G dd deZdS )	z$(RNN-)Transducer decoder definition.    )AnyDictListOptionalTupleUnionN)check_argument_types)
AbsDecoder)ExtendedHypothesis
Hypothesisc                       s  e Zd ZdZ						d0deded	ed
edededef fddZdej	fddZ
dedeejeej f fddZdejdeejeej f deejeejeej f f fddZdejdejfddZdedeeef deejeejeej f ejf fd d!Zd"eee ee f d#eejeej f deeef d$edeejeejejf ejf f
d%d&Zd'eejeej f d(edeejeej f fd)d*Z	+d1d'eejeej f d,eeejeej f  d-ee deeejeej f  fd.d/Z  ZS )2TransducerDecodera  (RNN-)Transducer decoder module.

    Args:
        vocab_size: Output dimension.
        layers_type: (RNN-)Decoder layers type.
        num_layers: Number of decoder layers.
        hidden_size: Number of decoder units per layer.
        dropout: Dropout rate for decoder layers.
        dropout_embed: Dropout rate for embedding layer.
        embed_pad: Embed/Blank symbol ID.

    lstm   @          r   
vocab_sizernn_type
num_layershidden_sizedropoutdropout_embed	embed_padc                    s   t  sJ |dvrtd| t   tjj||d| _tjj|d| _	|dkr/tjj
ntjj tj fddt|D | _tjfddt|D | _|| _| _|| _|| _d	| _|| _t|  j| _d S )
N>   grur   zNot supported: rnn_type=)padding_idxpr   c                    s   g | ]
} d ddqS )r   T)batch_first .0_)dec_netr   r   Z/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr/decoder/transducer_decoder.py
<listcomp>1   s    z.TransducerDecoder.__init__.<locals>.<listcomp>c                    s   g | ]	}t jj d qS )r   )torchnnDropoutr   )r   r   r"   r#   7   s    )r   
ValueErrorsuper__init__r$   r%   	Embeddingembedr&   r   LSTMGRU
ModuleListrangedecoderdropout_decdlayersdunitsdtypeodim	ignore_idblank_idnext
parametersdevice)selfr   r   r   r   r   r   r   	__class__)r!   r   r   r"   r*      s,   


zTransducerDecoder.__init__r;   c                 C   s
   || _ dS )zNSet GPU device to use.

        Args:
            device: Device ID.

        Nr;   )r<   r;   r   r   r"   
set_deviceD   s   
zTransducerDecoder.set_device
batch_sizereturnc                 C   sJ   t j| j|| j| jd}| jdkr!t j| j|| j| jd}||fS |dfS )zInitialize decoder states.

        Args:
            batch_size: Batch size.

        Returns:
            : Initial decoder hidden states. ((N, B, D_dec), (N, B, D_dec))

        r?   r   N)r$   zerosr3   r4   r;   r5   )r<   rA   h_nc_nr   r   r"   
init_stateM   s   
zTransducerDecoder.init_statesequencestatec                 C   s   |\}}|  |d\}}t| jD ]M}| jdkrB| j| ||||d  |||d  fd\}\|||d < |||d < n| j| ||||d  d\}|||d < | j| |}q|||ffS )aR  Encode source label sequences.

        Args:
            sequence: RNN input sequences. (B, D_emb)
            state: Decoder hidden states. ((N, B, D_dec), (N, B, D_dec))

        Returns:
            sequence: RNN output sequences. (B, D_dec)
            (h_next, c_next): Decoder hidden states. (N, B, D_dec), (N, B, D_dec))

        r   r   r   )hx)rF   sizer0   r3   r5   r1   r2   )r<   rG   rH   h_prevc_prevh_nextc_nextlayerr   r   r"   rnn_forwardl   s   
 zTransducerDecoder.rnn_forwardlabelsc                 C   s4   |  |d}| | |}| ||\}}|S )zEncode source label sequences.

        Args:
            labels: Label ID sequences. (B, L)

        Returns:
            dec_out: Decoder output sequences. (B, T, U, D_dec)

        r   )rF   rJ   r   r,   rP   )r<   rQ   rF   	dec_embeddec_outr    r   r   r"   forward   s   
zTransducerDecoder.forwardhypcachec                 C   s   t jd|jd t j| jd}dttt|j}||v r$|| \}}n| 	|}| 
||j\}}||f||< |d d ||d fS )a_  One-step forward hypothesis.

        Args:
            hyp: Hypothesis.
            cache: Pairs of (dec_out, state) for each label sequence. (key)

        Returns:
            dec_out: Decoder output sequence. (1, D_dec)
            new_state: Decoder hidden states. ((N, 1, D_dec), (N, 1, D_dec))
            label: Label ID for LM. (1,)

        )r   r   r'   )r5   r;   r    r   )r$   fullyseqlongr;   joinlistmapstrr,   rP   	dec_state)r<   rU   rV   label
str_labelsrS   r^   dec_embr   r   r"   score   s   
zTransducerDecoder.scorehyps
dec_statesuse_lmc                 C   s  t |}g }dg| }t|D ]&\}}	dttt|	j}
|
|v r)||
 ||< q||
|	jd |	jf q|rct	j
dd |D | jd}| | |ddd |D }| |}| ||\}}d}t|D ]&}|| du r| ||}|| |f||< || |f||| d < |d	7 }qit	jd
d |D dd}| |dd |D }|rt	j
dd |D | jd|d	}|||fS ||dfS )a  One-step forward hypotheses.

        Args:
            hyps: Hypotheses.
            states: Decoder hidden states. ((N, B, D_dec), (N, B, D_dec))
            cache: Pairs of (dec_out, dec_states) for each label sequences. (keys)
            use_lm: Whether to compute label ID sequences for LM.

        Returns:
            dec_out: Decoder output sequences. (B, D_dec)
            dec_states: Decoder hidden states. ((N, B, D_dec), (N, B, D_dec))
            lm_labels: Label ID sequences for LM. (B,)

        Nr    r'   c                 S   s   g | ]}|d  gqS r   r   r   r   r   r   r"   r#          z1TransducerDecoder.batch_score.<locals>.<listcomp>r?   r   c                 S      g | ]}|d  qS )   r   rg   r   r   r"   r#          r   c                 S   ri   r   r   r   dr   r   r"   r#      rk   dimc                 S   ri   rf   r   rm   r   r   r"   r#      rk   c                 S   s   g | ]}|j d  qS )r'   )rX   )r   hr   r   r"   r#      rh   )len	enumeraterZ   r[   r\   r]   rX   appendr^   r$   
LongTensorr;   create_batch_statesrF   rJ   r,   rP   r0   select_statecatview)r<   rc   rd   rV   re   final_batchprocessdoneirU   r`   rQ   p_dec_statesra   rS   
new_statesjrH   	lm_labelsr   r   r"   batch_score   sB   



zTransducerDecoder.batch_scorestatesidxc                 C   sT   |d dd||d ddf | j dkr'|d dd||d ddf fS dfS )a+  Get specified ID state from decoder hidden states.

        Args:
            states: Decoder hidden states. ((N, B, D_dec), (N, B, D_dec))
            idx: State ID to extract.

        Returns:
            : Decoder hidden state for given ID.
              ((N, 1, D_dec), (N, 1, D_dec))

        r   Nr   r   )r5   )r<   r   r   r   r   r"   rw      s
    *zTransducerDecoder.select_stateNr   
check_listc                 C   s@   t jdd |D dd| jdkrt jdd |D ddfS dfS )a*  Create decoder hidden states.

        Args:
            states: Decoder hidden states. ((N, B, D_dec), (N, B, D_dec))
            new_states: Decoder hidden states. [N x ((1, D_dec), (1, D_dec))]

        Returns:
            states: Decoder hidden states. ((N, B, D_dec), (N, B, D_dec))

        c                 S   ri   rl   r   r   sr   r   r"   r#   #  rk   z9TransducerDecoder.create_batch_states.<locals>.<listcomp>r   ro   r   c                 S   ri   rf   r   r   r   r   r"   r#   $  rk   N)r$   rx   r5   )r<   r   r   r   r   r   r"   rv     s   
z%TransducerDecoder.create_batch_states)r   r   r   r   r   r   )N)__name__
__module____qualname____doc__intr]   floatr*   r$   r;   r@   r   Tensorr   tensorrF   rP   rT   r   r   r   rb   r   r   r
   boolr   rw   rv   __classcell__r   r   r=   r"   r      s    *	

$
 


A
r   )r   typingr   r   r   r   r   r   r$   	typeguardr   espnet2.asr.decoder.abs_decoderr	   -espnet2.asr.transducer.beam_search_transducerr
   r   r   r   r   r   r"   <module>   s     