o
    i
                     @   s8   d Z ddlZddlZddlmZ G dd dejjZdS )zPositional encoding modules.    N)	_pre_hookc                	       sl   e Zd ZdZ	ddedededdf fd	d
ZddejdeddfddZ	ddejdedejfddZ
  ZS )RelPositionalEncodingzRelative positional encoding.

    Args:
        size: Module size.
        max_len: Maximum input length.
        dropout_rate: Dropout rate.

              sizedropout_ratemax_lenreturnNc                    sL   t    || _d| _tjj|d| _| t	d
d| | t dS )z.Construct a RelativePositionalEncoding object.N)pr      )super__init__r   petorchnnDropoutdropout	extend_petensorexpand"_register_load_state_dict_pre_hookr   )selfr   r   r   	__class__ n/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr_transducer/encoder/modules/positional_encoding.pyr      s   
zRelPositionalEncoding.__init__r   xleft_contextc                 C   s  | d| }| jdur3| j d|d d kr3| jj|jks&| jj|jkr1| jj|j|jd| _dS t|| j }t|| j }tjd|tjd	d}t
tjd| j dtjdtd| j    }t|| |dddddf< t|| |dddddf< t|dg	d}td| | |dddddf< td| | |dddddf< |dd 	d}tj||gdd	j|j|jd
| _dS )zReset positional encoding.

        Args:
            x: Input sequences. (B, T, ?)
            left_context: Number of frames in left context.

        r   N   )devicedtyper   )r    g     @)dim)r    r   )r   r   r    r   tor   zerosarangefloat32	unsqueezeexpmathlogsincosflipcat)r   r   r   time1pe_positivepe_negativepositiondiv_termr   r   r   r   "   s.   
  $$zRelPositionalEncoding.extend_pec                 C   sj   | j ||d |d| }| jdd| jdd | d | jdd |d f }| |}|S )zCompute positional encoding.

        Args:
            x: Input sequences. (B, T, ?)
            left_context: Number of frames in left context.

        Returns:
            pos_enc: Positional embedding sequences. (B, 2 * (T - 1), ?)

        )r   r   Nr   )r   r   r   r   )r   r   r   r/   pos_encr   r   r   forwardG   s   8
zRelPositionalEncoding.forward)r   r   )r   )__name__
__module____qualname____doc__intfloatr   r   Tensorr   r5   __classcell__r   r   r   r   r   
   s    
$%r   )r9   r)   r   1espnet.nets.pytorch_backend.transformer.embeddingr   r   Moduler   r   r   r   r   <module>   s
    