o
    iF(                     @   s  d Z ddlmZmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ 													d8dededede de!dededee dee de!d ed!e d"e d#eeef fd$d%Z"d&e d'eeef d#efd(d)Z#d*e d'eeeee f f d#efd+d,Z$d'eeeef  d-eeef d#efd.d/Z%d'eeeef  d-eeef d#efd0d1Z&d'eeeef  d2e!d#efd3d4Z'd'eeeef  d-eeef d5e d#efd6d7Z(dS )9z8Set of methods to build Transducer encoder architecture.    )AnyDictListOptionalUnion)get_activation)Branchformer)	Conformer)Conv1d	ConvInput)RelPositionMultiHeadedAttention)ConformerConvolutionConvolutionalSpatialGatingUnit)MultiBlocks)get_normalization)RelPositionalEncoding)PositionwiseFeedForwardswish          F
layer_normN      ?   pos_wise_act_typeconv_mod_act_typepos_enc_dropout_ratepos_enc_max_lensimplified_att_score	norm_typeconv_mod_norm_typeafter_norm_epsafter_norm_partialdynamic_chunk_trainingshort_chunk_thresholdshort_chunk_sizeleft_chunk_sizereturnc                 K   s   i }t | fi ||d< t |fi ||d< ||d< ||d< ||d< ||d< ||d< t|||d\|d	< |d
< |	|d< td|
|d< td||d< td||d< |S )a  Build encoder main parameters.

    Args:
        pos_wise_act_type: Conformer position-wise feed-forward activation type.
        conv_mod_act_type: Conformer convolution module activation type.
        pos_enc_dropout_rate: Positional encoding dropout rate.
        pos_enc_max_len: Positional encoding maximum length.
        simplified_att_score: Whether to use simplified attention score computation.
        norm_type: X-former normalization module type.
        conv_mod_norm_type: Conformer convolution module normalization type.
        after_norm_eps: Epsilon value for the final normalization.
        after_norm_partial: Value for the final normalization with RMSNorm.
        dynamic_chunk_training: Whether to use dynamic chunk training.
        short_chunk_threshold: Threshold for dynamic chunk selection.
        short_chunk_size: Minimum number of frames during dynamic chunk training.
        left_chunk_size: Number of frames in left context.
        **activations_parameters: Parameters of the activation functions.
                                    (See espnet2/asr_transducer/activation.py)

    Returns:
        : Main encoder parameters

    pos_wise_actconv_mod_actr   r   r   r   r    epspartialafter_norm_classafter_norm_argsr#   r   r$   r%   r&   )r   r   max)r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   activation_parametersmain_params r2   [/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr_transducer/encoder/building.pybuild_main_parameters   s2   '

r4   
block_sizeconfigurationc                 C   s   t | |dd|dddS )zBuild positional encoding block.

    Args:
        block_size: Input/output size.
        configuration: Positional encoding configuration.

    Returns:
        : Positional encoding module.

    r   r   r   r   )max_len)r   get)r5   r6   r2   r2   r3   build_positional_encodinga   s
   

r9   
input_sizec                 C   s"   t | |d |d |d |d dS )zBuild encoder input block.

    Args:
        input_size: Input size.
        configuration: Input block configuration.

    Returns:
        : ConvInput block function.

    	conv_sizesubsampling_factorvgg_likeoutput_size)r=   r>   r   )r:   r6   r2   r2   r3   build_input_blocku   s   r?   r1   c                    s   | d | d |  ddt|d |  d|  dd\}}| d	 |||d
 f |  dd|  dd|d ft|d |  d|  dd\ fddS )zBuild Branchformer block.

    Args:
        configuration: Branchformer block configuration.
        main_params: Encoder main parameters.

    Returns:
        : Branchformer block function.

    hidden_sizelinear_sizedropout_rater   r    conv_mod_norm_epsconv_mod_norm_partialr*   conv_mod_kernel_sizer#   heads   att_dropout_rater   r   norm_epsnorm_partialc                	      s   t t t  dS )N
norm_class	norm_argsrB   )r   r   r   r2   conv_mod_argsrB   r@   rA   mult_att_argsrM   rL   r2   r3   <lambda>   s    z*build_branchformer_block.<locals>.<lambda>r8   r   )r6   r1   conv_mod_norm_classconv_mod_norm_argsr2   rN   r3   build_branchformer_block   s4   




rU   c                    s    d  d }|  dd|d f  dd  dd	d
} d |d ||d f  dd  dd|d ft|d   d  dd\ fddS )zBuild Conformer block.

    Args:
        configuration: Conformer block configuration.
        main_params: Encoder main parameters.

    Returns:
        : Conformer block function.

    r@   rA   pos_wise_dropout_rater   r(   rC   gh㈵>conv_mod_norm_momentumg?)r+   momentumrE   r)   r#   rF   rG   rH   r   r   rI   rJ   r*   c                      s0   t t t t t  dddS )NrB   r   rK   )r	   r   r   r   r8   r2   r6   rO   r@   rP   rM   rL   pos_wise_argsr2   r3   rQ      s    
z'build_conformer_block.<locals>.<lambda>rR   )r6   r1   rA   rT   r2   rY   r3   build_conformer_block   s6   



	

r[   causalc                    s    fddS )zBuild Conv1d block.

    Args:
        configuration: Conv1d block configuration.

    Returns:
        : Conv1d block function.

    c                      sb   t d d d dddddddd	d
d	dd dddS )Nr:   r>   kernel_sizestride   dilationgroupsbiasTrelu
batch_normFrB   r   )r^   r`   ra   rb   rc   rd   r\   rB   )r
   r8   r2   r\   r6   r2   r3   rQ     s    






z$build_conv1d_block.<locals>.<lambda>r2   )r6   r\   r2   re   r3   build_conv1d_block  s   rf   r>   c                    s   g }g }| D ]   ddur | d  fdd D g 7 }q| g7 }qt|D ]/\}  d }|dkr<t |}n|dkrFt |}n|dkrRt |d	 }nt|| q*td
d |D ||d |d dS )a  Build encoder body blocks.

    Args:
        configuration: Body blocks configuration.
        main_params: Encoder main parameters.
        output_size: Architecture output size.

    Returns:
        MultiBlocks function encapsulation all encoder blocks.

    
num_blocksNc                    s   i | ]}|d kr| | qS )rg   r2   ).0c_icr2   r3   
<dictcomp>6  s    z%build_body_blocks.<locals>.<dictcomp>
block_typebranchformer	conformerconv1dr#   c                 S   s   g | ]}| qS r2   r2   )rh   fnr2   r2   r3   
<listcomp>J  s    z%build_body_blocks.<locals>.<listcomp>r-   r.   )rL   rM   )r8   	enumeraterU   r[   rf   NotImplementedErrorappendr   )r6   r1   r>   
fn_modulesextended_confirm   moduler2   rj   r3   build_body_blocks   s0   
rz   )r   r   r   r   Fr   r   NNFr   r   r   ))__doc__typingr   r   r   r   r   !espnet2.asr_transducer.activationr   2espnet2.asr_transducer.encoder.blocks.branchformerr   /espnet2.asr_transducer.encoder.blocks.conformerr	   ,espnet2.asr_transducer.encoder.blocks.conv1dr
   0espnet2.asr_transducer.encoder.blocks.conv_inputr   0espnet2.asr_transducer.encoder.modules.attentionr   2espnet2.asr_transducer.encoder.modules.convolutionr   r   3espnet2.asr_transducer.encoder.modules.multi_blocksr   4espnet2.asr_transducer.encoder.modules.normalizationr   :espnet2.asr_transducer.encoder.modules.positional_encodingr   Aespnet.nets.pytorch_backend.transformer.positionwise_feed_forwardr   strfloatintboolr4   r9   r?   rU   r[   rf   rz   r2   r2   r2   r3   <module>   s    	


F




:

>

