o
    i?                  !   @   s  d Z ddlmZmZmZmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z# de$dee$ef de%dee%e%f fddZ&de$de%deee$ef  de'de'dee$ef fddZ(de$deee$ef  dee% fddZ)de$de$de$deeeeef eeef f fd d!Z*dee$ef d"ej+j,d#e%deee#eej+j-f e%f fd$d%Z.de$dee$ef d&e$d'e$deeef f
d(d)Z/dee$ef d*e$d&e$d'e$d+e$defd,d-Z0dee$ef d.e$defd/d0Z1		1	2	3	4	4	5	5	6d@de$d7e%de$deee$ef  d8e%de$d9e$d:e$d;e$d+e$d<e'd=e'd#e%deee#eej+j-f e!e%e%f fd>d?Z2dS )Az-Set of methods to create custom architecture.    )AnyDictListTupleUnionN)ConvolutionModule)EncoderLayer)get_activation)CausalConv1dConv1d)TransformerDecoderLayer)VGG2L)MultiHeadedAttentionRelPositionMultiHeadedAttention)PositionalEncodingRelPositionalEncodingScaledPositionalEncoding)PositionwiseFeedForward)MultiSequential)Conv2dSubsamplingnet_partblock	num_blockreturnc                 C   s  | d}|du rtd|| f|dkrh d}nI|dkr<h d}| dkr)td	| d
ddu r;d|vr;td| n&|dkrMh d}| dkrLtdn|dkr^h d}| dkr]tdntd||sqtd|| ||f |dv r|d |d f}|S |d |d f}|S )zVerify block arguments are valid.

    Args:
        net_part: Network part, either 'encoder' or 'decoder'.
        block: Block parameters.
        num_block: Block ID.

    Return:
        block_io: Input and output dimension of the block.

    typeNz'Block %d in %s doesn't a type assigned.transformer>   d_ffheadsd_hidden	conformer>   r   r   r   use_conv_modmacaron_styledecoderz%Decoder does not support 'conformer'.r    Tconv_mod_kernelzHBlock %d: 'use_conv_mod' is True but  'conv_mod_kernel' is not specifiedcausal-conv1d>   idimodimkernel_sizeencoderz)Encoder does not support 'causal-conv1d'.conv1dz"Decoder does not support 'conv1d.'zRWrong type. Currently supported: causal-conv1d, conformer, conv-nd or transformer.z_%s in %s in position %d: Expected block arguments : %s. See tutorial page for more information.r   r   r   r%   r&   )get
ValueErrorNotImplementedErrorissubset)r   r   r   
block_type	argumentsblock_io r2   a/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/pytorch_backend/transducer/blocks.pyverify_block_arguments"   sT   



r4   input_layer_type	feats_dimblocksdropout_ratepos_enc_dropout_ratec                 C   s~   i }|d  dd}|dkrd|d< n| |d< ||d< ||d< ||d< |d	v r3|d  d
d|d< |S |d  dd|d< |S )ar  Prepare input layer arguments.

    Args:
        input_layer_type: Input layer type.
        feats_dim: Dimension of input features.
        blocks: Blocks parameters for network part.
        dropout_rate: Dropout rate for input layer.
        pos_enc_dropout_rate: Dropout rate for input layer pos. enc.

    Return:
        input_block: Input block parameters.

    r   r   Nr$   c-embeddropout-ratepos-dropout-rater%   r*   r   r&   )r+   )r5   r6   r7   r8   r9   input_blockfirst_block_typer2   r2   r3   prepare_input_layerl   s   
r?   c                    s    fddt |D }ddhdd |D krt d tdt|D ]}||d  d || d	 kr>td
||d  f q#|d d S )zPrepare model body blocks.

    Args:
        net_part: Network part, either 'encoder' or 'decoder'.
        blocks: Blocks parameters for network part.

    Return:
        : Network output dimension.

    c                    s    g | ]\}}t  ||d  qS )   )r4   ).0ibr   r2   r3   
<listcomp>   s    z&prepare_body_model.<locals>.<listcomp>r   r   c                 S   s   h | ]}|d  qS )r   r2   )rA   rC   r2   r2   r3   	<setcomp>   s    z%prepare_body_model.<locals>.<setcomp>zO: transformer and conformer blocks can't be used together in the same net part.r@   r   z4Output/Input mismatch between blocks %d and %d in %s)	enumerater-   rangelenr,   )r   r7   cmp_iorB   r2   rD   r3   prepare_body_model   s    
rL   pos_enc_typeself_attn_typec                 C   sj   |dkrt }n|dkrt}n|dkr!| dkr|dkrtdt}ntd|dkr/t}||fS t}||fS )aS  Get positional encoding and self attention module class.

    Args:
        net_part: Network part, either 'encoder' or 'decoder'.
        pos_enc_type: Positional encoding type.
        self_attn_type: Self-attention type.

    Return:
        pos_enc_class: Positional encoding class.
        self_attn_class: Self-attention class.

    abs_posscaled_abs_posrel_posr(   rel_self_attnz1'rel_pos' is only compatible with 'rel_self_attn'zFpos_enc_type should be either 'abs_pos', 'scaled_abs_pos' or 'rel_pos')r   r   r,   r   r-   r   r   )r   rM   rN   pos_enc_classself_attn_classr2   r2   r3   get_pos_enc_and_att_class   s    rU   rS   padding_idxc           	   	   C   s$  | d }| d }| d }| d }| d }|j dkr|||}nd}|dkrDtjtj||tj|tj|tj |||d	fS |d
krQt||||dfS |dkr]t	|||dfS |dkrttjtjj
|||d|||d	fS |dkrtjtjj
|||dtj|d	fS td| )a2  Build input layer.

    Args:
        block: Architecture definition of input layer.
        pos_enc_class: Positional encoding class.
        padding_idx: Padding symbol ID for embedding layer (if provided).

    Returns:
        : Input layer module.
        subsampling_factor: Subsampling factor.

    r   r%   r&   r;   r<   r   Nlinearr@   conv2d   vgg2lembed)rV   r:   zCInvalid input layer: %s. Supported: linear, conv2d, vgg2l and embed)__name__torchnn
SequentialLinear	LayerNormDropoutReLUr   r   	Embeddingr-   )	r   rS   rV   
input_typer%   r&   r8   pos_dropout_ratepos_enc_class_subsamplingr2   r2   r3   build_input_layer   sR   




rh   pw_layer_typepw_activation_typec                    sn   d  dd dd dd |dkrtd| dkr%tn| d	kr+t fd
dS )a_  Build function for transformer block.

    Args:
        net_part: Network part, either 'encoder' or 'decoder'.
        block: Transformer block parameters.
        pw_layer_type: Positionwise layer type.
        pw_activation_type: Positionwise activation type.

    Returns:
        : Function to create transformer (encoder or decoder) block.

    r   r;           r<   att-dropout-raterW   z7Transformer block only supports linear pointwise layer.r(   r"   c                	      s,   t d  td tS )Nr   r   )r   r   r	   r2   att_dropout_rater   r   r8   rf   rj   transformer_layer_classr2   r3   <lambda>D  s    z)build_transformer_block.<locals>.<lambda>)r+   r-   r   r   )r   r   ri   rj   r2   rm   r3   build_transformer_block"  s   rq   rT   conv_mod_activation_typec                    s   d d } dd dd} dd d d |d	kr1t	||t|f
ntd
rAt||t|frNtd t|f 	
fddS )a  Build function for conformer block.

    Args:
        block: Conformer block parameters.
        self_attn_type: Self-attention module type.
        pw_layer_type: Positionwise layer type.
        pw_activation_type: Positionwise activation type.
        conv_mod_activation_type: Convolutional module activation type.

    Returns:
        : Function to create conformer (encoder) block.

    r   r   r;   rk   r<   rl   r!   r    rW   z)Conformer block only supports linear yet.r#   c                      s>   t d  	
 r nd r S d S )Nr   )ConformerEncoderLayerr2   rn   r   conv_modconv_mod_argsr   r8   macaron_netmacaron_net_argsr!   pw_layerpw_layer_argsrT   r    r2   r3   rp     s    
z'build_conformer_block.<locals>.<lambda>)r+   r   r	   r-   r   )r   rT   ri   rj   rr   r   rf   r2   rt   r3   build_conformer_blockQ  s:   $r{   r/   c              	      s   |dkrt ntdddddddd dd	d
d	dd f	ddS )zBuild function for causal conv1d block.

    Args:
        block: CausalConv1d or Conv1D block parameters.

    Returns:
        : Function to create conv1d (encoder) or causal conv1d (decoder) block.

    r)   strider@   dilationgroupsbiasTzuse-batch-normFzuse-relur;   rk   c                      s(   d d d  d
S )Nr%   r&   r'   )r|   r}   r~   r   relu
batch_normr8   r2   r2   	r   r   
conv_classr}   r8   r~   r|   use_batch_normuse_relur2   r3   rp     s    z$build_conv1d_block.<locals>.<lambda>)r   r
   r+   )r   r/   r2   r   r3   build_conv1d_block  s   
r   	self_attnrO   rW   r   rk   rG   r%   repeat_blockpositional_encoding_typepositionwise_layer_typepositionwise_activation_typeinput_layer_dropout_rate input_layer_pos_enc_dropout_ratec                 C   s   g }t | ||\}}t||||
|}t| |}t|||\}}tt|D ]5}|| d }|dv r9t|| |}n|dkrHt|| ||||	}n|dkrUt| || ||}|	| q%|dkrc|| }|t
dd |D  ||fS )a  Build custom model blocks.

    Args:
        net_part: Network part, either 'encoder' or 'decoder'.
        idim: Input dimension.
        input_layer: Input layer type.
        blocks: Blocks parameters for network part.
        repeat_block: Number of times provided blocks are repeated.
        positional_encoding_type: Positional encoding layer type.
        positionwise_layer_type: Positionwise layer type.
        positionwise_activation_type: Positionwise activation type.
        conv_mod_activation_type: Convolutional module activation type.
        input_layer_dropout_rate: Dropout rate for input layer.
        input_layer_pos_enc_dropout_rate: Dropout rate for input layer pos. enc.
        padding_idx: Padding symbol ID for embedding layer.

    Returns:
        in_layer: Input layer
        all_blocks: Encoder/Decoder network.
        out_dim: Network output dimension.
        conv_subsampling_factor: Subsampling factor in frontend CNN.

    r   )r$   r)   r   r   r@   c                 S   s   g | ]}| qS r2   r2   )rA   fnr2   r2   r3   rE     s    z build_blocks.<locals>.<listcomp>)rU   r?   rL   rh   rI   rJ   r   r{   rq   appendr   )r   r%   r5   r7   r   rN   r   r   r   rr   r   r   rV   
fn_modulesrS   rT   r=   out_diminput_layerconv_subsampling_factorrB   r/   moduler2   r2   r3   build_blocks  sX   (
r   )	r   r   rO   rW   r   r   rk   rk   rG   )3__doc__typingr   r   r   r   r   r]   1espnet.nets.pytorch_backend.conformer.convolutionr   3espnet.nets.pytorch_backend.conformer.encoder_layerr   rs   &espnet.nets.pytorch_backend.nets_utilsr	   2espnet.nets.pytorch_backend.transducer.conv1d_netsr
   r   @espnet.nets.pytorch_backend.transducer.transformer_decoder_layerr   ,espnet.nets.pytorch_backend.transducer.vgg2lr   1espnet.nets.pytorch_backend.transformer.attentionr   r   1espnet.nets.pytorch_backend.transformer.embeddingr   r   r   5espnet.nets.pytorch_backend.transformer.encoder_layerAespnet.nets.pytorch_backend.transformer.positionwise_feed_forwardr   .espnet.nets.pytorch_backend.transformer.repeatr   3espnet.nets.pytorch_backend.transformer.subsamplingr   strintr4   floatr?   rL   rU   r^   Moduler_   rh   rq   r{   r   r   r2   r2   r2   r3   <module>   s   


J

)
"

'

D


/

D+	
