o
    i                     @   s   d dl Z d dlmZ d dlm  mZ d dlmZ d dlm	Z	 e	
ddG dd dejZe	
ddG dd	 d	ejZe	
dd
G dd
 d
ejZdS )    N)make_pad_mask)tablesadaptor_classesLinearc                       s,   e Zd Zddef fddZdd Z  ZS )r      ffn_dimc                    sP   t    || _|| _|| _t| j| j || _t | _	t|| j| _
d S N)super__init__kencoder_dimllm_dimnnr   linear1ReLUrelulinear2selfdownsample_rater   r   r   kwargs	__class__ Q/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/llm_asr/adaptor.pyr
      s   

zLinear.__init__c                 C   s   |  \}}}|| j }|dkr|d d d | d d f }| d}| }|||| j || j }| |}| |}| |}|S )Nr      )sizer   
contiguousviewr   r   r   )r   x
batch_sizeseq_lendimnum_frames_to_discardr   r   r   forward   s   




zLinear.forwardr   __name__
__module____qualname__intr
   r$   __classcell__r   r   r   r   r   	   s    	QFormerc                       s4   e Zd Zd	def fddZdd Zdd Z  ZS )
EncoderProjectorQFormerr   r   c           	         s   t    || _|| _ddlm}m} | }| j|_d|_d| _	t
td| j	|j| _| jjjddd ||| _t
|j| j| _t
j| jd	d
| _d| _d| _d S )Nr   )Blip2QFormerConfigBlip2QFormerModel   @   r           g      ?)meanstdgh㈵>)epsgF]kSU?)r	   r
   r   r   transformersr.   r/   encoder_hidden_sizenum_hidden_layers	query_lenr   	Parametertorchzeroshidden_sizequerydatanormal_qformerr   linear	LayerNormnormsecond_per_framesecond_stride)	r   r   r   r   r   r   r.   r/   configurationr   r   r   r
   %   s   


z EncoderProjectorQFormer.__init__c                 C   s   |j \}}}t|| j d }t|| j d }d|f}d|f}|ddd}tjjj	||dd|d}|j \}	}	}
|
|d|d |
}t|g d}|d|d |}tj| d d tj|jd}||fS )	Ng      >@r   r0   r   )kernel_sizedilationpaddingstride)r      r0   r   )dtypedevice)shaperoundrE   rF   	transpose	unsqueezer;   r   
functionalunfoldr   permutereshapeonesr   longrO   )r   speech_embedsBTCkernelrK   speech_embeds_trspeech_embeds_overlap_Lspeech_attsr   r   r   split_frames:   s    z$EncoderProjectorQFormer.split_framesc           
      C   sr   |  \}}}| |\}}| j|jd dd}| j|||dd}| | |j}	|		|d|	 d
 }	|	S )Nr   rL   T)query_embedsencoder_hidden_statesencoder_attention_maskreturn_dictr0   )r   rd   r>   expandrP   rA   rD   rB   last_hidden_stater   r   )
r   r   r[   r\   r]   encoder_out_featattention_maskr>   query_output
query_projr   r   r   r$   I   s   zEncoderProjectorQFormer.forwardr%   )r'   r(   r)   r*   r
   rd   r$   r+   r   r   r   r   r-   #   s    r-   Transformerc                       s0   e Zd Z	ddef fddZdd	d
Z  ZS )ro   r0         r   r   c                    s   t    || _|| _| _t| j| j || _t | _	t|| j| _
ddlm  ddlm ddlm d | _dddkr\t fddtddD | _d S d S )	Nr   )EncoderLayer)MultiHeadedAttention)PositionwiseFeedForwardn_layerr0   c                    sN   g | ]#}  d d ddd  dd ddqS )attention_heads   attention_dropout_rater2      dropout_rate)get).0irr   rs   rt   r   r   r   r   
<listcomp>o   s"    



z(Transformer.__init__.<locals>.<listcomp>)r	   r
   r   r   r   r   r   r   r   r   r   !funasr.models.transformer.encoderrr   #funasr.models.transformer.attentionrs   3funasr.models.transformer.positionwise_feed_forwardrt   blocksr{   
ModuleListranger   r   r~   r   r
   ^   s$   


zTransformer.__init__Nc                 C   s   |  \}}}|d | j d }|| j | }tj|ddd|ddfdd}| d}| }||||| j }| |}| |}| |}d }|d | j d }t	|d d d d d f  
|j}	| jd urzt| jD ]\}
}|||	\}}	qn||fS )Nr   r   r2   )value)r   r   Fpadr   r   r   r   r   r   torO   r   	enumerate)r   r   ilensr    r!   r"   	chunk_numpad_numolensmaskslayerblockr   r   r   r$      s"   



$
zTransformer.forward)r0   rp   rq   r   r   r&   r   r   r   r   ro   \   s    $)r;   torch.nnr   torch.nn.functionalrT   r   *funasr.models.transformer.utils.nets_utilsr   funasr.registerr   registerModuler   r-   ro   r   r   r   r   <module>   s    


8