o
    i                     @   s   d dl Z d dlmZmZmZ d dlZd dlmZ d dlm  mZ	 d dl
Z
d dlmZ d dlmZ d dlmZ eddG d	d dejZdS )
    N)OptionalTupleUnion)nn)make_pad_mask)SpecAug)tablesencoder_classesOpenAIWhisperEncoderWarpc                       s   e Zd ZdZ						ddededed	ed
edeedf f fddZ		dde
jde
jde
jfddZdefddZ	dde
jde
jde
jdee
je
jee
j f fddZ  ZS )r
   znTransformer-based Speech Encoder from OpenAI's Whisper Model:

    URL: https://github.com/openai/whisper
            smallNFdropout_ratewhisper_modeldownload_diruse_specauguse_padmaskspecaug_confc                    sx   t    tj|| _|t v sJ tj||dd}t	
|j| _| j  ~|r4tdi || _nd | _|| _d S )Ncpu)download_rootdevice )super__init__torchr   Dropoutdropoutwhisperavailable_models
load_modelcopydeepcopyencoderencoderstrainr   specaugr   )selfr   r   r   r   r   r   _model	__class__r   U/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/whisper_lid/encoder.pyr      s   
	

z!OpenAIWhisperEncoderWarp.__init__inputilensreturnc           
      C   s|  t | j|}t | j|}|ddd}|d}| jjd}||kr?|| jjd |dd d f  |j	}n|d d d |d d f | jj }|d urwd|| jjj
d  d| jjjd   | jjjd   }tj||d}nd }| jrt|d d d d d f  |j}nd }| |}t| jjD ]\}}	|	|}|t| jjd k r| |}q| j|}||fS )Nr         )max)Fgelur"   conv1conv2permutesizepositional_embeddingtodtypekernel_sizepaddingstrider   clampr   r   r   r   	enumerateblockslenln_post)
r%   r*   r+   xn_framesmax_posolenspadding_masklayerblockr   r   r)   whisper_encode3   s8   
,""&

z'OpenAIWhisperEncoderWarp.whisper_encodec                 C   s   | j jjjd S )Nr   )r"   r3   weightshape)r%   r   r   r)   output_size^   s   z$OpenAIWhisperEncoderWarp.output_sizexs_padprev_statesc                 C   sb   ||}}| j d ur$| jjr$t|dd}|  ||\}}t|dd}| ||\}}||d fS )Nr.   r-   )r$   r"   trainingr   	transposerH   )r%   rL   r+   rM   feats
feats_lensrD   r   r   r)   forwardb   s   

z OpenAIWhisperEncoderWarp.forward)r   r   NFFN)N)__name__
__module____qualname____doc__floatstrboolr   dictr   r   TensorrH   intrK   r   r   rR   __classcell__r   r   r'   r)   r
      sP    

+)r   typingr   r   r   r   r   torch.nn.functional
functionalr0   r   *funasr.models.transformer.utils.nets_utilsr   funasr.models.specaug.specaugr   funasr.registerr   registerModuler
   r   r   r   r)   <module>   s   
