o
    i                     @   s   d dl mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlZd dl	m
Z
 d dlmZ d dlmZ eejed	krDd d
lmZ nedddZG dd de
jZdS )    )contextmanager)LooseVersion)Dict)Optional)TupleN)AbsFrontend)force_gatherablez1.6.0)autocastTc                 c   s    d V  d S N )enabledr   r   S/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/data2vec/data2vec.pyr	      s   
r	   c                
       s   e Zd ZdZ					d fdd	Zdejdejdeejee	ejf ejf fdd	Z
dejdejdee	ejf fd
dZdejdejfddZdejdejdeejejf fddZdd Zdd Z  ZS )Data2VecPretrainModelzData2Vec Pretrain modelNc                    s2   t    || _|| _|| _|| _|| _d| _d S )Nr   )super__init__frontendspecaug	normalize
preencoderencodernum_updates)selfr   r   r   r   r   	__class__r   r   r   $   s   
	
zData2VecPretrainModel.__init__speechspeech_lengthsreturnc                 C   s   |j d |j d ksJ |j |j f| j| j | ||}|d }t| }|d }| | }t|d }t|d }t|d }	tt	
| |||	d}
t||
|f|j\}}
}||
|fS )zFrontend + Encoder + Calc loss
        Args:
            speech: (Batch, Length, ...)
            speech_lengths: (Batch, )
        r   lossessample_size
target_varpred_var	ema_decay)lossr   r    r!   )shaper   set_num_updatesr   encodesumvaluesfloatdicttorchclonedetachr   device)r   r   r   encoder_outr   r"   r   r   r    r!   statsweightr   r   r   forward6   s$   $
zData2VecPretrainModel.forwardc                 C   s   |  ||\}}||dS )N)featsfeats_lengths)_extract_featsr   r   r   r2   r3   r   r   r   collect_feats[   s   
z#Data2VecPretrainModel.collect_featsc                 C   s   t d- | ||\}}| jdur| jr| ||\}}| jdur*| ||\}}W d   n1 s4w   Y  | jdurF| ||\}}t|t|krPd}| j||ddd}|S )zyFrontend + Encoder.
        Args:
            speech: (Batch, Length, ...)
            speech_lengths: (Batch, )
        FNT)maskfeatures_only)	r	   r4   r   trainingr   r   minmaxr   )r   r   r   r2   r3   r.   r   r   r   r%   a   s   



zData2VecPretrainModel.encodec                 C   sb   |  dksJ |j|d d d | f }| jd ur(| ||\}}||fS ||}}||fS )N   )dimr#   r;   r   r5   r   r   r   r4      s   

z$Data2VecPretrainModel._extract_featsc                 C   s
   || _ d S r
   r   )r   r   r   r   r   r$      s   
z%Data2VecPretrainModel.set_num_updatesc                 C   s   | j S r
   r>   )r   r   r   r   get_num_updates   s   z%Data2VecPretrainModel.get_num_updates)NNNNN)__name__
__module____qualname____doc__r   r*   Tensorr   r   strr1   r6   r%   r4   r$   r?   __classcell__r   r   r   r   r   !   sH    
%

!
r   )T)
contextlibr   distutils.versionr   typingr   r   r   r*   torch.nnnnfunasr.frontends.abs_frontendr   funasr.train_utils.device_funcsr   __version__torch.cuda.ampr	   Moduler   r   r   r   r   <module>   s   