o
    iIQ                  	   @   sn  d dl Z d dlZd dlZd dlZd dlmZ d dlm  mZ	 d dl
mZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ e eZed
g dZedg dZG dd dejZ dd Z!dee fddZ"dej#dedej#fddZ$dej#dedej#fddZ%		d*de&de&d e&d!e'fd"d#Z(		d*d$d%Z)d&d' Z*d(d) Z+dS )+    N)
namedtuple)	dataclass)partial)MISSINGII)OptionalCallable)compute_mask_indices)GradMultiply)	index_putMaskSeedseedupdateidsMaskInfo
x_unmaskedmaskids_restoreids_keepc                       s  e Zd Zdedejdejdeej deej dejdejdeeeeeege	j
f  f fd	d
Zdd Zdd ZdefddZdd Z			d*dedee	j
 fddZ			d*dedededee	j
 fddZdd Zd ee fd!d"Zd+d#d$Zd%d& Zd,d(d)Z  ZS )-ModalitySpecificEncoder	embed_dimlocal_encoderproject_featuresfixed_positional_encoderrelative_positional_encodercontext_encoderdecoderget_alibi_biasc
                    sx  t    || _|| _|| _|| _|| _|| _|| _|j	r|	nd | _
| jj| _d | _|jdkr]ttd|j|| _|jsFtj| j n| jddkr]tj| jd d dd f  d | _| j
d urtjtj|jrr|j|j ndd|jr{| jjndddf|jtjd|jd| _|jr| j
d ur|j d usJ | j
d|j |jdtjdd}
t|
| _!t"t#| j!d| _
d S d S d S )	Nr      )dtype)requires_grad      ?cpu)
batch_size
time_stepsheadsscaler!   device)
alibi_bias)$super__init__modality_cfgr   r   r   r   r   r   use_alibi_encoderr   local_grad_multextra_tokensnum_extra_tokensnn	Parametertorchzerosinit_extra_token_zeroinitnormal_sizealibi_scalefulllearned_alibi_scale_per_layerprenet_depthmodel_depthlearned_alibi_scale_per_headnum_alibi_headsfloatlearned_alibi_scalelearned_alibialibi_max_posr*   r   _learned_alibi_bias)selfr-   r   r   r   r   r   r   r   r   r*   	__class__ R/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/emotion2vec/base.pyr,      sf   




z ModalitySpecificEncoder.__init__c                 C   s8   | d}||v r||   dkr|| d||< |S )Nz.alibi_scale   r   )dim	unsqueeze)rF   
state_dictnamekrI   rI   rJ   upgrade_state_dict_namedh   s   
z0ModalitySpecificEncoder.upgrade_state_dict_namedc                 C   s   |S NrI   )rF   xpadding_maskrI   rI   rJ   convert_padding_masko      z,ModalitySpecificEncoder.convert_padding_mask	mask_infoc           	      C   s(  | j jj}|dkrtj||| jdd}| j j}|d urr|jjd |jd  | }|	|
d||
dd| j j}tj|d d |d f |gdd}tj|d|jd}| j jjrq| jd usaJ | |d }|||jd  }n
|d d |d f }| j jjr| jd usJ || |d  }||fS )Nr   T)traininginplacer    rL   rL   index)r-   r   input_dropoutFdropoutrX   r1   r   shape	new_emptyr9   r8   mask_noise_stdr4   catgatheradd_positions_maskedr   r   rM   add_positions_all)	rF   rS   rW   inp_drop	num_extra
num_maskedmask_tokensx_posrI   rI   rJ   decoder_inputr   s2   
"

z%ModalitySpecificEncoder.decoder_inputc                 C   sv   | j dkr| j dkr| |}n$t| || j }nt  | |}W d    n1 s/w   Y  | |}|S )Nr   r#   )r/   r   r
   applyr4   no_gradr   )rF   featuresrS   rI   rI   rJ   local_features   s   



z&ModalitySpecificEncoder.local_featuresr    Nclone_batch
mask_seedsc                    s(  |d ur
|  ||}|}|r|dkr| }|j\}	}
}|	}d }d }| jd ur/|| || }|r|dkr||d} d ur| fddt|d D }tdg|  	dd} j
}||d}|	d||| }|	d}t j j|d |d ur||d}| j|| | jd up| |d\}}| jd ur| |}|}|r|r|j}|d ur|t|| }|d ur| rt||}| sd }nd }n|d ur|| }d }| j}| jd ur$| j||
| jjtj|jd}|d ur|d}|ddkr||d| }d }|dkr||d}|d ur$|r$t||}| j d ur^| j d}tj!| j "|ddd|gdd	}|d urOt#$||df}|d ur^t#$||d|df}| %||||d urp|d | jj& nd }|||||d ur|ddkr|| jj&d  |d
S ||d
S )Nr    r   c                    s"   g | ]}t t j|fd  qS )g    _B)inthashr   ).0indrt   rI   rJ   
<listcomp>   s    zCModalitySpecificEncoder.contextualized_features.<locals>.<listcomp>rZ   r   )	mask_seedro   precomputed_mask)r%   r&   r'   r!   r)   r[   )rS   rr   rT   r*   r:   encoder_mask)'rU   clonera   r   repeat_interleaveranger4   tensorlongviewr   tor   r   r   compute_maskr   r   gather_unmaskedanygather_unmasked_maskr:   r   r-   r@   float32r)   	clamp_minr9   squeezetype_asmasked_alibir0   rd   expandr_   padr   r=   )rF   rS   rT   r   remove_maskedrs   rt   r|   rr   orig_Borig_T_
pre_mask_BrW   x_pos
clone_hashidmasked_padding_maskr*   r:   numrI   ry   rJ   contextualized_features   s   











$

z/ModalitySpecificEncoder.contextualized_featuresr   r   c           	   	   C   s    |  |}| |||||||S rR   )rr   r   )	rF   rq   rT   r   r   rs   rt   r|   rS   rI   rI   rJ   forward  s   

zModalitySpecificEncoder.forwardc                 C   s   d S rR   rI   )rF   rI   rI   rJ   reset_parameters0  rV   z(ModalitySpecificEncoder.reset_parametersr{   c                 C   s0  |d ur|}|  ||}n|j\}}	}
| j}|j}|jd ur0|jdkr0|j|k r0tj|j|}|dkr|jdkr@t	|||}nL| jj
rHd| }t||	f|||jdd|j|j|d ur]|jnd |d ure|jnd |d urm|jnd d}t|j|jd}| jj
rd| }|  ||}nd }|r| ||}||fS )Nr   r    T)	min_masksrequire_same_masksmask_dropout	add_masksr   epochindicesr)   )make_maskinfora   r-   	mask_probmask_prob_minnprandomuniformmask_lengthrandom_maskinginverse_maskr	   r   r   r   r   r   r4   
from_numpyr   r)   
apply_mask)rF   rS   rT   r{   ro   r|   r   rW   BTCcfgr   rI   rI   rJ   r   3  sH   



z$ModalitySpecificEncoder.compute_maskc                 C   s   |d u r|j \}}}n|\}}}|tj}|jdd}|jddddd|}||d   }	| jj	dkrF|	t
|t|	 | jj	 7 }	|d d d |	f }
|d urWd }n|
ddd|}
tj|d|
d}t||||
d}|S )Nr    r[   rZ   r   r\   r   )ra   r   r4   uint8argsortrM   r   sumr-   keep_masked_pctroundru   re   r   )rF   rS   r   ra   r   r   Dids_shuffler   len_keepr   r   rW   rI   rI   rJ   r   l  s*   
z%ModalitySpecificEncoder.make_maskinfoc                 C   s   | j }|j\}}}|d ur:|j}|jr |d||d  }n|  }|||	d
d|j}	t|||	}|jdkrat||fd |j|j}
t|
|jdd|d}
t||
d}|S )Nr    rZ   r   )r-   ra   r   encoder_zero_maskr   rM   r   itemrb   r9   r8   rc   r   mask_channel_probr	   mask_channel_lengthr4   r   r   r)   r   )rF   rS   rW   r   r   r   r   r   	num_masksmasksmask_channelrI   rI   rJ   r     s(   
 z"ModalitySpecificEncoder.apply_maskFc                 C   s   |sd | _ d S d S rR   )r   )rF   keep_decoderrI   rI   rJ   remove_pretraining_modules  s   
z2ModalitySpecificEncoder.remove_pretraining_modules)r    NNrR   )F)__name__
__module____qualname__ru   r2   Moduler   r   strr4   Tensorr,   rQ   rU   r   rn   rr   r   boolr   r   r   r   r   r   r   __classcell__rI   rI   rG   rJ   r      sd    	
J 
 


9r   c                 C   s,   ||kr|S ||  }d||  }|||  S )Nr    rI   )startend	curr_steptotal_stepsrpct_remainingrI   rI   rJ   get_annealed_rate  s
   r   r{   c                 C   s(  | j \}}}t|d|  }d }|d ur2tt|j|j|j  fd }tj	| j
d}|| tj|||| j
d}	|	jdd}
|
jdd}|
d d d |f }|ddd|}tj| d|d}tj||g| j| j
d}d	|d d d |f< tj|d|d}|ddd|}t||||d
S )Nr    g    .Ar   )	generatorr)   r[   rZ   r\   r!   r)   r   r   )ra   ru   rv   r   r   r   r   r   r4   	Generatorr)   manual_seedrandr   rM   r   re   onesr!   r   )rS   
mask_ratior{   NLr   r   r   r   noiser   r   r   r   r   rI   rI   rJ   r     s$   $
r   rS   rW   returnc                 C   s   t j| d|jdS )Nr    r\   r4   re   r   rS   rW   rI   rI   rJ   r     s
   r   c                 C   s   t j| d|jd dS )Nr    .r   r\   r   r   rI   rI   rJ   r     s
   r   r    	manhattanmax_positionsattention_headsdimsdistancec              	      sP   fdd | }|}t  |}|dkr+t t |dt |d d }ni|dkr|dkr8dd	 }n|d
kr@dd	 }t| }	|	 sMJ |	t|	}	t 	| | f}t
|	D ]/}
t
|	D ](}t
|	D ]!}t
|	D ]}|
|	 | }||	 | }||
||| |||f< qnqhqbq\ntd| |dd|d|dd }|S )Nc                    s\   dd }t |  r|| S dt t |  }|| d| dd d d | |   S )Nc                    s6   ddt | d       fddt| D S )N      c                    s   g | ]} |  qS rI   rI   )rw   iratior   rI   rJ   rz         zPget_alibi.<locals>.get_slopes.<locals>.get_slopes_power_of_2.<locals>.<listcomp>)mathlog2r   )nrI   r   rJ   get_slopes_power_of_2  s   z<get_alibi.<locals>.get_slopes.<locals>.get_slopes_power_of_2r   r   )r   r   
is_integerfloor)r   r   closest_power_of_2
get_slopesrI   rJ   r     s   	 zget_alibi.<locals>.get_slopesr    r   rZ   r   r   c                 S   s   t | | t ||  S rR   )absx1y1x2y2rI   rI   rJ   <lambda>  r   zget_alibi.<locals>.<lambda>	euclideanc                 S   s   t | | d || d  S )Nr   )r   sqrtr   rI   rI   rJ   r   	  s    z"unsupported number of alibi dims: )r4   r   r   arangerM   r   r   r   ru   r5   r   	Exceptionr   )r   r   r   r   maxpos
attn_headsslopespos_biasdfr   r   jrP   lnew_xnew_yr*   rI   r   rJ   	get_alibi  s<   (

	$r	  c                 C   s   | d| d| }|  |d }	|| }
|	d u s0|	d|
k s0|	d|k s0|	j|ks0|	j|krdt||	d ur;|	dnd}t|
|	d urI|	dnd| }t||||dj||d|dd}	|	| |< |	d |
d |d |f }|||||}|S )Nr   r   r    )r   r   r   )	getr9   r!   r)   maxr	  r   repeatr   )alibi_biasesr%   r&   r'   r!   r)   r   r   	cache_keybufferedtarget_sizebtbnbrI   rI   rJ   r   !  s&   


 
r   c                 C   s   |  d|ksJ | j| j|ksJ | j| j|ks J | j|  d|k r?t||  d d }tj| ||||fdd} | |ddd| } | dd |d |f S )Nr    rZ   r   	replicate)mode.)	r9   ra   r!   r)   r   ceilr_   r   r   )r*   r%   r&   r'   r(   r!   r)   pszrI   rI   rJ   rE   G  s   	rE   c                 C   sv   |  d}| }|jdd d}tj|d|d|d|j dd} tj| d|ddd||  ddd} | S )Nr    r   rZ   r\   )r9   r   rM   r4   re   r   r   	transpose)r*   rW   H	orig_biasr]   rI   rI   rJ   r   \  s   
r   )r    r   ),loggingr   numpyr   r4   torch.nnr2   torch.nn.functional
functionalr_   collectionsr   dataclassesr   	functoolsr   	omegaconfr   r   typingr   r   )funasr.models.emotion2vec.fairseq_modulesr	   r
   r   	getLoggerr   loggerr   r   r   r   r   r   r   r   r   ru   r   r	  r   rE   r   rI   rI   rI   rJ   <module>   sR   
   	 
G
&