o
    i                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ dd	 ZG d
d dejZdS )    N)	Namespace)Optional)Tuple)Union)Frontend)pad_listc                 C   sf   t | dd | _t | dd | _t | dd| _t | dd | _t | dd | _t | dd| _t | dd	| _| S )
Nupstream_feature_selectionupstream_model_configupstream_refreshFupstream_ckpt	init_ckptverbosetile_factor   )getattrr   r	   r
   r   r   r   r   )args r   J/home/ubuntu/.local/lib/python3.10/site-packages/funasr/frontends/s3prl.pybase_s3prl_setup   s   r   c                	       s   e Zd ZdZ				ddeeef dee dede	f fd	d
Z
dd Zdd ZdefddZdejdejdeejejf fddZdd Z  ZS )S3prlFrontendz<Speech Pretrained Representation frontend structure for ASR.>  NFfsfrontend_confdownload_dirmultilayer_featurec                    s~   t    t|trt|}|d urtj| || _	| 
|\| _| _t| j | _| jj| _d| _| jd| _d S )Ns3prlkey)super__init__
isinstancestrhumanfriendly
parse_sizetorchhubset_dirr   _get_upstreamupstream
featurizercopydeepcopy
state_dictpretrained_params
output_dimfrontend_typeget_downsample_rates
hop_length)selfr   r   r   r   	__class__r   r   r      s   



zS3prlFrontend.__init__c           
      C   s   t tdi |ddi}|| _d}tjddd}|D ]}|dr)|} nq|dus0J tj	j
||j|j|j|jdd	d}t|d
ddurW|jjjdv rWd|jj_ddlm} | jdu red}nd}|||dd}	||	fS )zGet S3PRL upstream model.devicecpuN
PYTHONPATHz(None):r   local)ckptmodel_configrefreshsourcemodel)Wav2Vec2ModelHubertModel        r   )
Featurizerlast_hidden_statehidden_states)r'   feature_selectionupstream_devicer   )r   r   r   osenvirongetsplitendswithr#   r$   loadr'   r   r	   r
   tor   r=   r3   __name__encoder	layerdrops3prl.upstream.interfacesrA   r   )
r1   r   
s3prl_args
s3prl_pathpython_path_listps3prl_upstreamrA   rD   s3prl_featurizerr   r   r   r&   4   sN   
	

zS3prlFrontend._get_upstreamc                 C   s\   t |jdksJ d|j|dd| jj}||d|d| jj |d}|S )a  Tile up the representations by `tile_factor`.
        Input - sequence of representations
                shape: (batch_size, seq_len, feature_dim)
        Output - sequence of tiled representations
                 shape: (batch_size, seq_len * factor, feature_dim)
           z.Input argument `feature` has invalid shape: {}r   r      )lenshapeformatrepeatr   r   reshapesize)r1   featuretiled_featurer   r   r   _tile_representationsb   s    z#S3prlFrontend._tile_representationsreturnc                 C   s   | j S N)r-   r1   r   r   r   output_sizer   s   zS3prlFrontend.output_sizeinputinput_lengthsc                    s    fddt |D }| j  t  | |}W d    n1 s$w   Y  | ||}| jjdkr:| |}t	|d}tj
dd |D tjd}~||fS )Nc                    s    g | ]\}}|d  |  qS rc   r   ).0iwavrg   r   r   
<listcomp>x   s     z)S3prlFrontend.forward.<locals>.<listcomp>r   r@   c                 S   s   g | ]}|j d  qS )r   )rZ   )rh   fr   r   r   rl      s    )dtype)	enumerater'   evalr#   no_gradr(   r   r   ra   r   tensorlong)r1   rf   rg   wavsfeatsinput_feats
feats_lensr   rk   r   forwardu   s   



zS3prlFrontend.forwardc                 C   s   | j | j td d S )Nz4Pretrained S3PRL frontend model parameters reloaded!)r'   load_state_dictr,   logginginford   r   r   r   reload_pretrained_parameters   s   z*S3prlFrontend.reload_pretrained_parameters)r   NNF)rM   
__module____qualname____doc__r   intr    r   dictboolr   r&   ra   re   r#   Tensorr   rx   r|   __classcell__r   r   r2   r   r      s4    
.
r   )r)   rz   rF   argparser   typingr   r   r   r!   r#   torch.nnnnfunasr.frontends.utils.frontendr   *funasr.models.transformer.utils.nets_utilsr   r   Moduler   r   r   r   r   <module>   s    