o
    im0                     @   s   d dl Z d dlmZ d dlmZ d dlmZ d dlZd dlZd dlZd dl	m
Z
 zd dlmZ W n   ed Y d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ eddeddG dd de
jZG dd de
jZdS )    N)Optional)Tuple)Union)ComplexTensorz$Please install torch_complex firstly)LogMel)Stft)Frontend)make_pad_mask)tablesfrontend_classesDefaultFrontendEspnetFrontendc                       s   e Zd ZdZ																d$d
ededededee dedededededededee dedef fddZ	defddZ
dejdeejef deejejf fd d!Zdejdejdejfd"d#Z  ZS )%r   sConventional frontend structure for ASR.
    Stft -> WPE -> MVDR-Beamformer -> Power-spec -> Mel-Fbank -> CMVN
    >     N   hannTFP   fsn_fft
win_length
hop_lengthwindowcenter
normalizedonesidedn_melsfminfmaxhtkfrontend_conf
apply_stftuse_channelc              	      s   t    t|}|| _|| _|rt|||||||d| _nd | _|| _|d ur8t	dd|d d i|| _
nd | _
t|||	|
||d| _|	| _|| _d| _d S )Nr   r   r   r   r   r   r   idim      r   r   r   r   r   r   default )super__init__copydeepcopyr   r   r   stftr!   r   frontendr   logmelr   r"   frontend_type)selfr   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   kwargs	__class__r)   L/home/ubuntu/.local/lib/python3.10/site-packages/funasr/frontends/default.pyr+      s<   




zDefaultFrontend.__init__returnc                 C      | j S Nr   r2   r)   r)   r6   output_sizeV      zDefaultFrontend.output_sizeinputinput_lengthsc           
      C   s@  t |tr
t|}|jtjkr| }| jd ur"| ||\}}nt	|d |d }|}| j
d urFt |t	s=J t|| 
||\}}}| dkr| jr|| jd urd|d d d d | jd d f }n&tj|d}|d d d d |d d f }n|d d d d dd d f }|jd |jd  }| ||\}	}|	|fS )N.r   .r&      r%   r   )
isinstancelisttorchtensordtypefloat64floatr.   _compute_stftr   r/   typedimtrainingr"   nprandomrandintsizerealimagr0   )
r2   r>   r?   
input_stft
feats_lens_maskchinput_powerinput_featsr)   r)   r6   forwardY   s*   




 zDefaultFrontend.forwardc                 C   X   |  ||\}}| dksJ |j|jd dksJ |jt|d |d }||fS NrB   r%   r@   rA   r.   rL   shaper   r2   r>   r?   rT   rU   r)   r)   r6   rJ      
   zDefaultFrontend._compute_stft)r   r   Nr   r   TFTr   NNFNTN)__name__
__module____qualname____doc__intr   strbooldictr+   r<   rE   Tensorr   rD   r   r[   rJ   __classcell__r)   r)   r4   r6   r      sr    	
9
$,c                +       s  e Zd ZdZ																							d,d
ededededededee dedededededededee dedededededef* fdd Z	d!efd"d#Z
d$ejd%ejd!eejejf fd&d'Zd$ejd%ejd!ejfd(d)Zd*d+ Z  ZS )-MultiChannelFrontendr   r   r   Nr   TFr   r&   r   r   r   r   frame_lengthframe_shiftr   r   r   r   r   r   r   r   r    r!   r"   lfr_mlfr_n	cmvn_filemcc              	      sj  t    t|}|d u r|d u r|d | _|d | _n|d u r.|d u r.| j| _| j| _n	td td |rHt	|| j| j|||	|
d| _
nd | _
|| _|d uratdd|d d i|| _nd | _t||||||d| _|| _|| _|| _| js| jd urtd| j  ntd	 || _| jd ur| | j\}}| d
t| | dt| d| _d S )N   zOOnly one of (win_length, hop_length) and (frame_length, frame_shift)can be set.r&   r#   r$   r%   r'   zuse the channel %dzrandom select channelmeanstdmultichannelfrontendr)   )r*   r+   r,   r-   r   r   loggingerrorexitr   r.   r!   r   r/   r   r0   r   r"   rs   inforr   
_load_cmvnregister_bufferrE   
from_numpyr1   )r2   r   r   r   r   rn   ro   r   r   r   r   r   r   r   r   r    r!   r"   rp   rq   rr   rs   ru   rv   r4   r)   r6   r+      s`   









zMultiChannelFrontend.__init__r7   c                 C   r8   r9   r:   r;   r)   r)   r6   r<      r=   z MultiChannelFrontend.output_sizer>   r?   c                 C   s,  | j d ur| ||\}}nt|d |d }|}| jd ur2t|ts)J t|| ||\}}}| dkry| jsy| jrk| j	d urS|d d d d | j	d d f }n&t
j|d}|d d d d |d d f }n|d d d d dd d f }|jd |jd  }| ||\}	}| jr|	 dkr|	d}
|	d}|	dd|
| dd }	|d| }nd}|	||fS | jd ur|d u r|	|	dg|	d}| j|	j|	j| _| j|	j|	j| _t||	d}|	jr|	| j }	n|	| j7 }	|	jr|	|d	}	n|	 |d	 |	| j9 }	|	|fS )
Nr@   rA   rB   r%   r   r&   r^   r   g        )!r.   rJ   r   r/   rC   rK   rL   rs   rM   r"   rN   rO   rP   rQ   rR   rS   r0   	transposereshape
contiguousrepeatsqueezerr   new_fullru   todevicerG   rv   r	   requires_gradmasked_fillmasked_fill_)r2   r>   r?   rT   rU   rV   rW   rX   rY   rZ   btchannel_sizer)   r)   r6   r[      sN   


 




zMultiChannelFrontend.forwardc                 C   r\   r]   r_   ra   r)   r)   r6   rJ   3  rb   z"MultiChannelFrontend._compute_stftc                 C   s  t |ddd}| }W d    n1 sw   Y  g }g }tt|D ]O}||  }|d dkrQ||d   }|d dkrP|dt|d  }t|}q%q%|d d	krt||d   }|d dkrt|dt|d  }	t|	}q%q%t|tj	}
t|tj	}|
|fS )
Nrzutf-8)encodingr   z
<AddShift>r&   z<LearnRateCoef>   z	<Rescale>)
open	readlinesrangelensplitrD   rN   arrayastyperI   )r2   rr   flines
means_list	vars_listi	line_itemadd_shift_linerescale_linemeansvarsr)   r)   r6   r|   ?  s0   
zMultiChannelFrontend._load_cmvn)r   r   NNNNr   TFTr   NNFNTNr&   r&   NT)rc   rd   re   rf   rg   r   rh   ri   rj   r+   r<   rE   rk   r   r[   rJ   r|   rl   r)   r)   r4   r6   rm      s    	
Q
Hrm   )r,   typingr   r   r   rx   numpyrN   rE   torch.nnnntorch_complex.tensorr   printfunasr.frontends.utils.log_melr   funasr.frontends.utils.stftr   funasr.frontends.utils.frontendr   *funasr.models.transformer.utils.nets_utilsr	   funasr.registerr
   registerModuler   rm   r)   r)   r)   r6   <module>   s*    


z