o
    ߥi(                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlm  mZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ dd	lmZmZ dd
lmZ ejeje
jdG dd deZG dd dejZdd ZdddZ dS )    N)Dict)Models)
TorchModel)Tensor)MODELS)	ModelFileTasks   )	ConviSTFTConvSTFT)UNet)module_namec                       sH   e Zd ZdZdef fddZdeeef deeef fddZ  Z	S )	FRCRNDecoratorz@ A decorator of FRCRN for integrating into modelscope framework 	model_dirc                    s   t  j|g|R i | t|i || _tj|tj}tj	|rIt
j|t
dd}t|tr?d|v r?| |d  dS | jj|dd dS dS )zzinitialize the frcrn model from the `model_dir` path.

        Args:
            model_dir (str): the model path.
        cpu)map_location
state_dictF)strictN)super__init__FRCRNmodelospathjoinr   TORCH_MODEL_BIN_FILEexiststorchloaddevice
isinstancedictload_state_dict)selfr   argskwargsmodel_bin_file
checkpoint	__class__ U/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/audio/ans/frcrn.pyr      s   
zFRCRNDecorator.__init__inputsreturnc                    s   | j |d }|d |d |d |d |d |d d}d	|v r`| j j|d |d	 |d
d |  | j j|d |d	 |dd|  fdd D |d< |d fddD  |S )Nnoisyr   r	               )spec_l1wav_l1mask_l1spec_l2wav_l2mask_l2cleanMix)modeSiSNRc                       i | ]	}| |   qS r*   item.0k)
mix_resultr*   r+   
<dictcomp>>   s    z*FRCRNDecorator.forward.<locals>.<dictcomp>log_varsc                    r=   r*   r>   r@   )sisnr_resultr*   r+   rD   @   s    )r   forwardlossupdate)r#   r,   result_listoutputr*   )rC   rF   r+   rG   ,   s0   


zFRCRNDecorator.forward)
__name__
__module____qualname____doc__strr   r   r   rG   __classcell__r*   r*   r(   r+   r      s    *r   c                       sX   e Zd ZdZ				d fdd	Zdd	 Zd
d ZdddZdddZdddZ	  Z
S )r   z Frequency Recurrent CRN   d      hannc
                    s   t    |d d | _|| _|| _|| _|	| _d}t| j| j| j| jd|d| _t	| j| j| j| jd|d| _
td||||d| _td||||d| _dS )a  
        Args:
            complex: Whether to use complex networks.
            model_complexity: define the model complexity with the number of layers
            model_depth: Only two options are available : 10, 20
            log_amp: Whether to use log amplitude to estimate signals
            padding_mode: Encoder's convolution filter. 'zeros', 'reflect'
            win_len: length of window used for defining one frame of sample points
            win_inc: length of window shifting (equivalent to hop_size)
            fft_len: number of Short Time Fourier Transform (STFT) points
            win_type: windowing type used in STFT, eg. 'hanning', 'hamming'
        r/   r	   Tcomplex)feature_typefix)rV   model_complexitymodel_depthpadding_modeN)r   r   feat_dimwin_lenwin_incfft_lenwin_typer   stftr
   istftr   unetunet2)r#   rV   rY   rZ   log_ampr[   r]   r^   r_   r`   r%   rX   r(   r*   r+   r   H   sJ   
zFRCRN.__init__c              	   C   s  g }|  |}t|d}t|d d d d d | jd d f |d d d d | jd d d f gd}t|d}t|dd}| |}t|}| |}t|}| 	||\}}	}
|
| |
|	 |
|
 || }| 	||\}}	}
|
| |
|	 |
|
 |S )Nr	   r1   )ra   r   	unsqueezecatr\   	transposerc   tanhrd   
apply_maskappend)r#   r,   out_listcmp_spec	unet1_out	cmp_mask1	unet2_out	cmp_mask2est_specest_wavest_maskr*   r*   r+   rG      s2   
  









zFRCRN.forwardc                 C   s  t |d d d d d d d d df |d d d d d d d d df  |d d d d d d d d df |d d d d d d d d df   |d d d d d d d d df |d d d d d d d d df  |d d d d d d d d df |d d d d d d d d df   gd}t |d d dd d d d f |d d dd d d d f gd}t |d}t |d d d d d d df |d d d d d d df gd}| |}t |d}|||fS )Nr   r	   )r   rg   squeezerb   )r#   rm   cmp_maskrr   rs   r*   r*   r+   rj      s    BBBB@@

zFRCRN.apply_mask        c                 C   sR   g g }}|   D ]\}}d|v r||g7 }q	||g7 }q	||d|ddg}|S )Nbias)paramsweight_decayrw   )named_parameters)r#   rz   weightsbiasesnameparamry   r*   r*   r+   
get_params   s   
zFRCRN.get_paramsr:   c                 C   s  |dkr=d}|t |k r8|| }|d }|| }|d }|| }|d }|dkr2| ||||||}	|t |k st|	dS |dkrd}|t |k r~|| }|d }|| }|d }|| }|d }|dkrx| ||||||\}
}}|
| | }	|t |k sIt|	|
|dS d S )Nr<   r   r	   r0   )sisnrr:   )rH   amp_loss
phase_loss)lenloss_1layerr!   )r#   r.   labelsrl   r;   countrr   rs   rt   rH   r   r   
SiSNR_lossr*   r*   r+   rH      s@   


z
FRCRN.lossc                 C   s
  |dkr"|  dkrt|d}|  dkrt|d}t|| S |dkr|  dkr3t|d}|  dkr?t|d}t|| }| \}}	}
| |}|ddd| jddf }|dd| jdddf }| |}|ddd| jddf }|dd| jdddf }|d |d  }t|| ||  |d  || ||  |d  gd}d||dk< d||d	k < t	|ddd| jddf |ddd| jddf |	 }t	|dd| jdddf |dd| jdddf |	 }|||fS dS )
z Compute the loss by mode
        mode == 'Mix'
            est: [B, F*2, T]
            labels: [B, F*2,T]
        mode == 'SiSNR'
            est: [B, T]
            labels: [B, T]
        r<   r0   r	   r:   Nr/   :0yE>)
dimr   ru   si_snrsizera   r\   rg   Fmse_loss)r#   r.   estrs   r   rv   r;   r   bdtSSrSiYYrYiY_powgth_maskr   r   r*   r*   r+   r      sN   	



zFRCRN.loss_1layer)rR   rS   rT   rU   )rw   )r:   )rL   rM   rN   rO   r   rG   rj   r   rH   r   rQ   r*   r*   r(   r+   r   E   s    ; 

r   c                 C   s   t j| | ddd}|S )Nr   T)keepdim)r   sum)s1s2normr*   r*   r+   l2_norm  s   r   r   c           
      C   sd   t | |}t ||}|||  | }| | }t ||}t ||}dt|||  |  }	t|	S )N
   )r   r   log10mean)
r   r   eps
s1_s2_norm
s2_s2_norms_targete_noisetarget_norm
noise_normsnrr*   r*   r+   r     s   




r   )r   )!r   typingr   r   torch.nnnntorch.nn.functional
functionalr   modelscope.metainfor   modelscope.modelsr   modelscope.models.baser   modelscope.models.builderr   modelscope.utils.constantr   r   	conv_stftr
   r   rc   r   register_moduleacoustic_noise_suppressionspeech_frcrn_ans_cirm_16kr   Moduler   r   r   r*   r*   r*   r+   <module>   s*   0 G