o
    ॵi_                     @   s   d dl Z d dlZd dlZd dlZd dlmZmZ d dlZd dlZ	d dl
Zd dlZd dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZ d	Zd
ZdZd
ZdZd
Z ej!ej"ej#dG dd deZ$dS )    N)AnyDict)File)	Pipelines)
OutputKeys)InputPipeline)	PIPELINES)	ModelFileTasksi  i  hammingi   )module_namec                       s   e Zd ZdZdZ fddZdedeee	f fddZ
d	d
 Zdeee	f deee	f fddZdd Zdeee	f deee	f fddZ  ZS )ANSDFSMNPipelineap  ANS (Acoustic Noise Suppression) inference pipeline based on DFSMN model.

    Args:
        stream_mode: set its work mode, default False
        In stream model, it accepts bytes as pipeline input that should be the audio data in PCM format.
        In normal model, it accepts str and treat it as the path of local wav file or the http link of remote wav file.
    i  c           	         s  t  jdd|i| tj| jjtj}tj	|r)t
j|| jd}| j| | j  |dd| _| jrmtt| jjd   d }tj|d| _ttd D ]}| jd qQtd t d | _d	| _tt d | _t
jtd| jd
  fdd}dd }|| _|| _d S )Nmodel)map_locationstream_modeF      )maxlen    T)periodicdevicec              	      s   t j| tttd ddS )NF)centerwindowreturn_complex)torchstftN_FFT
HOP_LENGTHSTFT_WIN_LEN)xr    a/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/audio/ans_dfsmn_pipeline.pyr   B   s   z'ANSDFSMNPipeline.__init__.<locals>.stftc                 S   s   t j| tttd|dS )NF)
hop_length
win_lengthr   r   length)librosaistftr   r   WINDOW_NAME_HAM)r    slenr"   r"   r#   r(   L   s   z(ANSDFSMNPipeline.__init__.<locals>.istftr"   ) super__init__ospathjoinr   	model_dirr
   TORCH_MODEL_BIN_FILEexistsr   loadr   load_state_dictevalgetr   WINLENSTRIDElordercollectionsdequebufferrangeappendbyte_length_remainfirst_forwardtensor_give_up_lengthhamming_windowr   r   r(   )	selfr   kwargsmodel_bin_file
checkpointbyte_buffer_lengthir   r(   	__class__r!   r#   r,   )   s2   

	
zANSDFSMNPipeline.__init__inputsreturnc                 K   s  | j rt|tstdt|| jjkr"tdt| d| jj g }d}| jt| | t	d krt	d | j }t
||| D ]}| j|| jdtjdd qAt }| jD ]}|| qZtj| tjd	}	t|	tj}
||
 d| _||7 }| jt| | t	d ks3t
|t|D ]}| j|| jdtjdd |  jd7  _qd
|iS t|trt|}nt|tr|}n
tdt| d| |}
d
|
iS )Nz"Only support bytes in stream mode.zinputs length too large: z > r   r   r   F)	byteordersigneddtypeaudiozUnsupported type .)r   
isinstancebytes	TypeErrorlenr<   r   
ValueErrorr?   r8   r=   r>   to_bytessysrM   ioBytesIOwritenp
frombuffer	getbufferint16r   
from_numpytypeFloatTensorstrr   readbytes2tensor)rC   rK   preprocess_paramstensor_listcurrent_indexbyte_length_to_addrH   bytes_iobdatadata_tensor
data_bytesr"   r"   r#   
preprocessX   sf   






zANSDFSMNPipeline.preprocessc                 C   sv   t t|\}}|tj}t|jdkr|d d df }|| j	kr,t
||| j	}|d }t|tj}|S )Nr   r   i   )sfre   rZ   r[   astyper]   float32rV   shapeSAMPLE_RATEr'   resampler   ra   rb   rc   )rC   
file_bytesdata1fsrm   rn   r"   r"   r#   rf      s   
zANSDFSMNPipeline.bytes2tensorc                 K   s   | j rBt }|d D ]1}| |}| jr!|d | j  }d| _n|t d  }|| j| j  }||t	j
  q| }n|d }| |}|t	j
 }tj|iS )NrQ   F)r   rZ   r[   _forwardr@   rA   r7   r\   rr   r]   r`   tobytesgetvaluer   
OUTPUT_PCM)rC   rK   forward_paramsrk   origin_audio
masked_sigoutputsr"   r"   r#   forward   s"   



zANSDFSMNPipeline.forwardc           
   
   C   s   t  < |d}dd l}|jjj|dddd| jtd}|d}| 	|}| 
|}|ddd}||  }W d    n1 sCw   Y  |  }|d d d d df d	|d d d d df   }| |t|}	|	S )
Nr   g      ?g      D@g      4@x   )ditherframe_lengthframe_shiftnum_mel_binssample_frequencywindow_typer   r   y              ?)r   no_grad	unsqueeze
torchaudio
compliancekaldifbankru   r)   r   r   permutecpudetachnumpyr(   rV   )
rC   r   audio_inr   fbanksmasksspectrummasked_specmasked_spec_complexr   r"   r"   r#   rz      s,   




0zANSDFSMNPipeline._forwardc                 K   s<   | j sd| v rt|d tj|tj tjd| j	 |S )Noutput_pathrO   )
r   keysrq   r\   r]   r^   r   r}   r`   ru   )rC   rK   rD   r"   r"   r#   postprocess   s   zANSDFSMNPipeline.postprocess)__name__
__module____qualname____doc__ru   r,   r   r   rd   r   rp   rf   r   rz   r   __classcell__r"   r"   rI   r#   r      s    /(

*r   )%r:   rZ   r-   rY   typingr   r   r'   r   r]   	soundfilerq   r   modelscope.fileior   modelscope.metainfor   modelscope.outputsr   modelscope.pipelines.baser   r   modelscope.pipelines.builderr	   modelscope.utils.constantr
   r   r   r   r)   r   r7   r8   register_moduleacoustic_noise_suppressionspeech_dfsmn_ans_psm_48k_causalr   r"   r"   r"   r#   <module>   s4   