o
    ॵi
                     @   s   d dl Z d dlZd dlmZmZ d dlZd dlm  m	Z
 d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ e Zd
ZdZdd Zej ej!ej"dG dd deZ#dS )    N)AnyDict)	Pipelines)
OutputKeys)Pipeline)	PIPELINES)LinearAECAndFbank)	ModelFileTasks)
get_loggerzfeature.DEY.mvn.txtzdey_mini.yamlc                 C   s*   t | d }t|| d di | d S )a+  According to config items, load specific module dynamically with params.
        1. Load the module corresponding to the "module" param.
        2. Call function (or instantiate class) corresponding to the "main" param.
        3. Send the param (in "args") into the function (or class) when calling ( or instantiating).

    Args:
        module_cfg (dict): config items, eg:
            {
                "module": "models.model",
                "main": "Model",
                "args": {...}
            }

    Returns:
        the module loaded.
    modulemainargsN )	importlibimport_modulegetattr)
module_cfgr   r   r   b/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/audio/linear_aec_pipeline.pyinitialize_config   s   r   )module_namec                       st   e Zd ZdZ fddZdd Zdeeef deeef fdd	Z	deeef deeef fd
dZ
dd Z  ZS )LinearAECPipelinea  AEC Inference Pipeline only support 16000 sample rate.

    When invoke the class with pipeline.__call__(), you should provide two params:
        Dict[str, Any]
            the path of wav files, eg:{
            "nearend_mic": "/your/data/near_end_mic_audio.wav",
            "farend_speech": "/your/data/far_end_speech_audio.wav"}
        output_path (str, optional): "/your/output/audio_after_aec.wav"
            the file path to write generate audio.
    c                    s  t  jdd|i| tj | _ttj	| j
tdd}t| | _tj	| j
t| jd d< W d   n1 s=w   Y  |   t| jd | _| jd d d	 | jd d d
  tjdd fdd} fdd}|| _|| _dS )z
        use `model` and `preprocessor` to create a kws pipeline for prediction
        Args:
            model: model id on modelscope hub.
        modelzutf-8)encodingiomvnNlossr   n_fft
hop_lengthF)periodicc                    s&   t t j|  d| jddS )NFT)centerwindowreturn_complex)torchview_as_realstfttodevice)xr   r   r!   winlenr   r   r%   T   s   
z(LinearAECPipeline.__init__.<locals>.stftc              	      s&   t jt |  | jd|dS )NF)r!   r    length)r#   istftview_as_complexr&   r'   )r(   slenr)   r   r   r,   _   s   
z)LinearAECPipeline.__init__.<locals>.istftr   )super__init__r#   cudais_availableuse_cudaopenospathjoinr   CONFIG_YAMLyaml	full_loadreadconfigFEATURE_MVN_init_modelr   preprocessorhamming_windowr%   r,   )selfr   kwargsfr%   r,   	__class__r)   r   r0   ?   s&   

zLinearAECPipeline.__init__c                 C   sN   t jtj| jtjdd}t| j	d | _| j
r| j | _| j| d S )Ncpu)map_locationnnet)r#   loadr5   r6   r7   r   r	   TORCH_MODEL_BIN_FILEr   r<   r3   r1   load_state_dict)rA   
checkpointr   r   r   r>   l   s   zLinearAECPipeline._init_modelinputsreturnc                 C   s.   |  |d |d }|tj }tj|iS )a  The AEC process.

        Args:
            inputs: dict={'feature': Tensor, 'base': Tensor}
                'feature' feature of input audio.
                'base' the base audio to mask.

        Returns:
            output_pcm: generated audio array
        featurebase)_processastypenpint16tobytesr   
OUTPUT_PCM)rA   rM   output_datar   r   r   forwardu   s   
zLinearAECPipeline.forwardc                 K   s8   d|  v rt|d | jjtj|tj tj	d |S )a[  The post process. Will save audio to file, if the output_path is given.

        Args:
            inputs: a dict contains following keys:
                - output_pcm: generated audio array
            kwargs: accept 'output_path' which is the path to write generated audio

        Returns:
            output_pcm: generated audio array
        output_path)dtype)
keyswavwriter?   SAMPLE_RATErS   
frombufferr   rV   rT   )rA   rM   rB   r   r   r   postprocess   s   zLinearAECPipeline.postprocessc                 C   s   | j r| }| }| jjr5t  | |d\}}|g d}W d    n1 s/w   Y  n#t  | |d}|g d}W d    n1 sSw   Y  | |}|| }| 	|t
|  }|S )Nr   )      r   )r3   r1   r   vadr#   no_grad	unsqueezepermuter%   r,   lenrF   numpy)rA   fbanksmixturemasksrc   spectrummasked_spec
masked_sigr   r   r   rQ      s"   


zLinearAECPipeline._process)__name__
__module____qualname____doc__r0   r>   r   strr   rX   r`   rQ   __classcell__r   r   rD   r   r   0   s    -"	"r   )$r   r5   typingr   r   rh   rS   scipy.io.wavfiler   wavfiler\   r#   r9   modelscope.metainfor   modelscope.outputsr   modelscope.pipelines.baser   modelscope.pipelines.builderr   modelscope.preprocessorsr   modelscope.utils.constantr	   r
   modelscope.utils.loggerr   loggerr=   r8   r   register_moduleacoustic_echo_cancellationspeech_dfsmn_aec_psm_16kr   r   r   r   r   <module>   s.   