o
    ॵi#                     @   s   d dl Z d dlmZmZmZmZmZmZ d dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZmZ d dlmZ e ZdgZ ej!ej"ej#dG dd deZ$dS )    N)AnyDictListSequenceTupleUnion)	asr_utils)	Pipelines)Model)
OutputKeys)Pipeline)	PIPELINES)generate_scp_from_urlupdate_local_model)
Frameworks	ModelFileTasks)
get_loggerSeparationPipeline)module_namec                       s   e Zd ZdZ		ddeeef def fddZ					ddeee	f d	ed
ededede
deeef fddZdeeef fddZdeeef deeef fddZdeeef deeef fddZdd Z  ZS )r   aQ  Speech Separation Inference Pipeline
    use `model` to create a speech separation pipeline for prediction.

    Args:
        model: A model instance, or a model local dir, or a model id in the model hub.
        kwargs (dict, `optional`):
            Extra kwargs passed into the preprocessor's constructor.

    Example:
        >>> from modelscope.pipelines import pipeline
        >>> pipeline = pipeline(
        >>>    task=Tasks.speech_separation, model='damo/speech_separation_mossformer_8k_pytorch')
        >>> audio_in = 'mix_speech.wav'
        >>> print(pipeline(audio_in))

    N   modelngpuc                    s   t  jdd|i| tj|tj}| |||| _ddl	m
} |jd| jd | jd || jd | jd | jd | jd	 | jd
 | jd | jd | jd | jd d|| _dS )zKuse `model` to create an speech separation pipeline for prediction
        r   r   )ss_inference_launchmode
batch_size	log_levelss_infer_configss_model_file
output_dirdtypeseednum_workersnum_spks
param_dict)r   r   r   r   r   r   r   r    r!   r"   r#   r$   N )super__init__ospathjoinr   CONFIGURATIONget_cmdcmd
funasr.binr   inference_launchfunasr_infer_modelscope)selfr   r   kwargsconfig_pathr   	__class__r%   i/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/audio/speech_separation_pipeline.pyr'   ,   s(   zSeparationPipeline.__init__audio_inaudio_fs
recog_typeaudio_formatr   r$   returnc                 K   sj  d| _ d| _|| _|| _d| _d}|dur|| jd< |dur#|| jd< t|tr1t|\| _ | _n-t|t	r=|| _ d| _n!ddl
}	ddl}
t||
jrRd| _ || _nt||	jr^d| _ || _|dure|| _|du sm|du r|tj| j ||d\| _| _| _ ttdr| j durt| j | j}|dur|| _|dur|| jd d< n| j| jd d< | j| j fi |}|S )	a  
        Decoding the input audios
        Args:
            audio_in('str' or 'bytes'):
                - A string containing a local path to a wav file
                - A string containing a local path to a scp
                - A string containing a wav url
                - A bytes input
            audio_fs('int'):
                frequency of sample
            recog_type('str'):
                recog type for wav file or datasets file ('wav', 'test', 'dev', 'train')
            audio_format('str'):
                audio format ('pcm', 'scp', 'kaldi_ark', 'tfrecord')
            output_dir('str'):
                output dir
            param_dict('dict'):
                extra kwargs
        Return:
            A dictionary of result or a list of dictionary of result.

            The dictionary contain the following keys:
            - **text** ('str') --The vad result.
        Nr   r$   r   )r7   r9   r:   sample_rate_checkingfsr8   )r7   
raw_inputsr9   r:   r8   r-   
isinstancestrr   bytesnumpytorchTensorndarrayr   type_checkinghasattrr<   forward)r1   r7   r8   r9   r:   r   r$   r2   checking_audio_fsrB   rC   outputr%   r%   r6   __call__G   sZ    




zSeparationPipeline.__call__c                 C   s8  t t| }tj|}tj||d d d }tj||d d d }|d d d }d }	tj|rPt|dd}
t	
|
}|
  d|v rP|d }	t|d d || |d	d	d
||d ddddd d d dd}|	d ur}d|	v r}|	d |d d< g d}|D ]}||v r||d ur|| ||< ||= q|S )Nr   model_configss_model_namess_model_configr   zutf-8)encodingfrontend_confr   ERRORfloat32r      )model_fsr8   )r   r   r   r   r   r   r   r    r!   r"   r#   r$   r=   r=   rT   )r   r   r   r   r$   r"   r=   )jsonloadsopenreadr(   r)   dirnamer*   existsyaml	full_loadcloser   get)r1   r3   
extra_args
model_path	model_cfg	model_dirss_model_pathrN   r   rP   config_filerootr-   user_args_dict	user_argsr%   r%   r6   r,      sZ   
zSeparationPipeline.get_cmdinputsc                 K   s   |S )Nr%   )r1   rh   post_paramsr%   r%   r6   postprocess   s   zSeparationPipeline.postprocessc                 K   s   t d t| jtr| jddg}nt| jtr| jddg}n| jdur&d}|| jd< | j| jd< | j| jd< | j| jfi |}|S )	zDecoding
        z Speech Separation Processing ...speechrA   soundNname_and_typer>   r7   )	loggerinfor?   r7   rA   r@   r>   r-   run_inference)r1   r7   r2   data_cmd	ss_resultr%   r%   r6   rH      s   


zSeparationPipeline.forwardc                 K   sL   g }| j tjkr"| jd|d |d |d |d |d d|}|S td)	Nrm   r>   r   r=   r$   )data_path_and_name_and_typer>   output_dir_v2r=   r$   zmodel type is mismatchingr%   )	frameworkr   rC   r0   
ValueError)r1   r-   r2   rr   r%   r%   r6   rp      s   
z SeparationPipeline.run_inference)Nr   )NNNNN)__name__
__module____qualname____doc__r   r
   r@   intr'   rA   dictr   r   rK   r,   rj   rH   rp   __classcell__r%   r%   r4   r6   r      sF    



R5

")%r(   typingr   r   r   r   r   r   rU   r[   funasr.utilsr   modelscope.metainfor	   modelscope.modelsr
   modelscope.outputsr   modelscope.pipelines.baser   modelscope.pipelines.builderr   "modelscope.utils.audio.audio_utilsr   r   modelscope.utils.constantr   r   r   modelscope.utils.loggerr   rn   __all__register_modulespeech_separationfunasr_speech_separationr   r%   r%   r%   r6   <module>   s&    