o
    ॵi0+                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZmZ d d
lmZ d dlmZ dgZe Zej ej!e
j"dG dd deZ#dS )    N)AnyDictOptionalUnion)	Pipelines)Model)
OutputKeys)Pipeline)	PIPELINES)generate_scp_from_urlupdate_local_model)
Frameworks	ModelFileTasks)snapshot_download)
get_loggerLauraCodecTTSPipeline)module_namec                       s(  e Zd ZdZ				ddeeef deeeef  dee def fdd	Z						dd
ee
eef dee
eef dee
eef dededeeef fddZdedeeef fddZdd Zdeeef fddZ			dd
ee
eef dee
eef dee
eef defddZdd Z  ZS ) r   aa  Laura-style Codec-based TTS Inference Pipeline
    use `model` to create a TTS pipeline.

    Args:
        model (LauraCodecTTSPipeline): A model instance, or a model local dir, or a model id in the model hub.
        kwargs (dict, `optional`):
            Extra kwargs passed into the preprocessor's constructor.
    Examples:
        >>> from modelscope.pipelines import pipeline
        >>> from modelscope.utils.constant import Tasks
        >>> my_pipeline = pipeline(
        >>>    task=Tasks.text_to_speech,
        >>>    model='damo/speech_synthesizer-laura-en-libritts-16k-codec_nq2-pytorch'
        >>> )
        >>> text='nothing was to be done but to put about, and return in disappointment towards the north.'
        >>> prompt_text='one of these is context'
        >>> prompt_speech='example/prompt.wav'
        >>> print(my_pipeline(text))

    N   modelcodec_modelcodec_model_revisionngpuc                    sh  t  jdd|i| | j | _|| _|| _| ||| _ddl	m
} |jdi d| jd d| jd d| jd d| jd d|d	| jd	 d
| jd
 d| jd d| jd d| jd d| jd d| jd d| jd d| jd d| jd d| jd d| jd d| jd d| jd d| jd d| jd d| jd d| jd | _dS )z=use `model` to create an asr pipeline for prediction
        r   r   )text2audio_inferencemode
output_dir
batch_sizedtyper   seednum_workers	log_levelkey_fileconfig_file
model_file	model_tagallow_variable_data_keys	streamingtext_emb_model	beam_sizesampling	continualtokenize_to_phoneexclude_promptcodec_config_filecodec_model_file
param_dictN )super__init__r   forward	model_cfgr   r   get_cmdcmdfuncodec.binr   inference_funcfunasr_infer_modelscope)selfr   r   r   r   kwargsr   	__class__r0   m/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/audio/codec_based_synthesis_pipeline.pyr2   1   sj   








	















zLauraCodecTTSPipeline.__init__textprompt_textprompt_audior   r/   returnc                 C   sL   t |dkr
td|d ur|| jd< || jd< | |||}| |}|S )Nr   zThe input should not be null.r   r/   )len
ValueErrorr6   r3   postprocess)r:   r?   r@   rA   r   r/   outputresultr0   r0   r>   __call__Y   s   


zLauraCodecTTSPipeline.__call__inputsc                 C   sr   i }t t|D ].}t|dkr(|dkr(|d d d }|  d |tj< q|| d d ||| d < q|S )zPostprocessing
        r   r   valuegenkey)rangerC   cpunumpyr   
OUTPUT_WAV)r:   rI   rsti	recon_wavr0   r0   r>   rE   i   s   z!LauraCodecTTSPipeline.postprocessc                 C   s   | j d ur\| j dkr^tj| j r| j }nt| j | jd}td| tj	|t
j}tt| }tj|}tj	||d d d |d< tj	||d d d |d	< d S d S d S )
N )revisionz loading codec model from {0} ...r   model_configr#   r.   r"   r-   )r   ospathexistsr   r   loggerinfoformatjoinr   CONFIGURATIONjsonloadsopenreaddirname)r:   r6   r   config_pathr4   	model_dirr0   r0   r>   load_codec_modelv   s"   z&LauraCodecTTSPipeline.load_codec_modelc           
      C   s  | j d d }tj| j d | j d d }tj| j d | j d d }t| j d || i d|dd ddd	d
dddddddddd d|d|dd dddddddddd dddd d d d}g d}| j d }|dr| jd u r|d | _|dr| jd u r|d | _| | |D ]9}	|	| j d v r| j d |	 d urt	||	 t
rt	| j d |	 t
r||	 | j d |	  q| j d |	 ||	< q|D ]0}	|	|v r||	d urt	||	 t
rt	||	 t
r||	 ||	  n||	 ||	< ||	= q|S )NrV   r   model_workspacer#   r"   r   r   r   r   float32r   r   r   r   r    ERRORr!   r$   r%   Tr&   Fr(   r)      r'   )r*   r+   r,   r.   r-   r/   )r   r   r   r    r%   r&   r   sampling_rate	bit_width	use_scaler/   r   r   )r4   rW   rX   r]   r   __contains__r   r   rf   
isinstancedictupdateget)
r:   
extra_args
model_pathr   _model_path_model_configr6   user_args_dictrV   	user_argsr0   r0   r>   r5      s   	







zLauraCodecTTSPipeline.get_cmdc                 K   sN  t |trtd| d d\}}t|ddkr$t|dg}n|g}|dur|durt|ddkrC|t|d n|| t |trmt|ddkra|t|d n4t|\}}|| n(ddl	}	t ||	j
r||  nt |tjr|| n
tdt| d	|| jd
< || jd< | | j}
|
S )zDecoding
        zGenerate speech for: z ...)NN,   Nr   zUnsupported prompt audio type .name_and_type
raw_inputs)ro   strrZ   r[   rC   splittupleappendr   torchTensorrO   npndarray	TypeErrortyper6   run_inference)r:   r?   r@   rA   forward_paramsdata_cmdr}   
audio_path_r   rG   r0   r0   r>   r3      s6   




zLauraCodecTTSPipeline.forwardc                 C   s:   | j tjkr| j|d |d |d |d d}|S td)Nr|   r}   r   r/   )data_path_and_name_and_typer}   output_dir_v2r/   zmodel type is mismatching)	frameworkr   r   r9   rD   )r:   r6   	sv_resultr0   r0   r>   r   
  s   z#LauraCodecTTSPipeline.run_inference)NNNr   )NNNNN)NNN)__name__
__module____qualname____doc__r   r   r~   r   intr2   r   r   rp   r   rH   listrE   rf   r5   r3   r   __classcell__r0   r0   r<   r>   r      sb    
)

U
0)$rW   typingr   r   r   r   r_   rO   r   modelscope.metainfor   modelscope.modelsr   modelscope.outputsr   modelscope.pipelines.baser	   modelscope.pipelines.builderr
   "modelscope.utils.audio.audio_utilsr   r   modelscope.utils.constantr   r   r   modelscope.utils.hubr   modelscope.utils.loggerr   __all__rZ   register_moduletext_to_speechlaura_codec_tts_inferencer   r0   r0   r0   r>   <module>   s&   