o
    }oiO                  	   @   sl  d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZmZmZmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dl m!Z!m"Z"m#Z#m$Z$ d dl%m&Z& z$d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0m1Z1 dZ2W n e3e4fy   dZ2Y nw dgZ5eG dd dZ6G dd dee.eZ7dS )    N)	dataclass)AnyDictListOptionalUnion)instantiate)Trainer)
DictConfig
ListConfig	OmegaConf	open_dict)
AutoConfig	AutoModelAutoTokenizer)CTCG2PBPEDataset)G2PModel)PretrainedModelInfo)
Exportable)LengthsTypeLossType
NeuralType
TokenIndex)logging)CTCLoss)WER)EncDecCTCModel)ASRBPEMixin)CTCBPEDecodingCTCBPEDecodingConfigTFCTCG2PModelc                   @   s>   e Zd ZU dZeeeef  ed< dZeeeef  ed< dS )CTCG2PConfigNtrain_dsvalidation_ds)	__name__
__module____qualname__r"   r   r   r   __annotations__r#    r(   r(   W/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/tts/g2p/models/ctc.pyr!   0   s   
 r!   c                       s  e Zd ZdZdHdedef fddZdd Zd	d
 Zdd Z	dd Z
 fddZdIddZdJddZdIddZdJddZdeddfddZe dedee fd d!Zded"efd#d$Zdefd%d&ZdHd'eeef f fd(d)ZdHd*eeef f fd+d,Zdee fd-d.Zdee fd/d0Ze dKd2d3Z!e"d4d5 Z#e#j$d6d5 Z#e"d7d8 Z%e%j$d9d8 Z% fd:d;Z&d<d= Z'e"d>d? Z(e"d@dA Z)dLdDdEZ*dFdG Z+  Z,S )Mr    z.
    CTC-based grapheme-to-phoneme model.
    Ncfgtrainerc                    s  d| _ |d ur|j|j | _ |j | _ddg| _| j| jvr*t| j d| j | |j	 | 
|| _| j	j	 }tt| |j_|jj| _dd t| jD | _dd t| jD | _t || |   t| jj| _t| jjd d| jd	d
d| _| jdd }|d u rt !t"}t#| j || j_$W d    n1 sw   Y  t%| jj$| j	d| _$t&| j$dddd| _'t&| j$dddd| _(d S )N   byt5conformer_bpez is not supported, choose from c                 S   s   i | ]\}}||qS r(   r(   .0ilr(   r(   r)   
<dictcomp>P       z(CTCG2PModel.__init__.<locals>.<dictcomp>c                 S   s   i | ]\}}||qS r(   r(   r/   r(   r(   r)   r3   Q   r4   Tctc_reduction
mean_batch)num_classeszero_infinity	reductiondecoding)	tokenizerF)r:   use_cerlog_predictiondist_sync_on_step))
world_size	num_nodesnum_devices
model_namelowermodesupported_modes
ValueError_setup_tokenizerr;   setup_grapheme_tokenizertokenizer_grapheme	get_vocabr   listkeysdecoder
vocabulary	enumeratelabels_tkn2idlabels_id2tknsuper__init___setup_encoderr   from_config_dict_cfgr   num_classes_with_blankgetlossr*   r   
structuredr   r   r:   r   r   werper)selfr*   r+   rN   decoding_cfg	__class__r(   r)   rS   ;   sR   




zCTCG2PModel.__init__c                    sL  | j dkrt|jj}|d|j| _|d|j| _|S | j dkr|jj	dur-|jj	nd}t
j| d d }|jjsA|t
j7 }|jjrXt
jd	dd
ddd}||7 }d}t|d  fdd|D   d W d   n1 sxw   Y  | d| t|jj|d}|dd| _|dd| _|S t| j  d| j )zInitialized grapheme tokenizerr-   max_source_lenmax_target_lenr.   N  '"\z/tmp/char_vocab.txtwc                    s   g | ]}  d | dqS )rf   z"
)write)r0   chfr(   r)   
<listcomp>   s    z8CTCG2PModel.setup_grapheme_tokenizer.<locals>.<listcomp>z"\""
ztokenizer_grapheme.vocab_file)
vocab_filei    is not supported. Choose from )rD   r   from_pretrainedrI   
pretrainedrX   model_max_lengthra   rb   	unk_tokenstringascii_lowercasedo_lowerascii_uppercaseadd_punctuationpunctuationreplaceopenri   register_artifactr   datasetrF   rE   )r]   r*   grapheme_tokenizergrapheme_unk_tokencharspunctuation_marksrn   r(   rk   r)   rH   u   s2   


z$CTCG2PModel.setup_grapheme_tokenizerc                 C   sN  | j dkrAt| jjj}| jjjd ur"| jjj|_t	d|j  t
j| jjj|dj| _| jjjd u r?| jjj| jj_d S d S | j dkrtjj| jjj| jjdd| _t| jj| _t| j0 d| jjvst| jjjs{t| jdr{| jj| jj_d| jjvs| jjjstd	W d    d S 1 sw   Y  d S t| j  d
| j )Nr-   z

DROPOUT: )configr.   r   )embedding_dimnum_embeddingspadding_idxfeat_in	_feat_outz1param feat_in of the decoder's config is not set!ro   )rD   r   rp   rV   rI   rq   encoderdropoutdropout_rateprintr   transformerr*   rM   r   r   d_modeltorchnn	Embedding	embeddingr;   
vocab_sizer   rU   r   hasattrr   rF   rE   )r]   r   r(   r(   r)   rT      s4   


"zCTCG2PModel._setup_encoderc           	      C   s   | j dkr| j||dd }|}|dd}n%| j dkr1| |}|dd}| j||d\}}nt| j  d| j | j|d	}|jd
dd}|||fS )Nr-   )	input_idsattention_maskr   r,      r.   audio_signallengthro   encoder_outputF)dimkeepdim)rD   r   	transposer   rF   rE   rM   argmax)	r]   r   r   	input_lenencoded_inputencoded_leninput_embedding	log_probsgreedy_predictionsr(   r(   r)   forward   s   



zCTCG2PModel.forwardc                 C   sF   |\}}}}}| j |||d\}}	}
| j|||
|d}| d| |S )Nr   r   r   r   targetsinput_lengthstarget_lengths
train_loss)r   rY   log)r]   batch	batch_idxr   r   r   r   r   r   predictionsr   rY   r(   r(   r)   training_step   s   zCTCG2PModel.training_stepc                    s
   t   S N)rR   on_train_epoch_endr]   r_   r(   r)   r      s   
zCTCG2PModel.on_train_epoch_endr   valc                 C   s  |\}}}}}	| j |||d\}
}}| j|
|||	d}| jj|
||	|d | j \}}}| j  | jj|
||	|d | j \}}}| j  | | d| | d|| d|| d|| d|| d|| d	|| d
|i}|dkrt| j	j
tkrt| j	j
dkr| j| | |S | j| |S |dkrt| j	jtkrt| j	jdkr| j| | |S | j| |S )Nr   r   )r   r   targets_lengthspredictions_lengths_loss_wer_num
_wer_denom_wer_per_num
_per_denom_perr   r,   test)r   rY   r[   updatecomputeresetr\   r   typer+   val_dataloadersrK   lenvalidation_step_outputsappendtest_dataloaderstest_step_outputs)r]   r   r   dataloader_idxsplitr   r   r   r   r   r   r   r   val_lossr[   wer_num	wer_denomr\   per_num	per_denomrY   r(   r(   r)   validation_step   sL   









 	 zCTCG2PModel.validation_stepc                 C   s   | j |||ddS )z
        Lightning calls this inside the test loop with the data from the test dataloader
        passed in as `batch`.
        r   r   )r   )r]   r   r   r   r(   r(   r)   	test_step  s   zCTCG2PModel.test_stepc                    s\  t  fdd|D  }| j  d|dd t  fdd|D  }t  fdd|D  }|| }t  fdd|D  }t  fd	d|D  }	||	 }
 d
kre| j|  }n| j|  }|   d| |   d|
 |   d|
 |   d| |
 t	d|
d  d|  t	d|d  d|  dS )zm
        Called at the end of validation to aggregate outputs (reduces across batches, not workers).
        c                       g | ]	}|  d  qS )r   r(   r0   xr   r(   r)   rm         z:CTCG2PModel.multi_validation_epoch_end.<locals>.<listcomp>r   T)prog_barc                    r   )r   r(   r   r   r(   r)   rm     r   c                    r   )r   r(   r   r   r(   r)   rm     r   c                    r   )r   r(   r   r   r(   r)   rm     r   c                    r   )r   r(   r   r   r(   r)   rm     r   r   r   r   _per_zPER: d   z% zWER: N)
r   stackmeanr   sum_test_namesupper_validation_namesr   info)r]   outputsr   r   avg_lossr   r   r[   r   r   r\   dataloader_namer(   r   r)   multi_validation_epoch_end  s"   z&CTCG2PModel.multi_validation_epoch_endc                 C   s   | j ||dd d S )Nr   r   )r   )r]   r   r   r(   r(   r)   multi_test_epoch_end*  s   z CTCG2PModel.multi_test_epoch_endreturnztorch.utils.data.DataLoaderc              
   C   sL   t |j|j| j| j| jjj| j| jjdd}t	j
jj||j|jd|jddS )zl
        Setup function for a infer data loader.
        Returns:
            A pytorch DataLoader.
        F)manifest_filepathgrapheme_fieldtokenizer_graphemestokenizer_phonemesrv   labelsra   with_labels)
collate_fn
batch_sizeshufflenum_workers	drop_last)r   r   r   rI   r;   rV   rv   rN   ra   r   utilsdata
DataLoaderr   r   r   )r]   r*   r}   r(   r(   r)   _setup_infer_dataloader-  s$   z#CTCG2PModel._setup_infer_dataloaderr   c              	   C   s   g }| j }z]tj rdnd}|   | | | |}|D ]:}|\}}}	| j|||du r2|n|||	|d\}
}}| jj	|
|dd}dd |D }|
| ~~
~~	qW | j|d	 |S | j|d	 w )
z
        Runs model inference.

        Args:
            Config: configuration file to set up DataLoader
        Returns:
            all_preds: model predictions
        cudacpuNr   F)decoder_lengthsreturn_hypothesesc                 S   s   g | ]}|j qS r(   )text)r0   hypr(   r(   r)   rm   j  s    z&CTCG2PModel._infer.<locals>.<listcomp>)rD   )trainingr   r   is_availableevaltor   r   r:   ctc_decoder_predictions_tensorextendtrain)r]   r   	all_predsrD   deviceinfer_datalayerr   r   r   r   r   r   r   
preds_hyps	preds_strr(   r(   r)   _inferG  s6   



zCTCG2PModel._infernamec                 C   s   d|vs
t |jtstd| tj|js!t|jj dt	|j|j|jj
|jj| j| jjj| j| j| jdd
}tjjj|fd|ji|jS )Ndataloader_paramszNo dataloader_params for z
 not foundT)	r   phoneme_fieldr   r   rv   r   r   ra   r   r   )
isinstancer  r
   rF   ospathexistsr   r}   r   r  r   rI   rV   rv   r;   rN   ra   r   r   r   r   r   )r]   r*   r  r}   r(   r(   r)   _setup_dataloader_from_configw  s"   z)CTCG2PModel._setup_dataloader_from_configc                 C   6   |r|j d u rtd d | _d S | j|dd| _d S )Nz`Dataloader config or file_path for the train is missing, so no data loader for train is created!r  r  )r   r   r   	_train_dlr  r]   r*   r(   r(   r)   setup_training_data     zCTCG2PModel.setup_training_dataval_data_configc                    (   |r|j d u rd | _d S t | d S r   )r   _validation_dlrR   setup_multiple_validation_data)r]   r  r_   r(   r)   r       z*CTCG2PModel.setup_multiple_validation_datatest_data_configc                    r  r   )r   _test_dlrR   setup_multiple_test_data)r]   r  r_   r(   r)   r     r  z$CTCG2PModel.setup_multiple_test_datac                 C   r  )NzjDataloader config or file_path for the validation is missing, so no data loader for validation is created!r   r  )r   r   r   r  r  r  r(   r(   r)   setup_validation_data  r  z!CTCG2PModel.setup_validation_datac                 C   r  )Nz^Dataloader config or file_path for the test is missing, so no data loader for test is created!r   r  )r   r   r   r  r  r  r(   r(   r)   setup_test_data  r  zCTCG2PModel.setup_test_dataList[PretrainedModelInfo]c                 C   s   g S r   r(   )clsr(   r(   r)   list_available_models  s   z!CTCG2PModel.list_available_modelsc                 C      | j S r   r   r   r(   r(   r)   r[        zCTCG2PModel.werc                 C   
   || _ d S r   r'  )r]   r[   r(   r(   r)   r[        
c                 C   r&  r   r   r   r(   r(   r)   r\     r(  zCTCG2PModel.perc                 C   r)  r   r+  )r]   r\   r(   r(   r)   r\     r*  c                    sR   t  jdi | tdt ttdt d| _tdt tdt d| _d S )N)BTr,  )r   r   )r   r   r(   )	rR   _prepare_for_exportr   r   tupler   _input_typesr   _output_types)r]   kwargsr_   r(   r)   r.    s   


zCTCG2PModel._prepare_for_exportc                 C   s   d  | _ | _d S r   )r0  r1  r   r(   r(   r)   _export_teardown  s   zCTCG2PModel._export_teardownc                 C   r&  r   )r0  r   r(   r(   r)   input_types  r(  zCTCG2PModel.input_typesc                 C   r&  r   )r1  r   r(   r(   r)   output_types  r(  zCTCG2PModel.output_typesr,   ,   c                    sh   d}| j |g}dd |D }t|  fddt||D }t|| jt|| jf}|S )zs
        Generates input examples for tracing etc.
        Returns:
            A tuple of input examples.
        z)Kupil sem si bicikel in mu zamenjal stol.c                 S   s   g | ]}t |qS r(   )r   )r0   entryr(   r(   r)   rm     s    z-CTCG2PModel.input_example.<locals>.<listcomp>c                    s"   g | ]\}}|d g |   qS r   r(   )r0   r7  	entry_lenmax_lenr(   r)   rm     s   " )rI   text_to_idsmaxzipr   tensorr  r  )r]   	max_batchmax_dimsentencer   r   inputsr(   r:  r)   input_example  s   $zCTCG2PModel.input_examplec                 C   s<   |  |}|dd}| j||d\}}| j|d}||fS )Nr,   r   r   r   )r   r   r   rM   )r]   r   r   r   r   r   r   r(   r(   r)   forward_for_export  s
   
zCTCG2PModel.forward_for_exportr   )r   r   r8  )r   r#  )r,   r6  )-r$   r%   r&   __doc__r
   r	   rS   rH   rT   r   r   r   r   r   r   r   r   r   no_gradr   intr
  strr  r  r   r   r  r   r   r!  r"  classmethodr%  propertyr[   setterr\   r.  r3  r4  r5  rD  rE  __classcell__r(   r(   r_   r)   r    6   sV    :$

.

/		







)8r  rt   dataclassesr   typingr   r   r   r   r   r   hydra.utilsr   lightning.pytorchr	   	omegaconfr
   r   r   r   transformersr   r   r   !nemo.collections.tts.g2p.data.ctcr    nemo.collections.tts.models.baser   nemo.core.classes.commonr   nemo.core.classes.exportabler   nemo.core.neural_typesr   r   r   r   
nemo.utilsr   nemo.collections.asr.losses.ctcr    nemo.collections.asr.metrics.werr   nemo.collections.asr.modelsr   !nemo.collections.asr.parts.mixinsr   2nemo.collections.asr.parts.submodules.ctc_decodingr   r   ASR_AVAILABLEModuleNotFoundErrorImportError__all__r!   r    r(   r(   r(   r)   <module>   s:   