o
    i&                     @   s^  d dl Z d dlmZmZmZmZmZmZ d dlZ	d dl
Z
d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZC d dlDmEZEmFZFmGZG e9d eHeeed!ed"d#d$ZIe9d%eHe!d&edd#d$ZJe9d'eHe/e3d(e-d)d#d*ZKe9d+eHe1d,d+d-ZLe9d.eHeeed/ed0d1ZMe9d2eHe)d3e'd4d1ZNe9d5eHe%d6e#dd#d*ZOG d7d8 d8e5ZPdS )9    N)Callable
CollectionDictListOptionalTuple)check_argument_typescheck_return_type)
AbsEncoder)ConformerEncoder)
RNNEncoder)TransformerEncoder)AbsFrontend)DefaultFrontend)S3prlFrontend)SlidingWindow)
AbsSpecAug)SpecAug)AbsAttractor)RnnAttractor)
AbsDecoder)LinearDecoder)ESPnetDiarizationModel)AbsNormalize)	GlobalMVN)LabelAggregate)UtteranceMVN)AbsTask)
initialize)ClassChoices)CommonCollateFn)CommonPreprocessor)Trainer)get_default_kwargs)NestedDictAction)int_or_nonestr2boolstr_or_nonefrontend)defaultsliding_windows3prlr)   T)nameclasses
type_checkr)   optionalspecaug)r0   	normalize)
global_mvnutterance_mvnr3   )r-   r.   r)   r/   label_aggregator)r4   )r-   r)   encoder)	conformertransformerrnnr7   )r-   r.   r)   decoder)linearr:   	attractor)r8   c                   @   s:  e Zd ZU dZeed< eeee	e
eegZeZedejfddZedejdedeeeeeeejf f  geee eeejf f f fd	d
Z edejdede!eeeeej"f geeejf f  fddZ#e	ddededeedf fddZ$e	ddededeedf fddZ%edejde&fddZ'dS )DiarizationTask   num_optimizersparserc                 C   s   |j dd}|jdtd dd |jddd d d	g d
d |jdtd dd |jdtttdd |j dd}|jdtddd | jD ]}|| qCd S )NzTask related)descriptionz	--num_spkzCThe number fo speakers (for each recording) used in system training)typer)   helpz--initc                 S   s   t |  S )N)r'   lower)x rE   F/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/tasks/diar.py<lambda>   s    z4DiarizationTask.add_task_arguments.<locals>.<lambda>zThe initialization method)chainerxavier_uniformxavier_normalkaiming_uniformkaiming_normalN)rA   r)   rB   choicesz--input_sizez,The number of input dimension of the featurez--model_confz&The keyword arguments for model class.)actionr)   rB   zPreprocess relatedz--use_preprocessorTz"Apply preprocessing to data or not)	add_argument_groupadd_argumentr%   r$   r#   r   r&   class_choices_listadd_arguments)clsr?   groupclass_choicesrE   rE   rF   add_task_argumentsz   sH   
z"DiarizationTask.add_task_argumentsargstrainreturnc                 C   s   t  sJ tdddS )Ng        )float_pad_valueint_pad_value)r   r    )rS   rW   rX   rE   rE   rF   build_collate_fn   s   
z DiarizationTask.build_collate_fnc                 C   s0   t  sJ |jrt|d}nd }t|sJ |S )N)rX   )r   use_preprocessorr!   r	   )rS   rW   rX   retvalrE   rE   rF   build_preprocess_fn   s   
z#DiarizationTask.build_preprocess_fnTF	inference.c                 C   s   |sd}|S d}|S )N)speech
spk_labels)rb   rE   rS   rX   ra   r_   rE   rE   rF   required_data_names   s
   z#DiarizationTask.required_data_namesc                 C   s   d}t |sJ |S )NrE   )r	   rd   rE   rE   rF   optional_data_names   s   z#DiarizationTask.optional_data_namesc              
   C   s  t  sJ |jd u rt|j}|di |j}| }n+|jd ur=|jd ur=t|j}|di |j}|j|  }nd |_i |_d }|j}|jd ur\t|j}|di |j	}nd }|j
d urrt|j
}|di |j}nd }t|j}	|	di |j}
t|j}|dd|i|j}t|j}|d|j| d|j}t|dd d urt|j}|dd| i|j}nd }td||||
|||d|j}|jd urt||j t|sJ |S )N
input_size)num_spkencoder_output_sizer;   ri   )r(   r0   r1   r4   r5   r9   r;   rE   ) r   rg   frontend_choices	get_classr(   frontend_confoutput_sizer0   specaug_choicesspecaug_confr1   normalize_choicesnormalize_conflabel_aggregator_choicesr4   label_aggregator_confencoder_choicesr5   encoder_confdecoder_choicesr9   rh   decoder_confgetattrattractor_choicesr;   attractor_confr   
model_confinitr   r	   )rS   rW   frontend_classr(   rg   specaug_classr0   normalize_classr1   label_aggregator_classr4   encoder_classr5   decoder_classr9   attractor_classr;   modelrE   rE   rF   build_model   sr   





zDiarizationTask.build_modelN)TF)(__name__
__module____qualname__r>   int__annotations__rj   rn   rp   rt   rv   rr   ry   rQ   r"   trainerclassmethodargparseArgumentParserrV   	Namespaceboolr   r   r   strr   npndarrayr   torchTensorr]   r   arrayr`   re   rf   r   r   rE   rE   rE   rF   r<   a   sj   
 4
&


r<   )Qr   typingr   r   r   r   r   r   numpyr   r   	typeguardr   r	   espnet2.asr.encoder.abs_encoderr
   %espnet2.asr.encoder.conformer_encoderr   espnet2.asr.encoder.rnn_encoderr   'espnet2.asr.encoder.transformer_encoderr   !espnet2.asr.frontend.abs_frontendr   espnet2.asr.frontend.defaultr   espnet2.asr.frontend.s3prlr   espnet2.asr.frontend.windowingr   espnet2.asr.specaug.abs_specaugr   espnet2.asr.specaug.specaugr   $espnet2.diar.attractor.abs_attractorr   $espnet2.diar.attractor.rnn_attractorr    espnet2.diar.decoder.abs_decoderr   #espnet2.diar.decoder.linear_decoderr   espnet2.diar.espnet_modelr   espnet2.layers.abs_normalizer   espnet2.layers.global_mvnr    espnet2.layers.label_aggregationr   espnet2.layers.utterance_mvnr   espnet2.tasks.abs_taskr   espnet2.torch_utils.initializer   espnet2.train.class_choicesr   espnet2.train.collate_fnr    espnet2.train.preprocessorr!   espnet2.train.trainerr"    espnet2.utils.get_default_kwargsr#    espnet2.utils.nested_dict_actionr$   espnet2.utils.typesr%   r&   r'   dictrj   rn   rp   rr   rt   rv   ry   r<   rE   rE   rE   rF   <module>   s     

