o
    }oi                     @   s   d dl Z d dlm  mZ d dl mZ d dlmZmZ d dlm	Z	m
Z
mZmZmZmZ dgZG dd deZG dd deZdS )	    N)nn)Loss	typecheck)
LabelsTypeLengthsTypeLogprobsTypeLossType
NeuralTypeSpectrogramTypeMLMLossc                       sb   e Zd Zedd Zedd Zedd Z		dd	ed
ef fddZ	e
 	dddZ  ZS )r   c              
   C   sZ   t dt ddt dt t dt t tdt ddt tdt ddt dt dddS )z(Input types definitions for Contrastive.BDTToptional)r   r   r   r   r   r   )
spec_masksdecoder_outputstargetsdecoder_lengthstarget_lengthsmasks)r	   r
   r   r   tupler   self r   ^/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/asr/losses/ssl_losses/mlm.pyinput_types   s   

zMLMLoss.input_typesc                 C   s   dt t diS )z]Output types definitions for Contrastive.
        loss:
            NeuralType(None)
        loss)elements_type)r	   r   r   r   r   r   output_types&   s   zMLMLoss.output_typesc                 C   s   dS )NTr   r   r   r   r   needs_labels.   s   zMLMLoss.needs_labels   皙?combine_time_stepsmask_thresholdc                    s$   t    t | _|| _|| _d S N)super__init__r   NLLLossnll_lossr%   r&   )r   r%   r&   	__class__r   r   r)   2   s   


zMLMLoss.__init__Nc           
      C   s   |d u r|}| dd}||jd |jd | j d}|d| jk}|| }t|d|jd |jd  f}|| }| ||}	t	|	}	|	S )Nr#      r   )
	transposereshapeshaper%   meanr&   Fpadr+   torch)
r   r   r   r   r   r   r   out_masked_onlytargets_masked_onlyr   r   r   r   forward<   s     
zMLMLoss.forward)r#   r$   )NNNN)__name__
__module____qualname__propertyr   r!   r"   intfloatr)   r   r9   __classcell__r   r   r,   r   r      s"    



c                	       sX   e Zd ZdZedd Z				ddeded	ed
ef fddZ	e
 dddZ  ZS )MultiMLMLossa;  
    Masked language model loss for multiple decoders, where cross-entropy loss is applied separately on each decoder.
    This loss can be used with `nemo.collections.asr.modules.ssl_modules.MultiSoftmaxDecoder` to train a model with multiple targets per frame.
    Reference: https://arxiv.org/abs/2202.01855
    c              	   C   sz   | j r| jdkrtdt }tdt }ntdt }tdt }tdt ||ttdt dd	ttdt dd	d
S )Nr#   )r   r   Cr   )r   r   rB   H)r   r   rC   r   r   Tr   )r   r   r   r   r   )squeeze_singlenum_decodersr	   r   r   r
   r   r   )r   r   r   r   r   r   r   [   s   
zMultiMLMLoss.input_typesr#   r$   Fr%   r&   rE   rD   c                    s&   t    || _|| _t||| _d S r'   )r(   r)   rE   rD   r   mlm_loss)r   r%   r&   rE   rD   r,   r   r   r)   k   s   
zMultiMLMLoss.__init__Nc              	   C   s   | j r| jdkr| j|||||dS d}t| jD ]#}|| j||d d d d d d |f |d d d d |f ||d7 }q|| j S )Nr#   )r   r   r   r   r   g        )rD   rE   rF   range)r   r   r   r   r   r   r   ir   r   r   r9   w   s$   

zMultiMLMLoss.forward)r#   r$   r#   F)NN)r:   r;   r<   __doc__r=   r   r>   r?   boolr)   r   r9   r@   r   r   r,   r   rA   T   s&    
rA   )r6   torch.nn.functionalr   
functionalr4   	nemo.corer   r   nemo.core.neural_typesr   r   r   r   r	   r
   __all__r   rA   r   r   r   r   <module>   s    ;