o
    }oi.                     @   sv   d dl mZ d dlZd dlmZmZmZmZ d dlm	Z	m
Z
mZmZmZ ddgZG dd deZG dd deeZdS )	    )OptionalN)
ExportableLossNeuralModule	typecheck)
LabelsTypeLogprobsTypeLossTypeMaskType
NeuralTypeSmoothedCrossEntropyLossSmoothedNLLLossc                       sv   e Zd ZdZedd Zedd Z						
ddee dee	 dee de	de
f
 fddZe dddZ  ZS )r   a  
    Calculates Cross-entropy loss with label smoothing for a batch of sequences.

    SmoothedCrossEntropyLoss:
    1) excludes padding tokens from loss calculation
    2) allows to use label smoothing regularization
    3) allows to calculate loss for the desired number of last tokens
    4) per_token_reduction - if False disables reduction per token

    Args:
        label_smoothing (float): label smoothing regularization coefficient
        predict_last_k (int): parameter which sets the number of last tokens to calculate the loss for, for example
            0: (default) calculate loss on the entire sequence (e.g., NMT)
            1: calculate loss on the last token only (e.g., LM evaluation)
            Intermediate values allow to control the trade-off between eval
            time (proportional to the number of batches) and eval performance
            (proportional to the number of context tokens)
        pad_id (int): padding id
        eps (float): the small eps number to avoid division buy zero
    c                 C   s(   t dt t dt t dt dddS )3Returns definitions of module input ports.
        BTDr   r   Toptional)	log_probslabelsoutput_maskr   r   r   r
   self r   i/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/common/losses/smoothed_cross_entropy.pyinput_types/   s   

z$SmoothedCrossEntropyLoss.input_typesc                 C      dt t diS z4Returns definitions of module output ports.
        loss)elements_typer   r	   r   r   r   r   output_types9      z%SmoothedCrossEntropyLoss.output_typesN        r   ư>Tpad_idlabel_smoothingpredict_last_kepsper_token_reductionc                    s,   t    || _|| _|| _|| _|| _d S N)super__init___pad_id_eps_predict_last_k_label_smoothing_per_token_reduction)r   r(   r)   r*   r+   r,   	__class__r   r   r/   ?   s   

z!SmoothedCrossEntropyLoss.__init__c                 C   s  |du r| j du rtd|du r| j dur|| j k|j}|j|jur+||j}| \}}}|| j |d  }|d|dd}|j	dd}	d| | ||	  }
|
dd| j
 df }
|dd| j
 df }| jrt|
|  }
|
| | j  }
|
S |
|  }
|
S )aW  
        Args:
            log_probs: float tensor of shape batch_size x seq_len x vocab_size, values should be log probabilities
            labels: int tensor of shape batch_size x seq_len
            output_mask: binary tensor of shape batch_size x seq_len
            eps: epsilon param to avoid divide by zero in loss calculation
        Nz$Both output_mask and pad_id are None      dimg      ?)r0   
ValueErrortodtypesizer3   gather	unsqueezesqueezemeanr2   r4   torchsumr1   )r   r   r   r   
batch_sizeseq_len
vocab_size	smoothingtarget_log_probssmoothing_log_probsneg_log_likelihoodr   r   r   forwardN   s&   	
z SmoothedCrossEntropyLoss.forward)Nr&   r   r'   Tr-   )__name__
__module____qualname____doc__propertyr   r$   r   intfloatboolr/   r   rM   __classcell__r   r   r5   r   r      s0    
	
c                       sJ   e Zd ZdZedd Zedd Zd fd	d
	Ze dddZ	  Z
S )r   zf
    Calculate negative log likelihodd for sequence input, also applies label smoothing (if set).
    c                 C   s6   t dt t dt t dt ddt dt dddS )r   r   r   Tr   r   )r   r   r   lengthsr   r   r   r   r   r   w   s
   

zSmoothedNLLLoss.input_typesc                 C   r   r    r#   r   r   r   r   r$      r%   zSmoothedNLLLoss.output_typesrC   r&   :0yE>c                    s8   t    || _|| _tjjdddi|| _|| _d S )N	reductionnoner   )	r.   r/   rY   r)   rD   nnNLLLossnll_lossr+   )r   rY   r)   r+   kwargsr5   r   r   r/      s
   

zSmoothedNLLLoss.__init__Nc                 C   s  |du r|du rt | }n%|du r5|dur5t j|d|jddddf |dddf k }| }|dd}|| || }|d}| jdkrZ|	 t 	|| j
  }n&| jdkrf|	 | }n| jdkr||d		d||d		d| j
  }| jd
kr|S t j|dd| }| jdkrt 	|t 	| }n| jdkrt 	||jd  }n| jdkr|	d|	d }| j | d| j |  S )z
        Params:
        -   log_probs: BxTxC
        -   labels: B
        -   output_mask: BxT
        -   lengths: B
        Nr7   )devicer8   r   rC   	batchmeanbatchr9   r&   r:   )rD   	ones_likerT   aranger?   r_   	transposer]   rY   rE   r+   reshaper)   rC   shape)r   r   r   r   rW   r!   rF   loss_regr   r   r   rM      s0   
2



*



zSmoothedNLLLoss.forward)rC   r&   rX   )NN)rN   rO   rP   rQ   rR   r   r$   r/   r   rM   rV   r   r   r5   r   r   r   s    


)typingr   rD   nemo.core.classesr   r   r   r   nemo.core.neural_typesr   r   r	   r
   r   __all__r   r   r   r   r   r   <module>   s   Y