o
    }oi                     @   sT   d dl Z d dlmZ G dd de jjZG dd de jjZG dd de jjZ	dS )	    N)make_non_pad_maskc                   @   sF   e Zd ZdZe	ddejdejdefddZedejfd	d
Z	dS )GradExpNormalizezbFunction for fast gradient normalization.
    Typical use case is normalization for mle loss.
    mean	log_probsinput_lengths	reductionc                 C   sZ   t ||jd }| }t|}||  || 7  < |dkr&||jd  }| | |S )N   r   r   )r   shapeexptorch
zeros_likesave_for_backward)ctxr   r   r   maskprobs
norm_probs r   \/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/asr/parts/k2/grad_utils.pyforward   s   

zGradExpNormalize.forwardgrad_outputc                 C   s$   || dd| jd   d d fS )Nr   )sum	unsqueezesaved_tensors)r   r   r   r   r   backward&   s   $zGradExpNormalize.backwardN)r   )
__name__
__module____qualname____doc__staticmethodr   Tensorstrr   r   r   r   r   r   r      s    r   c                	   @   sJ   e Zd ZdZedejdejdejdejfddZedejfd	d
ZdS )
GradInsertzFunction to attach a pre-computed gradient to a tensor.
    Typical use case is gradient computation before calling loss.backward().
    input_tensoroutput_tensorgradr   c                 C   s*   |j sJ |j s|j rJ | || |S N)requires_gradr   )r   r#   r$   r%   r   r   r   r   r   0   s   
zGradInsert.forwardr   c                 C   s@   | j \}}tj|jd |j|jd}|||< ||j jd d d fS )Nr   )dtypedevice)r   r   zerosr	   r(   r)   T)r   r   
saved_gradr   padded_grad_outputr   r   r   r   :   s   
zGradInsert.backwardN)	r   r   r   r   r   r   r    r   r   r   r   r   r   r"   +   s    	r"   c                       sL   e Zd ZdZdejjf fddZdejdejdejdejfd	d
Z	  Z
S )PartialGradzlModule for partial gradient computation.
    Useful when computing loss on batch splits to save memory.
    funcc                    s   t    || _d S r&   )super__init__r/   )selfr/   	__class__r   r   r1   H   s   

zPartialGrad.__init__r#   targetsr   target_lengthsc           
      C   sT   |  }|d | ||||\}}|  }|   |j}	t|||	||fS )NT)detachrequires_grad_r/   r   r   r%   r"   apply)
r2   r#   r5   r   r6   
loc_tensor
new_tensorr   loc_new_tensorr%   r   r   r   r   L   s   
zPartialGrad.forward)r   r   r   r   r   nnModuler1   r    r   __classcell__r   r   r3   r   r.   C   s    r.   )
r   #nemo.collections.asr.parts.k2.utilsr   autogradFunctionr   r"   r=   r>   r.   r   r   r   r   <module>   s
   