o
    ©Ì³iÕ  ã                   @   s2   d dl Z d dl mZ dejde jddfdd„ZdS )é    N)ÚnnÚmodelÚscalerÚreturnc                 C   sB   d}|   ¡ D ]}|s|j}| |¡}|jdur| j|9  _qdS )aˆ  
    Utility to scale the gradients of a model.
    This is useful for gradient accumulation where we want to normalize
    the gradients by the total number of tokens seen.

    Inputs:
        model (nn.Module): model whose gradients should be scaled
        scaler (torch.Tensor): scaling factor to apply to the gradients

    Outputs:
        None (grad fields are modified in place)
    N)Ú
parametersÚdeviceÚtoÚgrad)r   r   r   Úp© r   úS/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/training/_grad_scaler.pyÚscale_grads   s   

€úr   )Útorchr   ÚModuleÚTensorr   r   r   r   r   Ú<module>   s   