o
    i                     @   sJ   d dl mZmZmZmZmZ ddlmZmZ ddl	m
Z
 G dd dZdS )    )DictIterableListUnioncast   )has_torch_amptorch)is_torch_arrayc                   @   s   e Zd ZdZ					d$deded	ed
edef
ddZdd Z	d%de	de
d f de	ded f fddZddded defddZdd Zedd Zdd  Zd!d" Zd#S )&PyTorchGradScalera  
    Gradient scaler for the PyTorch shim.

    Gradients with small magnitudes are not representable in half-precision and
    will underflow to zero. A gradient scaler counters this issue by scaling
    up the loss before backpropagation, increasing the gradients by the same
    magnitude. A large enough scale will avoid that the gradients underflow.
    The gradients are unscaled in single precision after backpropagation, to
    provide the unscaled gradients to the optimizer.
    F      @      ?       @  enabled
init_scalebackoff_factorgrowth_factorgrowth_intervalc                 C   sD   || _ || _|| _|| _tjddtjd| _td|| _d| _	dS )a  
        Construct a gradient scaler for the PyTorch shim.

        enabled (bool):
            Sets whether the gradient scalar is enabled. If it is disabled, the
            methods of the grad scaler are no-ops.

        init_scale (float):
            The initial scale used to increase the gradient magnitude.

        backoff_factor (float):
            The scale will be multiplied by this factor if any of the gradients
            overflows.

        growth_factor (float):
            The scale will be multiplied by this factor when none of the gradients
            overflowed for "growth_interval" steps.

        growth_interval (int):
            When no overflows were found for this number of steps, the scale will
            be multiplied by "growth_factor".
           r   )dtypeFN)
_enabled_growth_factor_backoff_factor_growth_intervalr	   fullint_growth_tracker_scale
_found_inf)selfr   r   r   r   r    r"   S/home/ubuntu/.local/lib/python3.10/site-packages/thinc/shims/pytorch_grad_scaler.py__init__   s   
zPyTorchGradScaler.__init__c                 C   s    | j || _ | j|| _d S N)r   tor   )r!   devicer"   r"   r#   to_:   s   zPyTorchGradScaler.to_tensorstorch.Tensorreturnc                 C   s~   | j std|S td}t }t|rtd|}| |||S t|tr=g }|D ]}t|s0||| ||| q(|S |)z)Scale up the values in the given tensors.r*   z>Input to gradient scaling must be a Tensor or Iterable[Tensor])	r   r   
ValueErrordictr
   _scale_tensor
isinstancer   append)r!   r)   inplaceincorrect_typescale_per_devicetensorscaled_tensorsr"   r"   r#   scale>   s&   


zPyTorchGradScaler.scaler4   r3   )ztorch.devicer*   r1   c                 C   s\   t std|jsd}t||j}||vr| jj|d||< || }|r*||S || S )NzHGradient scaling is not supported, requires capable GPU and torch>=1.9.0zGradient scaling is only supported for CUDA tensors. If you are using PyTorch models, you can avoid this error by disabling mixed-precision support.r'   )r   r,   is_cudar'   r   r&   mul_)r!   r4   r3   r1   msgr'   r6   r"   r"   r#   r.   ^   s   
zPyTorchGradScaler._scale_tensorc                 C   s,   t  }|D ]}||jg }|| q|S r%   )r-   
setdefaultr'   r0   )r!   r)   tensors_per_devicer4   device_tensorsr"   r"   r#   _tensors_per_device|   s
   z%PyTorchGradScaler._tensors_per_devicec                 C   s   | j S r%   )r    )r!   r"   r"   r#   	found_inf   s   zPyTorchGradScaler.found_infc                 C   sz   | j sdS | j   }| |}| D ]"\}}tjdd|d}|j	|d}t
||| t|dkr9d| _q| jS )zNUnscale the given tensors. Returns True if any of the gradients were infinite.Fr           r7   r   T)r   r   double
reciprocalfloatr>   itemsr	   r   r&   *_amp_foreach_non_finite_check_and_unscale_boolr    )r!   r)   	inv_scaler<   r'   r=   found_inf_deviceinv_scale_devicer"   r"   r#   unscale   s   
zPyTorchGradScaler.unscalec                 C   sP   | j sdS tjd| jrdnd| jjd}t| j| j|| j| j	| j
 d| _dS )z
        Update the scale factor and clear information about infinities.

        This method should be called after each optimization step.
        Nr   g      ?r@   r7   F)r   r	   r   r    r   r'   _amp_update_scale_r   r   r   r   )r!   rH   r"   r"   r#   update   s   

zPyTorchGradScaler.updateN)Fr   r   r   r   )F)__name__
__module____qualname____doc__rF   rC   r   r$   r(   r   r   r   r6   r   r.   r>   propertyr?   rJ   rL   r"   r"   r"   r#   r      sJ    
'
 

r   N)typingr   r   r   r   r   compatr   r	   utilr
   r   r"   r"   r"   r#   <module>   s    