o
    zi                     @   s   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZmZ erXd dlmZ edZe e Z!G dd deZ"dej#j$ddfddZ%dS )    N)	ExitStack)TYPE_CHECKINGAnyContextManagerLiteralMappingOptionalUnion)apply_to_collection)RequirementCache)Tensor)override)	Precision)_ClassReplacementContextManager_convert_fp_tensor_DtypeContextManager)rank_zero_inforank_zero_warnDelayedScalingztransformer_engine>=0.11.0c                   @   s   e Zd ZU dZdZed ed< dddddejde	e
eeef d	f  d
e	e de	ej ddf
ddZedejjdejjfddZedefddZedefddZedefddZededefddZededefddZdS )TransformerEnginePrecisiona  Plugin for training with fp8 precision via nvidia's
    `Transformer Engine <https://docs.nvidia.com/deeplearning/transformer-engine>`__.

    .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.

    Args:
        weights_dtype: The weights dtype to use.
        recipe: Recipe for the DelayedScaling
            `configuration <https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/api/common.html#transformer_engine.common.recipe.DelayedScaling>`__.
            In dict format or the dataclass format.
        replace_layers: Whether to replace ``Linear`` and ``LayerNorm`` layers automatically with their Transformer
            Engine alternatives. Note that they don't subclass the torch equivalents so checks like
            ``isinstance(l, torch.nn.Linear)`` will not pass.
        fallback_compute_dtype: The compute dtype to use for operations that don't support fp8 autocast. Defaults to the
            same as ``weights_dtype``.

    .. note::

        Support for FP8 in the linear layers with this plugin is currently limited to tensors
        with shapes where the dimensions are divisible by 8 and 16 respectively. You might want to add padding to your
        inputs to conform to this restriction.

    transformer-engine)r   ztransformer-engine-float16	precisionN)recipereplace_layersfallback_compute_dtypeweights_dtyper   r   r   r   returnc                C   s   t sttt ddlm} |d u r| }n#t|tr9t|}d|v r2ddlm} t	||d |d< |di |}|| _
|| _|| _|pE|| _d S )Nr   r   
fp8_format)Format )_TRANSFORMER_ENGINE_AVAILABLEModuleNotFoundErrorstr transformer_engine.common.reciper   
isinstancer   dictr   getattrr   r   r   r   )selfr   r   r   r   r   r   r    r    i/home/ubuntu/.local/lib/python3.10/site-packages/lightning_fabric/plugins/precision/transformer_engine.py__init__B   s   
z#TransformerEnginePrecision.__init__modulec                 C   sN   t dd | D r| jdu rtd n	| jdv rt| |j| jd}|S )Nc                 s   s    | ]}d |j v V  qdS )ztransformer_engine.pytorchN)
__module__).0mr    r    r)   	<genexpr>`   s    z<TransformerEnginePrecision.convert_module.<locals>.<genexpr>Tzz`TransformerEnginePrecision(replace_layers=True)` is set but the model already contains TransformerEngine layers. Skipping)NT)dtype)anymodulesr   r   _convert_layerstor   )r(   r+   r    r    r)   convert_module]   s   

z)TransformerEnginePrecision.convert_modulec                 C   s
   t | jS )N)r   r   )r(   r    r    r)   tensor_init_contextl   s   
z.TransformerEnginePrecision.tensor_init_contextc                 C   sJ   |   }t }| jrdd lm} t|j|jd}|| || |S )Nr   )ztorch.nn.Linearztorch.nn.LayerNorm)	r6   r   r   transformer_engine.pytorchpytorchr   Linear	LayerNormenter_context)r(   	dtype_ctxstacktecontext_managerr    r    r)   module_init_contextp   s   

z.TransformerEnginePrecision.module_init_contextc                 C   s^   t | j}tjd| jd}dd lm} |jd| jd}t	 }|
| |
| |
| |S )Ncuda)device_typer0   r   T)enabled
fp8_recipe)r   r   torchautocastr   r7   r8   fp8_autocastr   r   r;   )r(   r<   fallback_autocast_ctxr>   autocast_ctxr=   r    r    r)   forward_context   s   



z*TransformerEnginePrecision.forward_contextdatac                 C   s   t |tt| jdS N)functionr0   dst_type)r
   r   r   r   r(   rK   r    r    r)   convert_input   s   z(TransformerEnginePrecision.convert_inputc                 C   s   t |ttt dS rL   )r
   r   r   rE   get_default_dtyperO   r    r    r)   convert_output   s   z)TransformerEnginePrecision.convert_output)__name__r,   __qualname____doc__r   r   __annotations__rE   r0   r   r	   r   r#   r   boolr*   r   nnModuler5   r   r6   r@   rJ   rP   rR   r    r    r    r)   r   '   s:   
 
r   r+   r   c                 C   s,  dd l m} |  D ]\}}t|tjjr]|jd dks#|jd dkr,t	d|d q
|j
d u}|j|j|j|d}|jj |j_|rM|j
j |j
_td|d | || q
t|tjjr|j|jd |jd	}|jj |j_|j
j |j
_td|d | || q
t| q
d S )
Nr         zSupport for FP8 in the linear layers with this plugin is currently limited to tensors with shapes where the dimensions are divisible by 8 and 16 respectively. The layer zJ does not fit this criteria. You might want to add padding to your inputs.)biaszReplacing layer z# with Transformer Engine equivalent)eps)r7   r8   named_childrenr%   rE   rX   r9   in_featuresout_featuresr   r\   weightrK   clonelogdebug__setattr__r:   normalized_shaper]   r3   )r+   r>   namechildhas_biasreplacementr    r    r)   r3      s2   

r3   )&logging
contextlibr   typingr   r   r   r   r   r   r	   rE   lightning_utilitiesr
    lightning_utilities.core.importsr   r   typing_extensionsr   ,lightning_fabric.plugins.precision.precisionr   (lightning_fabric.plugins.precision.utilsr   r   r   $lightning_fabric.utilities.rank_zeror   r   r$   r   r!   	getLoggerrS   rc   r   rX   rY   r3   r    r    r    r)   <module>   s"   $
o