o
    i<                     @   s   U d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlm	Z	 e
 Zejed< G dd de jZG dd	 d	e jZeG d
d dZe ZejZejZeddG dd dZeddG dd dZG dd de jZeddG dd dZdS )    N)	dataclass)OptionalUnion)is_MI300loggerc                   @       e Zd ZdZdZdZdd ZdS )ScalingTypez
    Defines the type of scaling to use for casting to float8.

    Values:

    * ``DYNAMIC``: Compute scaling factor dynamically based on the tensor's values.
    * ``DISABLED``: Skip scaling for this tensor, leave it in its original precision.
    dynamicdisabledc                 C       | t ju rdS | t ju sJ dS )Ndyndis)r   DYNAMICDISABLEDself r   I/home/ubuntu/.local/lib/python3.10/site-packages/torchao/float8/config.py	short_str"      
zScalingType.short_strN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r      s
    	r   c                   @   r   )ScalingGranularityz
    Defines the granularity of scaling strategies for casting to float8.

    Values:

    * ``TENSORWISE``: A single scaling factor for the entire tensor.
    * ``AXISWISE``: Scaling factors computed along one axis of the tensor (rowwise scaling).
    
tensorwiseaxiswisec                 C   r   )Ntenaxs)r   
TENSORWISEAXISWISEr   r   r   r   r   :   r   zScalingGranularity.short_strN)r   r   r   r   r   r    r   r   r   r   r   r   *   s
    
r   c                   @   s$   e Zd ZdZejZejZdd Z	dS )Float8TypeConfigz
    Configuration for selecting the preferred float8 type pair, either e4m3fn/e5m2 or e4m3fnuz/e5m2fnuz.

    Currently, ROCm supports 1. fnuz variants in MI300. 2. OCP F8 variants in MI350/Navi4.
    c                 C   s8   t jjrt j rt rt j| _t j| _	d S d S d S d S )N)
torchversionhipcudais_availabler   float8_e4m3fnuz
e4m3_dtypefloat8_e5m2fnuz
e5m2_dtyper   r   r   r   __post_init__P   s   zFloat8TypeConfig.__post_init__N)
r   r   r   r   r"   float8_e4m3fnr(   float8_e5m2r*   r+   r   r   r   r   r!   B   s
    r!   T)frozenc                   @   sP   e Zd ZU dZejZeed< ej	Z
eed< dZeej ed< dd Zdd	 ZdS )

CastConfiga  
    Configuration for casting a single tensor to float8.

    Args:
        scaling_type: The type of scaling to use. See :class:`ScalingType`.
            Default: ``ScalingType.DYNAMIC``
        scaling_granularity: The granularity of scaling. See :class:`ScalingGranularity`.
            Default: ``ScalingGranularity.TENSORWISE``
        target_dtype: The target float8 dtype (e.g., ``torch.float8_e4m3fn``).
            Default: ``None`` (will be set based on the recipe)
    scaling_typescaling_granularityNtarget_dtypec                 C   s2   t dtdi| j }| j  d| j  d| S )Ne4m3e5m2_)r(   r*   r2   r0   r   r1   )r   dtyper   r   r   r   n   s    zCastConfig.short_strc                 C   sN   | j tju r| jtju sJ d| jd u s#| jjr| jjdks%J dd S d S )NzGonly dynamic scaling type is supported for axiswise scaling granularity   z)must specify a 8-bit floating-point dtype)	r1   r   r    r0   r   r   r2   is_floating_pointitemsizer   r   r   r   r+   r   s   
zCastConfig.__post_init__)r   r   r   r   r   r   r0   __annotations__r   r   r1   r2   r   r"   r6   r   r+   r   r   r   r   r/   \   s   
 r/   c                   @   s   e Zd ZU dZdZeed< dS )Float8GemmConfigz
    Configuration for a float8 gemm.

    Args:
        use_fast_accum: If True, use fast accumulation in lower precision.
            This can improve performance but may reduce numerical accuracy.
            Default: ``False``
    Fuse_fast_accumN)r   r   r   r   r<   boolr:   r   r   r   r   r;   |   s   
 r;   c                   @   s   e Zd ZdZdZdZdZdS )Float8LinearRecipeNameaV  
    Pre-made recipes for common float8 training configurations.

    Values:

    * ``TENSORWISE``: Default, dynamic per-tensor scaling with the cuBLAS tensorwise kernel.
      Fastest option.
    * ``ROWWISE``: Dynamic rowwise scaling with the CUTLASS rowwise kernel.
      Uses e4m3 for activations, weights, gradients. Scales are rounded (floor) to
      the nearest power of two for increased accuracy.
    * ``ROWWISE_WITH_GW_HP``: A modification on rowwise scaling with increased accuracy
      for grad_weight by keeping grad_weight computation in high precision. Most accurate option.
    r   rowwiserowwise_with_gw_hpN)r   r   r   r   r   ROWWISEROWWISE_WITH_GW_HPr   r   r   r   r>      s
    r>   c                   @   s   e Zd ZU dZe Zeed< dZee ed< e Z	eed< dZ
ee ed< e Zeed< dZee ed< ed	d
Zeed< e Zeed< e Zeed< dZeed< dZeed< dZeed< dZeed< dZeed< dd Zedeeef dd fddZdS )Float8LinearConfigz]
    Configuration for converting a `torch.nn.Linear` module to float8
    for training.
    cast_config_inputN!cast_config_input_for_grad_weightcast_config_weight!cast_config_weight_for_grad_inputcast_config_grad_output'cast_config_grad_output_for_grad_weightT)r<   gemm_config_outputgemm_config_grad_inputgemm_config_grad_weightFenable_fsdp_float8_all_gatherpad_inner_dimemulate!force_recompute_fp8_weight_in_bwdround_scales_to_power_of_2c                 C   s  | j d u rt| d| j | jd u rt| d| j | jd u r't| d| j | jjt	j
kr:| jr:J d| jj | j}| j}| j}| j }| j}| j}||df||df||dffD ]\}}}	|jtju }
|jtju }|
|ksvJ d|	 qZ||d	tf||d
tf||dtffD ]+\}}}}|jd u rt|d| |jd u rt|d| |j|jksJ | dq| jrtd d S d S )NrE   rG   rI   zPenable_fsdp_float8_all_gather only supports tensorwise scaling granularity, got output
grad_inputgrad_weightz#incompatible operand precision for inputweightgrad_outputr2   z< must be cast to the same dtype in both matmuls it's used inz`config.force_recompute_fp8_weight_in_bwd` is deprecated and will be removed in a future release. Please see https://github.com/pytorch/ao/issues/2251 for more details.)rE   object__setattr__rD   rG   rF   rI   rH   r1   r   r   rM   r0   r   r   r(   r*   r2   rP   r   warning)r   cc_icc_wcc_gocc_i_gwcc_w_gicc_go_gwcc1cc2	gemm_nameis_disabled_1is_disabled_2operand_namedefault_dtyper   r   r   r+      sb   








z Float8LinearConfig.__post_init__recipe_namereturnc              	   C   s  t | tkrdd tD }| |v sJ d|  d| t| } | tju r't S | tju rIttjt	d}ttjt	d}ttjt	d}t|||ddS | tj
u rttjd}ttjd}ttjt	d}ttjd}ttjd	}ttjt	d
}t||||||ddS td|  )z
        Input: `Float8LinearRecipeName` value, or a string representing a `Float8LinearRecipeName` value
        Output: a `Float8LinearConfig` configured to implement the specified recipe
        c                 S   s   g | ]}|j qS r   )value).0nr   r   r   
<listcomp>E  s    z7Float8LinearConfig.from_recipe_name.<locals>.<listcomp>zrecipe_name z not in valid names )r1   r2   T)rD   rF   rH   rQ   )r1   )r0   )r0   r2   )rD   rF   rH   rE   rG   rI   rQ   zunknown recipe_name )typestrr>   r   rC   rA   r/   r   r    r(   rB   r   r   AssertionError)rh   valid_namesr[   r\   r]   r_   r^   r`   r   r   r   from_recipe_name<  sZ   



z#Float8LinearConfig.from_recipe_name)r   r   r   r   r/   rD   r:   rE   r   rF   rG   rH   rI   r;   rJ   rK   rL   rM   r=   rN   rO   rP   rQ   r+   staticmethodr   r>   ro   rr   r   r   r   r   rC      s.   
 D
rC   )enumloggingdataclassesr   typingr   r   r"   torchao.utilsr   	getLoggerr   Loggerr:   Enumr   r   r!   type_configr(   r*   r/   r;   r>   rC   r   r   r   r   <module>   s*   
&