o
    i3                     @   s   d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
mZ ddlmZmZmZ ddlmZ ejjG d	d
 d
ejjZG dd dejjZdS )zD
A simple module swap UX for a float8 version of `torch.nn.Linear`.
    )OptionalN)Float8LinearConfigScalingType)tensor_already_casted_to_fp8)get_maybe_axiswise_dimhp_tensor_to_float8_dynamic)GemmInputRoleLinearMMConfigScaledMMConfig)!WeightWithDynamicFloat8CastTensorc                	   @   s>   e Zd ZdZedejdejdedefddZ	edd	 Z
d
S )matmul_with_hp_or_float8_argsa  
    Like torch.matmul, but with the arguments in either high precision or float8.
    * if the arguments are in high precision, they are cast to float8 according
      to the specified config
    * if the arguments are in float8, we assume the cast honored the config
    input_hpweight_hp_tlinear_mm_configconfigc              	   C   s   |  || || _|| _|}t|r|}n |jjtju r|}nt||jj	|t
j|jjtd|jj|jd}t|r<|}n |jjtju rF|}nt||jj	|t
j|jjtd|jj|jd}|j}|d|d }	t|	|}
|
jg |d d |
jd R  }
|
S )Ngemm_input_rolescaling_granularityaxiswise_dimround_scales_to_power_of_2r   )save_for_backwardr   r   r   cast_config_inputscaling_typer   DISABLEDr   target_dtyper   INPUTr   r   r   cast_config_weightWEIGHTshapereshapetorchmm)ctxr   r   r   r   cinput_maybe_fp8weight_maybe_fp8_t
orig_shapeinput_maybe_fp8_reshapedres_bits r*   P/home/ubuntu/.local/lib/python3.10/site-packages/torchao/float8/float8_linear.pyforward%   sN   "z%matmul_with_hp_or_float8_args.forwardc              	   C   s  | j \}}| j}|j}|d|d }t|r|}n!|jjtju r$|}nt	||jj
| jtj|jjtd|jj|jd}t|rB|}n!|jjtju rL|}nt	||jj
| jtj|jjtd|jj|jd}t|| }	|	jg |d d |	jd R  }	|j}
|d|
d }t|r|}n!|jjtju r|}nt	||jj
| jtj|jjtd|jj|jd}t|r|}n!|jjtju r|}nt	||jj
| jtj|jjtd|jj|jd}t| |}d}|	| g|R S )Nr   r   r   )NN)saved_tensorsr   r   r    r   cast_config_grad_outputr   r   r   r   r   r   r   GRAD_OUTPUTr   r   r   !cast_config_weight_for_grad_inputr   r!   r"   t'cast_config_grad_output_for_grad_weight!cast_config_input_for_grad_weightr   )r#   grad_outputr   r   r$   grad_output_orig_shapegrad_output_reshaped#grad_output_reshaped_maybe_fp8_dim0weight_t_maybe_fp8_dim0
grad_inputinput_hp_orig_shapeinput_hp_reshaped#grad_output_reshaped_maybe_fp8_dim1input_reshaped_maybe_fp8_dim1grad_weightempty_gradsr*   r*   r+   backward]   s   

z&matmul_with_hp_or_float8_args.backwardN)__name__
__module____qualname____doc__staticmethodr!   Tensorr	   r   r,   r@   r*   r*   r*   r+   r      s    7r   c                       s\   e Zd ZdZ fddZdejdejfddZ fdd	Ze		
dde
e fddZ  ZS )Float8Lineara   
    Note: this is **not** a public API and is only intended to be used
    inside of this repository. Please file an issue if you would benefit
    from this being a public API.

    A wrapper around a `torch.nn.Linear` module which does fp8 compute.
    c                    s   | d}t j|i | |jj| _|jj| _|jj| _	|| _
tt|j| j
jjd| j
jt|j| j
jjd| j
jt|j| j
jjd| j
j| _dS )zv
        Additional arguments on top of `torch.nn.Linear`'s arguments:
        * `config`: Float8LinearConfig
        r   FN)popsuper__init__r   r   scaling_type_inputr   scaling_type_weightr.   scaling_type_grad_outputr   r	   r
   emulategemm_config_outputuse_fast_accumpad_inner_dimgemm_config_grad_inputgemm_config_grad_weightr   )selfargskwargsr   	__class__r*   r+   rJ      s4   




zFloat8Linear.__init__inputreturnc                 C   sT   t  rt  }||}t|| j | j| j	}| j
d ur(|| j
|j }|S N)r!   is_autocast_enabledget_autocast_gpu_dtypetor   applyweightr1   r   r   biasdtype)rT   rY   autocast_dtypeoutputr*   r*   r+   r,      s   

zFloat8Linear.forwardc                    s   | j }d|j  }d|j  }d|j  }|||g}|j|jkr1|d|j   |j|jkrB|d|j   |j|jkrS|d|j   d	|}t
   d| d	}|S )
Nzi:zw:zgo:zi_gw:zw_gi:zgo_gw:,z, cast_configs=")r   r   	short_strr   r.   r3   appendr0   r2   joinrI   
extra_repr)rT   r$   cicwcgopartscast_config_strsrW   r*   r+   rj     s    

zFloat8Linear.extra_reprNr   c                 C   s   |du rt  }td | |j|jd|d}W d   n1 s!w   Y  |j|_|j|_|jrN|jj	t
ju s:J tjjt|j|j|jjj|jjd|_|S )z
        Create an nn.Linear with fp8 compute from a regular nn.Linear

        Args:
            mod (torch.nn.Linear): nn.Linear to convert
            config (Optional[Float8LinearConfig]): configuration for conversion to float8
        NmetaF)ra   r   )requires_grad)r   r!   devicein_featuresout_featuresr`   ra   enable_fsdp_float8_all_gatherr   r   r   DYNAMICnn	Parameterr   r   r   r   rr   )clsmodr   new_modr*   r*   r+   
from_float%  s.   	zFloat8Linear.from_floatr[   )rA   rB   rC   rD   rJ   r!   rF   r,   rj   classmethodr   r   r}   __classcell__r*   r*   rW   r+   rG      s    'rG   )rD   typingr   r!   torchao.float8.configr   r    torchao.float8.distributed_utilsr   #torchao.float8.float8_scaling_utilsr   r   %torchao.float8.float8_training_tensorr   r	   r
   torchao.float8.fsdp_utilsr   _dynamoallow_in_graphautogradFunctionr   rx   LinearrG   r*   r*   r*   r+   <module>   s    3