o
    i                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZ 		ddejdejjdejd	ejjd
ejde	dedefddZdejdejdejjdejd	ejjd
ejfddZdS )    )ListN)Float8LinearConfigScalingType)(precompute_float8_dynamic_scale_for_fsdpF	ref_model	ref_optim
fsdp_model
fsdp_optim	local_inpconfig
precomputecompile_transformer_blockc	                 C   s6  t | | D ]\}	}
| j|	j|
jd|	j d|
j d q	tdD ]t}g }||f||ffD ]N\}}|j|d dkd |||  |d   ||u ri| D ]}|jrht	
|j |jt	  qU|  ||u r~|r~|jjtju r~t| q0| j|d |d	 d
| d|d  d|d	  d q$d S )Nzref_param.requires_grad: z, fsdp_param.requires_grad: msg
      r   set_to_none   iter: , loss-ref: , loss-fp8: )zip
parametersassertEqualrequires_gradrange	zero_gradappendsumbackwarddist
all_reducegraddiv_get_world_sizestepcast_config_weightscaling_typer   DYNAMICr   )test_clsr   r   r   r	   r
   r   r   r   	ref_param
fsdp_paramiter_idxlossesmodeloptimparam r3   X/home/ubuntu/.local/lib/python3.10/site-packages/torchao/testing/training/fsdp2_utils.pycheck_parity_no_mp   s>   r5   ref_model_bf16c                 C   s  t dD ]}g }||f||ffD ]_\}	}
|
j|d dkd ||	|  |d   |	|u rVt| | D ]\}}t|j	 |j	
t  |j	 |_	d |_	q:|
  t| | D ]\}}| | qcq| j|d |d d| d|d  d	|d  d
 qd S )Nr   r   r   r   r   r   r   r   r   r   )r   r   r   r    r!   r   r   r"   r#   r$   r%   r&   floatr'   detachcopy_r   )r+   r   r6   r   r   r	   r
   r.   r/   r0   r1   
param_bf16
param_fp32r3   r3   r4   check_parity_bf16_mpB   s8   	r<   )FF)typingr   torchtorch.distributeddistributedr"   torch.nnnntorchao.float8.configr   r   torchao.float8.fsdp_utilsr   Moduler1   	OptimizerTensorboolr5   r<   r3   r3   r3   r4   <module>   sL   	
/