o
    i7                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZm	Z	 d dl
mZ d dlmZ ej o4ejjZej o=ejjZd dlmZmZmZ d dlmZmZmZ d dlmZ G dd	 d	eZee ejj e d
ddd Z!g dZ"g dZ#g dZ$ej%ej&gZ'e(e )e"e#e$e'Z*ej+fddZ,ejj dej-.dvddejj	de*e/dej	dej+ej0gdd Z1ejj dej-.dvddejj	de*e/dej	dej+ej2gdd Z3ejj dej-.dvpdej-.dvddejj ed d dej	d!d"d#gej	d$d%d&gej	dej+ej4ej5gej	d'd(d)gej	d*e e ed+gej	d,e e ed+gd-d. Z6e7d/krVe8ej9 dS dS )0    N)IS_LINUXTestCaseinstantiate_parametrized_testsparametrize)opcheck)torch_version_at_least)PerGroupPerRow	PerTensor)_choose_scale_float8_dequantize_affine_float8_quantize_affine_float8)get_block_sizec                   @   s  e Zd Z													d#ddZ								d$ddZejjed	 d
dejje	 ddejjde
jdvddede
je
jgedddgedddgedddgedddgedddged de
je
jgd!d" ZdS )%TestOpsNr   F      ?c                 C   s  | tj| | }| tj|
 |	 }| tj| | }dt|d }||dd }|| }|d ur@|| tj }|jdddj}|| }t	|}tj
|ddd}|| }tjt|| | ddd}|| | }|| }tjt|| | ddd}| tjS )	N   Tdimkeepdimr      minmax)totorchfloatmathsqrtsize	transposer   valuesexpsumclamprounduint8)selfqkv	attn_mask	dropout_p	is_causalq_scaleq_zpk_scalek_zpv_scalev_zpa_scalea_zpo_scaleo_zpscale_factorattnattn_maxattn_sumout r>   A/home/ubuntu/.local/lib/python3.10/site-packages/test/test_ops.py_scaled_dot_product_int8_op_ref'   s$   
z'TestOps._scaled_dot_product_int8_op_refc                 C   s  | tj| }| tj| }| tj|	 }dt|d }||dd }|| }|d ur:|| tj }|jdddj}|| }t	|}tj
|ddd}|| }tj||
 ddd}| tj tj}||
 }|| }tj|| ddd}| tjS )	Nr   r   r   Tr   @  r   )r   r   r   r   r   r    r!   r   r"   r#   r$   r%   float8_e4m3fn)r(   r)   r*   r+   r,   r-   r.   r/   r1   r3   r5   r7   r9   r:   r;   r<   r=   r>   r>   r?   _scaled_dot_product_fp8_op_refM   s&   
z&TestOps._scaled_dot_product_fp8_op_refz2.7.0z*quantized sdpa requires torch 2.7 or laterreasonzonly support on linuxCPUztorchao::qscaled_dot_productcpp kernels not builtinput_dtype
batch_size8   x   n_head      	q_seq_len   Y   
kv_seq_lend      head_dim    @   
mask_dtypec           "      C   sh  t d d}|t jkr9td}	td}
td}td}td}td}td	}td}td
}td}d\}}ntd}	td}
td}td}td}d\}}||||g}||||g}|dd|g}t j|t j|ddd}t j|t j|ddd}t j|t j|ddd}|t jkr|d9 }|d9 }|d9 }||}||}||}|d urt j|||dnd }| | | |d ur| nd f\}}}}|t jkr| j	||||dd|	||
|||||||d} t j
jj||||dd|	||
|||||||d}!n"| j||||dd|	|
|||d} t j
jj||||dd|	|
|||d}!| j|! |  ||d d S )Ni  cpug    Φ?g   ?g    l?g    p?g   `P?   }   rL      )r   h㈵>g     @g      @g     `?g     `@g     `	@)g      ?r^   r   )dtypedevicerN   rT   g        F)r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   )r,   r-   r.   r/   r1   r3   r5   r7   atolrtol)r   manual_seedr'   r   intrandnr!   r   cloner@   opstorchaoqscaled_dot_productrD   assertEqual)"r(   rI   rJ   rM   rP   rS   rV   rY   r`   r/   r1   r3   r5   r7   r0   r2   r4   r6   r8   rb   rc   q_shapekv_shape
mask_shaper)   r*   r+   r,   q2k2v2attn_mask_2math_refactualr>   r>   r?   $test_quantized_scaled_dot_product_opp   s   






z,TestOps.test_quantized_scaled_dot_product_op)Nr   Fr   r   r   r   r   r   r   r   r   r   )Nr   Fr   r   r   r   r   )__name__
__module____qualname__r@   rD   pytestmarkskipifr   r   r   _C_dispatch_dumpr   r'   rC   float32bfloat16ru   r>   r>   r>   r?   r   &   sR    
+
# r   zROCm not availablerE   c                  C   s^   g d} |  d tjddtjddd}tjddtjddd}ttjjj||d	d	f| d
 d S )N)test_schematest_autograd_registrationtest_faketensortest_aot_dispatch_dynamicr   rO   )rO   rW   cuda)r_   r    r`   )rW   rO   F)
test_utils)appendr   randintr   r   rh   ri   
swizzle_mm)r   mat1mat2r>   r>   r?   test_swizzle_mm   s   


r   )r   rN      
   )r   rN   r]   i   )r   r]   i   c                 C   s  |t jt jt jfv sJ d}d}|dkrd}n|dkrd}n|dkr$d}t d||  f|}	t d|d |  | |}
t jjd||t j|d	}|t jkrcd
|j	j
   }|j	j
| |}nt dg}|j	j
|}||j	j|j	_
d}|t jt jfv rd}t  a ||	|
| }|t jkrt || t j}t |dd|t j}n|t jkrt || dd|t j}t jj||	|
|||||t j}t jj||ddd W d    d S 1 sw   Y  d S )NTr$   r   meanr   r   rN   i  )moder_   include_last_offsetg     _@g       @r   ir[   rA   rB   gh㈵>ra   )r   r   int8rC   r   r   arangennEmbeddingBagweightdataabsr   tensorr_   no_gradforwardr&   int32r%   rh   ri   _scaled_embedding_bagtestingassert_close)	multi_hotrJ   vector_size
index_typeqtype	out_dtyper   r   	mode_enumindicesoffsetsmweight_scaleqweight	out_scalerefe_outtest_outr>   r>   r?   %_test_scaled_embedding_bag_cpu_helper!  sp   



	
"r   rG   ztorchao::_scaled_embedding_bagrH   z.multi_hot, batch_size, vector_size, index_type)idsr   c                 C      t | |||tj| d S N)r   r   r   r   rJ   r   r   r   r>   r>   r?   "test_scaled_embedding_bag_int8_cpug     r   c                 C   r   r   )r   r   rC   r   r>   r>   r?   !test_scaled_embedding_bag_fp8_cpu~  r   r   z"torchao::float8_linear_prepack_cpuztorchao::float8_linear_cpuz2.6.0zTest only enabled for 2.6+shape)rX   rX   )   r   bsr      biasTFx_granularityr]   w_granularityc                 C   s<  | \}}t |tr|j|krd S t |tsd S t |tr#|j|kr#d S tjj|||d }|j}	t||}
t	|
j
|}t|
tj|d}t|
|tj}|j }t	|j
|}t|tj|d}t||tj}t||}t||}tjj|||	|}tjj||\}}tjj|||||	|}tjj||ddd d S )N)r   )float8_dtype
block_sizeg{Gz?ra   )
isinstancer   
group_sizer   r   Linearevalr   rf   r   r   r   rC   r   r   detachr   
functionallinearr   rh   ri   float8_linear_prepack_cpufloat8_linear_cpur   r   )r   r   r   r   r   r   
in_featureout_featurer   bxx_block_sizex_scalex_fp8ww_block_sizew_scalew_fp8x_dqw_dqrefpacked_wpacked_scaleyr>   r>   r?   test_float8_linear_cpu  sF   







r   __main__):	itertoolsr   sysry   r   $torch.testing._internal.common_utilsr   r   r   r   torch.testing._internal.optestsr   torchao.utilsr   r   is_availableversionIS_CUDAhipIS_ROCMtorchao.quantizationr   r	   r
   %torchao.quantization.quant_primitivesr   r   r   torchao.quantization.utilsr   r   rz   r{   r   EMBEDINGBAG_MULTIHOT_SIZESEMBEDINGBAG_BAG_SIZESEMBEDINGBAG_VECTOR_SIZESint64r   EMBEDINGBAG_INDEX_DTYPESlistproductEMBEDINGBAG_TEST_PARAMSr   r   r|   r}   strr   r   rC   r   r   halfr   rv   mainargvr>   r>   r>   r?   <module>   s    V

F

*