o
    -i^                     @   s|   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZmZm	Z	 dd Z
dddZG d	d
 d
e jZdd Zdd ZdS )    N)MAX_FINITE_FLOAT8E4B8MAX_FINITE_FLOAT8E4NVMAX_FINITE_FLOAT8E5c                 C   s2   t | tjrt| |ksJ d S | |ksJ d S N)
isinstancetorchTensorall)reftri r   d/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/third_party/triton_kernels/testing.pyassert_equal
   s   r   --Tc                 C   sH  |j jdkr| |j }| j |j krt||ksJ d S |} |  dkr'd S |d u r-d}|d u r3d}	 | tj } |tj }| j|jksVJ d| jd|jt	| }t	|}t
||sjJ dt|d| }	t|d|}
d}d	tt|	|  }|	|9 }	|
|9 }
tt|	 | }t|	|
 t|t|	 }t| }tt|  }|rtd
|||f  td|||f  ||krt||k}|d}|d d }td|| t|j| f  |d}td| t|   td|t|   ||ksJ ||ks"J d S )N   r   g{Gz?gMbp?z&Tensors must have same size ref.shape=z tri.shape=z'Tensor must have same infinite elementsgKH9g      ?z/%s maximum relative error = %s (threshold = %s)z+%s RMS relative error = %s (threshold = %s)i  z5%d / %d mismatched elements (shape = %s) at coords %szref values: ztri values: )dtypeitemsizetor   r	   numelfloat32detachshapeisinfequalwheremaxabssqrtsquaremeanmaximumitemprintnonzerosizetupletolistunbindcpu)r
   r   maxtolrmstoldescriptionverboseref_as_typeinf_mask_refinf_mask_trirefntrineps
multiplierref_rmsrel_errmax_errrms_errbad_idxsnum_nonzeror   r   r   assert_close   sX   $

 


r;   c                   @   s   e Zd ZdZdZdZdZdS )ComputeSanitizerToolmemcheck	racecheck	synccheck	initcheckN)__name__
__module____qualname__MEMCHECK	RACECHECK	SYNCCHECK	INITCHECKr   r   r   r   r<   O   s
    r<   c                     s    fdd}|S )a  
    Decorator to run a test with compute sanitizer enabled and pytorch caching allocator disabled,
    to expose potential memory access errors.
    This decorator requires the `request` fixture to be present.
    If `run_sanitizer` argument is present and set to False, the sanitizer is not run.
    Running tests under compute sanitizer requires launching subprocess and is slow,
    so use sparingly
    c                    s   t   fdd}|S )Nc               
      sv  t jddkr| i | d S dd l} ddr tj   dtj	g}t
|ts4J d|tdd	 |D sIJ d
dd	 |D |t   }  | k}d|v rd||d M }|r2d|vr2|D ]}t jjd }t jd dddd}dt jv rt jd |d< d|v sJ d|d jjj}	| dj d|	 d}
dddd|j tjddd|
g	}
dD ]}|tjv r|
| qtj|
tjtj |d}dt!|j"v pd t!|j"v }|j"}t#|t$u r|% }d}|st&d! d"}n|j'dkrt&d# t&d$|j' d"}|r/t&d% t&d& t&d% t&| t&d% t&d' t&d% J qnd S | i | d S )(NSKIP_COMPUTE_SANITIZER1r   clear_torch_cacheFtools_to_checkztools_to_check=c                 s   s    | ]}|t v V  qd S r   r<   .0toolr   r   r   	<genexpr>p   s    zHcompute_sanitizer.<locals>.decorator.<locals>.wrapper.<locals>.<genexpr>zF(tool for tool in tools_to_check if tool not in ComputeSanitizerTool)=c                 s   s    | ]	}|t vr|V  qd S r   rL   rM   r   r   r   rP   q   s    run_sanitizerzcompute-sanitizer__file__PATH)rS   PYTORCH_NO_CUDA_MEMORY_CACHINGTORCH_SHOW_CPP_STACKTRACESCUDA_LAUNCH_BLOCKINGCUDA_VISIBLE_DEVICESrequest_fixturez@memcheck'ed test must have a (possibly unused) `request` fixturez::[]z#--target-processes=application-onlyz!--destroy-on-device-error=contextz--tool=z-mpytestz-vsx)z--update_checksumz--ignore_checksum_error)stdoutstderrenvzERROR SUMMARY: 0 errorsz&RACECHECK SUMMARY: 0 hazards displayedz#compute-sanitizer returned an errorTz_The test failed due to some other reason: consider running without compute-sanitizer to verify.zout.returncode=z5*****************************************************z5******************** TEST OUTPUT ********************z5****************** TEST OUTPUT END ******************)(osenvirongetpsutilpopr   cudaempty_cacher<   rD   r   listr	   Processgetppidexeitemspathrealpath__globals__nodecallspecidrA   valuesys
executableargvappend
subprocessrunPIPESTDOUTstrr\   typebytesdecoder#   
returncode)argskwargsrb   rK   	ppid_namerun_compute_sanitizerrO   rk   r^   test_idcmdoptoutsanitizer_oktest_outputfail)target_kwargstest_fnr   r   wrapperb   s   





>z5compute_sanitizer.<locals>.decorator.<locals>.wrapper)	functoolswraps)r   r   r   )r   r   	decorator`   s   Uz$compute_sanitizer.<locals>.decoratorr   )r   r   r   r   r   compute_sanitizerV   s   
Zr   c                 C   s*   t jtt jtt jti| }|   | S r   )	r   float8_e5m2r   float8_e4m3fnr   float8_e4m3fnuzr   r   r   )xr   
max_finiter   r   r   compute_actual_scale   s   r   )NNr   T)enumr   r_   rv   rr   r   triton_kernels.numericsr   r   r   r   r;   Enumr<   r   r   r   r   r   r   <module>   s    
>g