o
    پi                     @   s\   d dl Z dd ZdedefddZe jfdededee fd	d
Zde jde jfddZdS )    Nc                 C   sr  t d}t j}| d | }t jdd}|| t jdd}|| |d r-|d }	n|}	~|d  }
d urd}d	}t||\}}|d
ksIJ t j||| |	|||d}|
dkr`t| |}n#|
dkrlt| ||d}n|
dkrt j	| gd
g|d   t j
d}nttd|
d|d|j ||}||fS t j| |	|||d}|t j|j||ddk   d9  < |d fS )Ncudai'  cpu)devicefuse_silu_and_mul   masked_layout_modei   i   r   )r   dtype	generatorbalanced
imbalanced)gen_cpuextreme   r   zmasked_layout_mode=z
 masked_m=z	 x.shape=)r   r	   gMbP?
   )torchr   bfloat16	Generatormanual_seeddivmodrandn_compute_balanced_split_compute_imbalanced_splittensorintNotImplementedErrorprintshapeto)
num_tokens
hidden_dim	num_ranksflagsr   r   seedr   gen_cudaeffective_hidden_dimr    num_max_dispatch_tokens_per_ranknum_global_expertsnum_local_experts	remainderxmasked_m r,   I/home/ubuntu/.local/lib/python3.10/site-packages/sgl_kernel/test_utils.py&create_per_token_group_quant_test_data   s^   



	
"r.   totalarr_lenc                    sH   | |  | |  fddt |D }t|| ksJ tj|tjdS )Nc                    s    g | ]}|k r d  n qS r   r,   ).0ibaser)   r,   r-   
<listcomp>D   s     z+_compute_balanced_split.<locals>.<listcomp>r   )rangesumr   r   r   )r/   r0   ansr,   r4   r-   r   A   s
   r   returnc           	      C   s   t j||dd }||  }||   |}| |   }|dkrZt jd|d|d }|dkr@||  d7  < |d8 }n|dk rV|| dkrV||  d8  < |d7 }|dks$t|| ksbJ |S )N)r	      r   r1   r   )r   randr8   roundr   itemrandint)	r/   r0   r   r   	noise_rawnoiser9   diffidxr,   r,   r-   r   I   s   
	r   abc           
      C   s  | j |j kr| j|jks J d| j d|j d| jd|j|  }| jtjkra| tj}|tj}|tj|tj 	 }|dk|dk @ 
  }|dk
  }|dk
  }n3| jtjkr| tj| tj 	 }	| dk|dk @ 
  }|	dk
  }|	dk
  }nt|dkr|dkr|| dk s|| d	k r|d
ksJ d|d|d|d|d| d|d S d S )Nza.shape=z	 b.shape=z	 a.dtype=z	 b.dtype=r   r   r   g{Gzt?g{Gz?i   zcount_diff_sign=z count_tiny_diff=z count_large_diff=z numel=z a=z b=)r   r   numelr   float8_e4m3fnviewuint8r   int16absr8   r>   int8r   )
rD   rE   rF   a_u8b_u8diff_u8count_diff_signcount_tiny_diffcount_large_diffrB   r,   r,   r-   assert_all_close_or_tiny_diff`   s4   "&rS   )r   r.   r   r   listr   TensorrS   r,   r,   r,   r-   <module>   s    =	
