o
    پi                  
   @   sD  d dl Z d dlmZmZ d dlmZ d dlmZmZ d dl	Z	zd dl
mZ W n ey> Z zdd ZeZW Y dZ[ndZ[ww d dl	mZ d	ed
edefddZei fdee dee deeee f defddZdedee defddZ	d'dedededefddZdd Zdd Zd d! Zd"ejd#e	jd$efd%d&ZdS )(    N)IterableMapping)MappingProxyType)AnyOptional)dynamic_mxfp4_quantc                  O   s   t d)NzDFailed to import aiter. Make sure AITER is installed and accessible.)ImportError)argskwargs r   ^/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/quantization/quark/utils.pyraise_aiter_import_error   s   r   )nndict1dict2returnc                    sl   t  t ur
dS t tr%   krdS t fdd D S t tr2t tkS  kS )NFc                 3   s"    | ]}t  | | V  qd S )N)deep_compare).0kr   r   r   r   	<genexpr>   s     zdeep_compare.<locals>.<genexpr>)type
isinstancedictkeysalllistsetr   r   r   r   r      s   

r   
layer_nameignorefused_mappingc                    s    d u rdS   dd |v rC| } fdd|D }d }|D ]}t||d}|d u r2|}q#||krAtd| d  d	q#nt |d}|d usOJ |S )
NF.c                    s   g | ]}  |qS r   )replace)r   shard_proj_namer   	proj_namer   r   
<listcomp>8   s    
z'should_ignore_layer.<locals>.<listcomp>)r   targetsz+Found a different quantization schemes for z in z+. vLLM requires all to use the same scheme.)splitcheck_equal_or_regex_match
ValueError)r   r   r    shard_proj_namesshard_namesshould_ignore_layer
shard_nameshould_ignore_shardr   r%   r   r.   $   s<   r.   r(   c                 C   s   |D ]
}t | |r dS qdS )z
    Checks whether a layer_name is exactly equal or a regex match for
    if target starts with 're:' to any target in list.
    TF)_is_equal_or_regex_match)r   r(   targetr   r   r   r*   \   s
   
r*   Fvaluer2   check_containsc                 C   sV   | dr|dd }t|| rdS dS |r#| |  v r!dS dS || kr)dS dS )z
    Checks whether a value is exactly equal or a regex match for target
    if target starts with 're:'. If check_contains is set to True,
    additionally checks if the target string is contained within the value.
    zre:   NTF)
startswithrematchlower)r3   r2   r4   patternr   r   r   r1   g   s   
	r1   c                 C   sD   | j \}}}t| d|\} }| |||d ||||d fS )Nr"          )shaper   reshapeview)xhbdx_scalesr   r   r   b_dynamic_mxfp4_quant}   s   $rE   c                 C   s   | j ddd} |r.| dd d df d@ | dd d df< | ddd df d? | ddd df< n,| d d d d df d@ | d d d d df< | d d dd df d? | d d dd df< g d}tj|tjd	d
}||   S )Nr;   r"   dim.         )g        g      ?g      ?g      ?g       @g      @g      @g      @g       g      g      g      g       g      g      g      cuda)dtypedevice)repeat_interleavetorchtensorfloat32long)r@   is_3d
mxfp4_listmxfp4_in_f32r   r   r   mxfp4_to_f32   s   $&,,rV   c                 C   s(   d|  tjd  }td||dk< |S )Nr;      nan   )torO   rQ   float)r@   x_f32r   r   r   e8m0_to_f32   s   r]   	self_attnwquant_formatc                 C   s  d|v r|j tjkrR|dd| j| j fj| j| jgdd\}}t|dd\}}|dd}|dd}t|\}}|dd	 dd}|	 dd}nk|j tj
krt|dtj}| jjjd	dd}t|tj}|| }|dd| j| j fj| j| jgdd\}}t|dd\}}|dd}|dd}t|\}}|dd	 dd}|	 dd}||||fS d S )
Nmxfp4r   r"   rI   rF   r;   Tr<   )rL   rO   bfloat16	unflattenqk_nope_head_dim
v_head_dimr)   rE   	transpose
contiguousuint8rV   rZ   	kv_b_projweight_scalerN   r]   )r^   r_   r`   w_kcw_vcw_s_kcw_s_vcw_scalesr   r   r   quark_post_load_weights   s>   

rq   )F)r7   collections.abcr   r   typesr   typingr   r   rO   aiter.ops.triton.quantr   r   errr   r   boolr   strr   r.   r*   r1   rE   rV   r]   ModuleTensorrq   r   r   r   r   <module>   sN   
8
 