o
    پi                     @   s   d dl Z d dlZd dlmZ e eZd dlmZ G dd deZ	dddZ
d	d
 Zdd Zdd Zdd Z	d	dddZG dd dZdS )    N)cpu_has_amx_support)IntEnumc                   @   s   e Zd ZdZdZdZdZdS )CPUQuantMethodr            N)__name__
__module____qualname__UNQUANT	INT8_W8A8	FP8_W8A16	INT4_W4A8 r   r   O/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/amx_utils.pyr      s
    r   Fc                 C   sL   | j t dkr
| S t s| S |rtjj| d| dS tjj| S )Ncpu)	devicetorchr   ops
sgl_kernelcausal_conv1d_weight_packviewsizeconvert_weight_packed)weightis_convr   r   r    amx_process_weight_after_loading   s   r   c                 C   sn   d}d}| j }|dkr| dn| d}|dkr| dn| d}||k p,|| dk}|| dk}|o6|S )N       r   r   r   r   )ndimr   )r   TILE_NTILE_Kr    OCICis_oc_supportis_ic_supportr   r   r   dim_is_supported#   s   r'   c                 C   s   | j tjtjtjtjfv S N)dtyper   float16bfloat16int8float8_e4m3fnr   r   r   r   dtype_is_supported.   s   r/   c                 C   s   |   dko| ddkS )Nr   r   )dimr   r.   r   r   r   is_dim_conv_weight7   s   r1   c              
   C   sF   g }| D ]}| | |d|dd|df}|| q|S )Nr   r   r   )as_strided_r   strideappend)
conv_stateconv_state_cpuconv_shape_tconv_shape_newr   r   r   _init_amx_conv_state;   s   	r9   returnc           
         st   fdd|D }t |dksJ d| }|r%t |t |ks%J dt|D ]c\}}t |}|r?|| r?|j||  }t|}t|rKt|sg|sgt	d| d|j
 d|j d	  d
	 d _ d S tjjt||dd}	|j|	_t ||	 |r|d|d}||	 q)|tdkot  _ jrt dr jd urtjj jj dd _d S d S d S d S )Nc                    s   h | ]}t  |jqS r   )getattrr   ).0weight_namemoduler   r   	<setcomp>P   s    z4_amx_process_weight_after_loading.<locals>.<setcomp>r   z,Expects all weights to be on the same devicez8len(weight_names) should be equal to len(transpose_dims)z:Unsupported dimension or dtype for prepacking for weight 'z' with shape z and dtype z in zA. The derived (OC, IC) dimensions must be divisible by (16, 32). F)requires_gradr   r   bias)lenpop	enumerater;   	transposer1   r'   r/   loggerwarningshaper)   use_intel_amx_backendr   nn	Parameterr   __dict__setattrr   r   copy_r   r   hasattrrB   datafloat)
r?   weight_namestranspose_dimsdevicesr   ir=   weight_tensoris_conv_weightpacked_weightr   r>   r   !_amx_process_weight_after_loadingL   sZ   
 

rZ   c                   @   s    e Zd ZdddZdddZdS )	PackWeightMethodNc                 C   s   || _ || _d S r(   )rS   rT   )selfrS   rT   r   r   r   __init__   s   
zPackWeightMethod.__init__r:   c                 C   s   t || j| j d S r(   )rZ   rS   rT   )r\   r?   r   r   r   process_weights_after_loading   s   
z.PackWeightMethod.process_weights_after_loadingr(   r:   N)r   r	   r
   r]   r^   r   r   r   r   r[      s    
r[   )Fr(   r_   )loggingr   sglang.srt.utilsr   	getLoggerr   rG   enumr   r   r   r'   r/   r1   r9   rZ   r[   r   r   r   r   <module>   s     

	
7