o
    
۾i                     @   s$  d dl mZ d dlZd dlmZ d dlmZ eeZer!dd Z	nzd dl
m	Z	 W n ey8   d dl
mZ	 Y nw eejjd	r\e	d
	ddejdejdejdejdB dejf
ddZeejjdre	d	ddejdejdejdB dejdejdedejdB dejfddZG dd dZdS )    )TYPE_CHECKINGN)flash_attn_varlen_func)init_loggerc                    s    fddS )Nc                    s    S N )namefnr   A/home/ubuntu/.local/lib/python3.10/site-packages/vllm/_xpu_ops.py<lambda>   s    zregister_fake.<locals>.<lambda>r   r   r   r   r
   register_fake   s   r   )r   )impl_abstractfp8_gemm_w8a16z_xpu_C::fp8_gemm_w8a16inputq_weightweight_scalebiasreturnc                 C   s>   |  d| jd }|d}|d}tj||f| j| jdS Nr      dtypedeviceviewshapesizetorchemptyr   r   )r   r   r   r   input_2dMNr   r   r
   _fp8_gemm_w8a16_fake   s   

r#   int4_gemm_w4a16z_xpu_C::int4_gemm_w4a16qzeros
group_size	group_idxc           
      C   s>   |  d| jd }|d}|d}	tj||	f| j| jdS r   r   )
r   r   r   r   r%   r&   r'   r    r!   r"   r   r   r
   _int4_gemm_w4a16_fake(   s   


r(   c                )   @   s(  e Zd Ze																		d#dejdejdejd	ejd
edededB dedejdB dejdB dejdB de	e dB dedB dejdB dejdB dedededB dejdB f&ddZ
eejddddddddddddfdejd	ejdB dejdB dejdB dedB d dfd!d"ZdS )$xpu_opsNF           r   qkvcu_seqlens_qmax_seqlen_qmax_seqlen_ksoftmax_scalecausaloutblock_tablealibi_slopeswindow_sizesoftcap	seqused_kcu_seqlens_k	dropout_p
fa_versionreturn_softmax_lses_auxc                 C   s   |d us|d usJ d|d u s|d u sJ d|	d u s$|d us$J d|	d us0|d us0J d|d u r?t j| j| j| jd}|d u rFd}nt|dksNJ |d |d	 f}|	d u r^| }t||  ||||||||||	|||d
S )Nz*cu_seqlens_k or seqused_k must be providedz>cu_seqlens_k and seqused_k cannot be provided at the same timez,when enable block_table, seqused_k is neededz4when block_table is disabled, cu_seqlens_k is neededr   r   r   r+   r   r   )r4   r,   r-   r.   r/   r:   r9   r0   r1   r2   r3   r5   r>   r7   r=   )r   r   r   r   r   len
contiguousr   )r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   scheduler_metadatar<   	q_descale	k_descale	v_descale
num_splitsr=   r>   real_window_sizer   r   r
   r   9   sJ   zxpu_ops.flash_attn_varlen_funcr?   cache_seqlenscu_seqlens_k_newcache_leftpad	page_sizer   c                 C   s   t d d S )NzFget_scheduler_metadata is not implemented for xpu_ops, returning None.)loggerwarning_once)
batch_sizer0   r1   num_heads_qnum_heads_kvheaddimrH   	qkv_dtype	headdim_vr/   rI   rJ   rK   max_seqlen_k_newr3   r7   has_softcaprF   pack_gqa	sm_marginr   r   r
   get_scheduler_metadata   s   zxpu_ops.get_scheduler_metadata)NFNNNNr*   NNr*   Nr+   NNNr   FN)__name__
__module____qualname__staticmethodr   Tensorintfloatboollistr   bfloat16rX   r   r   r   r
   r)   8   s    	

J	
r)   r   )typingr   r   %vllm_xpu_kernels.flash_attn_interfacer   vllm.loggerr   rY   rL   r   torch.libraryImportErrorr   hasattrops_xpu_Cr]   r#   r^   r(   r)   r   r   r   r
   <module>   s^   
