o
    -i                     @   s~  U d dl mZmZ d dlmZ d dlZd dlmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ ddlmZmZmZmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$ ee%Z&e' Z(ej)Z*ej+j,j-j.Z/eej+j,j0j.iZ1e2eef e3d< e4 oe5ej+j,dZ6e6rej+j,j7j.e1e< G dd deZ8G dd de8Z9G dd de8Z:G dd de$Z;dS )    )ABCabstractmethod)AnyN)auto_functionalized)PatternMatcherPassfwd_onlyregister_replacement)
OpOverload)
VllmConfig)init_logger)QuantKeykFp8StaticTensorSymkNvfp4Dynamic)current_platform   )	QUANT_OPS
empty_bf16
empty_fp32	empty_i32)enable_fake_mode)MatcherQuantFP8MatcherSiluAndMul)VllmInductorPassVllmPatternMatcherPass	FUSED_OPSsilu_and_mul_nvfp4_quantc                   @   sP   e Zd ZdZdeddfddZdededejfd	d
Z	e
deddfddZdS )ActivationQuantPatternzW
    The base class for Activation+Quant fusions.
    Should not be used directly.
    	quant_keyreturnNc                 C   sf   || _ |j| _| j tv sJ d| j  t| j  | _| j tv s'J d| j  t| j  | _t | _d S )Nz unsupported quantization scheme zunsupported fusion scheme )	r   dtypequant_dtyper   QUANT_OPr   FUSED_OPr   silu_and_mul_matcher)selfr    r%   e/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/compilation/activation_quant_fusion.py__init__5   s   

zActivationQuantPattern.__init__argskwargsc                 O   s    | j dd|}tj|i |S )Ncuda)r   device)r    torchempty)r$   r(   r)   r%   r%   r&   empty_quantH   s   z"ActivationQuantPattern.empty_quantpm_passc                 C   s   t N)NotImplementedError)r$   r/   r%   r%   r&   registerL   s   zActivationQuantPattern.register)__name__
__module____qualname____doc__r   r'   r   r,   Tensorr.   r   r   r2   r%   r%   r%   r&   r   /   s    
r   c                       H   e Zd ZdZd fddZdeej fddZde	ddfd	d
Z
  ZS )SiluMulFp8StaticQuantPatternz3
    Fusion for SiluMul+Fp8StaticQuant Pattern
    r   Nc                    s   t  t tt| _d S r0   )superr'   r   r   quant_matcherr$   	__class__r%   r&   r'   V   s   z%SiluMulFp8StaticQuantPattern.__init__c                 C   s    | j  d }g | j |S )Nr   )r;   inputsr#   )r$   scaler%   r%   r&   
get_inputsZ   s   z'SiluMulFp8StaticQuantPattern.get_inputsr/   c                    sd   dt jdt jdt jf fdd}dt jdt jdt jf fdd}  }||  t|||t| d S )Ninputr@   r   c                    s     | } ||}|d S )Nr   )r#   r;   )rB   r@   result_silu_mulresult_quantr<   r%   r&   patternb   s   
z6SiluMulFp8StaticQuantPattern.register.<locals>.patternc                    sP   | j d d }| j d d |f }tj|| j jd}t j|| |d}|d S )N   )r+   r   )resultrB   r@   r   )shaper,   r-   r+   r    r   r"   )rB   r@   doutput_shaperH   atr<   r%   r&   replacementj   s   

z:SiluMulFp8StaticQuantPattern.register.<locals>.replacement)r,   r7   rA   r   r   )r$   r/   rE   rM   inpsr%   r<   r&   r2   a   s"   z%SiluMulFp8StaticQuantPattern.registerr   Nr3   r4   r5   r6   r'   listr,   r7   rA   r   r2   __classcell__r%   r%   r=   r&   r9   Q   s
    r9   c                       r8   )SiluMulNvfp4QuantPatternz/
    Fusion for SiluMul+Nvfp4Quant Pattern
    r   Nc                    s   t  t d S r0   )r:   r'   r   r<   r=   r%   r&   r'      s   z!SiluMulNvfp4QuantPattern.__init__c                 C   s6   |  dd}tdd}tdd}tdd}||||gS )N             @   r   )r.   r   r   r   )r$   rH   output_scaleinput_r@   r%   r%   r&   rA      s
   


z#SiluMulNvfp4QuantPattern.get_inputsr/   c                    s   dt jdt jdt jdt jdtt jt jf f
 fdd}dt jdt jdt jdt jdtt jt jf f
 fdd	}t||  t| d S )
NrH   rY   rB   r@   r   c                    s0     |}t j| |||dd}|d |d fS )NT)outputrB   rY   input_scaleis_sf_swizzled_layoutr   rG   )r#   r   r!   )rH   rY   rB   r@   rC   rL   r<   r%   r&   rE      s   
z2SiluMulNvfp4QuantPattern.register.<locals>.patternc                    s$   t  j| |||d}|d |d fS )N)rH   result_block_scalerB   input_global_scaler   rG   )r   r"   )rH   rY   rB   r@   rL   r<   r%   r&   rM      s   z6SiluMulNvfp4QuantPattern.register.<locals>.replacement)r,   r7   tupler   rA   r   )r$   r/   rE   rM   r%   r<   r&   r2      s.   z!SiluMulNvfp4QuantPattern.registerrO   rP   r%   r%   r=   r&   rS   ~   s
    rS   c                       sX   e Zd ZdZededdf fddZejde	j
jddfdd	Zdefd
dZ  ZS )ActivationQuantFusionPassah  
    This pass fuses a pre-defined set of custom ops into fused ops.
    It uses the torch pattern matcher to find the patterns and replace them.

    Because patterns can only be registered once, the pass is a singleton.
    This will be addressed in a future version of PyTorch:
    https://github.com/pytorch/pytorch/pull/139321#issuecomment-2452354980
    configr   Nc                    sR   t  | tdd| _t }|| j tr t }|| j | || j d S )Nactivation_quant_fusion_pass)	pass_name)	r:   r'   r   patternsr9   r2   "silu_and_mul_nvfp4_quant_supportedrS   dump_patterns)r$   rb   pattern_silu_mul_fp8pattern_silu_mul_nvfp4r=   r%   r&   r'      s   z"ActivationQuantFusionPass.__init__graphc                 C   s    | j || _td| j d S )NzReplaced %s patterns)re   applymatched_countloggerdebug)r$   rj   r%   r%   r&   __call__   s   z"ActivationQuantFusionPass.__call__c                 C   s   t | tttS r0   )r   hash_sourcer   r9   rS   r<   r%   r%   r&   uuid   s   zActivationQuantFusionPass.uuid)r3   r4   r5   r6   r   r
   r'   r   time_and_logr,   fxGraphro   strrq   rR   r%   r%   r=   r&   ra      s    	ra   )<abcr   r   typingr   r,   *torch._higher_order_ops.auto_functionalizer   torch._inductor.pattern_matcherr   r   r   
torch._opsr	   vllm.configr
   vllm.loggerr   9vllm.model_executor.layers.quantization.utils.quant_utilsr   r   r   vllm.platformsr   fusionr   r   r   r   inductor_passr   matcher_utilsr   r   vllm_inductor_passr   r   r3   rm   	fp8_dtype	FP8_DTYPEuint8	FP4_DTYPEops_Csilu_and_muldefaultSILU_MUL_OPsilu_and_mul_quantr   dict__annotations__is_cudahasattrrf   r   r   r9   rS   ra   r%   r%   r%   r&   <module>   s:   
"-3