o
    i                     @   s  d dl Z d dlmZ d dlmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ ddlmZ e rZddlmZmZmZ e rddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ e, rddl-m.Z. ddl/m0Z0 ddl1m2Z2m3Z3m4Z4 ddl5m6Z6 ddl7m8Z8 ee9Z:edZ;edZ<dee;e<f dee;e<f fddZ=G d d! d!e2Z>dS )"    N)Callable)Any	ParamSpecTypeVar)fx)envs)rocm_aiter_ops)PostCleanupPass)
VllmConfigset_current_vllm_config)init_logger)current_platform)set_env_var   )VllmInductorPass)RocmAiterRMSNormQuantFusionPass'RocmAiterSiluMulFp8GroupQuantFusionPass&RocmAiterTritonAddRMSNormPadFusionPass)ActivationQuantFusionPass)AttnFusionPass)QKNormRoPEFusionPass)RMSNormQuantFusionPass)SequenceParallelismPass)SplitCoalescingPass)AllReduceFusionPass)AsyncTPPass)CustomGraphPassInductorPassget_pass_context)FixFunctionalizationPass)NoOpEliminationPassPRfnreturnc                    s,   t  dtjdtjdtf fdd}|S )z
    Function decorator that turns on inductor pattern match debug
    for the duration of the call.
    Used to avoid logging builtin Inductor pattern matching.
    argskwargsr$   c                     sT   t j }d ur#td|  | i |W  d    S 1 sw   Y   | i |S )N!TORCHINDUCTOR_PATTERN_MATCH_DEBUG)r   VLLM_PATTERN_MATCH_DEBUGr   )r%   r&   	debug_valr#    Z/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/compilation/passes/pass_manager.pywrapper;   s
    z)with_pattern_match_debug.<locals>.wrapper)	functoolswrapsr!   r%   r&   r"   )r#   r-   r+   r*   r,   with_pattern_match_debug4   s    r0   c                   @   sd   e Zd ZdZdddZedejddfddZd	e	ddfd
dZ
deddfddZdefddZdS )PostGradPassManagera$  
    The pass manager for post-grad passes.
    It handles configuration, adding custom passes, and running passes.
    It supports uuid for the Inductor code cache. That includes torch<2.6
    support using pickling (in .inductor_pass.CustomGraphPass).

    The order of the post-grad post-passes is:
    1. passes (constructor parameter)
    2. default passes (NoopEliminationPass, FusionPass)
    3. config["post_grad_custom_post_pass"] (if it exists)
    4. fix_functionalization
    This way, all passes operate on a functionalized graph.
    r$   Nc                 C   s
   g | _ d S N)passes)selfr+   r+   r,   __init__U   s   
zPostGradPassManager.__init__graphc                 C   sv   dt _t j}| jD ]}||r|| t  jd7  _q
td|| q
| | t  jd7  _| 	| d t _d S )Nr   r   z!Skipping %s with compile range %s)
r   dump_prefixr   compile_ranger3   is_applicable_for_rangeloggerdebugpost_cleanupfix_functionalization)r4   r6   r8   pass_r+   r+   r,   __call__X   s   




zPostGradPassManager.__call__configc                 C   s  |j j| _t|dd | jjr|  jt|g7  _| jjr6|  jt|g7  _| jjr6|  jt	|g7  _| jj
rD|  jt|g7  _| jjr`|  jt|g7  _t r`|  jt|g7  _| jjr||  jt|g7  _t r||  jt|g7  _| jjrt r|  jt|g7  _| jjr|  jt|g7  _| jjr|  jt|g7  _|  jt|g7  _t|| _t|| _W d    d S 1 sw   Y  d S )NF)check_compile)compilation_configpass_configr   eliminate_noopsr3   r    	enable_spr   fuse_gemm_commsr   fuse_allreduce_rmsr   fuse_norm_quantr   r   
is_enabledr   fuse_act_quantr   r   fuse_act_paddingr   fuse_attn_quantr   enable_qk_norm_rope_fusionr   r   r	   r<   r   r=   )r4   r@   r+   r+   r,   	configurem   s<   

"zPostGradPassManager.configurer>   c                 C   s   t |tsJ | j| d S r2   )
isinstancer   r3   append)r4   r>   r+   r+   r,   add   s   zPostGradPassManager.addc                 C   s^   g }d| j  i}| jD ]	}||  q|| j  tt j|d< ||d< t	
|S )z
        The PostGradPassManager is set as a custom pass in the Inductor and
        affects compilation caching. Its uuid depends on the UUIDs of all
        dependent passes and the pass config. See InductorPass for more info.
        rC   r8   r3   )rC   compute_hashr3   rP   uuidr=   strr   r8   r   	hash_dict)r4   r3   stater>   r+   r+   r,   rS      s   

zPostGradPassManager.uuid)r$   N)__name__
__module____qualname____doc__r5   r0   r   Graphr?   r
   rN   r   rQ   rT   rS   r+   r+   r+   r,   r1   F   s    
)r1   )?r.   collections.abcr   typingr   r   r   torchr   vllmr   vllm._aiter_opsr   ,vllm.compilation.passes.utility.post_cleanupr	   vllm.configr
   r   vllm.loggerr   vllm.platformsr   vllm.utils.system_utilsr   vllm_inductor_passr   rI   fusion.rocm_aiter_fusionr   r   r   is_cuda_alikefusion.act_quant_fusionr   fusion.attn_quant_fusionr   fusion.qk_norm_rope_fusionr   fusion.rms_quant_fusionr   fusion.sequence_parallelismr   utility.split_coalescingr   is_cudafusion.allreduce_rms_fusionr   fusion.collective_fusionr   inductor_passr   r   r   utility.fix_functionalizationr   utility.noop_eliminationr    rW   r:   r!   r"   r0   r1   r+   r+   r+   r,   <module>   s@   "