o
    -i                     @   s  d dl Z d dlmZ d dlmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ ddlmZ ddlmZ e rXd dlmZmZ e rzddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( e) rddl*m+Z+m,Z, ddl-m.Z. ddl/m0Z0m1Z1m2Z2 ddl3m4Z4 ee5Z6edZ7edZ8dee7e8f dee7e8f fddZ9G dd de0Z:dS )     N)Callable)Any	ParamSpecTypeVar)fx)envs)rocm_aiter_ops)
VllmConfigset_current_vllm_config)init_logger)current_platform)set_env_var   )PostCleanupPass)VllmInductorPass)RocmAiterRMSNormFusionPass'RocmAiterSiluMulFp8GroupQuantFusionPass)ActivationQuantFusionPass)RMSNormQuantFusionPass)AttnFusionPass)QKNormRoPEFusionPass)SequenceParallelismPass)AllReduceFusionPassAsyncTPPass)FixFunctionalizationPass)CustomGraphPassInductorPassget_pass_context)NoOpEliminationPassPRfnreturnc                    s,   t  dtjdtjdtf fdd}|S )z
    Function decorator that turns on inductor pattern match debug
    for the duration of the call.
    Used to avoid logging builtin Inductor pattern matching.
    argskwargsr"   c                     sT   t j }d ur#td|  | i |W  d    S 1 sw   Y   | i |S )N!TORCHINDUCTOR_PATTERN_MATCH_DEBUG)r   VLLM_PATTERN_MATCH_DEBUGr   )r#   r$   	debug_valr!    Z/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/compilation/pass_manager.pywrapper8   s
    z)with_pattern_match_debug.<locals>.wrapper)	functoolswrapsr   r#   r$   r    )r!   r+   r)   r(   r*   with_pattern_match_debug1   s    r.   c                   @   sd   e Zd ZdZdddZedejddfddZd	e	ddfd
dZ
deddfddZdefddZdS )PostGradPassManagera$  
    The pass manager for post-grad passes.
    It handles configuration, adding custom passes, and running passes.
    It supports uuid for the Inductor code cache. That includes torch<2.6
    support using pickling (in .inductor_pass.CustomGraphPass).

    The order of the post-grad post-passes is:
    1. passes (constructor parameter)
    2. default passes (NoopEliminationPass, FusionPass)
    3. config["post_grad_custom_post_pass"] (if it exists)
    4. fix_functionalization
    This way, all passes operate on a functionalized graph.
    r"   Nc                 C   s
   g | _ d S N)passes)selfr)   r)   r*   __init__R   s   
zPostGradPassManager.__init__graphc                 C   sv   dt _t j}| jD ]}||r|| t  jd7  _q
td|| q
| | t  jd7  _| 	| d t _d S )Nr   r   z!Skipping %s with compile range %s)
r   dump_prefixr   compile_ranger1   is_applicable_for_rangeloggerdebugpost_cleanupfix_functionalization)r2   r4   r6   pass_r)   r)   r*   __call__U   s   




zPostGradPassManager.__call__configc                 C   sh  |j j| _t|dd | jjr|  jt|g7  _| jjr6|  jt|g7  _| jjr6|  jt	|g7  _| jj
rD|  jt|g7  _| jjr`|  jt|g7  _t r`|  jt|g7  _| jjr||  jt|g7  _t r||  jt|g7  _| jjr|  jt|g7  _| jjr|  jt|g7  _t|| _t|| _W d    d S 1 sw   Y  d S )NF)check_compile)compilation_configpass_configr
   eliminate_noopsr1   r   	enable_spr   fuse_gemm_commsr   fuse_allreduce_rmsr   fuse_norm_quantr   r   
is_enabledr   fuse_act_quantr   r   fuse_attn_quantr   enable_qk_norm_rope_fusionr   r   r:   r   r;   )r2   r>   r)   r)   r*   	configurej   s6   

"zPostGradPassManager.configurer<   c                 C   s   t |tsJ | j| d S r0   )
isinstancer   r1   append)r2   r<   r)   r)   r*   add   s   zPostGradPassManager.addc                 C   s^   g }d| j  i}| jD ]	}||  q|| j  tt j|d< ||d< t	
|S )z
        The PostGradPassManager is set as a custom pass in the Inductor and
        affects compilation caching. Its uuid depends on the UUIDs of all
        dependent passes and the pass config. See InductorPass for more info.
        rA   r6   r1   )rA   compute_hashr1   rM   uuidr;   strr   r6   r   	hash_dict)r2   r1   stater<   r)   r)   r*   rP      s   

zPostGradPassManager.uuid)r"   N)__name__
__module____qualname____doc__r3   r.   r   Graphr=   r	   rK   r   rN   rQ   rP   r)   r)   r)   r*   r/   C   s    
%r/   );r,   collections.abcr   typingr   r   r   torchr   vllmr   vllm._aiter_opsr   vllm.configr	   r
   vllm.loggerr   vllm.platformsr   vllm.utils.system_utilsr   r:   r   vllm_inductor_passr   rG   "vllm.compilation.rocm_aiter_fusionr   r   is_cuda_alikeactivation_quant_fusionr   fusionr   fusion_attnr   qk_norm_rope_fusionr   sequence_parallelismr   is_cudacollective_fusionr   r   r;   r   inductor_passr   r   r   noop_eliminationr   rT   r8   r   r    r.   r/   r)   r)   r)   r*   <module>   s<   "