o
    -id+                     @   st   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 ddl
mZ ddlmZ eeZG d	d
 d
eZdS )    N)Iterable)auto_functionalized)init_logger)current_platform   )is_func)VllmInductorPassc                   @   s"  e Zd ZdZejdejjddfddZ	dejj
eejj
 B ddfdd	Z	ddejjd
ejj
deeejj
eB f deejj
eB df dB ddf
ddZd
ejj
deeejj
eB f ddfddZd
ejj
deeejj
f fddZ	ddejjd
ejj
deejj
eB df dB ddfddZdS )FixFunctionalizationPassa"  
    This pass defunctionalizes certain nodes to avoid redundant tensor copies.
    After this pass, DCE (dead-code elimination) should never be run,
    as de-functionalized nodes may appear as dead code.

    To add new nodes to defunctionalize, add to the if-elif chain in __call__.
    graphreturnNc                 C   st  t  rtd d S g | _d}|jD ]}t|tsq|j}|j	d }|t
jjjjkr|d }|d }| |}t|tjrt|tjr|j	d |j	d krt|j	d t
jjjjrtdd | D r|j	d j	d }	| D ] }
|
jD ]}t|t
jjjjr||	 | | qw| |
 qr| || | | nddd}| ||| n|t
jjjjkrdd	d}| ||| n|t
jjjjkrd
d	d}| ||| n|t
jjjjkrd
dd	d}| ||| n|t
jjjjt
jjj jfv rdd
i}| ||| nt!t
jj"dr(|t
jj"j#jkr(dd	dddd}| ||| nh|t
jjj$jkr?dd
i}| j|||dd nQ|t
jjj%jkrVdd
i}| j|||dd n:t!t
jjdrv|t
jjj&jkrvd
dd}| j|||dd n|t
jjj'jkrddi}d}| j||||d nq|d7 }q| (|d t)| j}| jD ]}|*| qtd|| | j+  d S )NzBXPU platform does not support fix functionalizationpass currently.r   querykeyc                 s   s.    | ]}|j D ]}t|tjjjjV  qqd S N)usersr   torchopsatenslice_scatterdefault).0getitem_nodeuser r   c/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/compilation/fix_functionalization.py	<genexpr>9   s    z4FixFunctionalizationPass.__call__.<locals>.<genexpr>)r      inputresidualresultscale)r   r      r   &flashinfer_trtllm_fused_allreduce_normallreduce_innorm_out	quant_out	scale_out)r   r   r          )r   r   args)r   r   r   silu_and_mul_nvfp4_quantresult_block_scale)r   r+   r   input_global_scaleqkv)r-   num_heads_qnum_heads_knum_heads_vhead_dimepsq_weightk_weightcos_sin_cacheis_neoxposition_ids)mutated_argsr)   before_cleanupz,De-functionalized %s nodes, removed %s nodes),r   is_xpuloggerdebugnodes_to_removenodesr   r   kwargsr)   r   r   _Crotary_embeddingr   getitem_usersoperatorgetitemr   split_with_sizesallvaluesr   r   replace_all_uses_with_removeinsert_defunctionalizeddefunctionalizefused_add_rms_norm#fused_add_rms_norm_static_fp8_quant rms_norm_dynamic_per_token_quantrms_normrms_norm_static_fp8_quanthasattrvllmr!   silu_and_mulsilu_and_mul_quantr*   fused_qk_norm_rope
dump_graphlen
erase_nodeclear)selfr
   countnoder?   	at_targetr   r   getitem_nodesmm_noder   user_of_getitemr8   r)   count_removedr   r   r   __call__   s   

















z!FixFunctionalizationPass.__call__node_or_nodesc                 C   s.   t |tjjr| j| dS | j| dS )zM
        Stage a node (or nodes) for removal at the end of the pass.
        N)
isinstancer   fxNoder=   appendextend)rZ   rc   r   r   r   rI      s   z FixFunctionalizationPass._remover\   r8   r)   .c                 C   s*   |  || | j|||d | | dS )z
        De-functionalize a node by replacing it with a call to the original.
        It also replaces the getitem users with the mutated arguments.
        See replace_users_with_mutated_args and insert_defunctionalized.
        r(   N)replace_users_with_mutated_argsrJ   rI   )rZ   r
   r\   r8   r)   r   r   r   rK      s   z(FixFunctionalizationPass.defunctionalizec                 C   sP   |  | D ]\}}|| }t|tr|j| n|}|| | | qdS )a7  
        Replace all getitem users of the auto-functionalized node with the
        mutated arguments.
        :param node: The auto-functionalized node
        :param mutated_args: The mutated arguments, indexed by getitem index.
        If the value of an arg is a string, `node.kwargs[arg]` is used.
        N)rB   itemsrd   strr?   rH   rI   )rZ   r\   r8   idxr   argr   r   r   ri      s   

z8FixFunctionalizationPass.replace_users_with_mutated_argsc                 C   s2   i }|j D ]}t|tjr|jd }|||< q|S )z
        Returns the operator.getitem users of the auto-functionalized node,
        indexed by the index they are getting.
        r   )r   r   rC   rD   r)   )rZ   r\   r   r   rl   r   r   r   rB      s   

z&FixFunctionalizationPass.getitem_usersc                    s   t  tsJ d  d| 5  jd }|du r%|j| jd nt fdd|D }|j||d W d   dS W d   dS 1 sJw   Y  dS )	a>  
        Insert a new defunctionalized node into the graph before node.
        If one of the kwargs is 'out', provide args directly,
        as node.kwargs cannot be used.
        See https://github.com/pytorch/pytorch/blob/a00faf440888ffb724bad413f329a49e2b6388e7/torch/_inductor/lowering.py#L351

        :param graph: Graph to insert the defunctionalized node into
        :param node: The auto-functionalized node to defunctionalize
        :param args: If we cannot use kwargs, specify args directly.
        If an arg is a string, `node.kwargs[arg]` is used.
        z%node must be auto-functionalized, is z insteadr   N)r?   c                 3   s(    | ]}t |tr j| n|V  qd S r   )rd   rk   r?   )r   rm   r\   r   r   r     s    
zCFixFunctionalizationPass.insert_defunctionalized.<locals>.<genexpr>r(   )r   r   inserting_beforer)   call_functionr?   tuple)rZ   r
   r\   r)   functionr   rn   r   rJ      s   

"z0FixFunctionalizationPass.insert_defunctionalizedr   )__name__
__module____qualname____doc__r   time_and_logr   re   Graphrb   rf   r   rI   dictintrk   rq   rK   ri   rB   rJ   r   r   r   r   r	      sH     "

"r	   )rC   collections.abcr   r   *torch._higher_order_ops.auto_functionalizer   vllm.loggerr   vllm.platformsr   fx_utilsr   vllm_inductor_passr   rs   r;   r	   r   r   r   r   <module>   s   