o
    i                     @   sl   d dl mZ d dlZd dlmZ d dlmZ d dlmZ ddl	m
Z
 ddlmZ eeZG d	d
 d
eZdS )    )IterableN)SymIntstatically_known_true)init_logger   )is_func)VllmInductorPassc                   @   sp   e Zd ZdZejdejjddfddZ	de
eB de
eB defd	d
Zdee
eB  dee
eB  defddZdS )NoOpEliminationPassa_  
    This is an inductor pass that removes redundant reshape/slice operations.
    It is required for RMSNorm-quant fusion to work properly.
    That's because apply_fp8_linear adds a reshape, which is redundant
    in the 2D-case. Additionally, torch internal no-op elimination pass does
    not handle certain slice variants.

    Cases handled:
      1. A chain of reshapes is equivalent to the last reshape called on the
      base tensor (input of the first reshape).
      2. A reshape that produces the shape of the input is redundant
      3. A slice that produces the shape of the input is redundant

    Example graph 1:
    mul_1: "f16[s0, 4096]" = ...
    view_1: "f16[s0, 128, 32]" = torch.reshape(mul_1, [-1, 128, 32])
    view_2: "f16[s0, 4096]" = torch.reshape(view_2, [-1, 4096])
    view_3: "f16[s0, 128, 32]" = torch.reshape(view_3, [-1, 128, 32])

    Can be replaced with:
    mul_1: "f16[s0, 4096]" = ...
    view_3: "f16[s0, 128, 32]" = ...

    Example graph 2:
    getitem_1: "f16[s0, 4096]" = ...
    view_1: "f16[s0, 4096]" = torch.reshape(getitem_1, [-1, 4096])
    at = auto_functionalized(static_scaled_fp8_quant, input = view_1, ...)
    out: "f8e4m3fn[s0, 4096]" = at[1]

    Can be replaced with:
    getitem_1: "f16[s0, 4096]" = ...
    at = auto_functionalized(static_scaled_fp8_quant, input = getitem_1, ...)
    out: "f8e4m3fn[s0, 4096]" = at[1]

    Example graph 3:
    arg0: "s0" = SymInt(s0)
    scaled_mm: "f16[s0, 4096]" = ...
    slice_1: "f16[s0, 4096]" = torch.slice(scaled_mm, -1, 0, arg0)
    at = auto_functionalized(fused_add_rms_norm, input = slice_1, ...)
    out: "f16[s0, 4096]" = torch.slice_scatter(scaled_mm, at[1], 0, 0, arg0)

    Can be replaced with:
    arg0: "s0" = SymInt(s0)
    scaled_mm: "f16[s0, 4096]" = ...
    at = auto_functionalized(fused_add_rms_norm, input = scaled_mm, ...)
    out: "f16[s0, 4096]" = at[1]
    graphreturnNc                 C   sZ  d}|j D ]}t|tjjjjr7|jd }t|tjjjjr7|d|jd  t	|j
dkr7|| |d7 }t|tjjjjsIt|tjjjjro|jd }|jd j}|jd j}| ||rn|| || |d7 }qt|tjjjjr|jd d \}}}	}
}|jd j}|jd j}| ||r|| || |d7 }qtd| d S )Nr      val   z$Removed %s no-op reshapes and slices)nodesr   torchopsatenreshapedefaultargs
update_arglenusers
erase_nodesliceTensormetashapeall_dims_equivalentreplace_all_uses_withslice_scatterloggerdebug)selfr   countnodeinputinput_shapeoutput_shapebaseview	dim_indexstartend
base_shape
view_shape r1   f/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/compilation/passes/utility/noop_elimination.py__call__C   s<   







zNoOpEliminationPass.__call__dimi_dimc                 C   s   t ||kS )a  
        This function checks if two dimensions are equivalent.
        :param dim: The dimension arg to reshape/slice
        :param i_dim: The corresponding dimension in the input tensor
        :return: Are the dimensions equivalent?

        There are two cases in which the dimensions are equivalent:
        1. The dimensions are equal (both integers)
        2. The dimensions both correspond to the same SymInt
        r   )r$   r4   r5   r1   r1   r2   dims_equivalentl   s   z#NoOpEliminationPass.dims_equivalentdimsi_dimsc                    s@   t |}t |}t|t|krdS t fddt||D S )NFc                 3   s     | ]\}}  ||V  qd S )N)r6   ).0si_sr$   r1   r2   	<genexpr>   s    z:NoOpEliminationPass.all_dims_equivalent.<locals>.<genexpr>)listr   allzip)r$   r7   r8   dims_i_dims_r1   r<   r2   r   z   s
   z'NoOpEliminationPass.all_dims_equivalent)__name__
__module____qualname____doc__r	   time_and_logr   fxGraphr3   intr   boolr6   r   r   r1   r1   r1   r2   r
      s    0(

r
   )collections.abcr   torch.fxr   r   %torch.fx.experimental.symbolic_shapesr   vllm.loggerr   fx_utilsr   vllm_inductor_passr	   rC   r"   r
   r1   r1   r1   r2   <module>   s   