o
    -i34                     @   sh  d dl Z d dlm  mZ d dl mZ d dlmZ d dlmZ d dl	Z
d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ ddlmZ ddlmZ ddlmZmZm Z m!Z! ddl"m#Z#m$Z$ ee%Z&e' Z(G dd dZ)G dd de)Z*G dd de)Z+G dd de)Z,G dd de)Z-G dd de$Z.G dd deZ/G dd de$Z0dS )     N)fx)PatternMatcherPass)
OpOverload)rocm_aiter_ops)ActivationQuantPattern)
VllmConfig)init_logger)
GroupShapeQuantKey	ScaleDesc)current_platform   )FusedRMSQuantKey)enable_fake_mode)MatcherFusedAddRMSNormMatcherQuantFP8MatcherRMSNormMatcherSiluAndMul)VllmInductorPassVllmPatternMatcherPassc                   @   s&   e Zd Z	ddededefddZdS )	AiterRMSNormQuantPatternTepsilonkeymatch_aiter_quantc                 C   sD   || _ |jj| _|jst|ddnt|dd| _t|j|d| _	d S )NT)match_rocm_aiter)
r   quantdtypequant_dtype	fused_addr   r   rmsnorm_matcherr   quant_matcher)selfr   r   r    r"   _/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/compilation/rocm_aiter_fusion.py__init__'   s   

z!AiterRMSNormQuantPattern.__init__N)T)__name__
__module____qualname__floatr   boolr$   r"   r"   r"   r#   r   &   s    r   c                       `   e Zd ZdZe Zdejdfde	de
jdedededd	f fd
dZdedd	fddZ  ZS )AiterRMSNormDynamicQuantPatternz-AITER RMSNorm + Dynamic Quantization pattern.Tr   r   r   group_shape	symmetricreturnNc                    s8   t tjd|}tdt|||dd}t ||| d S NFr   scaler-   r   r   r   torchfloat32r   r
   superr$   r!   r   r   r   r,   r-   r1   r   	__class__r"   r#   r$   =      z(AiterRMSNormDynamicQuantPattern.__init__pm_passc                    r   dt jdt jdtt jt jf f fdd}dt jdt jdtt jt jf f fdd}t|| j tj| d S )Ninputweightr.   c                    "     | |} |\}}||fS Nr   r    r=   r>   
result_rmsresultr1   r!   r"   r#   patternN      z9AiterRMSNormDynamicQuantPattern.register.<locals>.patternc                    s&    j | | j jd}|d |d fS )N)xr>   r   r   r   r   FUSED_OPr   r   )r=   r>   rD   rE   r"   r#   replacementV   s   z=AiterRMSNormDynamicQuantPattern.register.<locals>.replacementr4   Tensortuplepmregister_replacementr   inputsfwd_onlyr!   r;   rF   rK   r"   rE   r#   registerM   s*   z(AiterRMSNormDynamicQuantPattern.register)r%   r&   r'   __doc__r   "get_rmsnorm_fused_dynamic_quant_oprJ   r	   	PER_TOKENr(   r4   r   r)   r$   r   rT   __classcell__r"   r"   r8   r#   r+   8   (    r+   c                       r*   )'AiterFusedAddRMSNormDynamicQuantPatternz7AITER RMSNorm Fused Add + Dynamic Quantization pattern.Tr   r   r   r,   r-   r.   Nc                    s8   t tjd|}tdt|||dd}t ||| d S NFTr0   r2   r3   r7   r8   r"   r#   r$   q   r:   z0AiterFusedAddRMSNormDynamicQuantPattern.__init__r;   c                       dt jdt jdt jdtt jt jt jf f fdd}dt jdt jdt jdtt jt jt jf f fdd}t|| j tj| d S )	Nr=   r>   residualr.   c                    *     | ||\}} |\}}|||fS r@   rA   r=   r>   r]   rC   residual_outrD   r1   rE   r"   r#   rF         
zAAiterFusedAddRMSNormDynamicQuantPattern.register.<locals>.patternc                    s.    j | || j jd}|d |d |d fS )N)rH   r]   r>   r   r   r   r      rI   )r=   r>   r]   rD   rE   r"   r#   rK      s   zEAiterFusedAddRMSNormDynamicQuantPattern.register.<locals>.replacementrL   rS   r"   rE   r#   rT      s2   
z0AiterFusedAddRMSNormDynamicQuantPattern.register)r%   r&   r'   rU   r   &get_rmsnorm_fused_add_dynamic_quant_oprJ   r	   rW   r(   r4   r   r)   r$   r   rT   rX   r"   r"   r8   r#   rZ   l   rY   rZ   c                       \   e Zd ZdZe Z		ddedej	de
dededd	f fd
dZdedd	fddZ  ZS )AiterRMSFp8GroupQuantPatternzw
    This pattern fuses aiter rms_norm & group fp8 quant custom
    ops into an aiter rms_norm_group_fp8_quant op.
    Tr   r   r,   r   r-   r.   Nc                    s8   t tjd|}tdt|||dd}t ||| d S r/   r3   r!   r   r   r,   r   r-   r1   r   r8   r"   r#   r$      r:   z%AiterRMSFp8GroupQuantPattern.__init__r;   c                    r<   )Nr=   r>   r.   c                    r?   r@   rA   rB   rE   r"   r#   rF      rG   z6AiterRMSFp8GroupQuantPattern.register.<locals>.patternc                    s$    j | | jdd}|d |d fS )N   )rH   r>   variance_epsilon
group_sizer   r   rJ   r   )r=   r>   atrE   r"   r#   rK      s   z:AiterRMSFp8GroupQuantPattern.register.<locals>.replacementrL   rS   r"   rE   r#   rT      s"   z%AiterRMSFp8GroupQuantPattern.registerTT)r%   r&   r'   rU   r    get_rmsnorm_group_fused_quant_oprJ   r(   r4   r   r	   r)   r$   r   rT   rX   r"   r"   r8   r#   re      &    re   c                       rd   )$AiterFusedAddRMSFp8GroupQuantPatternz
    This pattern fuses aiter rms_norm_with_add & group fp8 quant custom ops
    into a aiter rms_norm_with_add_group_fp8_quant op.
    Tr   r   r,   r   r-   r.   Nc                    s8   t tjd|}tdt|||dd}t ||| d S r[   r3   rf   r8   r"   r#   r$      r:   z-AiterFusedAddRMSFp8GroupQuantPattern.__init__r;   c                    r\   )	Nr=   r>   r]   r.   c                    r^   r@   rA   r_   rE   r"   r#   rF      ra   z>AiterFusedAddRMSFp8GroupQuantPattern.register.<locals>.patternc                    s,    j | || jdd}|d |d |d fS )Nrg   )rH   r]   r>   rh   ri   r   r   rb   rj   )r=   r>   r]   rk   rE   r"   r#   rK      s   	zBAiterFusedAddRMSFp8GroupQuantPattern.register.<locals>.replacementrL   rS   r"   rE   r#   rT      s*   
z-AiterFusedAddRMSFp8GroupQuantPattern.registerrl   )r%   r&   r'   rU   r   $get_rmsnorm_group_add_fused_quant_oprJ   r(   r4   r   r	   r)   r$   r   rT   rX   r"   r"   r8   r#   ro      rn   ro   c                       sV   e Zd ZdZededdf fddZejde	j
ddfdd	Zdefd
dZ  ZS )RocmAiterRMSNormFusionPassz
    This pass fuses aiter rms_norm & vllm/aiter quant custom ops
    into a fused rms_norm_quant op.
    It also supports fused_add_rms_norm.
    configr.   Nc                    s   t  | tdd| _dD ]7}t|ttdd| j t|ttdd| j dD ]}t	|t|d| j t
|t|d| j q,q| || j d S )N%rocm_aiter_rms_norm_quant_fusion_pass	pass_name)gh㈵>gư>r   rg   )TF)r   )r6   r$   r   patternsre   	FP8_DTYPEr	   rT   ro   r+   rZ   dump_patterns)r!   rr   r   r   r8   r"   r#   r$     s0   


z#RocmAiterRMSNormFusionPass.__init__graphc                 C       | j || _td| j d S NzReplaced %s patternsrv   applymatched_countloggerdebugr!   ry   r"   r"   r#   __call__8     z#RocmAiterRMSNormFusionPass.__call__c                 C   s   t tttg}| j| g|R  S r@   )r+   rZ   re   ro   hash_sourcer!   fusion_patternsr"   r"   r#   uuid=  s   zRocmAiterRMSNormFusionPass.uuid)r%   r&   r'   rU   r   r   r$   r   time_and_logr   Graphr   strr   rX   r"   r"   r8   r#   rq     s    #rq   c                   @   sP   e Zd ZdZe ZdeddfddZde	e
j fddZd	eddfd
dZdS ) AiterSiluMulFp8GroupQuantPatternz
    This pattern fuses aiter silu_and_mul & group fp8 quant custom
    ops into an aiter silu_and_mul_group_fp8_quant op.
    quant_opr.   Nc                 C   s   t  | _|| _d S r@   )r   silu_and_mul_matcherr   )r!   r   r"   r"   r#   r$   O  s   
z)AiterSiluMulFp8GroupQuantPattern.__init__c                 C   s   | j  d gS )Nr   )r   rQ   rE   r"   r"   r#   
get_inputsS  s   z+AiterSiluMulFp8GroupQuantPattern.get_inputsr;   c                    sd   dt jdtt jt jf f fdd}dt jdtt jt jf f fdd}t||  tj| d S )Nr=   r.   c                    s&     | } |d}|d |d fS )Nrg   r   r   )r   r   )r=   at1at2rE   r"   r#   rF   Y  s   
z:AiterSiluMulFp8GroupQuantPattern.register.<locals>.patternc                    s    j | dd}|d |d fS )Nrg   )rH   ri   r   r   )FUSED_SILU_MUL_QUANT_OP)r=   rk   rE   r"   r#   rK   `  s   z>AiterSiluMulFp8GroupQuantPattern.register.<locals>.replacement)r4   rM   rN   rO   rP   r   rR   rS   r"   rE   r#   rT   X  s   z)AiterSiluMulFp8GroupQuantPattern.register)r%   r&   r'   rU   r   $get_act_mul_fused_fp8_group_quant_opr   r   r$   listr4   rM   r   r   rT   r"   r"   r"   r#   r   G  s    r   c                       st   e Zd ZdZe Zejj	j
jZeegZededdf fddZejdejjddfdd	Zdefd
dZ  ZS )'RocmAiterSiluMulFp8GroupQuantFusionPassah  
    This pass fuses a pre-defined set of custom ops into fused ops.
    It uses the torch pattern matcher to find the patterns and replace them.

    Because patterns can only be registered once, the pass is a singleton.
    This will be addressed in a future version of PyTorch:
    https://github.com/pytorch/pytorch/pull/139321#issuecomment-2452354980
    rr   r.   Nc                    sF   t  | tdd| _| jD ]
}t|| j q| || j d S )N/rocm_aiter_silu_mul_fp8_group_quant_fusion_passrt   )r6   r$   r   rv   	QUANT_OPSr   rT   rx   )r!   rr   r   r8   r"   r#   r$   z  s   
z0RocmAiterSiluMulFp8GroupQuantFusionPass.__init__ry   c                 C   rz   r{   r|   r   r"   r"   r#   r     r   z0RocmAiterSiluMulFp8GroupQuantFusionPass.__call__c                 C   s   t tg}tj| g|R  S r@   )r   r   r   r   r   r"   r"   r#   r     s   z,RocmAiterSiluMulFp8GroupQuantFusionPass.uuid)r%   r&   r'   rU   r   get_group_quant_opAITER_GROUP_FP8_QUANT_OPr4   opsvllm triton_per_token_group_quant_fp8defaultTRITON_GROUP_FP8_QUANT_OPr   r   r   r$   r   r   r   r   r   r   r   rX   r"   r"   r8   r#   r   k  s    	r   )1r4   torch._inductor.pattern_matcher	_inductorpattern_matcherrO   r   r   
torch._opsr   7vllm.model_executor.layers.quantization.utils.fp8_utilsr   vllm._aiter_opsr   (vllm.compilation.activation_quant_fusionr   vllm.configr   vllm.loggerr   9vllm.model_executor.layers.quantization.utils.quant_utilsr	   r
   r   vllm.platformsr   fusionr   inductor_passr   matcher_utilsr   r   r   r   vllm_inductor_passr   r   r%   r   	fp8_dtyperw   r   r+   rZ   re   ro   rq   r   r   r"   r"   r"   r#   <module>   s4   4638:$