o
    )wiv4                     @   s\   d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
 eeZG dd deZdS )	    )	getLogger)AttentionMaskFusionAttention)AttentionMaskFormat)	NodeProto)	OnnxModelc                       sP   e Zd ZdZdededef fddZdedeeef fd	d
Z	dd Z
  ZS )FusionAttentionClipzB
    Fuse Attention subgraph of Clip into one Attention node.
    modelhidden_size	num_headsc                    s.   t |}tj|_t j||||ddgd d S )NFSkipLayerNormalization)use_multi_head_attentionsearch_op_types)r   r   NoMaskmask_formatsuper__init__)selfr	   r
   r   attention_mask	__class__ k/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_attention_clip.pyr      s   
zFusionAttentionClip.__init__	reshape_qreturnc                 C   sZ  | j |dd}|du st|jdkr| j| jfS | j |jd }|du r,| j| jfS t|dks8|d dkr>| j| jfS |d }| j |jd }|du rU| j| jfS t|dksa|d dkrg| j| jfS |d }|| }| jdkr|| jkr| jrt	d| j d	| d
 d| _| jdkr|| jkr| j
rt	d| j d	| d
 d| _
||fS )zDetect num_heads and hidden_size for ONNX model from MiDaS
        Args:
            reshape_q (NodeProto): reshape node for q
        Returns:
            Tuple[int, int]: num_heads and hidden_size
        Concat   N      r      z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r	   match_parentleninputr   r
   get_constant_valuenum_heads_warningloggerwarninghidden_size_warning)r   r   concatnum_head_valuer   head_size_value	head_sizer
   r   r   r   get_num_heads_and_hidden_size&   s6   z1FusionAttentionClip.get_num_heads_and_hidden_sizec           '      C   sF  d }d }dD ]}| j |d|}|d ur|}|}qd }|d ur%|jd }nDdD ];}d }| j |d|}	| j |d|}
|	d urB|	}n|
d urH|
}|d u rMq'| j |d|d}|d u r[q'|jd }|} |d u rid S | j |g dd	| d d dddg}|d u r| j |g d
g d}|d u rtd d S |d |d |d }}}| j |g dg d}|d u r| j |g dg d}|d u rtd d S |d |d }}d }d }g }| j j|g dg d|d}|d u rT| j |ddgddg}|d u rS| j |g dg d}|d ur|d	 }nS| j |g dg d}|d u rS| j |g dg d }|d ur;|d }n,| j |g d!g d"}|d u rStd# d S nt|d	ks]J d	|d  }|d }|d }| j |g d$g d%}|d u r| j |g dg d&}|d u rtd' d S |d	 }n|d }|d |d }}| j |g d(g d)}|d u r| j |g dg d}|d u rtd* d S |d |d }}|jd |ks|jd |ks|jd |krtd+ d S | 	|\}} |dks| dkrtd, d S |}!d-}"|d ur_| j |g d.g d/}#|#d ur/|jd	 }"n0| j |g d0|dddddg}$| j |g d1|ddddg}%|$d u r_|%d u r_td2 d S | j
d |||||||| ||!jd |"d |d ud3}&|&d u rtd4 d S | j|& | j| j|&j< | j|!|g | |&j d5| _d S )6N)r   r   r   r   )r   r   AddLayerNormalizationF)r-   MatMulReshape	Transposer0   r/   r   )r-   r/   r0   r1   r/   )r   Nr   r   r   z(fuse_attention: failed to match qkv pathr   r   )r0   r1   r0   r-   r/   )r   r   r   r   N)r1   r0   r-   r/   )r   r   r   Nz&fuse_attention: failed to match v path)Softmaxr0   r-   r0   r/   )r   r   r   Nr   )return_indicer4   r/   )r4   r-   Mulr/   )r   r   r   r   )r4   r6   r/   )r   r   r   )Castr7   r4   r-   r6   r/   )r   r   r   r   r   r   )r7   r7   r4   r6   r/   )r   r   r   r   r   z'fuse_attention: failed to match qk path)r0   r1   r0   r6   r-   r/   )r   r   r   r   NN)r   r   r   Nz&fuse_attention: failed to match q path)r1   r0   r1   r0   r-   r/   )r   r   r   r   r   Nz&fuse_attention: failed to match k pathz>fuse_attention: expect to have same input to q, k and v matmulz9fuse_attention: failed to detect num_heads or hidden_size )	WhereSubr7   Expand	Unsqueezer<   r0   r0   r7   )	r   r   r   r   r   r   r   r   r   )r   r;   r<   r<   r9   Less)r;   r<   r<   r9   r=   z4fuse_attention: failed to match causal mask subgraph)
mask_indexq_matmulk_matmulv_matmulq_addk_addv_addr   r
   first_inputoutput
add_qk_strscalecausalz+fuse_attention: failed to create fused nodeT)r	   r    rF   find_first_child_by_typematch_parent_pathr%   debugr!   r"   r,   create_attention_nodenodes_to_addappendthis_graph_namenode_name_to_graph_namenamenodes_to_removeextendincrease_counterop_typeprune_graph)'r   normalize_nodeinput_name_to_nodesoutput_name_to_nodeskip_input_indexnode_before_layer_normiparent
root_inputnode_before_layer_norm_1node_before_layer_norm_2child	qkv_nodesreshape_qkvtranspose_qkv
matmul_qkvv_nodesadd_vmatmul_vcausal_mask_input_indexadd_maskadd_mask_indicesqk_nodes	matmul_qkq_nodesr   add_qmatmul_qk_nodesadd_kmatmul_kr   r
   attention_last_nodeadd_qkadd_qk_nodescausal_mask_nodes_1causal_mask_nodes_2new_noder   r   r   fuseT   sx  





















0







zFusionAttentionClip.fuse)__name__
__module____qualname____doc__r   intr   r   tupler,   r{   __classcell__r   r   r   r   r      s    .r   N)loggingr   fusion_attentionr   r   fusion_optionsr   onnxr   
onnx_modelr   r|   r%   r   r   r   r   r   <module>   s   