o
    
۾iO$                     @   sX   d dl Z d dlmZ d dlZG dd dejZG dd dejZ						
dddZdS )    Nc                       s   e Zd ZdZ										
				d&dededededededededededededef fddZdd Z	 d'd!e	j
d"e	j
d#e	j
fd$d%Z  ZS )(CustomQwen2Decoderuw   
    Qwen2 visual encoder
    non-causal attention + causal attention
    token_type_ids ：0=non-causal, 1=causal
                     Q sdpaư>    .A        silu{Gz?decoder_layermax_position_embeddingshidden_dimensionnum_attention_headsnum_key_value_headsintermediate_size
vocab_sizeattn_implementationrms_norm_eps
rope_thetaattention_dropout
hidden_actinitializer_rangec                    sV   t    tjjjj}tj}|||||||||	|
||||d}| ||| _	| j	`
d S )N)hidden_sizenum_hidden_layersr   r   r   r   r   r   r   r   r   r   _attn_implementation)super__init__transformersmodelsqwen2modeling_qwen2
Qwen2ModelQwen2Config_create_custom_modelmodelembed_tokens)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r&   r'   config	__class__ [/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/deepencoder2.pyr!      s(   

zCustomQwen2Decoder.__init__c                 C   s   G dd d|}||S )r&   c                       sD   e Zd Z											d fdd	Zdd Zdd Z  ZS )	zFCustomQwen2Decoder._create_custom_model.<locals>.CustomQwen2ModelInnerNc                    s@   || _ d| |||||i}t j||||||||	|
|d
}|S )Nfull_attention)
	input_idsattention_maskposition_idspast_key_valuesinputs_embeds	use_cacheoutput_attentionsoutput_hidden_statesreturn_dictcache_position)_current_token_type_ids_update_causal_maskr    forward)r+   r2   r3   r4   r5   r6   token_type_idsr7   r8   r9   r:   r;   causal_mask_mappingoutputsr-   r/   r0   r>   I   s,   	zNCustomQwen2Decoder._create_custom_model.<locals>.CustomQwen2ModelInner.forwardc                 S   s   |j |j}}t|j}|jd |jd }	}
| j}| j|
|||	|d}|d urI| dkrI|d d d d d d f j	|d}d| | }|| }|S )Nr      )sequence_lengthdtypedevice
batch_sizer?   r   rD   g      ?)
rD   rE   torchfinfominshaper<   _create_custom_4d_maskdimto)r+   r3   input_tensorr;   r5   r8   rD   rE   	min_dtyperF   rC   r?   causal_maskpadding_maskr/   r/   r0   r=   q   s$   	 zZCustomQwen2Decoder._create_custom_model.<locals>.CustomQwen2ModelInner._update_causal_maskc              	   S   s   t |j}g }t|D ]]}t j||f|||d}	|| }
|
dkjddd }|
dkjddd }t|dkrCd|	|d d d f |f< t|D ]\}}t|dkrWd|	||f< d|	||d |d  f< qG||	 qt j	|dd
d}	|	S )N)
fill_valuerD   rE   r   T)as_tuplerB   r   rM   )rH   rI   rJ   rangefullnonzerolen	enumerateappendstack	unsqueeze)r+   rC   rD   rE   rF   r?   rP   masksbmasktype_idsimage_positionstext_positionsitext_posr/   r/   r0   rL      s*   z]CustomQwen2Decoder._create_custom_model.<locals>.CustomQwen2ModelInner._create_custom_4d_mask)NNNNNNNNNNN)__name__
__module____qualname__r>   r=   rL   __classcell__r/   r/   r-   r0   CustomQwen2ModelInnerH   s    ("rj   r/   )r+   r&   r,   rj   r/   r/   r0   r(   E   s   rz'CustomQwen2Decoder._create_custom_modelNr6   r?   r3   c                 K   s   | j d|||d|S )z
        Args:
            inputs_embeds: [batch_size, seq_len, hidden_dim]
            token_type_ids: [batch_size, seq_len], 0=non-causal, 1=causal
            attention_mask: [batch_size, seq_len], optional
        )r6   r?   r3   Nr/   )r)   )r+   r6   r?   r3   kwargsr/   r/   r0   r>      s   zCustomQwen2Decoder.forward)r   r   r   r   r   r   r	   r
   r   r   r   r   r   )N)rf   rg   rh   __doc__intstrfloatr!   r(   rH   Tensorr>   ri   r/   r/   r-   r0   r      sf    	
,{r   c                
       sL   e Zd ZdZdededededef
 fddZd	ejd
ejfddZ  Z	S )Qwen2Decoder2Encoderz
    Decoder based on Multilingual BART
    Set the initial weights and configuration with a pretrained multilingual BART model,
    and modify the detailed configurations as a Nougat decoder
    r   r   r   r   r   c                    s@   t    t|||||dd| _td|| _td|| _d S )Nr
   )r   r   r   r   r   r         )r    r!   r   r)   nn	Embedding	query_768
query_1024)r+   r   r   r   r   r   r-   r/   r0   r!      s   
zQwen2Decoder2Encoder.__init__xreturnc           
      C   s   | ddd}|j\}}}|dkr| jj}n|dkr | jj}|d|dd}tj	||gdd}tj	tj
||tjdtj||tjdgdd}| ||d }	|	d d |d d d f }	|	S )	Nr   rB   rr   rs   r   rU   rG   )flatten	transposerK   rv   weightrw   r]   expandrH   catzeroslongonesr)   )
r+   rx   bsn_query_	param_imgbatch_query_imgs
x_combinedr?   yr/   r/   r0   r>      s&   

zQwen2Decoder2Encoder.forward)
rf   rg   rh   rl   rm   r!   rH   rp   r>   ri   r/   r/   r-   r0   rq      s    rq   r   r   r   r   r   c                 C   s   t | ||||d}|S )N)r   r   r   r   r   )rq   )r   r   r   r   r   decoder_as_encoderr/   r/   r0   build_qwen2_decoder_as_encoder  s   r   )r   r   r   r   r   )rH   torch.nnrt   r"   Moduler   rq   r   r/   r/   r/   r0   <module>   s    @<