o
    ei$                      @   s  d dl mZ d dlZd dlmZ ddlmZmZ ddlmZm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& ddl'm(Z( ddl)m*Z* e+e,Z-G dd de#Z.G dd deZ/G dd deZ0G dd deZ1G dd de$Z2G dd de(Z3G d d! d!eZ4G d"d# d#e!Z5G d$d% d%e"Z6G d&d' d'e Z7g d(Z8dS ))    )CallableN)nn   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)merge_with_config_defaults)capture_outputs   )Gemma2RotaryEmbedding)
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                       s   e Zd Z fddZ  ZS )Qwen2MLPc                    sR   t  | tj| j| jdd| _tj| j| jdd| _tj| j| jdd| _d S )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_projselfconfig	__class__ e/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/qwen2/modular_qwen2.pyr%   &   s   zQwen2MLP.__init__)__name__
__module____qualname__r%   __classcell__r1   r1   r/   r2   r!   %   s    r!   c                   @      e Zd ZdS )Qwen2RotaryEmbeddingNr3   r4   r5   r1   r1   r1   r2   r8   -       r8   c                       s   e Zd Zdedef fddZ		ddejdeejejf dejdB d	e	dB d
ej
dB dee deejejdB f fddZ  ZS )Qwen2Attentionr.   	layer_idxc                    s   t |dr
|j| nd | _t || tj|j|j| j	 dd| _
tj|j|j| j	 dd| _tj|j|j| j	 dd| _tj|j| j	 |jdd| _| jdkrW|j| _d S d | _d S )Nlayer_typesTr"   Fsliding_attention)hasattrr=   
layer_typer$   r%   r   r&   r'   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projsliding_windowr-   r.   r<   r/   r1   r2   r%   2   s    zQwen2Attention.__init__Nhidden_statesposition_embeddingsattention_maskpast_key_valuescache_positionkwargsreturnc                 K   s  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d urW|||d}||
|| j	|\}
}t
| jjt}|| |	|
||f| jskdn| j| j| jd|\}}|jg |dR   }| |}||fS )Nr   r   )sincosrN   g        )dropoutscalingrH   )shaperB   rC   view	transposerE   rF   r   updater<   r   get_interfacer.   _attn_implementationr   trainingattention_dropoutrU   rH   reshape
contiguousrG   )r-   rJ   rK   rL   rM   rN   rO   input_shapehidden_shapequery_states
key_statesvalue_statesrS   rR   cache_kwargsattention_interfaceattn_outputattn_weightsr1   r1   r2   forward;   s:   		

zQwen2Attention.forward)NN)r3   r4   r5   r    intr%   torchTensortupler   
LongTensorr   r	   ri   r6   r1   r1   r/   r2   r;   1   s&    r;   c                       s&   e Zd Zdedef fddZ  ZS )Qwen2DecoderLayerr.   r<   c                    s    t  j||d |j| | _d S )N)r.   r<   )r$   r%   r=   attention_typerI   r/   r1   r2   r%   i   s   zQwen2DecoderLayer.__init__)r3   r4   r5   r    rj   r%   r6   r1   r1   r/   r2   ro   h   s    ro   c                   @   r7   )Qwen2PreTrainedModelNr9   r1   r1   r1   r2   rq   n   r:   rq   c                       s   e Zd Zdef fddZeee							ddej	dB dej
dB dej	dB dedB d	ejdB d
edB dej	dB dee defddZ  ZS )
Qwen2Modelr.   c                    s   t  | d| jjv | _d S )Nr>   )r$   r%   r.   r=   has_sliding_layersr,   r/   r1   r2   r%   s   s   zQwen2Model.__init__N	input_idsrL   position_idsrM   inputs_embeds	use_cacherN   rO   rP   c              
   K   sF  |d u |d uA rt d|d u r| |}|r!|d u r!t| jd}|d u r=|d ur-| nd}	tj|	|	|jd  |jd}|d u rF|	d}t
| }
tsl| j|||||d}dtdi |i}
| jrltdi ||
d< |}| ||}| jd | jj D ]}||f|
|j |||||d	|}q}| |}t||r|d
S d d
S )Nz:You must specify exactly one of input_ids or inputs_embeds)r.   r   r   )device)r.   rv   rL   rN   rM   ru   full_attentionr>   )rL   rK   ru   rM   rw   rN   )last_hidden_staterM   r1   )
ValueErrorembed_tokensr   r.   get_seq_lengthrk   arangerV   rx   	unsqueeze
isinstancedictr   rs   r   
rotary_emblayersnum_hidden_layersrp   normr
   )r-   rt   rL   ru   rM   rv   rw   rN   rO   past_seen_tokenscausal_mask_mappingmask_kwargsrJ   rK   decoder_layerr1   r1   r2   ri   w   s^   



zQwen2Model.forward)NNNNNNN)r3   r4   r5   r    r%   r   r   r   rk   rn   rl   r   FloatTensorboolr   r   r
   ri   r6   r1   r1   r/   r2   rr   r   s>    	
rr   c                   @   r7   )Qwen2ForCausalLMNr9   r1   r1   r1   r2   r      r:   r   c                   @   r7   )Qwen2ForSequenceClassificationNr9   r1   r1   r1   r2   r      r:   r   c                   @   r7   )Qwen2ForTokenClassificationNr9   r1   r1   r1   r2   r      r:   r   c                   @   r7   )Qwen2ForQuestionAnsweringNr9   r1   r1   r1   r2   r      r:   r   )rq   rr   r   Qwen2RMSNormr   r   r   )9collections.abcr   rk   r   cache_utilsr   r   masking_utilsr   r   modeling_flash_attention_utilsr	   modeling_outputsr
   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   gemma2.modeling_gemma2r   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   mistral.modeling_mistralr   configuration_qwen2r    
get_loggerr3   loggerr!   r8   r;   ro   rq   rr   r   r   r   r   __all__r1   r1   r1   r2   <module>   s8    0
7N