o
    ei3%                     @   sd  d Z ddlmZ ddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ ddl%m&Z& e'e(Z)G dd dej*Z+G dd deZ,G dd deZ-G dd de"Z.G dd deZ/G dd de Z0G d d! d!e!Z1g d"Z2dS )#zPyTorch Starcoder2 model.    )CallableN)nn   )ACT2FN)CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)merge_with_config_defaults)capture_outputs   )MistralAttentionMistralDecoderLayerMistralForCausalLM MistralForSequenceClassificationMistralForTokenClassificationMistralModelapply_rotary_pos_embeager_attention_forward   )Starcoder2Configc                       s@   e Zd Zdef fddZdeej dB dejfddZ  Z	S )	Starcoder2MLPconfigc                    sT   t    |j}tj||j|jd| _tj|j||jd| _t	|j
 | _|j| _d S )Nbias)super__init__hidden_sizer   Linearintermediate_sizeuse_biasc_fcc_projr   
hidden_actactresidual_dropout)selfr   	embed_dim	__class__ o/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/starcoder2/modular_starcoder2.pyr"   5   s   
zStarcoder2MLP.__init__hidden_statesNreturnc                 C   s8   |  |}| |}| |}tjj|| j| jd}|S )Nptraining)r'   r*   r(   r   
functionaldropoutr+   r6   )r,   r2   r0   r0   r1   forward=   s
   


zStarcoder2MLP.forward)
__name__
__module____qualname__r   r"   tupletorchFloatTensorr9   __classcell__r0   r0   r.   r1   r   4   s    &r   c                       s   e Zd ZddededB f fddZ		ddejdeejejf dejdB d	e	dB d
ej
dB dee deejejdB eej dB f fddZ  ZS )Starcoder2AttentionNr   	layer_idxc                    s   t  j||d |j| _tj|j|j| j |jd| _	tj|j|j
| j |jd| _tj|j|j
| j |jd| _tj|j| j |j|jd| _d S )Nr   rB   r   )r!   r"   r+   r   r$   r#   num_attention_headshead_dimr&   q_projnum_key_value_headsk_projv_projo_projr,   r   rB   r.   r0   r1   r"   F   s   "zStarcoder2Attention.__init__r2   position_embeddingsattention_maskpast_key_valuescache_positionkwargsr3   c                 K   s:  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d urW|||d}||
|| j	|\}
}t
| jjt}|| |	|
||f| jskdn| j| jt| jdd d|\}}|jg |dR   }| |}tjj|| j| jd}||fS )	Nr   r   )sincosrO   g        sliding_window)r8   scalingrT   r4   )shaperE   rF   view	transposerH   rI   r   updaterB   r   get_interfacer   _attn_implementationr   r6   attention_dropoutrU   getattrreshape
contiguousrJ   r   r7   r8   r+   )r,   r2   rL   rM   rN   rO   rP   input_shapehidden_shapequery_states
key_statesvalue_statesrS   rR   cache_kwargsattention_interfaceattn_outputattn_weightsr0   r0   r1   r9   N   s@   		


zStarcoder2Attention.forward)N)NN)r:   r;   r<   r   intr"   r>   Tensorr=   r   
LongTensorr   r
   r9   r@   r0   r0   r.   r1   rA   E   s&    rA   c                       s&   e Zd Zdedef fddZ  ZS )Starcoder2DecoderLayerr   rB   c                    sR   t  || t||d| _t|| _tj|j|j	d| _
tj|j|j	d| _d S )NrC   eps)r!   r"   rA   	self_attnr   mlpr   	LayerNormr#   norm_epsiloninput_layernormpost_attention_layernormrK   r.   r0   r1   r"      s
   
zStarcoder2DecoderLayer.__init__)r:   r;   r<   r   ri   r"   r@   r0   r0   r.   r1   rl      s    rl   c                       s   e Zd Zdef fddZee							ddejdB dej	dB dejdB de
dB d	ejdB d
edB dejdB dee deeB fddZ  ZS )Starcoder2Modelr   c                    sL   t    t fddt jD | _tj j j	d| _
 j| _d S )Nc                    s   g | ]}t  |qS r0   )rl   ).0rB   r   r0   r1   
<listcomp>   s    z,Starcoder2Model.__init__.<locals>.<listcomp>rm   )r!   r"   r   
ModuleListrangenum_hidden_layerslayersrq   r#   rr   normembedding_dropout)r,   r   r.   rw   r1   r"      s   zStarcoder2Model.__init__N	input_idsrM   position_idsrN   inputs_embeds	use_cacherO   rP   r3   c              
   K   s6  |d u |d uA rt d|d u r| |}|r!|d u r!t| jd}|d u r=|d ur-| nd}	tj|	|	|jd  |jd}|d u rF|	d}| jj
d u rNtnt}
|
| j|||||d}|}tjj|| j| jd}| j||d}| jd | jj D ]}||f||||||d	|}qx| |}t||r|d
S d d
S )Nz:You must specify exactly one of input_ids or inputs_embedsrw   r   r   )device)r   r   rM   rO   rN   r   r4   )r   )rM   r   rN   r   rO   rL   )last_hidden_staterN   )
ValueErrorembed_tokensr   r   get_seq_lengthr>   arangerV   r   	unsqueezerT   r   r	   r   r7   r8   r~   r6   
rotary_embr|   r{   r}   r   )r,   r   rM   r   rN   r   r   rO   rP   past_seen_tokensmask_functioncausal_maskr2   rL   decoder_layerr0   r0   r1   r9      s^   

	

zStarcoder2Model.forward)NNNNNNN)r:   r;   r<   r   r"   r   r   r>   rk   rj   r   r?   boolr   r   r=   r   r9   r@   r0   r0   r.   r1   ru      s<    	
ru   c                   @      e Zd ZdS )Starcoder2ForCausalLMNr:   r;   r<   r0   r0   r0   r1   r          r   c                   @   r   )#Starcoder2ForSequenceClassificationNr   r0   r0   r0   r1   r      r   r   c                   @   r   ) Starcoder2ForTokenClassificationNr   r0   r0   r0   r1   r      r   r   )r   ru   Starcoder2PreTrainedModelr   r   )3__doc__collections.abcr   r>   r   activationsr   cache_utilsr   r   masking_utilsr   r	   modeling_flash_attention_utilsr
   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   utils.genericr   utils.output_capturingr   mistral.modeling_mistralr   r   r   r   r   r   r   r   configuration_starcoder2r   
get_loggerr:   loggerModuler   rA   rl   ru   r   r   r   __all__r0   r0   r0   r1   <module>   s2   (

:	L