o
    ߥiz                     @   s
  d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
m  mZ d dlZ	d dl	mZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dl m!Z! d dl"m#Z#m$Z$m%Z%m&Z&m'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9m:Z:m;Z;m<Z<m=Z= erd dl>m?Z? zd dl@mAZA W n eBy   dZAY nw zd dlCmDZD d dl@mAZA dZEW n eBy   dZEeFd Y nw zd dlGmHZH W n eBy   dZHeFd Y nw e3 ZIdZJd ZKdgZLzd d!lMmNZN W n eBy&   dZNeFd" Y nw G d#d$ d$e	jjOZPG d%d& d&ejOZQG d'd( d(ejOZRG d)d* d*ejOZSG d+d, d,e-eZTe5jUe1jVe/jWd-G d.d/ d/eTZXG d0d1 d1e	jjOZYd2d3 ZZd8d4d5Z[G d6d7 d7e	jjOZ\dS )9    N)TYPE_CHECKINGCallableListOptionalTupleUnion)nn)autocast)CrossEntropyLoss)GenerationConfigPreTrainedTokenizerStoppingCriteriaList)LogitsProcessorList)GenerateOutput)BaseModelOutputWithPastCausalLMOutputWithPast)PreTrainedModel)set_seed)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging)assert_device_mapget_device_map)Model
TorchModel)Models)Tasks)
get_logger   )MODELS   )
QWenConfig)HistoryTypeStopWordsLogitsProcessordecode_tokensget_stop_words_idsmake_context)BaseStreamer	rearrange)apply_rotary_emb_funcTFzWarning: import flash_attn rotary fail, please install FlashAttention rotary to get better performance https://github.com/Dao-AILab/flash-attention/tree/main/csrc/rotary)rms_normzWarning: import flash_attn rms_norm fail, please install FlashAttention layer_norm to get better performance https://github.com/Dao-AILab/flash-attention/tree/main/csrc/layer_normzqwen-7br#   )flash_attn_unpadded_funczkWarning: import flash_attn fail, please install FlashAttention https://github.com/Dao-AILab/flash-attentionc                       s,   e Zd Z			d fdd	Zdd Z  ZS )	FlashSelfAttentionFN        c                    s@   t    td usJ dtd usJ d|| _|| _|| _d S )NzFPlease install FlashAttention first, e.g., with pip install flash-attnz:Please install einops first, e.g., with pip install einops)super__init__r.   r+   causalsoftmax_scale	dropout_p)selfr3   r4   attention_dropout	__class__ W/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/qwen/backbone.pyr2   U   s   



zFlashSelfAttention.__init__c                 C   s  t dd |||fD sJ t dd |||fD sJ |jd |jd }}|jd }dd |||fD \}}}tjd|d | |tj|jd}| jrX||ksRJ | j}|}	n||k}tjd|d | |tj|jd}	d| _t	|||||	||| j| j
|d	
}
t|
d
|d}
|
S )Nc                 s   s"    | ]}|j tjtjfv V  qd S N)dtypetorchfloat16bfloat16.0ir:   r:   r;   	<genexpr>g   s     z-FlashSelfAttention.forward.<locals>.<genexpr>c                 s   s    | ]}|j V  qd S r<   )is_cudarA   r:   r:   r;   rD   h   s    r   r"   c                 S   s   g | ]}t |d qS )zb s ... -> (b s) ...r*   )rB   xr:   r:   r;   
<listcomp>k   s    z.FlashSelfAttention.forward.<locals>.<listcomp>)stepr=   device)r4   r3   z(b s) ... -> b s ...)b)allshaper>   arangeint32rI   trainingr3   r5   r.   r4   r+   )r6   qkv
batch_sizeseqlen_qseqlen_kcu_seqlens_q	is_causalcu_seqlens_koutputr:   r:   r;   forwarde   sR   


zFlashSelfAttention.forward)FNr0   __name__
__module____qualname__r2   rZ   __classcell__r:   r:   r8   r;   r/   S   s    r/   c                       s   e Zd Zd fdd	ZdddZ		dddZdd	 Zd
d Z							ddee	e
j  dee	e
j  dee
j dee
j dee
j dee
j dee dee fddZ  ZS )QWenAttentionNc                    s  t    |j} jdttj||ftjddd||dd  jdt	ddd t
d| _|j _|j _|j _|j _|j _ j j  _|j _d _d  _|j|j  _ j|j d	kshJ  j|j  _t|jd
 j  _tj|j j|j d _|jp|j  _  jrt!d ur j st"d|j#d _$|j _|j%dkrd  _&n|j%dk sJ t' j|j%  _& j&d urƈ j&n j}t(||j)d _*|j+ _+|j, _, fddt-ddD }t.|d d d d d f  _/d _0t1|j# _2d S )Nbiasr=   r"   F)
persistentmasked_biasg     Tr   r    ra   )r3   r7         ?)basec                    s(   g | ]}| j krt| j nd qS )r"   )
seq_lengthmathlogrA   r6   r:   r;   rG      s    z*QWenAttention.__init__.<locals>.<listcomp>i   )3r1   r2   max_position_embeddingsregister_bufferr>   trilonesboolviewtensormaxlayer_numberparams_dtyperh   hidden_size
split_sizenum_attention_heads	num_headshead_dimuse_flash_attnscale_attn_weights	layer_idxkv_channelsprojection_sizehidden_size_per_attention_headr   Linearc_attnno_biasc_projbf16fp16is_fp32r.   r/   
attn_pdropcore_attention_flash
rotary_pctrotary_ndimsintRotaryEmbeddingrotary_emb_base
rotary_embuse_dynamic_ntkuse_logn_attnrangeTensorlogn_tensor_ntk_cachedDropoutattn_dropout)r6   configrt   max_positionsdim	logn_listr8   rk   r;   r2      sv   




zQWenAttention.__init__c                 C   s  t ||dd}| jr|t jg |dd |j|jd }|d|d}}| jd d d d || |d |f }	t 	|jj
}
t jg |
|jd|j}
t |	||j|
}tjj|dd}||j}| |}|d urw|| }t ||}|dd}||fS )	N      ?r=   rI   rb   r   r"      )r>   matmul	transposer|   fullsizer=   rI   ra   finfomintowherer   
functionalsoftmaxtyper   )r6   querykeyvalueattention_mask	head_maskattn_weightsquery_length
key_lengthcausal_mask
mask_valueattn_outputr:   r:   r;   _attn   s@   

zQWenAttention._attnc                 C   s  |  \}}}}	|  \}
}
}}
tj|| ||tj|jd}d}| jr.|t| dd  }tdd1 |d||	|	ddd|	|}}tj
|| | d|d	}|||||}W d    n1 sgw   Y  | d| d}}| jd d d d || |d |f }t|jj}tj||jd
|j}t|||}|d ur|| }tjj|dd}|jtjkrtd||j}| |}|d ur|| }t||}||fS )Nr   rf   r   r   F)enabledr   r   )betaalpharb   r   zDError with upcasting, attn_weights does not have dtype torch.float32)r   r>   emptyfloat32rI   r|   floatr	   reshaper   baddbmmra   r   r=   r   rr   r   r   r   r   r   RuntimeErrorr   r   r   )r6   r   r   r   r   r   bszry   	q_seq_lendk_	k_seq_lenr   scale_factorrP   rQ   r   r   r   r   r   r:   r:   r;   _upcast_and_reordered_attn   sn   
	
z(QWenAttention._upcast_and_reordered_attnc                 C   s&   |  d d ||f }||}|S )Nr   )r   rq   r6   rr   ry   attn_head_size	new_shaper:   r:   r;   _split_heads6  s   
zQWenAttention._split_headsc                 C   s,   |  }| d d || f }||S )Nr   )
contiguousr   rq   r   r:   r:   r;   _merge_heads;  s   
zQWenAttention._merge_headsFhidden_states
layer_pastr   r   encoder_hidden_statesencoder_attention_maskoutput_attentions	use_cachec	           !      C   sB  |  |}	|	j| jdd\}
}}| |
| j| j}
| || j| j}| || j| j}| d }|r<||d jd 7 }| jrg|| d krg| j	sgt
|| j dd }dt
| d }t|d}|| _n| j}| j||d|j}|d urt|tr|}n|fd }|d ur|\}}|
jd }|d d | d d d d d f }|d d | d d d d d f }t|
|}
t||}|d ur|d |d }}tj||fdd}tj||fdd}|r||f}nd }| jr)| j	s)| jj|
jkr| j|
j|
| _|d|
d }|d}| jd d ||d d d d f }|
||
 }
| jrQtd urQ| jsQ|
jrQ|
||}}}|  |||}t!|d" }n,|
#dddd}
|#dddd}|#dddd}| $|
||||\}}| %|| j| j}| &|}||f} |r| jrtd ur| jst'd| |f7 } | S )	Nr   r   r"   r   )	ntk_alphazb s h d -> b s (h d)r    z/Cannot output attentions while using flash-attn)(r   splitrw   r   ry   rz   r   rL   r   rO   ri   rj   rh   ceilrs   r   r   r   rI   
isinstancetupleapply_rotary_pos_embr>   catr   r   type_as	expand_asr{   r.   r   rE   r   r+   r   permuter   r   r   
ValueError)!r6   r   r   r   r   r   r   r   r   mixed_x_layerr   r   r   
kv_seq_lencontext_valuer   rotary_pos_emb	q_pos_emb	k_pos_embcur_lenpast_key
past_valuepresent	seq_startseq_endr   rP   rQ   rR   context_layerr   attn_weightoutputsr:   r:   r;   rZ   @  s   




""



""



zQWenAttention.forwardr<   )NNNNNNNFF)r\   r]   r^   r2   r   r   r   r   r   r   r>   FloatTensorr   rp   rZ   r_   r:   r:   r8   r;   r`      s@    
E&
9	r`   c                       s$   e Zd Z fddZdd Z  ZS )QWenMLPc                    sl   t    tj|j|jd |j d| _tj|j|jd |j d| _|jd }tj||j|j d| _	d S )Nr   re   )
r1   r2   r   r   rv   ffn_hidden_sizer   w1w2r   )r6   r   	ff_dim_inr8   r:   r;   r2     s   

zQWenMLP.__init__c                 C   s0   |  |}| |}|t| }| |}|S r<   )r   r   Fsilur   )r6   r   a1a2intermediate_parallelrY   r:   r:   r;   rZ     s
   


zQWenMLP.forwardr[   r:   r:   r8   r;   r     s    r   c                       s   e Zd Zd fdd	Z							ddeeej  deeej  deej d	eej d
eej deej dee	 dee	 fddZ
  ZS )	QWenBlockNr"   c                    sp   t    || _|| _|j| _|j}|j| _|j| _t||jd| _	t
||d| _t||jd| _t|| _d S )Neps)rt   )r1   r2   
num_expertrt   (apply_residual_connection_post_layernormrv   r   RMSNormlayer_norm_epsilonln_1r`   attnln_2r   mlp)r6   r   r}   r   rv   r8   r:   r;   r2     s&   
zQWenBlock.__init__Fr   r   r   r   r   r   r   r   c	                 C   s   |  |}	| j|	|||||d}
|
d }|
dd  }| jr |	}n|}|| }| |}	| jr1|	}n|}| |	}|| }|rE|f| }|S |f|dd   }|S )N)r   r   r   r   r   r   r"   )r  r  r   r  r  )r6   r   r   r   r   r   r   r   r   layernorm_outputattn_outputsr   r   residuallayernorm_input
mlp_outputr:   r:   r;   rZ     s4   



zQWenBlock.forward)Nr"   r   )r\   r]   r^   r2   r   r   r>   r   r   rp   rZ   r_   r:   r:   r8   r;   r     s4    	r   c                       sT   e Zd ZeZdZdZdZdgZ fddZ	dd Z
dd	d
Ze fddZ  ZS )QWenPreTrainedModeltransformerFTr   c                    s*   t  j|jfi | t t| | d S r<   )r1   r2   name_or_pathr   )r6   r   kwargsr8   r:   r;   r2     s   zQWenPreTrainedModel.__init__c                 C   s   t |tjr|jjjd| jjd |jdur|jj	  n,t |tj
r=|jjjd| jjd |jdur<|jj|j 	  nt |trI|jjd | D ]\}}|dkrh|jjd| jjtd| jj  d qMdS )zInitialize the weights.r0   )meanstdNrf   zc_proj.weightr   )r   r   r   weightdatanormal_r   initializer_rangera   zero_	Embeddingpadding_idxr   fill_named_parametersri   sqrtn_layer)r6   modulenamepr:   r:   r;   _init_weights
  s4   


z!QWenPreTrainedModel._init_weightsc                 C   s   t |tr
||_d S d S r<   )r   	QWenModelgradient_checkpointing)r6   r  r   r:   r:   r;   _set_gradient_checkpointing!  s   

z/QWenPreTrainedModel._set_gradient_checkpointingc                    sP   | dd }|d u rtdi |}| |}ntt| jdd|i|}||_|S )N	model_dirpretrained_model_name_or_pathr:   )popr#   r1   r   from_pretrainedr#  )clsr  r#  r   modelr8   r:   r;   _instantiate%  s   
z QWenPreTrainedModel._instantiateF)r\   r]   r^   r#   config_classbase_model_prefixis_parallelizablesupports_gradient_checkpointing_no_split_modulesr2   r  r"  classmethodr)  r_   r:   r:   r8   r;   r    s    
r  )module_namec                       s   e Zd ZdgZ fddZdd Zdd Z													dd	eej	 d
ee
e
ej   deej deej	 deej	 deej deej deej deej dee dee dee dee fddZ  ZS )r   zattn.masked_biasc                    s   t     j| _ j| _ j| _ j} j| _	d| _
| j	dkr;t|| j| _| | jj d| _| | jj nd | _d| _t| j| j| _t j| _t fddt jD | _t| j jd| _|   d S )NFlearnedposition_embeddings c                    s   g | ]}t  |d qS ))r}   )r   rA   r   r:   r;   rG   L  s    z&QWenModel.__init__.<locals>.<listcomp>r   )r1   r2   padded_vocab_size
vocab_sizenum_hidden_layersrv   	embed_dimrl   pos_embposition_embedding_typer!  r   r  wpeinit_methodr3  r  _position_embeddings_keywter   
embd_pdropdrop
ModuleListr   hr   r  ln_f	post_init)r6   r   max_sequence_lengthr8   r5  r;   r2   6  s0   

zQWenModel.__init__c                 C   s   | j S r<   r?  rk   r:   r:   r;   get_input_embeddingsY  s   zQWenModel.get_input_embeddingsc                 C   s
   || _ d S r<   rG  )r6   new_embeddingsr:   r:   r;   set_input_embeddings\  s   
zQWenModel.set_input_embeddingsN	input_idspast_key_valuesr   token_type_idsposition_idsr   inputs_embedsr   r   r   r   output_hidden_statesreturn_dictc                    s   d ur n| j j |d ur|n| j j}d urn| j j|d ur$|n| j j}|d ur4|d ur4td|d urJ| }|d|d }|jd }n|d ur\| d d }|jd }ntd|d urg|j	n|j	}|d urv|d|d }|d ur|d|d }|d u rd}t
d gt| j }n	|d d d}|d u rtj||d | tj|d}|dd|d }|d ur|dkrtd||d}|d d d d d d f }|j| jd}d	| t| jj }d }	| || j j}|d u r| |}|}| jd ur| |}|| }| |}||df }| jr,| jr,r,td
 dr1dnd } r8dnd }|r?dnd }tt| j|D ]]\}\}}|rW||f }| jrx| jrx fdd}tj j!!|||d ||| ||	}n|||||| ||	 d}|d }du r|| rdnd f } r||d f }qI| "|}||}|st
dd |||fD S t#||||dS )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   r   z5You have to specify either input_ids or inputs_embedsr   r   z$batch_size has to be defined and > 0rb   rf   zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr:   c                    s    fdd}|S )Nc                     s    g | R  S r<   r:   )inputs)r  r   r   r:   r;   custom_forward  s   zHQWenModel.forward.<locals>.create_custom_forward.<locals>.custom_forwardr:   )r  rS  r   r   )r  r;   create_custom_forward  s   z0QWenModel.forward.<locals>.create_custom_forward)r   r   r   r   r   r   r   Tr   r"   c                 s   s    | ]	}|d ur|V  qd S r<   r:   )rB   rR   r:   r:   r;   rD     s    z$QWenModel.forward.<locals>.<genexpr>)last_hidden_staterL  r   
attentions)$r   r   rP  r   use_return_dictr   r   rq   rL   rI   r   lenrC  r>   rM   long	unsqueezer   r=   r   r   get_head_maskr  r?  r<  rA  r!  rO   loggerwarning_once	enumerateziputils
checkpointrD  r   )r6   rK  rL  r   rM  rN  r   rO  r   r   r   r   rP  rQ  input_shaperS   rI   past_lengthr   position_embedsoutput_shapepresentsall_self_attentionsall_hidden_statesrC   blockr   rU  r   r:   rT  r;   rZ   _  s   









zQWenModel.forward)NNNNNNNNNNNNN)r\   r]   r^   _keys_to_ignore_on_load_missingr2   rH  rJ  r   r>   
LongTensorr   r   r   rp   rZ   r_   r:   r:   r8   r;   r   2  sZ    #	
r   c                       s6   e Zd Zd
 fdd	Z		dddZddd	Z  ZS )r   '  c                    sd   t    || _|| _d|td|d |   | _tj	
dd u r'tdd | _d| _d| _d S )Nrf   r   r   einopsz'einops is required for Rotary Embedding)r1   r2   r   rg   r>   rM   r   inv_freq	importlibutil	find_specr   _rotary_pos_emb_cache_seq_len_cached_ntk_alpha_cached)r6   r   rg   r8   r:   r;   r2     s   
 
zRotaryEmbedding.__init__r   rf   c           
      C   s   || }|| j ks|| jkrg| j|| j| jd    }	 tjd| jd| jjd | j | _d|| j  | _|| _ || _tj|| jjd}t	|
| j| j}tj||fdd}ddlm}	 |	|d| _d S d S )	Nr   r   )rI   rf   r   r   r*   zn d -> 1 n 1 d)rt  ru  rg   r   r>   rM   ro  rI   r   outerr   r   rn  r+   rs  )
r6   max_seq_lenoffsetr   seqlenrg   seqfreqsembr+   r:   r:   r;   update_rotary_pos_emb_cache  s$   z+RotaryEmbedding.update_rotary_pos_emb_cachec                 C   s(   |  ||| | jd d ||| f S r<   )r}  rs  )r6   rw  rx  r   r:   r:   r;   rZ   !  s   zRotaryEmbedding.forward)rm  )r   rf   )r\   r]   r^   r2   r}  rZ   r_   r:   r:   r8   r;   r     s    
r   c                 C   s>   ddl m} || ddd} | jdd\}}tj| |fddS )	Nr   r*   z... (j d) -> ... j dr   )jr   r   r   )rn  r+   unbindr>   r   )rF   r+   x1x2r:   r:   r;   _rotate_half&  s   r  c           	      C   s   |r;|   }|dd}|d d d |jd d f  }|d d d |jd d f  }t|||| }|S |jd }| dd |f | d|d f }}|  }|  }||  t||   }tj	||fdd| S )Nr   r"   r   r   .r   )
r   squeezerL   cossinr,   r   r  r>   r   )	tr{  use_flash_rotaryt_r  r  rY   rot_dimt_pass_r:   r:   r;   r   .  s   ""
"r   c                       s8   e Zd Zd
dedef fddZdd Zdd	 Z  ZS )r   ư>r   r   c                    s&   t    || _tt|| _d S r<   )r1   r2   r   r   	Parameterr>   ro   r  )r6   r   r   r8   r:   r;   r2   A  s   
zRMSNorm.__init__c                 C   s$   |t |djddd| j  S )Nr   r   T)keepdim)r>   rsqrtpowr  r   )r6   rF   r:   r:   r;   _normF  s   $zRMSNorm._normc                 C   s<   t d ur|jrt || j| jS | | |}|| j S r<   )r-   rE   r  r   r  r   r   )r6   rF   rY   r:   r:   r;   rZ   I  s   
zRMSNorm.forward)r  )	r\   r]   r^   r   r   r2   r  rZ   r_   r:   r:   r8   r;   r   ?  s    r   r*  )]rp  ri   typingr   r   r   r   r   r   r>   torch.nn.functionalr   r   r   torch.utils.checkpointtorch.cuda.ampr	   torch.nnr
   transformersr   r   r   &transformers.generation.logits_processr   transformers.generation.utilsr   transformers.modeling_outputsr   r   transformers.modeling_utilsr   transformers.trainer_utilsr   transformers.utilsr   r   r   r   r   'transformers.utils.model_parallel_utilsr   r   
modelscoper   r   modelscope.metainfor   modelscope.utils.constantr   modelscope.utils.loggerr   r4  r!   configurationr#   qwen_generation_utilsr$   r%   r&   r'   r(   !transformers.generation.streamersr)   rn  r+   ImportErrorflash_attn.layers.rotaryr,   r  printflash_attn.ops.rms_normr-   r]  _CHECKPOINT_FOR_DOC_CONFIG_FOR_DOC"QWen_PRETRAINED_MODEL_ARCHIVE_LISTflash_attn.flash_attn_interfacer.   Moduler/   r`   r   r   r  register_modulebackboneqwen_7br   r   r  r   r   r:   r:   r:   r;   <module>   s    A  J3 H,
