o
    پizf                     @   s  d Z ddlZddlmZ ddlmZ ddlZddlm  m	Z
 ddlmZ ddlmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ G dd dZ%eG dd dZ&G dd dej'Z(G dd dej'Z)G dd dej'Z*G dd dej'Z+G dd dej'Z,G dd  d ej'Z-G d!d" d"ej'Z.G d#d$ d$ej'Z/G d%d& d&ej'Z0G d'd( d(e"Z1G d)d* d*e"Z2e1e2gZ3dS )+zPyTorch T5 & UMT5 model.    N)Iterable)	dataclass)nn)BaseEncoderOutputT5Config)_get_folding_tp_group)
get_act_fn)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)QuantizationConfig)get_group_rankget_group_size)VocabParallelEmbedding)default_weight_loader)TextEncoder)current_platformc                   @   s    e Zd ZdZdZdZdZdZdS )AttentionTypezO
    Attention type.
    Use string to be compatible with `torch.compile`.
    decoderencoderencoder_onlyencoder_decoderN)__name__
__module____qualname____doc__DECODERENCODERENCODER_ONLYENCODER_DECODER r!   r!   d/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/runtime/models/encoders/t5.pyr   3   s    r   c                   @   s   e Zd ZU ejed< dS )AttentionMetadata	attn_biasN)r   r   r   torchTensor__annotations__r!   r!   r!   r"   r#   C   s   
 r#   c                       >   e Zd Z	d	dededB f fddZdejfddZ  Z	S )
T5DenseActDenseNconfigquant_configc                    sR   t    t|}t|j|jgd|d| _t|j|jd||d| _t	|j
| _d S )NF)biastp_groupr,   r+   r-   )super__init__r   r
   d_modeld_ffwir   wor   dense_act_fnactselfr*   r+   r-   	__class__r!   r"   r0   J   s   
zT5DenseActDense.__init__returnc                 C   s*   |  |\}}| |}| |\}}|S N)r3   r6   r4   )r8   hidden_states_r!   r!   r"   forward[   s   
zT5DenseActDense.forwardr<   
r   r   r   r   r   r0   r%   r&   r?   __classcell__r!   r!   r9   r"   r)   H   s    r)   c                       r(   )
T5DenseGatedActDenseNr*   r+   c                    sn   t    t|}t|j|jgd||d| _t|j|jgd||d| _t|j|jd||d| _	t
|j| _d S )NFr.   )r/   r0   r   r
   r1   r2   wi_0wi_1r   r4   r   r5   r6   r7   r9   r!   r"   r0   d   s0   
	zT5DenseGatedActDense.__init__r;   c                 C   s<   |  | |d }| |\}}|| }| |\}}|S )Nr   )r6   rC   rD   r4   )r8   r=   hidden_geluhidden_linearr>   r!   r!   r"   r?      s
   zT5DenseGatedActDense.forwardr<   r@   r!   r!   r9   r"   rB   b   s    rB   c                       r(   )
	T5LayerFFNr*   r+   c                    sD   t    |jrt||d| _nt||d| _t|j|jd| _	d S )Nr+   eps)
r/   r0   is_gated_actrB   DenseReluDenser)   r	   r1   layer_norm_epsilon
layer_norm)r8   r*   r+   r9   r!   r"   r0      s   

zT5LayerFF.__init__r;   c                 C   s    |  |}| |}|| }|S r<   )rN   rL   )r8   r=   forwarded_statesr!   r!   r"   r?      s   

zT5LayerFF.forwardr<   r@   r!   r!   r9   r"   rG      s    rG   c                       s(   e Zd Zd fddZdddZ  ZS )	T5MultiHeadAttentionr;   Nc                    s   t    d S r<   )r/   r0   r8   r9   r!   r"   r0      s   zT5MultiHeadAttention.__init__c                 C   sh   |j \}}}}td||}	|d ur|	|7 }	tj|	 dd|	}	td|	|}
|
|d|| }
|
S )Nzbinc,bjnc->bnijdimzbnij,bjnc->binc)shaper%   einsumFsoftmaxfloattype_asreshape)r8   qkvr$   br>   ncattnxr!   r!   r"   r?      s   zT5MultiHeadAttention.forward)r;   Nr<   )r   r   r   r0   r?   rA   r!   r!   r9   r"   rP      s    rP   c                	       s   e Zd Z			ddedededB def fdd	Ze	ddej	fddZ
ddej	fddZ	ddej	dej	dedB dej	fddZ  ZS )T5AttentionFN r*   	attn_typer+   prefixc              
      s  t    || _|tjk| _|| _|j| _|j| _|j	| _	|j
| _|j | _| _t|| _t| j| _|j| j dks=J |j| j | _| j| j | _t| j	| j| j| jd|| d| jd| _t | _| jrwt| j| j| j| j|| jd| _t| j| j | j	d|| d| jd| _d S )Nr   F	.qkv_proj)r,   r+   rg   r-   )org_num_embeddingspadding_sizer+   r-   z.o_proj)r/   r0   rf   r   r   
is_decoderhas_relative_attention_biasrelative_attention_num_bucketsrelative_attention_max_distancer1   d_kvkey_value_proj_dim	num_headstotal_num_headstotal_num_kv_headsr   r-   r   tp_world_sizen_heads	inner_dimr   qkv_projrP   rb   r   relative_attention_biasr   o)r8   r*   rf   rl   r+   rg   r9   r!   r"   r0      sT   


zT5Attention.__init__T       r;   c                 C   s   d}|r|d }|| dk tj| 7 }t| } n
t| t|  } |d }| |k }|t|  | t||  ||   tj }t|t	||d }|t
|| |7 }|S )a  
        Adapted from Mesh Tensorflow:
        https://github.com/tensorflow/mesh/blob/0cb87fe07da627bf0b7e60475d59f95ed6b5be3d/mesh_tensorflow/transformer/transformer_layers.py#L593
        Translate relative position to a bucket number for relative attention.
        The relative position is defined as memory_position - query_position,
        i.e. the distance in tokens from the attending position to the
        attended-to position. If bidirectional=False, then positive relative
        positions are invalid. We use smaller buckets for small absolute
        relative_position and larger buckets for larger absolute
        relative_positions. All relative positions >=max_distance map to the
        same bucket. All relative positions <=-max_distance map to the same
        bucket. This should allow for more graceful generalization to longer
        sequences than the model has been trained on
        Args:
            relative_position: an int32 Tensor
            bidirectional: a boolean - whether the attention is bidirectional
            num_buckets: an integer
            max_distance: an integer
        Returns:
            a Tensor with the same shape as relative_position, containing int32
            values in the range [0, num_buckets)
        r         )tor%   longabsmin
zeros_likelogrY   math	full_likewhere)relative_positionbidirectionalnum_bucketsmax_distancerelative_buckets	max_exactis_smallrelative_position_if_larger!   r!   r"   _relative_position_bucket   s4   
z%T5Attention._relative_position_bucketc           
      C   s   |du r	| j jj}tj|tj|ddddf }tj|tj|ddddf }|| }| j|| j | j| j	d}|  |}|
g dd}	|	S )z%Compute binned relative position biasN)dtypedevice)r   r   r   )r|   r   r}   r   )rx   weightr   r%   aranger   r   rk   rm   rn   permute	unsqueeze)
r8   query_length
key_lengthr   context_positionmemory_positionr   relative_position_bucketvaluesrc   r!   r!   r"   compute_bias*  s,   


zT5Attention.compute_biasr=   attention_maskattn_metadatac                 C   s  |j \}}}|}| j| j}}	| |\}
}|
j| jdd\}}}|||||	}|||||	}|||||	}|d us?J |j}| jr]| j	t
jksMJ | |||ddd}||_n|d uscJ |d ur|jdkrt||dddn|d}t rdnt|jj}||dk| | jdkrt| j}|d d || j |d | j d d d d f }| ||||}| |\}}|S )NrR   rS   r}   r|   g     r   )rU   ru   rp   rw   splitrv   r[   r$   rl   rf   r   r   r   repeatndimviewr   r   is_mpsr%   finfor   r   masked_fill_rt   r   r-   rb   ry   )r8   r=   r   r   bsseq_lenr>   num_seqsr`   ra   qkvr\   r]   r^   r$   mask_valrankattn_outputoutputr!   r!   r"   r?   D  s@   


*zT5Attention.forwardFNre   )Trz   r{   r<   )r   r   r   r   strr   r0   staticmethodr%   r&   r   r   r#   r?   rA   r!   r!   r9   r"   rd      s:    <9rd   c                	       sZ   e Zd Z			ddedB def fddZ	ddejd	ejd
edB dejfddZ	  Z
S )T5LayerSelfAttentionFNre   r+   rg   c                    sJ   t    t|d|v rtjntj||| dd| _t|j|j	d| _
d S )Nr   z.SelfAttentionrl   r+   rg   rI   )r/   r0   rd   r   r   r   SelfAttentionr	   r1   rM   rN   )r8   r*   rl   r+   rg   r9   r!   r"   r0   |  s   
zT5LayerSelfAttention.__init__r=   r   r   r;   c                 C   s&   |  |}| j|||d}|| }|S Nr=   r   r   )rN   r   )r8   r=   r   r   normed_hidden_statesattention_outputr!   r!   r"   r?     s   
zT5LayerSelfAttention.forwardr   r<   r   r   r   r   r   r0   r%   r&   r#   r?   rA   r!   r!   r9   r"   r   z  s&    r   c                       sP   e Zd Z	ddedB def fddZ	ddejdedB d	ejfd
dZ	  Z
S )T5LayerCrossAttentionNre   r+   rg   c                    s<   t    t|tjd|| dd| _t|j|jd| _	d S )NFz.EncDecAttentionr   rI   )
r/   r0   rd   r   r    EncDecAttentionr	   r1   rM   rN   )r8   r*   r+   rg   r9   r!   r"   r0     s   
zT5LayerCrossAttention.__init__r=   r   r;   c                 C   s$   |  |}| j||d}|| }|S )Nr=   r   )rN   r   )r8   r=   r   r   r   r!   r!   r"   r?     s   
zT5LayerCrossAttention.forward)Nre   r<   r   r!   r!   r9   r"   r     s    r   c                	       sb   e Zd Z			ddedededB def fdd	Z	dd
ej	dej	de
dB dej	fddZ  ZS )T5BlockFNre   r*   rk   r+   rg   c                    sr   t    || _t | _| jt|||| dd | jr-| jt||| dd | jt	||d d S )Nz
.self_attnr   z.cross_attn)r+   rg   rH   )
r/   r0   rk   r   
ModuleListlayerappendr   r   rG   )r8   r*   rk   rl   r+   rg   r9   r!   r"   r0     s$   

	zT5Block.__init__r=   r   r   r;   c                 C   s`   |d u rt j|jd d |jd}| jd |||d}| jr'| jd ||d}| jd |}|S )Nr|   )r   r   r   r}   r   rR   )r%   onesrU   r   r   rk   )r8   r=   r   r   r!   r!   r"   r?     s   zT5Block.forwardr   r<   )r   r   r   r   boolr   r   r0   r%   r&   r#   r?   rA   r!   r!   r9   r"   r     s.    !r   c                       sd   e Zd Z				ddededededB ded	ef fd
dZde	j
de	j
dede	j
fddZ  ZS )T5StackNre   Fr*   rk   n_layersr+   rg   is_umt5c                    sz   t    || _|| _|r t fddt|D | _nt fddt|D | _t j	 j
d| _d S )Nc              
      s(   g | ]}t  d  d| dqS )T.blocks.rk   rl   r+   rg   r   .0ir*   rk   rg   r+   r!   r"   
<listcomp>
  s    z$T5Stack.__init__.<locals>.<listcomp>c              
      s,   g | ]}t  |d k d| dqS )r   r   r   r   r   r   r!   r"   r     s    rI   )r/   r0   embed_tokensr   r   r   rangeblockr	   r1   rM   final_layer_norm)r8   r*   rk   r   r   r+   rg   r   r9   r   r"   r0     s   

zT5Stack.__init__	input_idsr   r   r;   c                 C   s:   |  |}t| jD ]\}}||||d}q
| |}|S r   )r   	enumerater   r   )r8   r   r   r   r=   idxr   r!   r!   r"   r?   %  s   

zT5Stack.forward)NNre   F)r   r   r   r   r   intr   r   r0   r%   r&   r#   r?   rA   r!   r!   r9   r"   r     s6    *r   c                          e Zd Zddedef fddZdd Z				dd	ejdB d
ejdB dejdB dejdB de	dB de
fddZdeeeejf  dee fddZ  ZS )T5EncoderModelre   r*   rg   c              	      sV   t  | d }t|}t|j|j|j|d| _t|d|j| j|| ddd| _	d S )Nri   r-   F.encoderr+   rg   r   
r/   r0   r   r   
vocab_sizer1   sharedr   
num_layersr   r8   r*   rg   r+   r-   r9   r!   r"   r0   :  $   zT5EncoderModel.__init__c                 C      | j S r<   r   rQ   r!   r!   r"   get_input_embeddingsP     z#T5EncoderModel.get_input_embeddingsNr   position_idsr   inputs_embedsoutput_hidden_statesr;   c           	      K   s"   t d }| j|||d}t|dS )Nr   r   r   )last_hidden_stater#   r   r   	r8   r   r   r   r   r   kwargsr   r=   r!   r!   r"   r?   S  s   	
zT5EncoderModel.forwardweightsc                 C   s   g d}t |  }t }|D ]e\}}d}d|v sd|v rq|D ].\}}	}
|	|vr*q ||	|}|dr:||vr:q ||vr?q || }|j}||||
 d} |so|dr[||vr[q||vr`q|| }t|dt}||| || q|S )N))rh   z.qr\   )rh   z.kr]   )rh   z.vr^   Fr   lm_head.biasTweight_loader)	dictnamed_parameterssetreplaceendswithr   getattrr   add)r8   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weightloaded
param_nameweight_nameshard_idparamr   r!   r!   r"   load_weightse  s<   
zT5EncoderModel.load_weightsre   NNNNr   r   r   r   r   r0   r   r%   r&   r   r   r?   r   tupler   r  rA   r!   r!   r9   r"   r   8  s*    
,r   c                       r   )UMT5EncoderModelre   r*   rg   c              	      sV   t  | d }t|}t|j|j|j|d| _t|d|j| j|| ddd| _	d S )Nr   Fr   Tr   r   r   r9   r!   r"   r0     r   zUMT5EncoderModel.__init__c                 C   r   r<   r   rQ   r!   r!   r"   r     r   z%UMT5EncoderModel.get_input_embeddingsNr   r   r   r   r   r;   c           	      K   s$   t d }| j|||d}t||dS )Nr   )r   r   r   r   r!   r!   r"   r?     s   	zUMT5EncoderModel.forwardr   c                 C   s   t |  }t }|D ]h\}}d}d|v sd|v rq| jjjD ].\}}}	||vr)q|||}|dr9||vr9q||vr>q|| }
|
j}||
||	 d} |sn|drZ||vrZq||vr_q|| }
t	|
dt
}||
| || q|S )NFr   r   r   Tr   )r   r   r   r*   arch_configr   r   r   r   r   r   r   )r8   r   r   r   r   r   r   r   r   r   r   r   r!   r!   r"   r    sB   
zUMT5EncoderModel.load_weightsr  r  r  r!   r!   r9   r"   r    s*    
,r  )4r   r   collections.abcr   dataclassesr   r%   torch.nn.functionalr   
functionalrW   -sglang.multimodal_gen.configs.models.encodersr   r   )sglang.multimodal_gen.runtime.distributedr   /sglang.multimodal_gen.runtime.layers.activationr   .sglang.multimodal_gen.runtime.layers.layernormr	   +sglang.multimodal_gen.runtime.layers.linearr
   r   r   1sglang.multimodal_gen.runtime.layers.quantizationr   *sglang.multimodal_gen.runtime.layers.utilsr   r   =sglang.multimodal_gen.runtime.layers.vocab_parallel_embeddingr   1sglang.multimodal_gen.runtime.loader.weight_utilsr   2sglang.multimodal_gen.runtime.models.encoders.baser   'sglang.multimodal_gen.runtime.platformsr   r   r#   Moduler)   rB   rG   rP   rd   r   r   r   r   r   r  
EntryClassr!   r!   r!   r"   <module>   sD   ( I&<?YZ