o
    eiF                     @   s  d dl mZ d dlZd dlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZmZ dd	lmZmZmZmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZmZ ddlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ ddl0m1Z1m2Z2 e3e4Z5G dd de!Z6dd Z7dd Z8G dd dej9Z:G dd de'Z;G dd de(Z<G dd  d e Z=G d!d" d"e#Z>G d#d$ d$e%Z?G d%d& d&e$Z@G d'd( d(e"ZAG d)d* d*e&ZBeG d+d, d,eZCG d-d. d.eCZDG d/d0 d0ej9ZEG d1d2 d2ej9ZFG d3d4 d4ej9ZGeeG d5d6 d6eZHG d7d8 d8ej9ZIG d9d: d:ej9ZJG d;d< d<e.ZKG d=d> d>e/ZLG d?d@ d@e,ZMG dAdB dBe*ZNG dCdD dDe+ZOG dEdF dFe-ZPG dGdH dHePZQG dIdJ dJePeZRg dKZSdS )L    )	dataclassN)nn   )initialization)CacheDynamicCache)GenerationMixin)create_bidirectional_maskcreate_causal_mask)BaseModelOutputWithPast,BaseModelOutputWithPoolingAndCrossAttentionsCausalLMOutputWithPastModelOutput)PreTrainedModel)auto_docstringcan_return_tuplelogging)merge_with_config_defaults)OutputRecordercapture_outputs   )	EsmAttentionEsmEmbeddings
EsmEncoderEsmIntermediateEsmLayer	EsmOutput	EsmPoolerEsmSelfAttentionEsmSelfOutput)LlamaAttentionLlamaDecoderLayerLlamaMLPLlamaPreTrainedModelLlamaRMSNormLlamaRotaryEmbedding   )EvollaConfigSaProtConfigc                       s   e Zd Z fddZ  ZS )EvollaSaProtEmbeddingsc                    s   t  | d | _d S N)super__init__position_idsselfconfig	__class__ g/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/evolla/modular_evolla.pyr,   @   s   
zEvollaSaProtEmbeddings.__init__)__name__
__module____qualname__r,   __classcell__r3   r3   r1   r4   r)   ?   s    r)   c                 C   s&   | j ddd\}}tj| |fddS )Nr   dim)chunktorchcat)xx1x2r3   r3   r4   rotate_half_esmF   s   rB   c                 C   s`   |d d d d d | j d d d f }|d d d d d | j d d d f }| | t| |  S )N)shaperB   )r?   cossinr3   r3   r4   apply_rotary_pos_emb_esmK   s   &&rG   c                       sb   e Zd ZU dZejed< def fddZdddZ	d	ejd
ejde
ejejf fddZ  ZS )EvollaSaProtRotaryEmbeddingz
    Rotary position embeddings based on those in
    [RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
    matrices which depend on their relative positions.
    inv_freqr;   c                    sV   t    || _ddtjd|dtjd |   }| d| d | _d | _	d | _
d S )N      ?'  r   r   dtyperI   )r+   r,   r;   r=   arangeint64floatregister_buffer_seq_len_cached_cos_cached_sin_cached)r/   r;   rI   r1   r3   r4   r,   [   s   
$
z$EvollaSaProtRotaryEmbedding.__init__r   c                 C   s   |j | }|| jks| jj|jkrU|| _tj|j | |jd| j}t|| j}tj	||fdd
|j}| d d d d d d f | _| d d d d d d f | _| j| jfS )Ndevicer9   r:   )rD   rR   rS   rV   r=   rN   type_asrI   outerr>   torE   rF   rT   )r/   r?   seq_dimensionseq_lentfreqsembr3   r3   r4   _update_cos_sin_tablesf   s   
z2EvollaSaProtRotaryEmbedding._update_cos_sin_tablesqkreturnc                 C   sJ   | j |dd\| _| _t|| j| jj|jdt|| j| jj|jdfS )NrC   )rZ   rL   )r_   rS   rT   rG   rY   rM   )r/   r`   ra   r3   r3   r4   forwardv   s   z#EvollaSaProtRotaryEmbedding.forward)r   )r5   r6   r7   __doc__r=   Tensor__annotations__intr,   r_   tuplerc   r8   r3   r3   r1   r4   rH   R   s   
 

.rH   c                   @   s   e Zd ZdddZdS )EvollaSaProtSelfAttentionNFc                 C   s  t j|  || _|j|j dkr#t|ds#td|j d|j d|j| _t|j|j | _	| j| j	 | _
t |j| j
| _t |j| j
| _t |j| j
| _|j| _d | _|p`t|dd| _| jdkrnt| j	d	| _|j| _|| _d
| _| jo}| | _d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterotaryr:   rJ   )r   Moduler,   r0   hidden_sizenum_attention_headshasattr
ValueErrorrg   attention_head_sizeall_head_sizeLinearquerykeyvalueattention_probs_dropout_probdropoutrotary_embeddingsgetattrrl   rH   
is_decoder	layer_idxscaling	is_causal)r/   r0   rl   r   is_cross_attentionr3   r3   r4   r,      s2   

z"EvollaSaProtSelfAttention.__init__)NNF)r5   r6   r7   r,   r3   r3   r3   r4   ri      s    ri   c                   @      e Zd ZdS )EvollaSaProtSelfOutputNr5   r6   r7   r3   r3   r3   r4   r          r   c                   @   r   )EvollaSaProtAttentionNr   r3   r3   r3   r4   r      r   r   c                   @   r   )EvollaSaProtIntermediateNr   r3   r3   r3   r4   r      r   r   c                   @   r   )EvollaSaProtOutputNr   r3   r3   r3   r4   r      r   r   c                   @   r   )EvollaSaProtLayerNr   r3   r3   r3   r4   r      r   r   c                   @   r   )EvollaSaProtEncoderNr   r3   r3   r3   r4   r      r   r   c                   @   r   )EvollaSaProtPoolerNr   r3   r3   r3   r4   r      r   r   c                       s`   e Zd ZU eed< dgZdZdZdZdZ	e
eedddgeedddgdZ fd	d
Z  ZS )EvollaSaProtPreTrainedModelr0   r   Tr&   	attention)index
layer_namecrossattention)hidden_states
attentionscross_attentionsc                    sT   t  | t|tr(ddtjd|jdtjd |j   }t	
|j| d S d S )NrJ   rK   r   r   rL   )r+   _init_weights
isinstancerH   r=   rN   r;   rO   rP   initcopy_rI   )r/   modulerI   r1   r3   r4   r      s
   
(z)EvollaSaProtPreTrainedModel._init_weights)r5   r6   r7   r(   rf   _no_split_modules_supports_flash_attn_supports_sdpa_supports_flex_attn_supports_attention_backendr   r   ri   _can_record_outputsr   r8   r3   r3   r1   r4   r      s   
 r   c                
       sj   e Zd Zdef fddZdd Zdd Zee	dd	e	j
dB d
e	j
dB dee	j
 eB fddZ  ZS )EvollaSaProtProteinEncoderr0   c                    s,   t  | t|| _t|| _|   d S r*   )r+   r,   r)   
embeddingsr   encoder	post_initr.   r1   r3   r4   r,      s   

z#EvollaSaProtProteinEncoder.__init__c                 C   s   | j jS r*   r   word_embeddingsr/   r3   r3   r4   get_input_embeddings   s   z/EvollaSaProtProteinEncoder.get_input_embeddingsc                 C   s   || j _d S r*   r   r/   ry   r3   r3   r4   set_input_embeddings      z/EvollaSaProtProteinEncoder.set_input_embeddingsN	input_idsattention_maskrb   c                 K   s   |  }|\}}|j}|d u rtj||f|d}| j||d}t| j||d}| j|fd|i|}	|	d }
t|
|	j	|	j
|	jdS )NrU   r   r   )r0   inputs_embedsr   r   r   )last_hidden_stater   r   r   )sizerV   r=   onesr   r	   r0   r   r   r   r   r   )r/   r   r   kwargsinput_shape
batch_size
seq_lengthrV   r   encoder_outputssequence_outputr3   r3   r4   rc      s&   z"EvollaSaProtProteinEncoder.forwardr*   )r5   r6   r7   r(   r,   r   r   r   r   r=   re   rh   r   rc   r8   r3   r3   r1   r4   r      s    r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )!EvollaSequenceCompressorAttention@      c                    sx   t    |d | _|| _|| }t|| _t|| _tj||dd| _	tj||d dd| _
tj||dd| _d S )N      Fbiasr   )r+   r,   scaleheadsr   	LayerNorm
norm_medianorm_latentsrv   to_qto_kvto_out)r/   r;   dim_headr   	inner_dimr1   r3   r4   r,     s   

z*EvollaSequenceCompressorAttention.__init__c                 C   s  |  |}| |}| j}| |}tj||fdd}| |jddd\}}||	d|	d|d
dddd}||	d|	d|d
dddd}||	d|	d|d
dddd}|| j }t||dd}	|	|	jddd	  }	|	j\}
}}}t|||j}|d
d
d
d
d
d
f }|d
d
d
d
d
d
f }|| }|	d|  d}	|	jdd}t||}|
dddd}||	d|	dd}| |S )z
        Args:
            x (torch.Tensor): image features
                shape (b, n1, D)
            latent (torch.Tensor): latent features
                shape (b, n2, D);  n2: num of latent tokens
        rC   r:   r   r9   r   r&   r   Tr;   keepdimNg     )r   r   r   r   r=   r>   r   r<   viewr   permuter   matmul	transposeamaxdetachrD   r   rY   rV   masked_fillboolsoftmaxreshaper   )r/   r?   latentsmaskhr`   kv_inputra   vsimbsnhskdokdr   mask_expones_expattnoutr3   r3   r4   rc     s2   




(((

z)EvollaSequenceCompressorAttention.forward)r   r   r5   r6   r7   r,   rc   r8   r3   r3   r1   r4   r     s    r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )EvollaFeedForward   c                    sT   t    t|| }t|| _tj||dd| _t | _	tj||dd| _
d S NFr   )r+   r,   rg   r   r   normrv   fc1GELU
activationfc2)r/   r;   multr   r1   r3   r4   r,   >  s   

zEvollaFeedForward.__init__c              	   C   s   |  | | | |S r*   )r   r   r   r   )r/   r?   r3   r3   r4   rc   G  s   zEvollaFeedForward.forward)r   r   r3   r3   r1   r4   r   =  s    	r   c                       s*   e Zd Zdef fddZdd Z  ZS )!EvollaSequenceCompressorResamplerr0   c              
      s   t    |jj}|j| _tjt	| j|dd| _
tg | _t|jD ]}| jtt||j|jdt||jdg q%t|j| _t||j| _d S )NT)requires_grad)r;   r   r   )r;   r   )r+   r,   protein_encoder_configrp   resampler_num_latentsnum_latentsr   	Parameterr=   randnr   
ModuleListlayersrangeresampler_depthappendr   resampler_dim_headresampler_headsr   resampler_ff_multr   r   rv   protein_projector)r/   r0   protein_repr_dim_r1   r3   r4   r,   L  s"   

z*EvollaSequenceCompressorResampler.__init__c                 C   s   |j d }|j \}}t|| j|j}tj||fdd}t|| jj}| jd  |ddd }||j	}| j
D ]\}	}
|	|||| }|
|| }q=| |}| |S )Nr   r&   r:   r9   )rD   r=   r   r   rY   rV   r>   r   r   rM   r   r   r   )r/   embedsr   br   r   latent_maskr   r   r   fftransformed_featurer3   r3   r4   rc   a  s   



z)EvollaSequenceCompressorResampler.forward)r5   r6   r7   r'   r,   rc   r8   r3   r3   r1   r4   r   K  s    r   c                   @   sf   e Zd ZU dZejdB ed< dZejdB ed< dZe	ejdf dB ed< dZ
e	ejdf dB ed< dS )EvollaProteinEncoderModelOutputNsequence_compressor_outputr   .r   r   )r5   r6   r7   r  r=   FloatTensorrf   r   r   rh   r   r3   r3   r3   r4   r  u  s
   
 r  c                       s<   e Zd Zdef fddZedejdejfddZ	  Z
S )EvollaProteinEncoderr0   c                    s(   t    t|jd| _t|d| _d S )Nr0   )r+   r,   r   r   modelr   sequence_compressor_resamplerr.   r1   r3   r4   r,     s   
zEvollaProteinEncoder.__init__r   r   c                 K   s.   | j ||d}|j}| ||}t||jdS )Nr   )r  r   )r  r   r	  r  )r/   r   r   r   protein_outputprotein_embedssequence_reprr3   r3   r4   rc     s   zEvollaProteinEncoder.forward)r5   r6   r7   r'   r,   r   r=   
LongTensorr  rc   r8   r3   r3   r1   r4   r  ~  s     r  c                       s^   e Zd Z			ddedB dedB dedB f fddZdd Z							dd	d
Z  ZS )#EvollaSequenceAlignerCrossAttentionNprotein_encoder_dimstructure_encoder_dimmsa_encoder_dimc                    st  t    |j| _|j| _| jd | _t| j| j | _| j| j | _|j}|j	}|j
}t| j| j| _|d urJt|| j| _t|| j| _nd | _d | _|d uret|| j| _t|| j| _nd | _d | _|d urt|| j| _t|| j| _nd | _d | _t| j| _t|| _tj| j| j|d| _t| j|| _ttdg| _ttdg| _d S )Nr   r           ) r+   r,   rp   rq   r   rg   rt   ru   $aligner_attention_probs_dropout_probaligner_enable_biasaligner_ffn_multr   rv   rw   key_proteinvalue_proteinkey_structurevalue_structurekey_msa	value_msaEvollaRMSNormattention_normDropoutr{   out_projr   r  r   r=   tensorgate_attentiongate_ffw)r/   r0   r  r  r  rz   enable_biasffn_multr1   r3   r4   r,     s>   
z,EvollaSequenceAlignerCrossAttention.__init__c	                 C   s  |||g}	dd |	D }	|	st dtj|	dd}	| |}
| |
}
| jdur=| jdur=||}| |}| |}nd}d}| jdur[| j	dur[||}| |}| 	|}nd}d}| j
dury| jdury||}| 
|}| |}nd}d}|||g}dd |D }tj|dd}|||g}dd |D }tj|dd}|
 dd	 | j| jf }|
j| d
ddd}
| dd	 | j| jf }|j| d
ddd}| dd	 | j| jf }|j| d
ddd}|
| j }
|du rt|d
|d|j}|ddddddf |	ddddddf  }t|
|d	d}||jd	dd  }|d|  t|jj}tjd	d|}t||}|d
ddd }| dd | j f }|j| }| !|}|S )z
        query_states: text
        key_value_states: protein
        query_states: [bs, query_seq_len, dim]
        key_value_states: [bs, kv_seq_len, dim]
        query_attn_mask: [bs, query_seq_len]
        kv_attn_mask: [bs, kv_seq_len]
        c                 S      g | ]}|d ur|qS r*   r3   .0r   r3   r3   r4   
<listcomp>      zGEvollaSequenceAlignerCrossAttention.cross_attention.<locals>.<listcomp>z=At least one modality should be provided for cross attention.r&   r:   Nc                 S   r%  r*   r3   r&  r3   r3   r4   r(     r)  c                 S   r%  r*   r3   r&  r3   r3   r4   r(    r)  r9   r   r   r   rC   Tr   )"rs   r=   r>   r  rw   r  r  rY   r  r  r  r  r   rq   rt   r   r   r   r   rV   r   r   r   r   r   r   finforM   minr   Softmax
contiguousru   r  )r/   query_statesprotein_key_value_statesstructure_key_value_statesmsa_key_value_statesquery_attn_maskprotein_kv_attn_maskstructure_kv_attn_maskmsa_kv_attn_maskkv_attn_maskquery_layerkey_layer_proteinvalue_layer_proteinkey_layer_structurevalue_layer_structurekey_layer_msavalue_layer_msa	key_layervalue_layernew_query_layer_shapenew_key_layer_shapenew_value_layer_shaper   attn_weightsattention_scoresattention_probscontext_layernew_context_layer_shaper3   r3   r4   cross_attention  s|   












 0

z3EvollaSequenceAlignerCrossAttention.cross_attentionc              
   C   s  |d ur&|j \}}}|d u r%t|||	j|	j||fdj |j}nd }|d urN|j \}}}|d u rMt|||	j|
j||fdj |j}nd }|d urv|j \}}}|d u rut|||	j|j||fdj |j}nd }|}|d ur| s|d ur| s|d ur| r|}| j||||||||d}t	| j
| }|| }|}| |t	| j }|| }|S )N)r   )r.  r/  r0  r1  r2  r3  r4  r5  )rD   r=   r   rY   rV   expandTanyrH  tanhr!  r  r"  )r/   r.  protein_kv_statesstructure_kv_statesmsa_kv_statesr2  r3  r4  r5  protein_batch_maskstructure_batch_maskmsa_batch_maskpast_key_valuesr   protein_kv_seq_lenr;   structure_kv_seq_lenmsa_kv_seq_lenr   residualr3   r3   r4   rc   4  sf   z+EvollaSequenceAlignerCrossAttention.forward)NNN)NNNNNNN)r5   r6   r7   rg   r,   rH  rc   r8   r3   r3   r1   r4   r    s(    3wr  c                   @   r   )r  Nr   r3   r3   r3   r4   r  ~  r   r  c                   @   r   )EvollaRotaryEmbeddingNr   r3   r3   r3   r4   rX    r   rX  c                   @   r   )	EvollaMLPNr   r3   r3   r3   r4   rY    r   rY  c                   @   r   )EvollaAttentionNr   r3   r3   r3   r4   rZ    r   rZ  c                       s   e Zd Zdedef fddZ													ddejdeejejf dB d	ejdB d
ej	dB de
dB dedB dej	dB dejdB dejdB dejdB dejdB dejdB dejdB dejdB fddZ  ZS )EvollaDecoderLayerr0   r   c                    sD   t  || |d t|j|j d dkr t||jd| _d S d S )Nr&   r   )r  )r+   r,   maxnum_hidden_layersaligner_num_add_layersr  rp   adapter)r/   r0   r   r1   r3   r4   r,     s   zEvollaDecoderLayer.__init__NFr   position_embeddingsr   r-   rS  	use_cachecache_positionrM  rN  rO  rP  rQ  rR  r2  c              
   K   s   |}|  |}| jd|||||||d|\}}|| }|}| |}| |}|| }t| dr?| j|||	|
||||d}|S )N)r   r   r-   rS  ra  rb  r`  r_  )r.  rM  rN  rO  r2  rP  rQ  rR  r3   )input_layernorm	self_attnpost_attention_layernormmlprr   r_  )r/   r   r`  r   r-   rS  ra  rb  rM  rN  rO  rP  rQ  rR  r2  r   rW  r   r3   r3   r4   rc     s<   





zEvollaDecoderLayer.forward)NNNNFNNNNNNNN)r5   r6   r7   r'   rg   r,   r=   re   rh   r  r   r   rc   r8   r3   r3   r1   r4   r[    sX    	
r[  c                   @   s0   e Zd ZdZdZdZg dZe dd Z	dS )EvollaPreTrainedModelF)r[  r   r  c                 C   sl   | j j}t| | t|tr$t|j t|j	 t
|jj d S t|tr4tj|jd|d d S d S )Nr  )meanstd)r0   initializer_ranger   r   r   r  r   zeros_r!  r"  ones_r  weightr   normal_r   )r/   r   ri  r3   r3   r4   r     s   

z#EvollaPreTrainedModel._init_weightsN)
r5   r6   r7   r   r   r   r   r=   no_gradr   r3   r3   r3   r4   rg    s    rg  c                !       s   e Zd Zdef fddZdd Zdd Zeee														dd	e
jdB d
e
jdB de
jdB dedB de
jdB dedB de
jdB de
jdB de
jdB de
jdB de
jdB de
jdB de
jdB deeB fddZ  ZS )EvollaModelr0   c                    s   t     j| _ j| _t| j j| j| _t	 d| _
t fddt jD | _t j jd| _t dd| _t d| _|   d S )Nr  c                    s   g | ]}t  |d qS ))r0   r   )r[  )r'  r   r  r3   r4   r(    s    z(EvollaModel.__init__.<locals>.<listcomp>)epsgradient_checkpointingF)r+   r,   pad_token_idpadding_idx
vocab_sizer   	Embeddingrp   embed_tokensr  protein_encoderr   r   r]  r   r  rms_norm_epsr   r}   rr  rX  
rotary_embr   r.   r1   r  r4   r,     s   

zEvollaModel.__init__c                 C   s   | j S r*   rw  r   r3   r3   r4   r     s   z EvollaModel.get_input_embeddingsc                 C   s
   || _ d S r*   r{  r   r3   r3   r4   r        
z EvollaModel.set_input_embeddingsNr   r   r-   rS  r   ra  rb  protein_input_idsprotein_attention_maskstructure_feats	msa_featsrQ  rR  rb   c                 K   sJ  |du |duA rt d|du r| |}|r!|du r!t| jd}|du r=|dur-| nd}tj|||jd  |jd}|du rF|	d}d}d}|duri|	duri| j
||	d}|j}tj|jd |jtjd}t| j||||d	}|}| j||d
}| jD ]}||f|||||||
||||||d|}q| |}t||d}|S )a;  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence in structure-aware tokens. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.
        structure_feats (torch.FloatTensor):
            The input IDs for purely structure-based features. Should be of shape `(batch_size, structure_seq_length, structure_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        msa_feats (torch.FloatTensor):
            The input IDs for purely MSA-based features. Should be of shape `(batch_size, msa_seq_length, msa_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        structure_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely structure-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `structure_feats`. Dummpy input for now.
        msa_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely MSA-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `msa_feats`. Dummpy input for now.
        Nz:You must specify exactly one of input_ids or inputs_embedsr  r   r&   rU   r   )rV   rM   )r0   r   r   rb  rS  )r-   )r   r-   rS  ra  rb  rM  rN  rO  rP  rQ  rR  r2  r`  )r   rS  )rs   rw  r   r0   get_seq_lengthr=   rN   rD   rV   	unsqueezerx  r  r   r   r
   rz  r   r   r   )r/   r   r   r-   rS  r   ra  rb  r}  r~  r  r  rQ  rR  r   past_seen_tokensprotein_featsrP  protein_outputscausal_maskr   r`  decoder_layeroutputr3   r3   r4   rc     sz   "



zEvollaModel.forward)NNNNNNNNNNNNN)r5   r6   r7   r'   r,   r   r   r   r   r   r=   r  re   r   r  r   rh   r   rc   r8   r3   r3   r1   r4   rp    sb    	
rp  c                       s   e Zd Z fddZdd Zdd Zee								dd	ej	dB d
ej
dB dejdB dej	dB dej	dB dej
dB dedB deej
B fddZ  ZS )EvollaForProteinText2Textc                    s@   t  | t|| _|j| _tj|j| jdd| _| 	  d S r   )
r+   r,   rp  r  ru  r   rv   rp   lm_headr   r.   r1   r3   r4   r,   l  s
   
z"EvollaForProteinText2Text.__init__c                 C   s
   | j  S r*   )r  r   r   r3   r3   r4   r   t  r|  z.EvollaForProteinText2Text.get_input_embeddingsc                 C   s   | j |S r*   )r  r   r   r3   r3   r4   r   w  r   z.EvollaForProteinText2Text.set_input_embeddingsNr   r   r   r   labelsr}  r~  ra  logits_to_keepc	              	   K   s   | j d||||||d|	}
|
j}t|trt| dn|}| |dd|ddf }d}|dur@| jd||| jd|	}t|||
j	|
j
|
jd}|S )a,  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.

        Example:

        ```python
        >>> from transformers import EvollaProcessor, EvollaForProteinText2Text
        >>> model = EvollaForProteinText2Text.from_pretrained("westlake/Evolla-10B-hf")
        >>> processor = EvollaProcessor.from_pretrained("westlake/Evolla-10B-hf")

        >>> protein_information = {
            "aa_seq": "your amino acid sequence",
            "foldseek": "your foldseek sequence",
        }
        >>> question = "What is the function of this protein?"
        >>> message = [
            {"role": "system", "content": "You are an AI expert that can answer any questions about protein."},
            {"role": "user", "content": question},
        ]

        >>> inputs = processor(proteins=[protein_information], messages_list=[message], return_tensors="pt", padding="longest")
        >>> outputs = model.generate(**inputs)

        >>> print(processor.batch_decode(outputs, skip_special_tokens=True))
        ```)r   r   r   r}  r~  ra  N)logitsr  ru  )lossr  rS  r   r   r3   )r  r   r   rg   slicer  loss_functionru  r   rS  r   r   )r/   r   r   r   r  r}  r~  ra  r  r   outputsr   slice_indicesr  r  
lm_outputsr3   r3   r4   rc   z  s0   *
z!EvollaForProteinText2Text.forward)NNNNNNNr   )r5   r6   r7   r,   r   r   r   r   r=   r  re   r  r   rg   rc   r8   r3   r3   r1   r4   r  k  s>    	r  )r  rp  rg  )Tdataclassesr   r=   r    r   r   cache_utilsr   r   
generationr   masking_utilsr	   r
   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   r   esm.modeling_esmr   r   r   r   r   r   r   r   r   llama.modeling_llamar    r!   r"   r#   r$   r%   configuration_evollar'   r(   
get_loggerr5   loggerr)   rB   rG   ro   rH   ri   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  rX  rY  rZ  r[  rg  rp  r  __all__r3   r3   r3   r4   <module>   sd   , 
-".:* oA V