o
    i                     @   s|   d dl mZ d dlZd dlmZ d dlmZ ddlmZ ddlm	Z	 ddl
mZ d	d
lmZ d	dlmZ G dd deZdS )    )OptionalN)Module)Tensor   )AttentionMask)BertEncoderLayer)PyTorchTransformerOutput   )RobertaEmbeddings)RobertaConfigc                	       s\   e Zd Zdef fddZdedefddZ		dd	ed
ee dee de	fddZ
  ZS )RobertaEncoderconfigc                    sX   t    t j j jd| _ j| _ j| _t	j
 fddt jjD | _d S )N)padding_idxc                    s   g | ]	}t  j jqS  )r   layer	attention).0_r   r   _/home/ubuntu/.local/lib/python3.10/site-packages/curated_transformers/models/roberta/encoder.py
<listcomp>   s    z+RobertaEncoder.__init__.<locals>.<listcomp>)super__init__r
   	embeddingr   r   
embeddingsmodel_max_lengthmax_seq_lentorchnn
ModuleListrangenum_hidden_layerslayers)selfr   	__class__r   r   r      s   



zRobertaEncoder.__init__xreturnc                 C   s   t || jS )N)r   ner   )r#   r&   r   r   r   _create_attention_mask   s   z%RobertaEncoder._create_attention_maskN	input_idsattention_masktoken_type_idsc                 C   sT   |du r	|  |}| ||d}|}g }| jD ]}|||}|| qt||dS )zb
        Shapes:
            input_ids, attention_mask, token_type_ids - (batch, seq_len)
        N)embedding_outputlayer_hidden_states)r)   r   r"   appendr   )r#   r*   r+   r,   r   layer_outputlayer_outputsr   r   r   r   forward!   s   



zRobertaEncoder.forward)NN)__name__
__module____qualname__r   r   r   r   r)   r   r   r2   __classcell__r   r   r$   r   r      s    r   )typingr   r   torch.nnr   r   r   r   
bert.layerr   outputr   r   r
   r   r   r   r   r   r   r   <module>   s    