o
    }oi                     @   s<   d dl Z d dlZd dlmZmZmZ G dd dejjZdS )    N)T5ConfigT5EncoderModelT5Tokenizerc                       s(   e Zd Zd fdd	Zd	ddZ  ZS )
	T5Encoder   Nc                    s   t    || _d| _tjd| jd| _|du r"tjddd| _dS t	d|  t
j|d}t
jt
jtd	}tj|t|dd
| _dS )z
        Initialize the T5 Encoder.

        :param max_seq_len: Maximum token length, defaults to 512
        :param encoder_path: Optional if loaded T5 on the disk, defaults to None
        r   zt5-11b)model_max_lengthNT)low_cpu_mem_usagezLoad T5 encoder from zt5xxl-encoder.binzt5encoder.json)configr   )super__init__max_seq_lenmodel_seq_lenr   from_pretrained	tokenizerr   modelprintospathjoindirname__file__r   from_json_file)selfr   encoder_path"hard_coded_encoder_weight_location"hard_coded_encoder_config_location	__class__ p/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.pyr      s   
zT5Encoder.__init__cudac           
      C   s   | j j|dd| jdd}|jj|d}|jj|d}t  | j||d}|j	
 }W d   n1 s5w   Y  |ddd| jf }|ddd| jf }t|jd D ]}||   }	d|| |	d< qW||fS )	z:
        Encode a batch of text to T5 embeddings.
        pt
max_lengthT)return_tensorspaddingr"   
truncation)device)	input_idsattention_maskNr   )r   batch_encode_plusr   r'   tor(   torchno_gradr   last_hidden_statedetachr   rangeshapesumitem)
r   
text_batchr&   encodedr'   	attn_maskoutputencoded_textbnumnvalid_elemr   r   r   encode/   s   
zT5Encoder.encode)r   N)r    )__name__
__module____qualname__r   r:   __classcell__r   r   r   r   r      s    r   )	r   r+   transformersr   r   r   nnModuler   r   r   r   r   <module>   s   