o
    ̳iw                     @   sT   d dl mZ d dlmZ d dlmZ ddedefddZdd
edede	fddZ
dS )    
t5_encoder)	T5EncoderT5Tokenizer   max_seq_lenreturnc                 C   s   t ddddddddd| d	
S )
a  
    Builder for the T5 v1.1 XXL (11B parameters) encoder.

    T5 paper: https://arxiv.org/abs/1910.10683

    1.1 release:
    https://github.com/google-research/text-to-text-transfer-transformer/blob/main/released_checkpoints.md#t511

    Args:
        max_seq_len (int): The maximum sequence length (context length) of the model.
            Default: 512

    Returns:
        T5Encoder: Instantiation of the T5 encoder
    i   i (  @             i}  gư>)
	embed_dimmlp_dim	num_headshead_dim
num_layersrel_pos_num_bucketsrel_pos_max_dist
vocab_sizenorm_epsr   r   )r    r   W/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/models/t5/_model_builders.pyt5_v1_1_xxl_encoder   s   r   Tpathtruncatec                 C   s   t | ||dS )aF  
    Builder for the T5 tokenizer.

    Args:
        path (str): the path to the T5 sentencepiece tokenizer file
        max_seq_len (int): the context length
        truncate (bool): whether to truncate the token sequence when longer than max_seq_len

    Returns:
        T5Tokenizer: Instantiation of the T5 tokenizer
    )r   r   r   )r   r   r   r   r   r   t5_tokenizer*   s   r   N)r   )r   T)'torchtune.models.t5._component_buildersr   torchtune.models.t5._encoderr   torchtune.models.t5._tokenizerr   intr   strboolr   r   r   r   r   <module>   s
   