o
    }o™i„	  ã                   @   sÞ   d dl mZ d dlmZmZ eG dd„ dƒƒZeG dd„ deƒƒZeG dd„ deƒƒZd	Zej	Z
ej	Zd
ZdZdZdZdZdZdZdZdZdZdZejej	fejej	fdefdefdefdefdefdefdefdefg
ZdS )é    )Ú	dataclass)ÚCallableÚOptionalc                   @   sB   e Zd ZU dZeed< eed< eed< eed< dZe	e
 ed< dS )ÚMultiModalTokenzN
    Base class for multimodal tokens representing different media types.
    Ú	token_strÚtoken_indexÚ
media_typeÚuse_start_endNÚ
encoder_fn)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚstrÚ__annotations__ÚintÚboolr
   r   r   © r   r   úg/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/vlm/qwen2vl/data/multimodal_tokens.pyr      s   
 r   c                   @   óB   e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed	< d
S )ÚQwen2VLImageTokenzImage Token classz<|image_pad|>r   i8ÿÿÿr   Úimager   Fr	   N©r   r   r   r   r   r   r   r   r   r   r	   r   r   r   r   r   r       ó   
 r   c                   @   r   )ÚQwen2VLVideoTokenzVideo Token classz<|video_pad|>r   iÔþÿÿr   Úvideor   Fr	   Nr   r   r   r   r   r   *   r   r   iœÿÿÿi^P i_P i`P iaP ibP icP idP ieP i[P igP ihP z<|object_ref_start|>z<|object_ref_end|>z<|box_start|>z<|box_end|>z<|quad_start|>z<|quad_end|>z<|vision_start|>z<|vision_end|>N)Údataclassesr   Útypingr   r   r   r   r   ÚIGNORE_INDEXr   ÚIMAGE_TOKEN_INDEXÚVIDEO_TOKEN_INDEXÚOBJECT_REF_START_TOKEN_INDEXÚOBJECT_REF_END_TOKEN_INDEXÚBOX_START_TOKEN_INDEXÚBOX_END_TOKEN_INDEXÚQUAD_START_TOKEN_INDEXÚQUAD_END_TOKEN_INDEXÚVISION_START_TOKEN_INDEXÚVISION_END_TOKEN_INDEXÚPAD_TOKEN_INDEXÚHF_IMAGE_TOKEN_INDEXÚHF_VIDEO_TOKEN_INDEXr   ÚSPECIAL_TOKEN_MAPr   r   r   r   Ú<module>   sB   	


ö