o
    *i                  	   @   sr  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlZ	d dl
mZ d dlmZmZ d dlmZmZ d dlmZmZ eeZe rHd dlZeG d	d
 d
ZeG dd dZdeeB defddZdZdZG dd deeZeG dd dZ dejdejfddZ!de	j"de#e$e$e$f de#e$e$e$f de	j"fddZ%dejde#e&e&f de	j"fd d!Z'G d"d# d#Z(dS )$    N)	dataclass)Enum)BytesIO)Image)SerializableImagedownload_image)assert_opencv_installedis_opencv_installed)
ImageChunkImageURLChunkc                   @   s(   e Zd ZU dZee ed< ejed< dS )ImageEncodingzA tokenized image.

    Attributes:
        tokens: The token ids.
        image: The image as a numpy array.

    Examples:
        >>> import numpy as np
        >>> image_encoding = ImageEncoding(tokens=[1, 2, 3], image=np.array([[0., 0.5, 1.]]))
    tokensimageN)	__name__
__module____qualname____doc__listint__annotations__npndarray r   r   c/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/mistral_common/tokens/tokenizers/image.pyr      s   
 r   c                   @   s*   e Zd ZU dZeed< eed< eed< dS )SpecialImageIDsa  Special image tokens ids.

    Attributes:
        img: The image token id.
        img_break: The image break token id.
        img_end: The image end token id.

    Examples:
        >>> special_image_ids = SpecialImageIDs(img=1, img_break=2, img_end=3)
    img	img_breakimg_endN)r   r   r   r   r   r   r   r   r   r   r   &   s
   
 r   chunkreturnc                 C   s   t | tr| jS |  dr$|  dd }t|}t	t
|S |  dr9t	t	|  dddS |  drFt|  S td	|   )
zGet a serializable image from a chunk.

    Args:
        chunk: The chunk to get the image from.

    Returns:
        The image as a PIL Image object.
    z
data:image,   filezfile:// rbhttpzUnsupported image url scheme )
isinstancer
   r   get_url
startswithsplitbase64	b64decoder   openr   replacer   RuntimeError)r   data
image_datar   r   r   image_from_chunk8   s   
	
r1   )g3<4'?gwgM?gy{ ?)gB91?gwt.?g	U?c                   @   s"   e Zd ZdZdZedddZdS )	MultiModalVersionzVersion of the image tokenizer.m1r   ImageConfigc                 C   s    | j dkr
tddS t| j  )Nr3      i   )namer4   NotImplementedErrorselfr   r   r   configY   s   

zMultiModalVersion.configN)r   r4   )r   r   r   r   r3   propertyr:   r   r   r   r   r2   T   s
    r2   c                   @   s.   e Zd ZU dZeed< eed< dZeed< dS )r4   z'Configuration for the image tokenizers.image_patch_sizemax_image_sizer!   spatial_merge_sizeN)r   r   r   r   r   r   r>   r   r   r   r   r4   a   s
   
 r4   r   c                 C   sJ   | j dkr| S | j dkr| d} td| jd}|| d|  |dS )zUConvert a PIL image to RGB.

    We ensure transparent background becomes white.
    RGBRGBAWHITE)r   r   )modeconvertr   newsizepaste)r   white_bgr   r   r   _convert_to_rgbj   s   



rH   np_imagemeanstdc                 C   s   | d } t | jdksJ d| j| jd t |  kr$t |ks3n J d| jd|d|| | | } | dddS )	a  Normalize a tensor image with mean and standard deviation.

    Args:
        np_image: Image to be normalized.
        mean: Mean for each channel.
        std: Standard deviation for each channel.

    Returns:
        Normalized image with shape (C, H, W).
    g     o@   znp_image.shape=   z, mean=z, std=r   r!   )lenshape	transpose)rI   rJ   rK   r   r   r   	normalizex   s
   @rQ   new_sizec                 C   s4   t   tjtjt| tjd|tjd}t|t	t
S )zTransform an image to a numpy array with the given size.

    Args:
        image: Image to be transformed.
        new_size: New size of the image.

    Returns:
        Transformed image with shape (C, H, W).
    )dtype)interpolation)r   cv2resizer   arrayrH   float32INTER_CUBICrQ   DATASET_MEANDATASET_STD)r   rR   rI   r   r   r   transform_image   s   
"r\   c                   @   s|   e Zd ZdZdededdfddZedefdd	Zd
e	j	de
eef fddZdeeB defddZedefddZdS )ImageEncoderz&Image encoder for the image tokenizer.image_configspecial_idsr   Nc                 C   s   || _ || _dS )zInitialize the image encoder.

        Args:
            image_config: Configuration for the image tokenizer.
            special_ids: Special image tokens ids.
        N)r^   r_   )r9   r^   r_   r   r   r   __init__   s   
zImageEncoder.__init__c                 C   s   | j S N)r^   r8   r   r   r   	mm_config   s   zImageEncoder.mm_configr   c                 C   s   |j \}}t|| jj || jj }|dkr"t|| }t|| }|d | jj| jj  d }|d | jj| jj  d }||fS )Nr!   )rE   maxr^   r=   roundr<   r>   )r9   r   whratiowidth_tokensheight_tokensr   r   r   _image_to_num_tokens   s   
z!ImageEncoder._image_to_num_tokenscontentc                 C   s   t |}| |\}}|dksJ |dksJ | jjg| | jjg | }| jj|d< || jj | jj || jj | jj f}t	||}t
||dS )zConverts an image chunk to an image encoding.

        Args:
            content: image chunk to be converted.

        Returns:
            Image encoding.
        r   )r   r   )r1   rj   r_   r   r   r   r^   r<   r>   r\   r   )r9   rk   r   re   rf   image_tokensnew_image_sizeprocessed_imager   r   r   __call__   s   	
zImageEncoder.__call__c                 C   s   | j jS ra   )r_   r   r8   r   r   r   image_token   s   zImageEncoder.image_token)r   r   r   r   r4   r   r`   r;   rb   r   tupler   rj   r
   r   r   rp   rq   r   r   r   r   r]      s    
r]   ))r*   loggingdataclassesr   enumr   ior   numpyr   PILr   mistral_common.imager   r   mistral_common.importsr   r	   &mistral_common.protocol.instruct.chunkr
   r   	getLoggerr   loggerrU   r   r   r1   rZ   r[   strr2   r4   rH   r   rr   floatrQ   r   r\   r]   r   r   r   r   <module>   sF    

"