o
    c۷i4                     @   s  d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ G dd deeZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deeZG dd deZ G dd deZ!G dd  d eZ"G d!d" d"eZ#ee"eB eB e!B e B e#B ed#d$f Z$ee"eB eB e!B e B ed#d$f Z%d%e&eee&eef B f d&e$fd'd(Z'dS ))    N)Enum)Path)Literal)urlparse)
ConfigDictFieldValidationErrorfield_validator)	Annotated)EXPECTED_FORMAT_VALUESAudio)MistralBase)SerializableImagec                   @   s(   e Zd ZdZdZdZdZdZdZdZ	dS )	
ChunkTypesa  Enum for the types of chunks that can be sent to the model.

    Attributes:
       text: A text chunk.
       image: An image chunk.
       image_url: An image url chunk.
       input_audio: An input audio chunk.
       audio_url: An audio url chunk.

    Examples:
        >>> from mistral_common.protocol.instruct.chunk import ChunkTypes
        >>> chunk_type = ChunkTypes.text
    textimage	image_urlinput_audio	audio_urlthinkingN)
__name__
__module____qualname____doc__r   r   r   r   r   r    r   r   \/home/ubuntu/vllm_env/lib/python3.10/site-packages/mistral_common/protocol/instruct/chunk.pyr      s    r   c                   @   s   e Zd ZU dZeejejejej	ej
ejf ed< deeeeeef B f fddZedeeeeeef B f dd fddZd	S )
BaseContentChunkzBase class for all content chunks.

    Content chunks are used to send different types of content to the model.

    Attributes:
       type: The type of the chunk.
    typereturnc                 C   s   t dt| j )z_Converts the chunk to the OpenAI format.

        Should be implemented by subclasses.
        z%to_openai method not implemented for )NotImplementedErrorr   r   selfr   r   r   	to_openai8   s   zBaseContentChunk.to_openaiopenai_chunkc                 C   s   t d| j )zgConverts the OpenAI chunk to the Mistral format.

        Should be implemented by subclasses.
        z'from_openai method not implemented for )r   r   clsr#   r   r   r   from_openai?   s   zBaseContentChunk.from_openaiN)r   r   r   r   r   r   r   r   r   r   r   r   __annotations__dictstrr"   classmethodr&   r   r   r   r   r   &   s   
 "	,r   c                   @   s   e Zd ZU dZejZeej ed< e	ed< e
ddZdeeeeeef B f fddZed	eeeeeef B f dd fd
dZdS )
ImageChunkzImage chunk.

    Attributes:
       image: The image to be sent to the model.

    Examples:
        >>> from PIL import Image
        >>> image_chunk = ImageChunk(image=Image.new('RGB', (200, 200), color='blue'))
    r   r   Tarbitrary_types_allowedr   c                 C   s&   | j dhddidd }dd|idS )(Converts the chunk to the OpenAI format.r   add_format_prefixT)includecontextr   urlr   r   
model_dump)r!   base64_imager   r   r   r"   W   s   zImageChunk.to_openair#   c                 C   sp   | ddksJ ||d }t|trd|v sJ |td|d r/|d dd |d< | d|d iS )0Converts the OpenAI chunk to the Mistral format.r   r   r2   z^data:image/\w+;base64,,   r   )get
isinstancer(   rematchsplitmodel_validate)r%   r#   image_url_dictr   r   r   r&   \   s   zImageChunk.from_openaiN)r   r   r   r   r   r   r   r   r'   r   r   model_configr(   r)   r"   r*   r&   r   r   r   r   r+   H   s   
 

",r+   c                   @   s*   e Zd ZU dZeed< dZedB ed< dS )ImageURLzImage URL or a base64 encoded image.

    Attributes:
       url: The URL of the image.
       detail: The detail of the image.

    Examples:
       >>> image_url = ImageURL(url="https://example.com/image.png")
    r2   Ndetail)r   r   r   r   r)   r'   rC   r   r   r   r   rB   j   s   
 
rB   c                   @   s   e Zd ZU dZejZeej ed< e	e
B ed< eddZde
fddZdee
e
ee
e
f B f fd	d
Zedee
e
ee
e
f B f dd fddZdS )ImageURLChunkzImage URL chunk.

    Attributes:
       image_url: The URL of the image or a base64 encoded image to be sent to the model.

    Examples:
        >>> image_url_chunk = ImageURLChunk(image_url="data:image/png;base64,iVBORw0")
    r   r   Tr,   r   c                 C      t | jtr
| jjS | jS N)r;   r   rB   r2   r    r   r   r   get_url   s   zImageURLChunk.get_urlc                 C   s>   d|   i}t| jtr| jjdur| jj|d< d|d}|S )r.   r2   NrC   r   r3   )rG   r;   r   rB   rC   )r!   r@   out_dictr   r   r   r"      s   zImageURLChunk.to_openair#   c                 C   s   |  d|d iS )r7   r   r?   r$   r   r   r   r&      s   zImageURLChunk.from_openaiN)r   r   r   r   r   r   r   r   r'   rB   r)   r   rA   rG   r(   r"   r*   r&   r   r   r   r   rD   y   s   
 	
",rD   c                   @   sV   e Zd ZU dZeeB ed< eed< ededd fddZ	e
ddedefd	d
ZdS )RawAudioaK  Base64 encoded audio data.

    This class represents raw audio data encoded in base64 format.

    Attributes:
        data: The base64 encoded audio data, which can be a string or bytes.
        format: The format of the audio data.

    Examples:
        >>> audio = RawAudio(data="base64_encoded_audio_data", format="mp3")
    dataformataudior   c                 C   s   |j }||d}| ||dS )zCreates a RawAudio instance from an Audio object.

        Args:
            audio: An Audio object containing audio data, format, and duration.

        Returns:
            An AudioChunk instance initialized with the audio data.
        FrK   rL   )rL   	to_base64)r%   rM   rL   rK   r   r   r   
from_audio   s   
zRawAudio.from_audiovc                 C   s"   |t vrtdt  d| d|S )Nz`format` should be one of z. Got: `)r   r   r%   rQ   r   r   r   should_not_be_empty   s   zRawAudio.should_not_be_emptyN)r   r   r   r   r)   bytesr'   r*   r   rP   r	   rT   r   r   r   r   rJ      s   
 rJ   c                   @   s   e Zd ZU dZeed< dS )AudioURLzHAudio URL.

    Attributes:
        url: The URL of the audio file.
    r2   N)r   r   r   r   r)   r'   r   r   r   r   rV      s   
 rV   c                   @   s    e Zd ZdZdZdZdZdZdS )AudioURLTypezEnum for the types of audio URLs.

    Attributes:
        url: A URL.
        base64: A base64 encoded audio. Can be prefixed with `data:audio/<format>;base64,`.
        file: A file path.
        file_uri: A file URI (eg. `file:///path/to/file`).
    r2   base64filefile_uriN)r   r   r   r   r2   rX   rY   rZ   r   r   r   r   rW      s    	rW   c                   @   s   e Zd ZU dZejZeej ed< e	e
B ed< ede	fddZdefddZdee	e	ee	e	f B f fd	d
Zedee	e	ee	e	f B f dd fddZdS )AudioURLChunkzAudio URL chunk.

    Attributes:
        type: The type of the chunk, which is always `ChunkTypes.audio_url`.
        audio_url: The URL of the audio file.
    r   r   r   c                 C   rE   rF   )r;   r   rV   r2   r    r   r   r   r2      s   zAudioURLChunk.urlc                 C   st   t | jj}|dv rtjS |dkrtjS |dkrtjS zt| j}| }W n ty1   d}Y nw |r7tj	S tjS )a  Returns the type of the audio URL.

        Note:
            URLs should be either:
            - a valid URL (http:// or https://)
            - a valid file path (e.g. /path/to/file)
            - a valid file URI (e.g. file:///path/to/file)
            - a base64 encoded audio. It is assumed to be base64 encoded if it is not a valid URL or file path.

        Returns:
            The type of the audio URL.
        >   httphttpsrK   rY   F)
r   r2   schemerW   rX   rZ   r   existsOSErrorrY   )r!   
url_schemeurl_path
exist_pathr   r   r   get_url_type   s    
zAudioURLChunk.get_url_typec                 C   s&   t | jtr
|  S | jd| jidS )r.   r2   )r   r   )r;   r   rV   r5   r   r    r   r   r   r"     s   zAudioURLChunk.to_openair#   c                 C   
   |  |S r7   rI   r$   r   r   r   r&        
zAudioURLChunk.from_openaiN)r   r   r   r   r   r   r   r   r'   r)   rV   propertyr2   rW   rd   r(   r"   r*   r&   r   r   r   r   r[      s   
 " ,r[   c                   @   s   e Zd ZU dZejZeej ed< e	ed< e
dde	de	fddZededd fd	d
Zdeeeeeef B f fddZedeeeeeef B f dd fddZdS )
AudioChunka  Audio chunk containing raw audio data.

    This class represents a chunk of audio data that can be used as input.

    Attributes:
        type: The type of the chunk, which is always ChunkTypes.input_audio.
        input_audio: The RawAudio object containing the audio data.

    Examples:
        >>> audio_chunk = AudioChunk(input_audio=RawAudio(data="base64_encoded_audio_data", format="mp3"))
    r   r   rQ   r   c                 C   s   |j  std| d|S )Nz'`InputAudio` should not be empty. Got: rR   )rK   stripr   rS   r   r   r   rT   .  s   
zAudioChunk.should_not_be_emptyrM   c                 C   s   | t |dS )zCreates an AudioChunk instance from an Audio object.

        Args:
            audio: An Audio object containing audio data.

        Returns:
            An AudioChunk instance initialized with the audio data.
        )r   )rJ   rP   )r%   rM   r   r   r   rP   5  s   
zAudioChunk.from_audioc                 C   s@   t | jjtr| jjdn| jj}| jt|| jjd dS )zConverts the chunk to the OpenAI format.

        Returns:
            A dictionary representing the audio chunk in the OpenAI format.
        zutf-8rN   )r   r   )	r;   r   rK   rU   decoder   rJ   rL   r5   )r!   contentr   r   r   r"   A  s
   "zAudioChunk.to_openair#   c                 C   re   )a  Converts the OpenAI chunk to the Mistral format.

        Args:
            openai_chunk: A dictionary representing the audio chunk in the OpenAI format.

        Returns:
            An AudioChunk instance initialized with the data from the OpenAI chunk.
        rI   r$   r   r   r   r&   O  s   

zAudioChunk.from_openaiN)r   r   r   r   r   r   r   r   r'   rJ   r	   rT   r*   r   rP   r(   r)   r"   r&   r   r   r   r   ri     s   
 ",ri   c                   @   sz   e Zd ZU dZejZeej ed< e	ed< de
e	e	e
e	e	f B f fddZede
e	e	e
e	e	f B f dd fdd	Zd
S )	TextChunkzText chunk.

    Attributes:
      text: The text to be sent to the model.

    Examples:
        >>> text_chunk = TextChunk(text="Hello, how can I help you?")
    r   r   r   c                 C      |   S r.   r4   r    r   r   r   r"   i     zTextChunk.to_openair#   c                 C   re   rf   rI   r$   r   r   r   r&   m  rg   zTextChunk.from_openaiN)r   r   r   r   r   r   r   r   r'   r)   r(   r"   r*   r&   r   r   r   r   rm   \  s   
 	",rm   c                   @   s   e Zd ZU dZejZeej ed< e	ed< e
dddZeed< dee	e	ee	e	f B f fd	d
Zedee	e	ee	e	f B f dd fddZdS )
ThinkChunkzThinking chunk.

    Attributes:
        type: The type of the chunk, which is always ChunkTypes.thinking.
        thinking: The list of text chunks of the thinking.
        closed: Whether the thinking chunk is closed or not.
    r   r   Tz,Whether the thinking chunk is closed or not.)defaultdescriptionclosedr   c                 C   rn   ro   r4   r    r   r   r   r"     rp   zThinkChunk.to_openair#   c                 C   re   rf   rI   r$   r   r   r   r&     rg   zThinkChunk.from_openaiN)r   r   r   r   r   r   r   r   r'   r)   r   rt   boolr(   r"   r*   r&   r   r   r   r   rq   s  s   
 ",rq   r   )discriminatoropenai_content_chunksr   c                 C   s   |  d}|d u rtdt|}|tjkrt| S |tjkr%t| S |tjkr/t	| S |tj
kr9t| S |tjkrCt| S |tjkrMt| S td| )Nr   z%Content chunk must have a type field.zUnknown content chunk type: )r:   
ValueErrorr   r   rm   r&   r   rD   r   r+   r   ri   r   r[   r   rq   )rw   content_type_strcontent_typer   r   r   _convert_openai_content_chunks  s"   












r{   )(r<   enumr   pathlibr   typingr   urllib.parser   pydanticr   r   r   r	   typing_extensionsr
   mistral_common.audior   r   mistral_common.baser   mistral_common.imager   r)   r   r   r+   rB   rD   rJ   rV   rW   r[   ri   rm   rq   ContentChunkUserContentChunkr(   r{   r   r   r   r   <module>   s:    ""&'
>> *