o
    -iC                     @   s$  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZ d dlZd dlmZ d dlZd dlmZmZ d d	lmZmZ d dlmZ d d
lmZm Z  d dl!m"Z" d dl#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ erddl,m-Z-m.Z.m/Z/ neZ-eZ.eZ/e"e0Z1eej2dZ3e4e3j5 edZ6e$ Z7e74dG dd dZ8dddej9de:de;de;fddZ<dddej9de:de;de;fddZ=ddd d!ejd"e;de;dB de;fd#d$Z>dd%d d!ejd"e;de;de;fd&d'Z?d(dd)ej@de;de;fd*d+ZAd(dd)ej@de;de;fd,d-ZBd.e/deCeDe;e:f  fd/d0ZEdd1d2d3eCe. d4ejFjGd5eHdeeDe;e:e-f ddf fd6d7ZI	dDd8e;d9eJe;ef dB deDej9e:eKB f fd:d;ZL	dDd<e;d=eJe;ef dB dejfd>d?ZM	dDd@e;dAeJe;ef dB deDej@eJe;ef f fdBdCZNdS )E    N)	Generator)ThreadPoolExecutor)groupby)Path)TYPE_CHECKINGAnyTypeVar)url2pathname)ImageUnidentifiedImageError)Url	parse_url)HTTPConnectionglobal_http_connection)init_logger)ExtensionManager   )AudioEmbeddingMediaIOAudioMediaIOImageEmbeddingMediaIOImageMediaIOMediaIOVideoMediaIO)BatchedTensorInputsMultiModalKwargsItemMultiModalPlaceholderDict)max_workers_Mhttpc                       s  e Zd Zdefddddeeeeef f dB dededee dB ddf
 fd	d
Z	de
dee defddZde
dee defddZde
ddfddZdddedee dedB defddZdddedee dedB defddZdedeejeeB f fddZdedeejeeB f fddZdd d!ed"edejfd#d$Zdd d!ed"edejfd%d&Zdd d'ed"edeejeeef f fd(d)Zdd d'ed"edeejeeef f fd*d+Zd,ede j!fd-d.Z"d,ede j!fd/d0Z#  Z$S )1MediaConnectorN )allowed_local_media_pathallowed_media_domainsmedia_io_kwargs
connectionr!   r"   returnc                   s|   t    |r	|ni | _|| _|r.t|}| s!td| d| s-td| dnd}|| _|du r9g }|| _	dS )aj  
        Args:
            media_io_kwargs: Additional args passed to process media
                             inputs, keyed by modalities. For example,
                             to set num_frames for video, set
                             `--media-io-kwargs '{"video":{"num_frames":40}}'`
            connection: HTTP connection client to download media contents.
            allowed_local_media_path: A local directory to load media files from.
            allowed_media_domains: If set, only media URLs that belong to this
                                   domain can be used for multi-modal inputs.
        z/Invalid `--allowed-local-media-path`: The path z does not exist.z must be a directory.N)
super__init__r#   r$   r   exists
ValueErroris_dirr!   r"   )selfr#   r$   r!   r"   allowed_local_media_path_	__class__ R/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/multimodal/utils.pyr'   ;   s0   


zMediaConnector.__init__url_specmedia_ioc           	      C   sT   |j pd}|dd\}}|dd\}}|d}|dkr$d}t||||S )Nr    ,r   ;/base64z,Only base64 data URLs are supported for now.)pathsplitlstripNotImplementedErrorload_base64)	r+   r1   r2   url_spec_path	data_specdata
media_type	data_typemsgr/   r/   r0   _load_data_urlj   s   

zMediaConnector._load_data_urlc                 C   sh   | j }|d u rtd|jpd}|jpd}tt|| }|| jvr/td| d| d|	|S )Nz=Cannot load local files without `--allowed-local-media-path`.r    zThe file path z2 must be a subpath of `--allowed-local-media-path z`.)
r!   RuntimeErrorr7   netlocr   r	   resolveparentsr)   	load_file)r+   r1   r2   r!   r<   url_spec_netlocfilepathr/   r/   r0   _load_file_url{   s   


zMediaConnector._load_file_urlc                 C   s2   | j r|j| j vrtd| j  d|j d S d S )Nz1The URL must be from one of the allowed domains: z. Input URL domain: )r"   hostnamer)   )r+   r1   r/   r/   r0   $_assert_url_in_allowed_media_domains   s   z3MediaConnector._assert_url_in_allowed_media_domainsfetch_timeouturlrN   c                C   s~   t |}|jr#|jdr#| | | j}|j||tjd}||S |jdkr.| 	||S |jdkr9| 
||S d}t|Nr   )timeoutallow_redirectsr>   filez0The URL must be either a HTTP, data or file URL.)r   scheme
startswithrL   r$   	get_bytesenvsVLLM_MEDIA_URL_ALLOW_REDIRECTS
load_bytesrB   rJ   r)   )r+   rO   r2   rN   r1   r$   r>   rA   r/   r/   r0   load_from_url   s    



zMediaConnector.load_from_urlc          
         s   t |}t }|jr3|jdr3| | | j}|j||tj	dI d H }|
t|j|}|I d H S |jdkrF|
t| j||}|I d H S |jdkrY|
t| j||}|I d H S d}	t|	rP   )r   asyncioget_running_looprT   rU   rL   r$   async_get_bytesrW   rX   run_in_executorglobal_thread_poolrY   rB   rJ   r)   )
r+   rO   r2   rN   r1   loopr$   r>   futurerA   r/   r/   r0   load_from_url_async   s2   







z"MediaConnector.load_from_url_async	audio_urlc                 C   s*   t di | jdi }| j||tjdS )z(
        Load audio from a URL.
        audiorM   Nr/   )r   r#   getrZ   rW   VLLM_AUDIO_FETCH_TIMEOUTr+   rc   audio_ior/   r/   r0   fetch_audio   s   zMediaConnector.fetch_audioc                    s2   t di | jdi }| j||tjdI dH S )z8
        Asynchronously fetch audio from a URL.
        rd   rM   Nr/   )r   r#   re   rb   rW   rf   rg   r/   r/   r0   fetch_audio_async   s   z MediaConnector.fetch_audio_asyncRGB
image_mode	image_urlrm   c             
   C   sX   t dd|i| jdi }z
| j||tjdW S  ty+ } ztt||d}~ww )z
        Load a PIL image from an HTTP or base64 data URL.

        By default, the image is converted into RGB format.
        rm   imagerM   Nr/   )	r   r#   re   rZ   rW   VLLM_IMAGE_FETCH_TIMEOUTr   r)   strr+   rn   rm   image_ioer/   r/   r0   fetch_image   s   zMediaConnector.fetch_imagec             
      s`   t dd|i| jdi }z| j||tjdI dH W S  ty/ } ztt||d}~ww )z
        Asynchronously load a PIL image from an HTTP or base64 data URL.

        By default, the image is converted into RGB format.
        rm   ro   rM   Nr/   )	r   r#   re   rb   rW   rp   r   r)   rq   rr   r/   r/   r0   fetch_image_async  s    z MediaConnector.fetch_image_async	video_urlc                C   sH   t dd|i| jdi }t|fi | jdi }| j||tjdS )z=
        Load video from an HTTP or base64 data URL.
        rm   ro   videorM   Nr/   )r   r#   re   r   rZ   rW   VLLM_VIDEO_FETCH_TIMEOUTr+   rw   rm   rs   video_ior/   r/   r0   fetch_video.  s   	zMediaConnector.fetch_videoc                   sP   t dd|i| jdi }t|fi | jdi }| j||tjdI dH S )z
        Asynchronously load video from an HTTP or base64 data URL.

        By default, the image is converted into RGB format.
        rm   ro   rx   rM   Nr/   )r   r#   re   r   rb   rW   ry   rz   r/   r/   r0   fetch_video_asyncB  s   z MediaConnector.fetch_video_asyncr>   c                 C      t  }|d|S )z2
        Load image embedding from a URL.
        r    )r   r;   )r+   r>   image_embedding_ior/   r/   r0   fetch_image_embeddingX     z$MediaConnector.fetch_image_embeddingc                 C   r~   )z2
        Load audio embedding from a URL.
        r    )r   r;   )r+   r>   audio_embedding_ior/   r/   r0   fetch_audio_embeddingc  r   z$MediaConnector.fetch_audio_embedding)%__name__
__module____qualname__r   dictrq   r   r   listr'   r   r   r   rB   rJ   rL   intrZ   rb   tuplenpndarrayfloatri   rj   r
   ru   rv   nptNDArrayr|   r}   torchTensorr   r   __classcell__r/   r/   r-   r0   r   9   s    
/


#
$






r   WAVformatrd   sampling_rater   r%   c                C   s   t  }|j| |f|dS )zEncode audio as base64.)audio_format)r   encode_base64)rd   r   r   rh   r/   r/   r0   encode_audio_base64o  s   r   c                C   4   t | ||d}tjd|  d}d| d| S )zEncode audio as a data URL.r   .rd   data:;base64,)r   	mimetypes	types_mapre   lower)rd   r   r   	audio_b64mimetyper/   r/   r0   encode_audio_urlz  s   r   rk   rm   r   ro   rm   c                C   s   t |d}|j| |dS )z
    Encode a pillow image to base64 format.

    By default, the image is converted into RGB format before being encoded.
    rl   )image_format)r   r   )ro   rm   r   rs   r/   r/   r0   encode_image_base64  s   
r   PNGc                C   r   )z|
    Encode a pillow image as a data URL.

    By default, the image is converted into RGB format before being encoded.
    r   r   ro   r   r   )r   r   r   re   r   )ro   rm   r   	image_b64r   r/   r/   r0   encode_image_url  s   r   JPEGframesc                C   s   t  }t|}|j| |dS )N)video_format)r   r   r   )r   r   rs   r{   r/   r/   r0   encode_video_base64  s   r   c                C   sD   t | |d}| dkrd}ntjd|  d}d| d| S )Nr   jpegz
video/jpegr   rx   r   r   )r   r   r   r   re   )r   r   	video_b64r   r/   r/   r0   encode_video_url  s
   r   mm_positionsc                 C   s0   dd |   D }t|dd d}dd |D S )a/  
    Given a `MultiModalPlaceholderDict`, output a sequence of keys to
    sort the dictionary by `offset` (starting index in the input sequence)
    in ascending order.

    Returns:
        A list of `(modality, idx)`, which can be used to access an item
        by `mm_positions[modality][idx]`.
    c                 s   s0    | ]\}}t |D ]
\}}|||fV  q
qd S N)	enumerate).0modalityitemsidxitemr/   r/   r0   	<genexpr>  s    z'argsort_mm_positions.<locals>.<genexpr>c                 S   s
   | d j S )N   )offset)xr/   r/   r0   <lambda>  s   
 z&argsort_mm_positions.<locals>.<lambda>keyc                 S   s   g | ]	\}}}||fqS r/   r/   )r   r   r   _r/   r/   r0   
<listcomp>  s    z(argsort_mm_positions.<locals>.<listcomp>)r   sorted)r   
flat_itemssorted_flat_itemsr/   r/   r0   argsort_mm_positions  s
   r   Fdevice
pin_memory	mm_kwargsr   r   c          	      c   s\    ddl m} t| dd dD ]\}}t|}||}|j||d}|t||fV  qdS )a  Group consecutive `MultiModalKwargsItem`s from `mm_kwargs` with the same
    modality together into the same `MultiModalKwargs` instance.

    Args:
        mm_kwargs: List of `MultiModalKwargsItem`.
        device: The device to place the grouped tensors on.
        pin_memory: Whether to pin memory for faster host-to-device transfer.

    Yields:
        A tuple `(modality, num_items, grouped_kwargs)`.
    r   )MultiModalKwargsItemsc                 S   s   | j S r   )r   )r   r/   r/   r0   r     s    z-group_mm_kwargs_by_modality.<locals>.<lambda>r   r   N)vllm.multimodal.inputsr   r   r   from_seqget_datalen)	r   r   r   r   r   r   	items_lstmm_kwargs_itemsmm_kwargs_datar/   r/   r0   group_mm_kwargs_by_modality  s   
r   rc   audio_io_kwargsc                 C   &   |sdnd|i}t |dd}|| S )a+  
    Args:
        audio_url: URL of the audio file to fetch.
        audio_io_kwargs: Additional kwargs passed to handle audio IO.

    Warning:
        This method has direct access to local files and is only intended
        to be called by user code. Never call this from the online server!
    Nrd   r5   r#   r!   )r   ri   )rc   r   r#   media_connectorr/   r/   r0   ri        
ri   rn   image_io_kwargsc                 C   r   )a+  
    Args:
        image_url: URL of the image file to fetch.
        image_io_kwargs: Additional kwargs passed to handle image IO.

    Warning:
        This method has direct access to local files and is only intended
        to be called by user code. Never call this from the online server!
    Nro   r5   r   )r   ru   )rn   r   r#   r   r/   r/   r0   ru     r   ru   rw   video_io_kwargsc                 C   r   )a+  
    Args:
        video_url: URL of the video file to fetch.
        video_io_kwargs: Additional kwargs passed to handle video IO.

    Warning:
        This method has direct access to local files and is only intended
        to be called by user code. Never call this from the online server!
    Nrx   r5   r   )r   r|   )rw   r   r#   r   r/   r/   r0   r|     r   r|   r   )Or[   atexitr   collections.abcr   concurrent.futuresr   	itertoolsr   pathlibr   typingr   r   r   urllib.requestr	   numpyr   numpy.typingr   r   PILr
   r   urllib3.utilr   r   	vllm.envsrW   vllm.connectionsr   r   vllm.loggerr   vllm.utils.registryr   mediar   r   r   r   r   r   inputsr   r   r   r   loggerVLLM_MEDIA_LOADING_THREAD_COUNTr_   registershutdownr   MEDIA_CONNECTOR_REGISTRYr   r   r   rq   r   r   r   r   r   r   r   r   r   r   typesDeviceboolr   r   r   ri   ru   r|   r/   r/   r/   r0   <module>   s
   	  ;







 

