o
    i!                     @   s   d dl Z d dlmZ d dlmZmZmZ ddlmZm	Z	m
Z
mZmZmZ ddlmZmZ e r;d dlmZ dd	lmZ e
 rHd dlZdd
lmZ e	 rWddlmZ ddlmZ eeZeeddG dd deZdS )    N)UserDict)AnyUnionoverload   )add_end_docstringsis_tf_availableis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)Image)
load_image)6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES)9TF_MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES)stable_softmaxT)has_image_processorc                       s.  e Zd ZdZdZdZdZdZ fddZe	de
edf dee d	ed
eeeef  fddZe	de
ee ed f dee d	ed
eeeeef   fddZde
eee ded f dee d	ed
e
eeeef  eeeeef   f f fddZdddZ				dddZdd Zdd Z  ZS )#ZeroShotImageClassificationPipelineaL  
    Zero shot image classification pipeline using `CLIPModel`. This pipeline predicts the class of an image when you
    provide an image and a set of `candidate_labels`.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> classifier = pipeline(model="google/siglip-so400m-patch14-384")
    >>> classifier(
    ...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
    ...     candidate_labels=["animals", "humans", "landscape"],
    ... )
    [{'score': 0.965, 'label': 'animals'}, {'score': 0.03, 'label': 'humans'}, {'score': 0.005, 'label': 'landscape'}]

    >>> classifier(
    ...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
    ...     candidate_labels=["black and white", "photorealist", "painting"],
    ... )
    [{'score': 0.996, 'label': 'black and white'}, {'score': 0.003, 'label': 'photorealist'}, {'score': 0.0, 'label': 'painting'}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This image classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"zero-shot-image-classification"`.

    See the list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=zero-shot-image-classification).
    FTc                    s>   t  jdi | t| d | | jdkrt d S t d S )Nvisiontf )super__init__r   check_model_type	frameworkr   r   )selfkwargs	__class__r   i/home/ubuntu/.local/lib/python3.10/site-packages/transformers/pipelines/zero_shot_image_classification.pyr   H   s   

z,ZeroShotImageClassificationPipeline.__init__imagezImage.Imagecandidate_labelsr   returnc                 K      d S Nr   r   r#   r$   r   r   r   r"   __call__R      z,ZeroShotImageClassificationPipeline.__call__c                 K   r&   r'   r   r(   r   r   r"   r)   W   r*   c                    s:   d|v r	| d}|du rtdt j|fd|i|S )a  
        Assign labels to the image(s) passed as inputs.

        Args:
            image (`str`, `list[str]`, `PIL.Image` or `list[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing a http link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

            candidate_labels (`list[str]`):
                The candidate labels for this image. They will be formatted using *hypothesis_template*.

            hypothesis_template (`str`, *optional*, defaults to `"This is a photo of {}"`):
                The format used in conjunction with *candidate_labels* to attempt the image classification by
                replacing the placeholder with the candidate_labels. Pass "{}" if *candidate_labels* are
                already formatted.

            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
                the call may block forever.

        Return:
            A list of dictionaries containing one entry per proposed label. Each dictionary contains the
            following keys:
            - **label** (`str`) -- One of the suggested *candidate_labels*.
            - **score** (`float`) -- The score attributed by the model to that label. It is a value between
                0 and 1, computed as the `softmax` of `logits_per_image`.
        imagesNzSCannot call the zero-shot-image-classification pipeline without an images argument!r$   )pop
ValueErrorr   r)   r(   r    r   r"   r)   \   s
   %
Nc                 K   sf   i }d|v r|d |d< d|v r|d |d< d|v r |d |d< |d ur.t dt ||d< |i i fS )Nr$   timeouthypothesis_templatez^The `tokenizer_kwargs` argument is deprecated and will be removed in version 5 of Transformerstokenizer_kwargs)warningswarnFutureWarning)r   r0   r   preprocess_paramsr   r   r"   _sanitize_parameters   s   
z8ZeroShotImageClassificationPipeline._sanitize_parametersThis is a photo of {}.c           
         s   |d u ri }t ||d}| j|g| jd}| jdkr || j}||d<  fdd|D }ddi}d	| jjjv r@|jd
ddd || | j	|fd| ji|}	|	g|d< |S )N)r.   )r+   return_tensorsptr$   c                    s   g | ]}  |qS r   )format).0xr/   r   r"   
<listcomp>   s    zBZeroShotImageClassificationPipeline.preprocess.<locals>.<listcomp>paddingTsiglip
max_length@   )r>   r@   
truncationr7   text_inputs)
r   image_processorr   todtypemodelconfig
model_typeupdate	tokenizer)
r   r#   r$   r/   r.   r0   inputs	sequencestokenizer_default_kwargsrC   r   r<   r"   
preprocess   s   


z.ZeroShotImageClassificationPipeline.preprocessc                 C   s\   | d}| d}t|d tr|d }n|d d }| jdi ||}||jd}|S )Nr$   rC   r   )r$   logitsr   )r,   
isinstancer   rG   logits_per_image)r   model_inputsr$   rC   outputsmodel_outputsr   r   r"   _forward   s   


z,ZeroShotImageClassificationPipeline._forwardc                 C   s   | d}|d d }| jdkr,d| jjjv r,t|d}| }t	|t
s+|g}n5| jdkrG|jddd}| }t	|t
sF|g}n| jdkrYt|dd	}|  }ntd
| j dd tt||dd dD }|S )Nr$   rP   r   r8   r?   )dimr   )axiszUnsupported framework: c                 S   s   g | ]	\}}||d qS ))scorelabelr   )r:   rZ   candidate_labelr   r   r"   r=      s    zCZeroShotImageClassificationPipeline.postprocess.<locals>.<listcomp>c                 S   s
   | d  S )Nr   r   )r;   r   r   r"   <lambda>   s   
 zAZeroShotImageClassificationPipeline.postprocess.<locals>.<lambda>)key)r,   r   rG   rH   rI   torchsigmoidsqueezetolistrQ   listsoftmaxr   numpyr-   sortedzip)r   rU   r$   rP   probsscoresresultr   r   r"   postprocess   s,   




z/ZeroShotImageClassificationPipeline.postprocessr'   )Nr6   NN)__name__
__module____qualname____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r   r   strrc   r   dictr)   r5   rO   rV   rk   __classcell__r   r   r    r"   r   !   sX     

&
+
r   ) r1   collectionsr   typingr   r   r   utilsr   r   r	   r
   r   r   baser   r   PILr   image_utilsr   r_   models.auto.modeling_autor   models.auto.modeling_tf_autor   tf_utilsr   
get_loggerrl   loggerr   r   r   r   r"   <module>   s"     
