o
    Gi>                     @  sh  d dl mZ d dlZd dlZd dlmZmZ d dlZddlm	Z	 ddl
mZmZ e rMd dlZddlmZ dd	lmZ e rBd dlZejejejejd
Zd dlmZ dBddZ												dCdDd-d.Z				dEdFd1d2Z									dGdHd3d4Z												dCdId6d7Z		dJdKd:d;Z dLd<d=Z!		dJdMd>d?Z"		dJdNd@dAZ#dS )O    )annotationsN)Literalcast   )	deprecate)is_safetensors_availableis_torch_available   )VaeImageProcessor)VideoProcessor)float16float32bfloat16uint8)Imagedatabytesreturnstrc                 C  sF   |  drdS |  drdS |  ds|  drdS |  dr!d	S d
S )Ns   jpegs   PNG

pngs   GIF87as   GIF89agifs   BMbmpunknown)
startswith)r    r   P/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/utils/remote_utils.pydetect_image_type0   s   


r   TpiljpgFbinaryendpointtensor'torch.Tensor'	processor-'VaeImageProcessor' | 'VideoProcessor' | None
do_scalingboolscaling_factorfloat | Noneshift_factoroutput_typeLiteral['mp4', 'pil', 'pt']return_typeimage_formatLiteral['png', 'jpg']partial_postprocessinput_tensor_typeLiteral['binary']output_tensor_typeheight
int | Nonewidthc                 C  st   |j dkr|d u r|d u rtd|dkr&|dkr&|	s&t|ttfs&td|r6|d u r8tdddd	d
 d S d S d S )N   z1`height` and `width` required for packed latents.ptr   z`processor` is required.r&   1.0.0zQ`do_scaling` is deprecated, pass `scaling_factor` and `shift_factor` if required.Fstandard_warn)ndim
ValueError
isinstancer
   r   r   )r!   r"   r$   r&   r(   r*   r+   r-   r.   r0   r1   r3   r4   r6   r   r   r   check_inputs_decode<   s"   
r?   responserequests.Responsec                 C  s  |dks|dkr-|d ur-| j }| j}t|d }|d }t| }	tjt||	d|}|dkr|rR|dkrJdd |D }
t	|
dkrH|
d	 }
|
S |dkrP|}
|
S |d u sZ|dkr^|}
|
S t
|trttttj |j|dd
d	 }
|
S ttj|j|dd
d	 }
|
S |dkr|dkr|d u rtt| j d}
t| j }||
_|
S |dkr|d ur|dkrdd |d	ddd  d  dD }
|
S |dkr|}
|
S |dkr|dkr| j }
|
S )Nr8   r   shapedtyperC   c                 S  s   g | ]	}t | qS r   )r   	fromarraynumpy.0imager   r   r   
<listcomp>o   s    z&postprocess_decode.<locals>.<listcomp>r   r   )r+   RGBc                 S  s   g | ]}t |qS r   )r   rE   rG   r   r   r   rJ      s    r	   r7      r   mp4)contentheadersjsonloads	DTYPE_MAPtorch
frombuffer	bytearrayreshapelenr>   r   r   listr   postprocess_videopostprocessopenioBytesIOconvertr   formatpermutefloatrF   roundastype)r@   r$   r+   r-   r0   output_tensor
parametersrB   rC   torch_dtypeoutputdetected_formatr   r   r   postprocess_decode^   s`   

$ri   c
                 C  s"  i }
|||t | jt| jdd d}|r|d ur||d< |r)|d ur)||d< |r4|d u r4||d< n|rB|d u rB|d u rB||d< |d urR|	d urR||d< |	|d< d	|
d
< d	|
d< |dkrk|dkrk|d u rkd|
d< n|dkr||dkr||d u r|d|
d< n|dkrd|
d< tj| d}|||
dS )N.)r.   r+   r0   rB   rC   r(   r*   r&   r4   r6   ztensor/binaryzContent-TypeAcceptr   r   z
image/jpegr   z	image/pngrM   z
text/plainr"   r   paramsrO   )rX   rB   r   rC   splitsafetensorsrS   _tobytes)r"   r$   r&   r(   r*   r+   r.   r0   r4   r6   rO   re   tensor_datar   r   r   prepare_decode   s8   


rs   8Image.Image | list[Image.Image] | bytes | 'torch.Tensor'c                 C  s   |
dkrt ddddd d}
|dkrt ddd	dd d}t| |||||||||	|
||| t||||||||	||d

}tj| fi |}|jsMt| t|||||	d}|S )aM  
    Hugging Face Hybrid Inference that allow running VAE decode remotely.

    Args:
        endpoint (`str`):
            Endpoint for Remote Decode.
        tensor (`torch.Tensor`):
            Tensor to be decoded.
        processor (`VaeImageProcessor` or `VideoProcessor`, *optional*):
            Used with `return_type="pt"`, and `return_type="pil"` for Video models.
        do_scaling (`bool`, default `True`, *optional*):
            **DEPRECATED**. **pass `scaling_factor`/`shift_factor` instead.** **still set
            do_scaling=None/do_scaling=False for no scaling until option is removed** When `True` scaling e.g. `latents
            / self.vae.config.scaling_factor` is applied remotely. If `False`, input must be passed with scaling
            applied.
        scaling_factor (`float`, *optional*):
            Scaling is applied when passed e.g. [`latents /
            self.vae.config.scaling_factor`](https://github.com/huggingface/diffusers/blob/7007febae5cff000d4df9059d9cf35133e8b2ca9/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L1083C37-L1083C77).
            - SD v1: 0.18215
            - SD XL: 0.13025
            - Flux: 0.3611
            If `None`, input must be passed with scaling applied.
        shift_factor (`float`, *optional*):
            Shift is applied when passed e.g. `latents + self.vae.config.shift_factor`.
            - Flux: 0.1159
            If `None`, input must be passed with scaling applied.
        output_type (`"mp4"` or `"pil"` or `"pt", default `"pil"):
            **Endpoint** output type. Subject to change. Report feedback on preferred type.

            `"mp4": Supported by video models. Endpoint returns `bytes` of video. `"pil"`: Supported by image and video
            models.
                Image models: Endpoint returns `bytes` of an image in `image_format`. Video models: Endpoint returns
                `torch.Tensor` with partial `postprocessing` applied.
                    Requires `processor` as a flag (any `None` value will work).
            `"pt"`: Support by image and video models. Endpoint returns `torch.Tensor`.
                With `partial_postprocess=True` the tensor is postprocessed `uint8` image tensor.

            Recommendations:
                `"pt"` with `partial_postprocess=True` is the smallest transfer for full quality. `"pt"` with
                `partial_postprocess=False` is the most compatible with third party code. `"pil"` with
                `image_format="jpg"` is the smallest transfer overall.

        return_type (`"mp4"` or `"pil"` or `"pt", default `"pil"):
            **Function** return type.

            `"mp4": Function returns `bytes` of video. `"pil"`: Function returns `PIL.Image.Image`.
                With `output_type="pil" no further processing is applied. With `output_type="pt" a `PIL.Image.Image` is
                created.
                    `partial_postprocess=False` `processor` is required. `partial_postprocess=True` `processor` is
                    **not** required.
            `"pt"`: Function returns `torch.Tensor`.
                `processor` is **not** required. `partial_postprocess=False` tensor is `float16` or `bfloat16`, without
                denormalization. `partial_postprocess=True` tensor is `uint8`, denormalized.

        image_format (`"png"` or `"jpg"`, default `jpg`):
            Used with `output_type="pil"`. Endpoint returns `jpg` or `png`.

        partial_postprocess (`bool`, default `False`):
            Used with `output_type="pt"`. `partial_postprocess=False` tensor is `float16` or `bfloat16`, without
            denormalization. `partial_postprocess=True` tensor is `uint8`, denormalized.

        input_tensor_type (`"binary"`, default `"binary"`):
            Tensor transfer type.

        output_tensor_type (`"binary"`, default `"binary"`):
            Tensor transfer type.

        height (`int`, **optional**):
            Required for `"packed"` latents.

        width (`int`, **optional**):
            Required for `"packed"` latents.

    Returns:
        output (`Image.Image` or `list[Image.Image]` or `bytes` or `torch.Tensor`).
    base64zinput_tensor_type='base64'r9   z9input_tensor_type='base64' is deprecated. Using `binary`.Fr:   r    zoutput_tensor_type='base64'z:output_tensor_type='base64' is deprecated. Using `binary`.)
r"   r$   r&   r(   r*   r+   r.   r0   r4   r6   )r@   r$   r+   r-   r0   )	r   r?   rs   requestspostokRuntimeErrorrP   ri   )r!   r"   r$   r&   r(   r*   r+   r-   r.   r0   r1   r3   r4   r6   kwargsr@   rg   r   r   r   remote_decode   sn   \r{   rI   'torch.Tensor' | Image.Imagec                 C  s   d S )Nr   )r!   rI   r(   r*   r   r   r   check_inputs_encodeS  s   r}   c                 C  sF   | j }| j}t|d }|d }t| }tjt||d|}|S )NrB   rC   rD   )	rN   rO   rP   rQ   rR   rS   rT   rU   rV   )r@   rd   re   rB   rC   rf   r   r   r   postprocess_encode\  s   r~   c                 C  s   i }i }|d ur||d< |d ur||d< t | tjr7tj|  d}t| j|d< t| j	
dd |d< nt }| j|dd	 | }|||d
S )Nr(   r*   r"   rB   rj   rk   rC   PNG)r_   rm   )r>   rS   Tensorrp   rq   
contiguousrX   rB   r   rC   ro   r\   r]   savegetvalue)rI   r(   r*   rO   re   r   bufferr   r   r   prepare_encodeh  s   r   c                 C  sN   t | ||| t|||d}tj| fi |}|js t| t|d}|S )a%  
    Hugging Face Hybrid Inference that allow running VAE encode remotely.

    Args:
        endpoint (`str`):
            Endpoint for Remote Decode.
        image (`torch.Tensor` or `PIL.Image.Image`):
            Image to be encoded.
        scaling_factor (`float`, *optional*):
            Scaling is applied when passed e.g. [`latents * self.vae.config.scaling_factor`].
            - SD v1: 0.18215
            - SD XL: 0.13025
            - Flux: 0.3611
            If `None`, input must be passed with scaling applied.
        shift_factor (`float`, *optional*):
            Shift is applied when passed e.g. `latents - self.vae.config.shift_factor`.
            - Flux: 0.1159
            If `None`, input must be passed with scaling applied.

    Returns:
        output (`torch.Tensor`).
    )rI   r(   r*   )r@   )r}   r   rv   rw   rx   ry   rP   r~   )r!   rI   r(   r*   rz   r@   rg   r   r   r   remote_encode~  s$   r   )r   r   r   r   )NTNNr   r   r   Fr    r    NN)r!   r   r"   r#   r$   r%   r&   r'   r(   r)   r*   r)   r+   r,   r-   r,   r.   r/   r0   r'   r1   r2   r3   r2   r4   r5   r6   r5   )Nr   r   F)
r@   rA   r$   r%   r+   r,   r-   r,   r0   r'   )	NTNNr   r   FNN)r"   r#   r$   r%   r&   r'   r(   r)   r*   r)   r+   r,   r.   r/   r0   r'   r4   r5   r6   r5   )r!   r   r"   r#   r$   r%   r&   r'   r(   r)   r*   r)   r+   r,   r-   r,   r.   r/   r0   r'   r1   r2   r3   r2   r4   r5   r6   r5   r   rt   )NN)r!   r   rI   r|   r(   r)   r*   r)   )r@   rA   )rI   r|   r(   r)   r*   r)   )
r!   r   rI   r|   r(   r)   r*   r)   r   r#   )$
__future__r   r\   rP   typingr   r   rv   deprecation_utilsr   import_utilsr   r   rS   image_processorr
   video_processorr   safetensors.torchrp   r   r   r   r   rR   PILr   r   r?   ri   rs   r{   r}   r~   r   r   r   r   r   r   <module>   s   
$7. 
	