o
    Git                  
   @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dl	m
Z
mZ ddlmZ ddlmZmZ ddlmZ ddlmZmZmZmZmZmZmZ dd	lmZ d
dlmZmZ ddl m!Z!m"Z"m#Z# e rqd dl$m%  m&Z' dZ(ndZ(e)e*Z+e rd dl,m-Z- e rd dl.Z.i dddgdddgdddgdddgdddgdddgddd gd!d"d#gd$d"d%gd&d'd(gd)d'd*gd+d'd,gd-d.d,gd/d.d0gd1d2d3gd4d2d5gd6d7d5gi d8d7d9gd:d;d9gd<d;d=gd>d=d=gd?d=d;gd@d9d;gdAd9d7gdBd5d7gdCd5d2gdDd3d2gdEd0d.gdFd,d.gdGd*d'gdHd(d'gdId%d"gdJd#d"gdKd dgddgddgddgddgddgddgdLZ/dMZ0				dVdNe1dB dOe2ej3B dB dPe4e1 dB dQe4e5 dB fdRdSZ6G dTdU dUeZ7dS )W    N)Callable)T5EncoderModelT5Tokenizer   )PixArtImageProcessor)AutoencoderKLPixArtTransformer2DModel)KarrasDiffusionSchedulers)BACKENDS_MAPPING	deprecateis_bs4_availableis_ftfy_availableis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipelineImagePipelineOutput   )ASPECT_RATIO_256_BINASPECT_RATIO_512_BINASPECT_RATIO_1024_BINTF)BeautifulSoupz0.25g      @g      @z0.26g      @z0.27g      @z0.28g      @z0.32g      @g      @z0.33g      @z0.35g      @z0.4g      @g      @z0.42g      @z0.48g      @g      @z0.5g      @z0.52g      @z0.57g      @z0.6g      @z0.68g      @g      @z0.72g      @z0.78g      @z0.82g      @z0.88g      @z0.94g      @z1.0z1.07z1.13z1.21z1.29z1.38z1.46z1.67z1.75z2.0z2.09z2.4z2.5z2.89)z3.0z3.11z3.62z3.75z3.88z4.0aH  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import PixArtSigmaPipeline

        >>> # You can replace the checkpoint id with "PixArt-alpha/PixArt-Sigma-XL-2-512-MS" too.
        >>> pipe = PixArtSigmaPipeline.from_pretrained(
        ...     "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS", torch_dtype=torch.float16
        ... )
        >>> # Enable memory optimizations.
        >>> # pipe.enable_model_cpu_offload()

        >>> prompt = "A small cactus with a happy face in the Sahara desert."
        >>> image = pipe(prompt).images[0]
        ```
num_inference_stepsdevice	timestepssigmasc                 K   s  |dur|durt d|dur>dtt| jj v }|s(t d| j d| jd||d| | j}t	|}||fS |durpdtt| jj v }|sZt d| j d| jd||d	| | j}t	|}||fS | j|fd
|i| | j}||fS )a  
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
            must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`list[int]`, *optional*):
            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
            `num_inference_steps` and `sigmas` must be `None`.
        sigmas (`list[float]`, *optional*):
            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
            `num_inference_steps` and `timesteps` must be `None`.

    Returns:
        `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    NzYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r   r   r   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r   r   r    )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r   len)	schedulerr   r   r   r   kwargsaccepts_timestepsaccept_sigmasr   r   j/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.pyretrieve_timesteps~   s2   r-   c                4       s  e Zd ZdZedZddgZdZde	de
dededef
 fd	d
Z										d=deee B dedededejdB dejdB dejdB dejdB dejdB dedefddZdd Z				d>d d!Zd?d"d#Zd$d% Zd@d&d'Ze ee			(			)				*							+						dAdeee B ded,ed-ee d.ee d/ededB d0edB d1edB d2ed3ej eej  B dB d4ejdB dejdB dejdB dejdB dejdB d5edB d6ed7e!eeejgdf dB d8eded9eded:e"e#B f0d;d<Z$  Z%S )BPixArtSigmaPipelinea  
    Pipeline for text-to-image generation using PixArt-Sigma.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

    Args:
        vae ([`AutoencoderKL`]):
            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
        text_encoder ([`T5EncoderModel`]):
            Frozen text-encoder. PixArt-Alpha uses
            [T5](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5EncoderModel), specifically the
            [t5-v1_1-xxl](https://huggingface.co/PixArt-alpha/PixArt-alpha/tree/main/t5-v1_1-xxl) variant.
        tokenizer (`T5Tokenizer`):
            Tokenizer of class
            [T5Tokenizer](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer).
        transformer ([`PixArtTransformer2DModel`]):
            A text conditioned `PixArtTransformer2DModel` to denoise the encoded image latents. Initially published as
            [`Transformer2DModel`](https://huggingface.co/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS/blob/main/transformer/config.json#L2)
            in the config, but the mismatch can be ignored.
        scheduler ([`SchedulerMixin`]):
            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    u5   [#®•©™&@·º½¾¿¡§~\)\(\]\[\}\{\|\\/\*]{1,}	tokenizertext_encoderztext_encoder->transformer->vaevaetransformerr(   c                    sX   t    | j|||||d t| dd r dt| jjjd  nd| _t	| jd| _
d S )N)r/   r0   r1   r2   r(   r1   r   r      )vae_scale_factor)super__init__register_modulesgetattrr'   r1   configblock_out_channelsr4   r   image_processor)selfr/   r0   r1   r2   r(   r&   r   r,   r6      s   

(zPixArtSigmaPipeline.__init__T r   NF,  promptdo_classifier_free_guidancenegative_promptnum_images_per_promptr   prompt_embedsnegative_prompt_embedsprompt_attention_masknegative_prompt_attention_maskclean_captionmax_sequence_lengthc              	   K   sh  d|v rd}t dd|dd |du r| j}|}|du rz| j||
d}| j|d|d	d	d
d}|j}| j|dd
dj}|jd |jd krdt||sd| j|dd|d df }t	
d| d|  |j}||}| j|||d}|d }| jdur| jj}n| jdur| jj}nd}|j||d}|j\}}}|d|d}||| |d}|d|}||| d}|r|du rt|tr|g| n|}| j||
d}|jd }| j|d|d	d	d	d
d}|j}	|	|}	| j|j||	d}|d }|r*|jd }|j||d}|d|d}||| |d}|	d|}	|	|| d}	nd}d}	||||	fS )az  
        Encodes the prompt into text encoder hidden states.

        Args:
            prompt (`str` or `list[str]`, *optional*):
                prompt to be encoded
            negative_prompt (`str` or `list[str]`, *optional*):
                The prompt not to guide the image generation. If not defined, one has to pass `negative_prompt_embeds`
                instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`). For
                PixArt-Alpha, this should be "".
            do_classifier_free_guidance (`bool`, *optional*, defaults to `True`):
                whether to use classifier free guidance or not
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                number of images that should be generated per prompt
            device: (`torch.device`, *optional*):
                torch device to place the resulting embeddings on
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. For PixArt-Alpha, it's should be the embeddings of the ""
                string.
            clean_caption (`bool`, defaults to `False`):
                If `True`, the function will preprocess and clean the provided caption before encoding.
            max_sequence_length (`int`, defaults to 300): Maximum sequence length to use for the prompt.
        mask_featurezThe use of `mask_feature` is deprecated. It is no longer used in any computation and that doesn't affect the end results. It will be removed in a future version.z1.0.0F)standard_warnN)rH   
max_lengthTpt)paddingrL   
truncationadd_special_tokensreturn_tensorslongest)rN   rQ   r   zZThe following part of your input was truncated because T5 can only handle sequences up to z	 tokens: )attention_maskr   dtyper   )rN   rL   rO   return_attention_maskrP   rQ   )r   _execution_device_text_preprocessingr/   	input_idsshapetorchequalbatch_decodeloggerwarningrT   tor0   rV   r2   repeatview
isinstancestr)r<   r@   rA   rB   rC   r   rD   rE   rF   rG   rH   rI   r)   deprecation_messagerL   text_inputstext_input_idsuntruncated_idsremoved_textrV   bs_embedseq_len_uncond_tokensuncond_inputr   r   r,   encode_prompt   s   * 





	

z!PixArtSigmaPipeline.encode_promptc                 C   sX   dt t| jjj v }i }|r||d< dt t| jjj v }|r*||d< |S )Neta	generator)r    r!   r"   r(   stepr$   r%   )r<   rr   rq   accepts_etaextra_step_kwargsaccepts_generatorr   r   r,   prepare_extra_step_kwargs  s   z-PixArtSigmaPipeline.prepare_extra_step_kwargsc
           
      C   s  |d dks|d dkrt d| d| d|d u s(|d ur5t|tr(|dkr5t d| dt| d|d urH|d urHt d| d	| d
|d u rT|d u rTt d|d urkt|tskt|tskt dt| |d ur~|d ur~t d| d| d
|d ur|d urt d| d| d
|d ur|d u rt d|d ur|	d u rt d|d ur|d ur|j|jkrt d|j d|j d|j|	jkrt d|j d|	j dd S d S d S )Nr3   r   z7`height` and `width` have to be divisible by 8 but are z and .z5`callback_steps` has to be a positive integer but is z	 of type zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z and `negative_prompt_embeds`: z'Cannot forward both `negative_prompt`: zEMust provide `prompt_attention_mask` when specifying `prompt_embeds`.zWMust provide `negative_prompt_attention_mask` when specifying `negative_prompt_embeds`.zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` z`prompt_attention_mask` and `negative_prompt_attention_mask` must have the same shape when passed directly, but got: `prompt_attention_mask` z% != `negative_prompt_attention_mask` )r   rd   inttypere   listr[   )
r<   r@   heightwidthrB   callback_stepsrD   rE   rF   rG   r   r   r,   check_inputs  sl   z PixArtSigmaPipeline.check_inputsc                    s    rt  sttd d d td d  r0t s0ttd d d td d t|ttfs:|g}dt	f fdd	fd
d|D S )Nbs4rS   zSetting `clean_caption=True`z#Setting `clean_caption` to False...Fftfytextc                    s,    r | }  | } | S |   } | S N)_clean_captionlowerstrip)r   )rH   r<   r   r,   process  s   

z8PixArtSigmaPipeline._text_preprocessing.<locals>.processc                    s   g | ]} |qS r   r   ).0t)r   r   r,   
<listcomp>  s    z;PixArtSigmaPipeline._text_preprocessing.<locals>.<listcomp>)
r   r_   r`   r
   formatr   rd   tupler{   re   )r<   r   rH   r   )rH   r   r<   r,   rY     s   



z'PixArtSigmaPipeline._text_preprocessingc                 C   s  t |}t|}|  }tdd|}tdd|}tdd|}t|ddj}tdd|}td	d|}td
d|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}td d|}td!d|}t| j	d|}td"d|}t
d#}tt||d$krt|d|}t|}tt|}td%d|}td&d|}td'd|}td(d|}td)d|}td*d|}td+d|}td,d|}td-d|}td.d|}td/d0|}td1d2|}td3d|}|  td4d5|}td6d|}td7d|}td8d|}| S )9Nz<person>personzk\b((?:https?:(?:\/{1,3}|[a-zA-Z0-9%])|[a-zA-Z0-9.\-]+[.](?:com|co|ru|net|org|edu|gov|it)[\w/-]*\b\/?(?!@)))r>   zh\b((?:www:(?:\/{1,3}|[a-zA-Z0-9%])|[a-zA-Z0-9.\-]+[.](?:com|co|ru|net|org|edu|gov|it)[\w/-]*\b\/?(?!@)))zhtml.parser)featuresz
@[\w\d]+\bz[\u31c0-\u31ef]+z[\u31f0-\u31ff]+z[\u3200-\u32ff]+z[\u3300-\u33ff]+z[\u3400-\u4dbf]+z[\u4dc0-\u4dff]+z[\u4e00-\u9fff]+z|[\u002D\u058A\u05BE\u1400\u1806\u2010-\u2015\u2E17\u2E1A\u2E3A\u2E3B\u2E40\u301C\u3030\u30A0\uFE31\uFE32\uFE58\uFE63\uFF0D]+-u   [`´«»“”¨]"u   [‘’]'z&quot;?z&ampz"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} z\d:\d\d\s+$z\\nz
#\d{1,3}\bz	#\d{5,}\bz
\b\d{6,}\bz0[\S]+\.(?:png|jpg|jpeg|bmp|webp|eps|pdf|apk|mp4)z
[\"\']{2,}z[\.]{2,}z\s+\.\s+z	(?:\-|\_)r   z\b[a-zA-Z]{1,3}\d{3,15}\bz\b[a-zA-Z]+\d+[a-zA-Z]+\bz\b\d+[a-zA-Z]+\d+\bz!(worldwide\s+)?(free\s+)?shippingz(free\s)?download(\sfree)?z\bclick\b\s(?:for|on)\s\w+z9\b(?:png|jpg|jpeg|bmp|webp|eps|pdf|apk|mp4)(\simage[s]?)?z\bpage\s+\d+\bz*\b\d*[a-zA-Z]+\d+[a-zA-Z]+\d+[a-zA-Z\d]*\bu   \b\d+\.?\d*[xх×]\d+\.?\d*\bz
\b\s+\:\s+z: z(\D[,\./])\bz\1 z\s+z^[\"\']([\w\W]+)[\"\']$z\1z^[\'\_,\-\:;]z[\'\_,\-\:\-\+]$z^\.\S+$)re   ulunquote_plusr   r   resubr   r   bad_punct_regexcompiler'   findallr   fix_texthtmlunescape)r<   captionregex2r   r   r,   r     s   
	

z"PixArtSigmaPipeline._clean_captionc	           
      C   s   ||t || j t || j f}	t|tr(t||kr(tdt| d| d|d u r5t|	|||d}n||}|| jj	 }|S )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)rr   r   rV   )
ry   r4   rd   r{   r'   r   r   ra   r(   init_noise_sigma)
r<   
batch_sizenum_channels_latentsr|   r}   rV   r   rr   latentsr[   r   r   r,   prepare_latentsa  s    
z#PixArtSigmaPipeline.prepare_latents         @        pilr   r   r   guidance_scaler|   r}   rq   rr   r   output_typereturn_dictcallbackr~   use_resolution_binningreturnc           1      K   s  |p	| j jj| j }|	p| j jj| j }	|rR| j jjdkr t}n"| j jjdkr*t}n| j jjdkr4t}n| j jjdkr>t}ntd||	}}| j	j
||	|d\}}	| |||	||||||	 |durkt|trkd}n|duryt|tryt|}n|jd	 }| j}|d
k}| j|||||||||||d\}}}}|rtj||gd	d}tj||gd	d}trd}n|}t| j||||\}}| j jj} | || | ||	|j|||}| ||
}!ddd}"tt||| jj  d	}#| j|d}$t|D ]\}%}&|rt|gd n|}'| j |'|&}'|&}(t!|(sM|'j"j#dk})|'j"j#dk}*t|(t$r6|)s/|*r2tj%ntj&}+n|)s<|*r?tj'ntj(}+tj)|(g|+|'j"d}(nt|(jd	kr]|(d *|'j"}(|(+|'jd	 }(| j |'|||(|"ddd	 },|r|,,d\}-}.|-||.|-   },| j jj-d | kr|,j,dddd	 },n|,},| jj.|,|&|fi |!ddid	 }|%t|d ks|%d |#kr|%d | jj d	kr|$/  |dur|%| d	kr|%t0| jdd }/||/|&| trt12  qW d   n	1 sw   Y  |dks#| j3j4|*| j3j| j3jj5 ddd	 }0|r"| j	6|0||}0n|}0|dks2| j	j7|0|d}0| 8  |s<|0fS t9|0dS )uA  
        Function invoked when calling the pipeline for generation.

        Args:
            prompt (`str` or `list[str]`, *optional*):
                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
                instead.
            negative_prompt (`str` or `list[str]`, *optional*):
                The prompt or prompts not to guide the image generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            num_inference_steps (`int`, *optional*, defaults to 100):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            timesteps (`list[int]`, *optional*):
                Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
                in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
                passed will be used. Must be in descending order.
            sigmas (`list[float]`, *optional*):
                Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
                their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
                will be used.
            guidance_scale (`float`, *optional*, defaults to 4.5):
                Guidance scale as defined in [Classifier-Free Diffusion
                Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
                of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
                `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
                the text `prompt`, usually at the expense of lower image quality.
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                The number of images to generate per prompt.
            height (`int`, *optional*, defaults to self.unet.config.sample_size):
                The height in pixels of the generated image.
            width (`int`, *optional*, defaults to self.unet.config.sample_size):
                The width in pixels of the generated image.
            eta (`float`, *optional*, defaults to 0.0):
                Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
                applies to [`schedulers.DDIMScheduler`], will be ignored for others.
            generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                to make generation deterministic.
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            prompt_attention_mask (`torch.Tensor`, *optional*): Pre-generated attention mask for text embeddings.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not
                provided, negative_prompt_embeds will be generated from `negative_prompt` input argument.
            negative_prompt_attention_mask (`torch.Tensor`, *optional*):
                Pre-generated attention mask for negative text embeddings.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
            callback (`Callable`, *optional*):
                A function that will be called every `callback_steps` steps during inference. The function will be
                called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
            callback_steps (`int`, *optional*, defaults to 1):
                The frequency at which the `callback` function will be called. If not specified, the callback will be
                called at every step.
            clean_caption (`bool`, *optional*, defaults to `True`):
                Whether or not to clean the caption before creating embeddings. Requires `beautifulsoup4` and `ftfy` to
                be installed. If the dependencies are not installed, the embeddings will be created from the raw
                prompt.
            use_resolution_binning (`bool` defaults to `True`):
                If set to `True`, the requested height and width are first mapped to the closest resolutions using
                `ASPECT_RATIO_1024_BIN`. After the produced latents are decoded into images, they are resized back to
                the requested resolution. Useful for generating non-square images.
            max_sequence_length (`int` defaults to 300): Maximum sequence length to use with the `prompt`.

        Examples:

        Returns:
            [`~pipelines.ImagePipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
                returned where the first element is a list with the generated images
              @       zInvalid sample size)ratiosNr   r   g      ?)	rB   rC   r   rD   rE   rF   rG   rH   rI   )dimcpu)
resolutionaspect_ratio)totalr   mpsnpurU   F)encoder_hidden_statesencoder_attention_masktimestepadded_cond_kwargsr   r   orderlatent)r   )r   )images):r2   r9   sample_sizer4   ASPECT_RATIO_2048_BINr   r   r   r   r;   classify_height_width_binr   rd   re   r{   r'   r[   rX   rp   r\   catXLA_AVAILABLEr-   r(   in_channelsr   rV   rw   maxr   progress_bar	enumeratescale_model_input	is_tensorr   rz   floatfloat32float64int32int64tensorra   expandchunkout_channelsrs   updater8   xm	mark_stepr1   decodescaling_factorresize_and_crop_tensorpostprocessmaybe_free_model_hooksr   )1r<   r@   rB   r   r   r   r   rC   r|   r}   rq   rr   r   rD   rF   rE   rG   r   r   r   r~   rH   r   rI   r)   aspect_ratio_binorig_height
orig_widthr   r   rA   timestep_devicelatent_channelsru   r   num_warmup_stepsr   ir   latent_model_inputcurrent_timestepis_mpsis_npurV   
noise_prednoise_pred_uncondnoise_pred_textstep_idximager   r   r,   __call__w  s   o





$6
7(

zPixArtSigmaPipeline.__call__)
Tr>   r   NNNNNFr?   NNNN)Fr   )Nr>   r   NNr   r   NNr   NNNNNNr   TNr   TTr?   )&__name__
__module____qualname____doc__r   r   r   _optional_componentsmodel_cpu_offload_seqr   r   r   r   r	   r6   re   r{   boolry   r\   r   Tensorrp   rw   r   rY   r   r   no_gradr   EXAMPLE_DOC_STRINGr   	Generatorr   r   r   r   __classcell__r   r   r=   r,   r.      s   
	

 	

D
s
	
r.   r   )8r   r!   r   urllib.parseparser   typingr   r\   transformersr   r   r;   r   modelsr   r   
schedulersr	   utilsr
   r   r   r   r   r   r   utils.torch_utilsr   pipeline_utilsr   r   pipeline_pixart_alphar   r   r   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr   r_   r   r   r   r   r   ry   re   r   r{   r   r-   r.   r   r   r   r,   <module>   s   $	
	
 !"#,


;