o
    pih                     @   sX  d dl Z d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZ d dlZd dlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZ ddl m!Z! ddl"m#Z#m$Z$ e%e&Z'e rvd dl(m)Z) e r}d dl*Z*dZ+				dde	e, de	ee-ej.f  de	ee,  de	ee/  fddZ0G dd de#Z1dS )    N)ListOptionalTupleUnion)	AutoModelAutoTokenizer   )VaeImageProcessor)AutoencoderKL)get_2d_rotary_pos_embed_lumina)LuminaNextDiT2DModel)FlowMatchEulerDiscreteScheduler)BACKENDS_MAPPINGis_bs4_availableis_ftfy_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipelineImagePipelineOutput)BeautifulSoupav  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import LuminaText2ImgPipeline

        >>> pipe = LuminaText2ImgPipeline.from_pretrained(
        ...     "Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16
        ... ).cuda()
        >>> # Enable memory optimizations.
        >>> pipe.enable_model_cpu_offload()

        >>> prompt = "Upper body of a young woman in a Victorian-era outfit with brass goggles and leather straps. Background shows an industrial revolution cityscape with smoky skies and tall, metal structures"
        >>> image = pipe(prompt).images[0]
        ```
num_inference_stepsdevice	timestepssigmasc                 K   s  |dur|durt d|dur>dtt| jj v }|s(t d| j d| jd||d| | j}t	|}||fS |durpdtt| jj v }|sZt d| j d| jd||d	| | j}t	|}||fS | j|fd
|i| | j}||fS )a  
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
            must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`List[int]`, *optional*):
            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
            `num_inference_steps` and `sigmas` must be `None`.
        sigmas (`List[float]`, *optional*):
            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
            `num_inference_steps` and `timesteps` must be `None`.

    Returns:
        `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    NzYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r   r   r   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r   r   r    )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r   len)	schedulerr   r   r   r   kwargsaccepts_timestepsaccept_sigmasr   r   h/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/diffusers/pipelines/lumina/pipeline_lumina.pyretrieve_timestepsD   s2   r+   c                1       sp  e Zd ZdZedZg ZdZde	de
dededef
 fd	d
Z				dCdeeee f dedeej dee dee f
ddZ									dDdeeee f dedeeee f dedeej deej deej deej deej defddZdd Z				dEd d!ZdFd"d#Zd$d% ZdGd&d'Zed(d) Z ed*d+ Z!ed,d- Z"e# e$e%				.		/										0			1	2	dHdeeee f d3ee d4ee d5ed6ee d7e&deeee f d8ee& dee d9eeej'eej' f  d:eej deej deej deej deej d;ee d<eded=ed>ee& d?ee d@ee(e)f f,dAdBZ*  Z+S )ILuminaText2ImgPipelinea  
    Pipeline for text-to-image generation using Lumina-T2I.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

    Args:
        vae ([`AutoencoderKL`]):
            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
        text_encoder ([`AutoModel`]):
            Frozen text-encoder. Lumina-T2I uses
            [T5](https://huggingface.co/docs/transformers/model_doc/t5#transformers.AutoModel), specifically the
            [t5-v1_1-xxl](https://huggingface.co/Alpha-VLLM/tree/main/t5-v1_1-xxl) variant.
        tokenizer (`AutoModel`):
            Tokenizer of class
            [AutoModel](https://huggingface.co/docs/transformers/model_doc/t5#transformers.AutoModel).
        transformer ([`Transformer2DModel`]):
            A text conditioned `Transformer2DModel` to denoise the encoded image latents.
        scheduler ([`SchedulerMixin`]):
            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    u5   [#®•©™&@·º½¾¿¡§~\)\(\]\[\}\{\|\\/\*]{1,}ztext_encoder->transformer->vaetransformerr&   vaetext_encoder	tokenizerc                    sn   t    | j|||||d d| _t| jd| _d| _t| dr+| jd ur+| jj	j
nd| _| j| j | _d S )N)r.   r/   r0   r-   r&      )vae_scale_factor   r-      )super__init__register_modulesr2   r	   image_processormax_sequence_lengthhasattrr-   configsample_sizedefault_sample_sizedefault_image_size)selfr-   r&   r.   r/   r0   r$   r   r*   r6      s$   


zLuminaText2ImgPipeline.__init__   NFpromptnum_images_per_promptr   clean_caption
max_lengthc                 C   sz  |p| j }t|tr|gn|}t|}| j||d}| j|d| jdddd}|j|}| j|dddj|}	|	j	d |j	d kret
||	se| j|	d d | jd	 df }
td
| j d|
  |j|}| j||dd}|jd }| jd ur| jj}n| jd ur| jj}nd }|j||d}|j	\}}}|d	|d	}||| |d}||d	}||| d}||fS )N)rD   r1   Tpt)pad_to_multiple_ofrE   
truncationpaddingreturn_tensorslongest)rI   rJ   rA   z]The following part of your input was truncated because Gemma can only handle sequences up to z	 tokens: attention_maskoutput_hidden_statesdtyper   )_execution_device
isinstancestrr%   _text_preprocessingr0   r9   	input_idstoshapetorchequalbatch_decodeloggerwarningrN   r/   hidden_statesrR   r-   repeatview)r?   rB   rC   r   rD   rE   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textprompt_attention_maskprompt_embedsrR   _seq_lenr   r   r*   _get_gemma_prompt_embeds   sP   
 "




z/LuminaText2ImgPipeline._get_gemma_prompt_embedsTdo_classifier_free_guidancenegative_promptrh   negative_prompt_embedsrg   negative_prompt_attention_maskc              
   K   s  |du r| j }t|tr|gn|}|durt|}n|jd }|du r.| j||||
d\}}|r|du r|dur:|nd}t|trF||g n|}|durct|t|urctdt| dt| dt|trl|g}n|t|krtd| d	t| d
| d	| d	|jd }| j	|d|ddd}|j
|}|j|}	| j||	dd}| jj}|jd }|j\}}}|j||d}|d|d}||| |d}|	|d}	|	|| d}	||||	fS )af  
        Encodes the prompt into text encoder hidden states.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                prompt to be encoded
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt not to guide the image generation. If not defined, one has to pass `negative_prompt_embeds`
                instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`). For
                Lumina-T2I, this should be "".
            do_classifier_free_guidance (`bool`, *optional*, defaults to `True`):
                whether to use classifier free guidance or not
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                number of images that should be generated per prompt
            device: (`torch.device`, *optional*):
                torch device to place the resulting embeddings on
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. For Lumina-T2I, it's should be the embeddings of the "" string.
            clean_caption (`bool`, defaults to `False`):
                If `True`, the function will preprocess and clean the provided caption before encoding.
            max_sequence_length (`int`, defaults to 256): Maximum sequence length to use for the prompt.
        Nr   )rB   rC   r   rD    z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.rA   rE   TrF   )rI   rE   rH   rJ   rM   rP   rQ   rL   )rS   rT   rU   r%   rY   rk   type	TypeErrorr   r0   rW   rX   rN   r/   rR   r_   r`   ra   )r?   rB   rl   rm   rC   r   rh   rn   rg   ro   rD   r'   rb   prompt_max_lengthnegative_text_inputsnegative_text_input_idsnegative_dtyperi   rj   r   r   r*   encode_prompt   st   '





z$LuminaText2ImgPipeline.encode_promptc                 C   sX   dt t| jjj v }i }|r||d< dt t| jjj v }|r*||d< |S )Neta	generator)r   r   r    r&   stepr"   r#   )r?   rz   ry   accepts_etaextra_step_kwargsaccepts_generatorr   r   r*   prepare_extra_step_kwargsj  s   z0LuminaText2ImgPipeline.prepare_extra_step_kwargsc	           	      C   s~  |d dks|d dkrt d| d| d|d ur*|d ur*t d| d| d|d u r6|d u r6t d	|d urMt|tsMt|tsMt d
t| |d ur`|d ur`t d| d| d|d urs|d urst d| d| d|d ur|d u rt d|d ur|d u rt d|d ur|d ur|j|jkrt d|j d|j d|j|jkrt d|j d|j dd S d S d S )Nr1   r   z7`height` and `width` have to be divisible by 8 but are z and rq   zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z and `negative_prompt_embeds`: z'Cannot forward both `negative_prompt`: zEMust provide `prompt_attention_mask` when specifying `prompt_embeds`.zWMust provide `negative_prompt_attention_mask` when specifying `negative_prompt_embeds`.zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` z`prompt_attention_mask` and `negative_prompt_attention_mask` must have the same shape when passed directly, but got: `prompt_attention_mask` z% != `negative_prompt_attention_mask` )r   rT   rU   listrr   rY   )	r?   rB   heightwidthrm   rh   rn   rg   ro   r   r   r*   check_inputs{  s^   z#LuminaText2ImgPipeline.check_inputsc                    s    rt  sttd d d td d  r0t s0ttd d d td d t|ttfs:|g}dt	f fdd	fd
d|D S )Nbs4rL   zSetting `clean_caption=True`z#Setting `clean_caption` to False...Fftfytextc                    s,    r | }  | } | S |   } | S N)_clean_captionlowerstrip)r   )rD   r?   r   r*   process  s   

z;LuminaText2ImgPipeline._text_preprocessing.<locals>.processc                    s   g | ]} |qS r   r   ).0t)r   r   r*   
<listcomp>  s    z>LuminaText2ImgPipeline._text_preprocessing.<locals>.<listcomp>)
r   r]   r^   r   formatr   rT   tupler   rU   )r?   r   rD   r   )rD   r   r?   r*   rV     s   



z*LuminaText2ImgPipeline._text_preprocessingc                 C   s  t |}t|}|  }tdd|}tdd|}tdd|}t|ddj}tdd|}td	d|}td
d|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}tdd|}td d|}td!d|}t| j	d|}td"d|}t
d#}tt||d$krt|d|}t|}tt|}td%d|}td&d|}td'd|}td(d|}td)d|}td*d|}td+d|}td,d|}td-d|}td.d|}td/d0|}td1d2|}td3d|}|  td4d5|}td6d|}td7d|}td8d|}| S )9Nz<person>personzk\b((?:https?:(?:\/{1,3}|[a-zA-Z0-9%])|[a-zA-Z0-9.\-]+[.](?:com|co|ru|net|org|edu|gov|it)[\w/-]*\b\/?(?!@)))rp   zh\b((?:www:(?:\/{1,3}|[a-zA-Z0-9%])|[a-zA-Z0-9.\-]+[.](?:com|co|ru|net|org|edu|gov|it)[\w/-]*\b\/?(?!@)))zhtml.parser)featuresz
@[\w\d]+\bz[\u31c0-\u31ef]+z[\u31f0-\u31ff]+z[\u3200-\u32ff]+z[\u3300-\u33ff]+z[\u3400-\u4dbf]+z[\u4dc0-\u4dff]+z[\u4e00-\u9fff]+z|[\u002D\u058A\u05BE\u1400\u1806\u2010-\u2015\u2E17\u2E1A\u2E3A\u2E3B\u2E40\u301C\u3030\u30A0\uFE31\uFE32\uFE58\uFE63\uFF0D]+-u   [`´«»“”¨]"u   [‘’]'z&quot;?z&ampz"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} z\d:\d\d\s+$z\\nz
#\d{1,3}\bz	#\d{5,}\bz
\b\d{6,}\bz0[\S]+\.(?:png|jpg|jpeg|bmp|webp|eps|pdf|apk|mp4)z
[\"\']{2,}z[\.]{2,}z\s+\.\s+z	(?:\-|\_)r   z\b[a-zA-Z]{1,3}\d{3,15}\bz\b[a-zA-Z]+\d+[a-zA-Z]+\bz\b\d+[a-zA-Z]+\d+\bz!(worldwide\s+)?(free\s+)?shippingz(free\s)?download(\sfree)?z\bclick\b\s(?:for|on)\s\w+z9\b(?:png|jpg|jpeg|bmp|webp|eps|pdf|apk|mp4)(\simage[s]?)?z\bpage\s+\d+\bz*\b\d*[a-zA-Z]+\d+[a-zA-Z]+\d+[a-zA-Z\d]*\bu   \b\d+\.?\d*[xх×]\d+\.?\d*\bz
\b\s+\:\s+z: z(\D[,\./])\bz\1 z\s+z^[\"\']([\w\W]+)[\"\']$z\1z^[\'\_,\-\:;]z[\'\_,\-\:\-\+]$z^\.\S+$)rU   ulunquote_plusr   r   resubr   r   bad_punct_regexcompiler%   findallr   fix_texthtmlunescape)r?   captionregex2r   r   r*   r     s   
	

z%LuminaText2ImgPipeline._clean_captionc	           
      C   sz   ||t || j t || j f}	t|tr(t||kr(tdt| d| d|d u r6t|	|||d}|S ||}|S )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)rz   r   rR   )intr2   rT   r   r%   r   r   rX   )
r?   rb   num_channels_latentsr   r   rR   r   rz   latentsrY   r   r   r*   prepare_latentsA  s    
z&LuminaText2ImgPipeline.prepare_latentsc                 C      | j S r   _guidance_scaler?   r   r   r*   guidance_scaleU     z%LuminaText2ImgPipeline.guidance_scalec                 C   s
   | j dkS )NrA   r   r   r   r   r*   rl   \  s   
z2LuminaText2ImgPipeline.do_classifier_free_guidancec                 C   r   r   )_num_timestepsr   r   r   r*   num_timesteps`  r   z$LuminaText2ImgPipeline.num_timesteps         @pilr3         ?r   r   r   r   r   r   rz   r   output_typereturn_dictr9   scaling_watershedproportional_attnreturnc           /      C   s  |p| j | j }|p| j | j }| j||||||||d i }|dur+t|tr+d}n|dur9t|tr9t|}n|jd }|rI| jd d |d< t	
|| | jd  }| j}|dk}| j||||	|||||||d	\}}}}|rtj||gdd
}tj||gdd
}t| j||||\}}| jjj}| ||	 ||||j||
|}| j|d}t|D ]\}}|rt|gd n|}|} t| s|jjdk}!t| tr|!rtjntj}"n|!rtjntj}"tj | g|"|jd} nt| jdkr| d !|j} | "|jd } d| | jjj#  } | d |k r|}#d}$nd}#|}$t$| jj%dd|#|$d}%| j|| |||%|ddd }&|&j&ddd
d }&|r|&ddddf |&ddddf }'}(tj'|'t|'d dd
\})}*|*||)|*   }+tj|+|+gdd
}'tj|'|(gdd
}&|&j&ddd
\}&},|j}-|& }&| jj(|&||ddd }|j|-krtj)j*+ r|!|-}|,  qW d   n	1 sw   Y  |dks|| j-jj. }| j-j/|ddd }.| j0j1|.|d}.n|}.| 2  |s|.fS t3|.dS )u  
        Function invoked when calling the pipeline for generation.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
                instead.
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the image generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            num_inference_steps (`int`, *optional*, defaults to 30):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            timesteps (`List[int]`, *optional*):
                Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
                in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
                passed will be used. Must be in descending order.
            sigmas (`List[float]`, *optional*):
                Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
                their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
                will be used.
            guidance_scale (`float`, *optional*, defaults to 4.0):
                Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
                `guidance_scale` is defined as `w` of equation 2. of [Imagen
                Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
                1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
                usually at the expense of lower image quality.
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                The number of images to generate per prompt.
            height (`int`, *optional*, defaults to self.unet.config.sample_size):
                The height in pixels of the generated image.
            width (`int`, *optional*, defaults to self.unet.config.sample_size):
                The width in pixels of the generated image.
            eta (`float`, *optional*, defaults to 0.0):
                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
                [`schedulers.DDIMScheduler`], will be ignored for others.
            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                to make generation deterministic.
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor will ge generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            prompt_attention_mask (`torch.Tensor`, *optional*): Pre-generated attention mask for text embeddings.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. For Lumina-T2I this negative prompt should be "". If not
                provided, negative_prompt_embeds will be generated from `negative_prompt` input argument.
            negative_prompt_attention_mask (`torch.Tensor`, *optional*):
                Pre-generated attention mask for negative text embeddings.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
            clean_caption (`bool`, *optional*, defaults to `True`):
                Whether or not to clean the caption before creating embeddings. Requires `beautifulsoup4` and `ftfy` to
                be installed. If the dependencies are not installed, the embeddings will be created from the raw
                prompt.
            max_sequence_length (`int` defaults to 120):
                Maximum sequence length to use with the `prompt`.
            callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
                `callback_on_step_end_tensor_inputs`.
            callback_on_step_end_tensor_inputs (`List`, *optional*):
                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
                `._callback_tensor_inputs` attribute of your pipeline class.

        Examples:

        Returns:
            [`~pipelines.ImagePipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
                returned where the first element is a list with the generated images
        )rh   rn   rg   ro   NrA   r      r   base_sequence_lengthr   )	rm   rC   r   rh   rn   rg   ro   rD   r9   )dim)totalmpsrQ   i  )linear_factor
ntk_factorF)r_   timestepencoder_hidden_statesencoder_maskimage_rotary_embcross_attention_kwargsr   r   )r   latent)r   )images)4r=   r2   r   rT   rU   r   r%   rY   r>   mathsqrtrS   rx   rZ   catr+   r&   r-   r;   in_channelsr   rR   progress_bar	enumerate	is_tensorr   rr   floatfloat32float64int32int64tensorrX   expandnum_train_timestepsr   head_dimchunksplitr{   backendsr   is_availableupdater.   scaling_factordecoder8   postprocessmaybe_free_model_hooksr   )/r?   rB   r   r   r   r   r   rm   r   rC   rz   r   rh   rn   rg   ro   r   r   rD   r9   r   r   r   rb   r   r   rl   latent_channelsr   ir   latent_model_inputcurrent_timestepis_mpsrR   r   r   r   
noise_prednoise_pred_epsnoise_pred_restnoise_pred_cond_epsnoise_pred_uncond_epsnoise_pred_halfri   latents_dtypeimager   r   r*   __call__d  s  k





	*



W
zLuminaText2ImgPipeline.__call__)rA   NFN)	TNrA   NNNNNFNNNN)Fr   )NNNr   Nr   NNrA   NNNNNNr   TTr3   r   T),__name__
__module____qualname____doc__r   r   r   _optional_componentsmodel_cpu_offload_seqr   r   r
   r   r   r6   r   rU   r   r   r   rZ   r   boolrk   Tensorrx   r   r   rV   r   r   propertyr   rl   r   no_gradr   EXAMPLE_DOC_STRINGr   	Generatorr   r   r   __classcell__r   r   r@   r*   r,      s$   
;	

n

;
r


	

r,   r   )2r   r   r   r   urllib.parseparser   typingr   r   r   r   rZ   transformersr   r   r8   r	   modelsr
   models.embeddingsr   $models.transformers.lumina_nextdit2dr   
schedulersr   utilsr   r   r   r   r   utils.torch_utilsr   pipeline_utilsr   r   
get_loggerr   r]   r   r   r   r   r   rU   r   r   r+   r,   r   r   r   r*   <module>   sH   



;