o
    Gi`                  
   @   sv  d dl Z d dlZd dlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZ dd	lmZmZmZ d
dlmZ e	eZ				d0dedB deejB dB dee dB dee dB fddZ				d1dedededefddZ	d2dejdejdB defd d!Z d"d# Z!G d$d% d%eZ"G d&d' d'eZ#G d(d) d)eZ$G d*d+ d+eZ%G d,d- d-eZ&G d.d/ d/eZ'dS )3    N   )FluxPipeline)FlowMatchEulerDiscreteScheduler)logging)randn_tensor   )ModularPipelineBlocksPipelineState)ComponentSpec
InputParamOutputParam   )FluxModularPipelinenum_inference_stepsdevice	timestepssigmasc                 K   s  |dur|durt d|dur>dtt| jj v }|s(t d| j d| jd||d| | j}t	|}||fS |durpdtt| jj v }|sZt d| j d| jd||d	| | j}t	|}||fS | j|fd
|i| | j}||fS )a  
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
            must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`list[int]`, *optional*):
            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
            `num_inference_steps` and `sigmas` must be `None`.
        sigmas (`list[float]`, *optional*):
            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
            `num_inference_steps` and `timesteps` must be `None`.

    Returns:
        `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    NzYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r   r   r   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r   r   r    )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r   len)	schedulerr   r   r   r   kwargsaccepts_timestepsaccept_sigmasr   r   c/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/modular_pipelines/flux/before_denoise.pyretrieve_timesteps!   s2   r"               ?ffffff?base_seq_lenmax_seq_len
base_shift	max_shiftc                 C   s,   || ||  }|||  }| | | }|S Nr   )image_seq_lenr'   r(   r)   r*   mbmur   r   r!   calculate_shift]   s   r0   sampleencoder_output	generatorsample_modec                 C   sR   t | dr|dkr| j|S t | dr|dkr| j S t | dr%| jS td)Nlatent_distr1   argmaxlatentsz3Could not access latents of provided encoder_output)hasattrr5   r1   moder7   AttributeError)r2   r3   r4   r   r   r!   retrieve_latentsk   s   

r;   c
              	   C   s   t || d t || d  }
|d u rtdd| |n|}t|jdr+|jjr+d }t|
|jdd|jdd|jd	d
|jdd}t|||	||d\}}| jj	rgt
jdg||	t
jd}||}nd }||||fS )Nr   g      ?r   use_flow_sigmasbase_image_seq_lenr#   max_image_seq_lenr$   r)   r%   r*   r&   )r   r/   r   dtype)intnplinspacer8   configr<   r0   getr"   guidance_embedstorchfullfloat32expand)transformerr   
batch_sizeheightwidthvae_scale_factorr   guidance_scaler   r   r,   r/   r   guidancer   r   r!   $_get_initial_timesteps_and_optionalsx   s"    rR   c                   @   s   e Zd ZdZedee fddZedefddZ	edee
 fddZedee fd	d
Ze dededefddZdS )FluxSetTimestepsStepfluxreturnc                 C      t dtgS Nr   r
   r   selfr   r   r!   expected_components      z(FluxSetTimestepsStep.expected_componentsc                 C      dS Nz6Step that sets the scheduler's timesteps for inferencer   rY   r   r   r!   description      z FluxSetTimestepsStep.descriptionc                 C   s\   t dddt dt dt dddt dtjd	t d
ddt dtd	t dtd	t ddtddg	S )Nr   2   defaultr   r   rP         @r7   	type_hintnum_images_per_promptr   rM   rN   rL   TNumber of prompts, the final batch size of model inputs should be `batch_size * num_images_per_prompt`. Can be generated in input step.requiredrf   r_   )r   rG   TensorrA   rY   r   r   r!   inputs   s   




zFluxSetTimestepsStep.inputsc                 C   ,   t dtjddt dtddt dtjddgS Nr   z"The timesteps to use for inferencerf   r_   r   z:The number of denoising steps to perform at inference timerQ   zOptional guidance to be used.r   rG   rk   rA   rY   r   r   r!   intermediate_outputs      z)FluxSetTimestepsStep.intermediate_outputs
componentsstatec                 C   s   |  |}|j|_|j}|j}|j|j }t||||j|j	|j
|j|j|j|j
\}}}	}
||_||_|	|_|
|_|jd | || ||fS )Nr   )get_block_state_execution_devicer   r   rK   rL   rg   rR   rM   rN   rO   r   rP   r   r   rQ   set_begin_indexset_block_staterZ   rs   rt   block_stater   rK   rL   r   r   r   rQ   r   r   r!   __call__   s0   
zFluxSetTimestepsStep.__call__N)__name__
__module____qualname__
model_namepropertylistr
   r[   strr_   r   rl   r   rq   rG   no_gradr   r	   r{   r   r   r   r!   rS      s    rS   c                   @   s   e Zd ZdZedee fddZedefddZ	edee
 fddZedee fd	d
Zedd Ze dededefddZdS )FluxImg2ImgSetTimestepsSteprT   rU   c                 C   rV   rW   rX   rY   r   r   r!   r[      r\   z/FluxImg2ImgSetTimestepsStep.expected_componentsc                 C   r]   r^   r   rY   r   r   r!   r_      r`   z'FluxImg2ImgSetTimestepsStep.descriptionc                 C   sZ   t dddt dt dt dddt dd	dt d
ddt dtdt dtdt ddtddg	S )Nr   ra   rb   r   r   strengthg333333?rP   rd   rg   r   rM   re   rN   rL   Trh   ri   )r   rA   rY   r   r   r!   rl      s   





z"FluxImg2ImgSetTimestepsStep.inputsc                 C   rm   rn   rp   rY   r   r   r!   rq     rr   z0FluxImg2ImgSetTimestepsStep.intermediate_outputsc                 C   sZ   t || |}tt|| d}| j|| j d  }t| dr'| || j  ||| fS )Nr   rw   )minrA   maxr   orderr8   rw   )r   r   r   r   init_timestept_startr   r   r   r!   get_timesteps  s   
z)FluxImg2ImgSetTimestepsStep.get_timestepsrs   rt   c                 C   s   |  |}|j|_|jp|j|_|jp|j|_|j}|j}|j	|j
 }t||||j|j|j|j|j|j|j
\}}}	}
| |||j|j\}}||_||_|	|_|
|_| || ||fS r+   )ru   rv   r   rM   default_heightrN   default_widthr   rK   rL   rg   rR   rO   r   rP   r   r   r   r   rQ   rx   ry   r   r   r!   r{     s8   
z$FluxImg2ImgSetTimestepsStep.__call__N)r|   r}   r~   r   r   r   r
   r[   r   r_   r   rl   r   rq   staticmethodr   rG   r   r   r	   r{   r   r   r   r!   r      s    
r   c                   @   s   e Zd ZdZedee fddZedefddZ	edee
 fddZedee fd	d
Zedd Ze	dddZe dededefddZdS )FluxPrepareLatentsSteprT   rU   c                 C   s   g S r+   r   rY   r   r   r!   r[   C  r`   z*FluxPrepareLatentsStep.expected_componentsc                 C   r]   )NzWPrepare latents step that prepares the latents for the text-to-image generation processr   rY   r   r   r!   r_   G  r`   z"FluxPrepareLatentsStep.descriptionc                 C   sV   t dtdt dtdt dtjd B dt dtddt dt d	d
tddt dtjddgS )NrM   re   rN   r7   rg   r   )rf   rc   r3   rL   Trh   ri   r@   zThe dtype of the model inputsro   )r   rA   rG   rk   r@   rY   r   r   r!   rl   K  s   

zFluxPrepareLatentsStep.inputsc                 C      t dtjddgS )Nr7   z4The initial latents to use for the denoising processro   r   rG   rk   rY   r   r   r!   rq   \  s   z+FluxPrepareLatentsStep.intermediate_outputsc              	   C   sl   |j d ur|j | jd  dks|jd ur2|j| jd  dkr4td| j d|j  d|j d d S d S d S )Nr   r   z-`height` and `width` have to be divisible by z	 but are  and .)rM   rO   rN   loggerwarning)rs   rz   r   r   r!   check_inputsd  s   z#FluxPrepareLatentsStep.check_inputsNc	           
      C   s   dt || jd   }dt || jd   }||||f}	|d ur'|j||dS t|tr?t||kr?tdt| d| dt|	|||d}t	|||||}|S )Nr   r?   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r3   r   r@   )
rA   rO   to
isinstancer   r   r   r   r   _pack_latents)
comprL   num_channels_latentsrM   rN   r@   r   r3   r7   shaper   r   r!   prepare_latentsm  s   z&FluxPrepareLatentsStep.prepare_latentsrs   rt   c                 C   s   |  |}|jp
|j|_|jp|j|_|j|_|j|_| || |j	|j
 }| |||j|j|j|j|j|j|j	|_| || ||fS r+   )ru   rM   r   rN   r   rv   r   r   r   rL   rg   r   r@   r3   r7   rx   )rZ   rs   rt   rz   rL   r   r   r!   r{     s(   
zFluxPrepareLatentsStep.__call__r+   )r|   r}   r~   r   r   r   r
   r[   r   r_   r   rl   r   rq   r   r   r   rG   r   r   r	   r{   r   r   r   r!   r   @  s"    

r   c                   @   s   e Zd ZdZedefddZedee fddZ	edee
 fddZedee fd	d
Zedd Ze dededefddZdS )FluxImg2ImgPrepareLatentsSteprT   rU   c                 C   r]   )Nz^Step that adds noise to image latents for image-to-image. Should be run after `set_timesteps`,r   rY   r   r   r!   r_     r`   z)FluxImg2ImgPrepareLatentsStep.descriptionc                 C   rV   rW   rX   rY   r   r   r!   r[     r\   z1FluxImg2ImgPrepareLatentsStep.expected_componentsc                 C   s4   t ddtjddt ddtjddt ddtjddgS )	Nr7   TzCThe initial random noised, can be generated in prepare latent step.)namerj   rf   r_   image_latentszmThe image latents to use for the denoising process. Can be generated in vae encoder and packed in input step.r   zWThe timesteps to use for the denoising process. Can be generated in set_timesteps step.)r   rG   rk   rY   r   r   r!   rl     s&   z$FluxImg2ImgPrepareLatentsStep.inputsc                 C   r   )Ninitial_noisez8The initial random noised used for inpainting denoising.)r   rf   r_   r   rY   r   r   r!   rq     s   z2FluxImg2ImgPrepareLatentsStep.intermediate_outputsc                 C   sR   | j d |j d krtd| j d  d|j d  | jdkr'td| j d S )Nr   zE`image_latents` must have have same batch size as `latents`, but got r   r   z=`image_latents` must have 3 dimensions (patchified), but got )r   r   ndimr   r7   r   r   r!   r     s   
z*FluxImg2ImgPrepareLatentsStep.check_inputsrs   rt   c                 C   sj   |  |}| j|j|jd |jd d |jjd }|j|_|j	|j||j|_| 
|| ||fS )Nr   r   r   )ru   r   r   r7   r   repeatr   r   r   scale_noiserx   )rZ   rs   rt   rz   latent_timestepr   r   r!   r{     s   

z&FluxImg2ImgPrepareLatentsStep.__call__N)r|   r}   r~   r   r   r   r_   r   r
   r[   r   rl   r   rq   r   r   rG   r   r   r	   r{   r   r   r   r!   r     s    	
	r   c                   @   d   e Zd ZdZedefddZedee fddZ	edee
 fddZd	ed
edefddZdS )FluxRoPEInputsSteprT   rU   c                 C   r]   )NzStep that prepares the RoPE inputs for the denoising process. Should be placed after text encoder and latent preparation steps.r   rY   r   r   r!   r_     r`   zFluxRoPEInputsStep.descriptionc                 C   s    t dddt dddt ddgS )NrM   T)r   rj   rN   prompt_embedsr   r   rY   r   r   r!   rl     s   

zFluxRoPEInputsStep.inputsc                 C   (   t ddtt ddt ddtt ddgS Ntxt_idsdenoiser_input_fieldszEThe sequence lengths of the prompt embeds, used for RoPE calculation.)r   kwargs_typerf   r_   img_idszEThe sequence lengths of the image latents, used for RoPE calculation.r   r   rA   rY   r   r   r!   rq        z'FluxRoPEInputsStep.intermediate_outputsrs   rt   c           	      C   s   |  |}|j}|j|j}}t|jd dj|j|jd|_dt	|j
|jd   }dt	|j|jd   }td |d |d |||_| || ||fS )Nr   r   r?   r   )ru   r   r   r@   rG   zerosr   r   r   rA   rM   rO   rN   r   _prepare_latent_image_idsr   rx   )	rZ   rs   rt   rz   r   r   r@   rM   rN   r   r   r!   r{     s   
zFluxRoPEInputsStep.__call__Nr|   r}   r~   r   r   r   r_   r   r   rl   r   rq   r   r	   r{   r   r   r   r!   r     s    r   c                   @   r   )FluxKontextRoPEInputsStepzflux-kontextrU   c                 C   r]   )NzStep that prepares the RoPE inputs for the denoising process of Flux Kontext. Should be placed after text encoder and latent preparation steps.r   rY   r   r   r!   r_   *  r`   z%FluxKontextRoPEInputsStep.descriptionc                 C   s,   t ddt ddt ddt ddt ddgS )Nimage_heightr   image_widthrM   rN   r   r   rY   r   r   r!   rl   .  s   z FluxKontextRoPEInputsStep.inputsc                 C   r   r   r   rY   r   r   r!   rq   8  r   z.FluxKontextRoPEInputsStep.intermediate_outputsrs   rt   c                 C   s4  |  |}|j}|j|j}}t|jd dj|j|jd|_d }t	|dd d ur[t	|dd d ur[dt
|j|jd   }dt
|j|jd   }	td |d |	d ||}d|d< dt
|j|jd   }
dt
|j|jd   }td |
d |d ||}|d urtj||gdd	}||_| || ||fS )
Nr   r   r?   r   r   r   ).r   r   )dim)ru   r   r   r@   rG   r   r   r   r   getattrrA   r   rO   r   r   r   rM   rN   catr   rx   )rZ   rs   rt   rz   r   r   r@   r   image_latent_heightimage_latent_widthrM   rN   
latent_idsr   r   r!   r{   I  s.   
z"FluxKontextRoPEInputsStep.__call__Nr   r   r   r   r!   r   '  s    	r   )NNNN)r#   r$   r%   r&   )Nr1   )(r   numpyrB   rG   	pipelinesr   
schedulersr   utilsr   utils.torch_utilsr   modular_pipeliner   r	   modular_pipeline_utilsr
   r   r   r   
get_loggerr|   r   rA   r   r   r   floatr"   r0   rk   	Generatorr;   rR   rS   r   r   r   r   r   r   r   r   r!   <module>   sh   



>

"LZhM2