o
    ٷi|                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	l
mZ d d
lmZmZmZmZ d dlmZ d dlmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- e.e/Z0defddZ1				d*de2de2de3de3fdd Z4				d+d!e2dB d"e5e
j6B dB d#e7e2 dB d$e7e3 dB d%e8e
j9e2f f
d&d'Z:G d(d) d)ej;e!Z<dS ),    N)Iterable)Any)VaeImageProcessor)TextualInversionLoaderMixin)AutoencoderKL)FlowMatchEulerDiscreteScheduler)randn_tensor)nn)CLIPTextModelCLIPTokenizerT5EncoderModelT5TokenizerFast)AutoWeightsLoader)DiffusionOutputOmniDiffusionConfig)CFGParallelMixin)'get_classifier_free_guidance_world_size)get_local_device)DiffusersPipelineLoader)FluxTransformer2DModel)OmniDiffusionRequest)!download_weights_from_hf_specific	od_configc                    s   | j dkr	dd S | j}tj|r|}nt|d dg}tj|d}t|}t	|}d|v r;dt
|d d  nd	}W d    n1 sGw   Y  t|d d
 dtjf fdd}|S )Nlatentc                 S   s   | S N )xr   r   a/home/ubuntu/.local/lib/python3.10/site-packages/vllm_omni/diffusion/models/flux/pipeline_flux.py<lambda>(   s    z,get_flux_post_process_func.<locals>.<lambda>*zvae/config.jsonblock_out_channels         )vae_scale_factorimagesc                    s
     | S r   )postprocess)r%   image_processorr   r   post_process_func6   s   
z5get_flux_post_process_func.<locals>.post_process_func)output_typemodelospathexistsr   joinopenjsonloadlenr   torchTensor)r   
model_name
model_pathvae_config_pathf
vae_configr$   r)   r   r'   r   get_flux_post_process_func$   s   


"r;               ?ffffff?base_seq_lenmax_seq_len
base_shift	max_shiftc                 C   s,   || ||  }|||  }| | | }|S r   r   )image_seq_lenr@   rA   rB   rC   mbmur   r   r   calculate_shift<   s   rH   num_inference_stepsdevice	timestepssigmasreturnc                 K   s  |dur|durt d|dur>dtt| jj v }|s(t d| j d| jd||d| | j}t	|}||fS |durpdtt| jj v }|sZt d| j d| jd||d	| | j}t	|}||fS | j|fd
|i| | j}||fS )a  
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
            must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`List[int]`, *optional*):
            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
            `num_inference_steps` and `sigmas` must be `None`.
        sigmas (`List[float]`, *optional*):
            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
            `num_inference_steps` and `timesteps` must be `None`.

    Returns:
        `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    NzYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesrK   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)rK   rJ   rL   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)rL   rJ   rJ   r   )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__rK   r3   )	schedulerrI   rJ   rK   rL   kwargsaccepts_timestepsaccept_sigmasr   r   r   retrieve_timestepsJ   s2   rZ   c                /       s  e Zd Zdddedef fddZ								dVdd	Z		
		dWdeee B dedede	j
dB fddZ	
dXdeee B defddZ	
			dYdeee B deee B dede	jdB de	jdB defddZedd Zedd Zedd Z	dZdd Zd!d" Zed#d$ Zed%d& Zed'd( Zed)d* Zed+d, Z	-d[de	jde	jd.e	jd/e	jd0e	jd1e	jd2e	jd3e	jd4e	jd5ed6e	jd7ed8ed9e	jfd:d;Zd7ed<efd=d>Zddddd?ddd@ddAd
dddddddBdCdd0gdfdDe deee B dB deee B dB dEeee B dB dFeee B dB d7edGedB dHedB dIedJee dB dKededLe	j!ee	j! B dB d0e	jdB de	jdB de	jdB d.e	jdB d/e	jdB dMedB dNedOe"ee#f dB dPee def.dQdRZ$dSe%e&ee	jf  d9e'e fdTdUZ(  Z)S )\FluxPipeline )prefixr   r]   c                   s  t    || _tj|jdd dddg| _t | _|j}t	j
|}tj|d|d| _tj|d|d| _tj|d|d| _tj|d	|d| j| _t|d
| _tj|d|d| _tj|d|d| _d | _t| d	d rvdt| jjj d  nd| _!t"| dr| jd ur| jj#nd| _$d| _%d S )Ntransformerztransformer.T)model_or_path	subfolderrevisionr]   fall_back_to_ptrV   )r`   local_files_onlytext_encodertext_encoder_2vae)r   	tokenizertokenizer_2r!   r"   r#   M      )&super__init__r   r   ComponentSourcer+   weights_sourcesr   rJ   r,   r-   r.   r   from_pretrainedrV   r
   rd   r   re   r   torf   r   r^   r   rg   r   rh   stagegetattrr3   configr    r$   hasattrmodel_max_lengthtokenizer_max_lengthdefault_sample_size)selfr   r]   r+   rc   rU   r   r   rl      sF   

(
zFluxPipeline.__init__Nc              	   C   s  || j d  dks|| j d  dkr$td| j d  d| d| d |d ur7|d ur7td| d| d	|d urJ|d urJtd
| d| d	|d u rV|d u rVtd|d urmt|tsmt|tsmtdt| |d urt|tst|tstdt| |d ur|d urtd| d| d	|d ur|d urtd| d| d	|d ur|	d u rtd|d ur|
d u rtd|d ur|dkrtd| d S d S )Nr!   r   z-`height` and `width` have to be divisible by z	 but are z and z(. Dimensions will be resized accordinglyzCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.z Cannot forward both `prompt_2`: zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z4`prompt_2` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: z)Cannot forward both `negative_prompt_2`: zIf `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`.zIf `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`.   z8`max_sequence_length` cannot be greater than 512 but is )r$   loggerwarningrN   
isinstancestrlisttype)rx   promptprompt_2heightwidthnegative_promptnegative_prompt_2prompt_embedsnegative_prompt_embedspooled_prompt_embedsnegative_pooled_prompt_embeds"callback_on_step_end_tensor_inputsmax_sequence_lengthr   r   r   check_inputs   s^   $zFluxPipeline.check_inputsr"   rz   r   num_images_per_promptr   dtypec              	   C   s  t |tr|gn|}t|}t | tr| || j}| j|d|ddddd}|j}| j|dddj}|jd |jd kr]t	||s]| j
|d d |d	 df }	td
| d|	  | j|| jddd }
| jj}|
j|| jd}
|
j\}}}|
d	|d	}
|
|| |d}
|
S )N
max_lengthTFpt)paddingr   
truncationreturn_lengthreturn_overflowing_tokensreturn_tensorslongestr   r   r"   zXThe following part of your input was truncated because `max_sequence_length` is set to  	 tokens: output_hidden_statesr   r   rJ   )r}   r~   r3   r   maybe_convert_promptrh   	input_idsshaper4   equalbatch_decoder{   r|   re   rp   rJ   r   repeatview)rx   r   r   r   r   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textr   _seq_lenr   r   r   _get_t5_prompt_embeds   s>   
	  z"FluxPipeline._get_t5_prompt_embedsc           	   	   C   s  t |tr|gn|}t|}t | tr| || j}| j|d| jddddd}|j}| j|dddj}|jd |jd kr`t	
||s`| j|d d | jd	 df }td
| j d|  | j|| jdd}|j}|j| jj| jd}|d	|}||| d}|S )Nr   TFr   )r   r   r   r   r   r   r   r   r   r"   z\The following part of your input was truncated because CLIP can only handle sequences up to r   r   r   )r}   r~   r3   r   r   rg   rv   r   r   r4   r   r   r{   r|   rd   rp   rJ   pooler_outputr   r   r   )	rx   r   r   r   r   r   r   r   r   r   r   r   _get_clip_prompt_embeds,  s<   

 "z$FluxPipeline._get_clip_prompt_embedsr   r   r   c           	      C   s   t |tr|gn|}|du r+|p|}t |tr|gn|}| j||d}| j|||d}| jdur4| jjn| jj}t|j	d dj
| j|d}|||fS )a  

        Args:
            prompt (`str` or `List[str]`, *optional*):
                prompt to be encoded
            prompt_2 (`str` or `List[str]`, *optional*):
                The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
                used in all text-encoders
            num_images_per_prompt (`int`):
                number of images that should be generated per prompt
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
                If not provided, pooled text embeddings will be generated from `prompt` input argument.
        N)r   r   )r   r   r   r"      rJ   r   )r}   r~   r   r   rd   r   r^   r4   zerosr   rp   rJ   )	rx   r   r   r   r   r   r   r   text_idsr   r   r   encode_promptU  s    
zFluxPipeline.encode_promptc           	      C   s|   t ||d}|d t |d d d f  |d< |d t |d d d f  |d< |j\}}}||| |}|j||dS )Nr   ).r"   ).r!   r   )r4   r   aranger   reshaperp   )	r   r   r   rJ   r   latent_image_idslatent_image_id_heightlatent_image_id_widthlatent_image_id_channelsr   r   r   _prepare_latent_image_ids  s   ""z&FluxPipeline._prepare_latent_image_idsc                 C   sR   |  |||d d|d d} | dddddd} | ||d |d  |d } | S )Nr!   r      r"   r      )r   permuter   )latentsr   num_channels_latentsr   r   r   r   r   _pack_latents  s   zFluxPipeline._pack_latentsc                 C   s   | j \}}}dt||d   }dt||d   }| ||d |d |d dd} | dddddd} | ||d ||} | S )Nr!   r   r   r   r"   r   )r   intr   r   r   )r   r   r   r$   r   num_patcheschannelsr   r   r   _unpack_latents  s    zFluxPipeline._unpack_latentsc	                 C   s   dt || jd   }dt || jd   }||||f}	|d ur6| ||d |d ||}
|j||d|
fS t|trNt||krNtdt| d| dt|	|||d}| 	|||||}| ||d |d ||}
||
fS )Nr!   r   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)	generatorrJ   r   )
r   r$   r   rp   r}   r   r3   rN   r   r   )rx   r   r   r   r   r   rJ   r   r   r   r   r   r   r   prepare_latents  s    zFluxPipeline.prepare_latentsc              	   C   s|   |d u rt dd| |n|}t|| jjdd| jjdd| jjdd| jjd	d
}t| j|||d\}}||fS )N      ?r"   base_image_seq_lenr<   max_image_seq_lenr=   rB   r>   rC   r?   )rL   rG   )nplinspacerH   rV   rs   getrZ   )rx   rI   rL   rD   rG   rK   r   r   r   prepare_timesteps  s   
zFluxPipeline.prepare_timestepsc                 C      | j S r   )_guidance_scalerx   r   r   r   guidance_scale     zFluxPipeline.guidance_scalec                 C   r   r   )_joint_attention_kwargsr   r   r   r   joint_attention_kwargs  r   z#FluxPipeline.joint_attention_kwargsc                 C   r   r   )_num_timestepsr   r   r   r   num_timesteps  r   zFluxPipeline.num_timestepsc                 C   r   r   )_current_timestepr   r   r   r   current_timestep  r   zFluxPipeline.current_timestepc                 C   r   r   )
_interruptr   r   r   r   	interrupt  r   zFluxPipeline.interruptFr   r   r   r   r   negative_text_idsrK   do_true_cfgguidancetrue_cfg_scalecfg_normalizerM   c                 C   s   | j d |
| j_t|	D ]N\}}| jrq|| _||jd j	|j
|jd}||d |||||| jdd	}|
rI||d |||||| jdd	}nd}| |
||||}| ||||
}q|S )z0Diffusion loop with optional image conditioning.r   r   i  F)	hidden_statestimestepr   pooled_projectionsencoder_hidden_statestxt_idsimg_idsr   return_dictN)rV   set_begin_indexr^   r   	enumerater   r   expandr   rp   rJ   r   r   predict_noise_maybe_with_cfgscheduler_step_maybe_with_cfg)rx   r   r   r   r   r   r   r   r   rK   r   r   r   r   itr   positive_kwargsnegative_kwargs
noise_predr   r   r   diffuse  sL   	zFluxPipeline.diffusehas_neg_promptc                 C   s:   t  dkrdS |dkrtd dS |std dS dS )Nr"   Tz?CFG parallel is NOT working correctly when true_cfg_scale <= 1.FzeCFG parallel is NOT working correctly when there is no negative prompt or negative prompt embeddings.)r   r{   r|   )rx   r   r   r   r   r   check_cfg_parallel_validity9  s   

z(FluxPipeline.check_cfg_parallel_validityr      g      @pilTreqr   r   r   r   rI   rL   r   r   r*   r   r   r   c           "      C   s  dd |j D p	|}tdd |j D rd}n|j r"dd |j D }|jjp+| j| j }|jjp5| j| j }|jjp;|	}	|jjpA|
}
|jj	durL|jj	n|}|jj
pS|}|jjpY|}|jjdkrd|jjn|}| j||||||||||||d || _|| _d| _d	| _|durt|trd
}n|durt|trt|}n|jd }|dup|duo|du}|d
ko|}| || | j||||||d\}}}d}|r| j||||||d\}}}| jjd }| || ||||j| j||\}}| |	|
|jd
 \}}	t|| _| jj rt!j"d
g|t!j#d} | $|jd } nd} | j%du r&i | _| j&||||||||||| |d	d}d| _|dkrC|}!n| '|||| j}|| j(j)j* | j(j)j+ }| j(j,|d	dd }!t-|!dS )zForward pass for flux.c                 S   s(   g | ]}t |tr|n|d pdqS )r   r\   r}   r~   r   .0pr   r   r   
<listcomp>e     ( z(FluxPipeline.forward.<locals>.<listcomp>c                 s   s(    | ]}t |tp|d du V  qdS )r   Nr   r   r   r   r   	<genexpr>f  s   & z'FluxPipeline.forward.<locals>.<genexpr>Nc                 S   s(   g | ]}t |trd n|dpd qS )r\   r   r   r   r   r   r   r   i  r   r   )r   r   r   r   r   r   r   r   Fr"   )r   r   r   r   r   r   r   )r   )r   r   )r   )output).promptsallsampling_paramsr   rw   r$   r   rI   rL   r   r   r   num_outputs_per_promptr   r   r   r   r   r}   r~   r   r3   r   r   r   r^   in_channelsr   r   rJ   r   r   guidance_embedsr4   fullfloat32r   r   r   r   rf   rs   scaling_factorshift_factordecoder   )"rx   r   r   r   r   r   r   r   r   rI   rL   r   r   r   r   r   r   r   r   r*   r   r   r   r   r   r   r   r   r   r   r   rK   r   imager   r   r   forwardH  s   






zFluxPipeline.forwardweightsc                 C   s   t | }||S r   )r   load_weights)rx   r  loaderr   r   r   r    s   
zFluxPipeline.load_weights)NNNNNNNN)Nr"   rz   N)r"   )r"   NNrz   r   )F)*__name__
__module____qualname__r   r~   rl   r   r   r   r4   r   r   r   FloatTensorr   staticmethodr   r   r   r   r   propertyr   r   r   r   r   r5   boolfloatr   r   r   	Generatordictr   r  r   tuplerO   r  __classcell__r   r   ry   r   r[      sZ   <
E

0

-


1



#




	

E	


 ,)r[   )r<   r=   r>   r?   )NNNN)=rP   r1   loggingr,   collections.abcr   typingr   numpyr   r4   diffusers.image_processorr   diffusers.loadersr   ,diffusers.models.autoencoders.autoencoder_klr   9diffusers.schedulers.scheduling_flow_match_euler_discreter   diffusers.utils.torch_utilsr   r	   transformersr
   r   r   r    vllm.model_executor.models.utilsr   vllm_omni.diffusion.datar   r   ,vllm_omni.diffusion.distributed.cfg_parallelr   .vllm_omni.diffusion.distributed.parallel_stater   %vllm_omni.diffusion.distributed.utilsr   1vllm_omni.diffusion.model_loader.diffusers_loaderr   vllm_omni.diffusion.models.fluxr   vllm_omni.diffusion.requestr   2vllm_omni.model_executor.model_loader.weight_utilsr   	getLoggerr  r{   r;   r   r  rH   r~   rJ   r   r  r5   rZ   Moduler[   r   r   r   r   <module>   sr   





;