o
    ۷i                     @  s   d dl mZ d dlmZmZ d dlZd dlZd dlZddl	m
Z
 ddlmZ ddlmZ ddlmZ d	d
lmZmZ d	dlmZmZmZ eeZG dd deZG dd deZdS )    )annotations)AnyUnionN   )
FrozenDict)AutoencoderKLFlux2)Flux2ImageProcessor)logging   )ModularPipelineBlocksPipelineState)ComponentSpec
InputParamOutputParamc                   @  sZ   e Zd ZdZedddZedddZedd
dZedddZ	e
 dddZdS )Flux2UnpackLatentsStepflux2returnstrc                 C     dS )Nz5Step that unpacks the latents from the denoising step selfr   r   `/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/modular_pipelines/flux2/decoders.pydescription%      z"Flux2UnpackLatentsStep.descriptionlist[tuple[str, Any]]c                 C  s$   t ddtjddt ddtjddgS )NlatentsT,The denoised latents from the denoising steprequired	type_hintr   
latent_idsz0Position IDs for the latents, used for unpackingr   torchTensorr   r   r   r   inputs)   s   zFlux2UnpackLatentsStep.inputs	list[str]c                 C  s   t dtjddgS )Nr   zDThe denoise latents from denoising step, unpacked with position IDs.r    r   )r   r#   r$   r   r   r   r   intermediate_outputs:   s   z+Flux2UnpackLatentsStep.intermediate_outputsxtorch.Tensorx_idsc                 C  s   g }t | |D ]b\}}|j\}}|dddf tj}|dddf tj}t|d }	t|d }
||
 | }tj|	|
 |f|j|jd}|	d|
dd|| ||	|
|ddd}|| qtj|ddS )aH  
        Unpack latents using position IDs to scatter tokens into place.

        Args:
            x: Packed latents tensor of shape (B, seq_len, C)
            x_ids: Position IDs tensor of shape (B, seq_len, 4) with (T, H, W, L) coordinates

        Returns:
            Unpacked latents tensor of shape (B, C, H, W)
        N   r
   )devicedtyper   )dim)zipshapetor#   int64maxzerosr-   r.   scatter_	unsqueezeexpandviewpermuteappendstack)r)   r+   x_listdatapos_chh_idsw_idshwflat_idsoutr   r   r   _unpack_latents_with_idsD   s   
z/Flux2UnpackLatentsStep._unpack_latents_with_idsstater   c                 C  s<   |  |}|j}|j}| ||}||_| || ||fS )N)get_block_stater   r!   rI   set_block_state)r   
componentsrJ   block_stater   r!   r   r   r   __call__c   s   
zFlux2UnpackLatentsStep.__call__Nr   r   r   r   r   r&   )r)   r*   r+   r*   r   r*   rJ   r   r   r   )__name__
__module____qualname__
model_namepropertyr   r%   r(   staticmethodrI   r#   no_gradrO   r   r   r   r   r   "   s    	r   c                   @  sf   e Zd ZdZedddZedddZedd
dZedddZe	dd Z
e dddZdS )Flux2DecodeStepr   r   list[ComponentSpec]c                 C  s$   t dtt dttdddddgS )Nvaeimage_processor       )vae_scale_factorvae_latent_channelsfrom_config)configdefault_creation_method)r   r   r   r   r   r   r   r   expected_componentsu   s   z#Flux2DecodeStep.expected_componentsr   c                 C  r   )NzbStep that decodes the denoised latents into images using Flux2 VAE with batch norm denormalizationr   r   r   r   r   r      r   zFlux2DecodeStep.descriptionr   c                 C  s   t dddt ddtjddgS )Noutput_typepil)defaultr   Tr   r   r"   r   r   r   r   r%      s   
zFlux2DecodeStep.inputsr&   c                 C  s&   t dtttjj tjtjf ddgS )NimageszUThe generated images, can be a list of PIL.Image.Image, torch.Tensor or a numpy arrayr'   )	r   r   listPILImager#   r$   npndarrayr   r   r   r   r(      s   z$Flux2DecodeStep.intermediate_outputsc                 C  sZ   | j \}}}}| ||d dd||} | dddddd} | ||d |d |d } | S )z2Convert patchified latents back to regular format.   r
   r   r,      r   )r2   reshaper;   )r   
batch_sizenum_channels_latentsheightwidthr   r   r   _unpatchify_latents   s
   z#Flux2DecodeStep._unpatchify_latentsrJ   r   c                 C  s   |  |}|j}|j}|jjdddd|j|j}t	
|jjdddd|jj |j|j}|| | }| |}|j|ddd |_|jj|j|jd|_| || ||fS )Nr,   r/   F)return_dictr   )rg   )rK   r]   r   bnrunning_meanr:   r3   r-   r.   r#   sqrtrunning_varrd   batch_norm_epsrw   decoderj   r^   postprocessrg   rL   )r   rM   rJ   rN   r]   r   latents_bn_meanlatents_bn_stdr   r   r   rO      s   
 "
zFlux2DecodeStep.__call__N)r   r\   rP   rQ   rR   rS   )rT   rU   rV   rW   rX   rf   r   r%   r(   rY   rw   r#   rZ   rO   r   r   r   r   r[   r   s    	
r[   )
__future__r   typingr   r   numpyrn   rl   r#   configuration_utilsr   modelsr   pipelines.flux2.image_processorr   utilsr	   modular_pipeliner   r   modular_pipeline_utilsr   r   r   
get_loggerrT   loggerr   r[   r   r   r   r   <module>   s   
P