o
    ۷iA                     @   s   d dl mZ d dlZd dlmZ ddlmZmZ ddlmZm	Z	 ddl
mZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ e	eZeG dd deZG dd deeZG dd deZdS )    )	dataclassN)nn   )ConfigMixinregister_to_config)
BaseOutputlogging   )AttentionProcessor)-HunyuanCombinedTimestepTextSizeStyleEmbedding
PatchEmbedPixArtAlphaTextProjection)
ModelMixin)HunyuanDiTBlock   )zero_modulec                   @   s   e Zd ZU eej ed< dS )HunyuanControlNetOutputcontrolnet_block_samplesN)__name__
__module____qualname__tupletorchTensor__annotations__ r   r   e/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/models/controlnets/controlnet_hunyuan.pyr   #   s   
 r   c                       s   e Zd Ze												
		
			d,dededededB dedB dededededededededef fddZe	de
eef fd d!Zd"ee
eef B fd#d$Ze	d-d%d&Z	'								d.d(ejd)efd*d+Z  ZS )/HunyuanDiT2DControlNetModelr      X   Ngelu-approximate      (         @      M      Tconditioning_channelsnum_attention_headsattention_head_dimin_channels
patch_sizeactivation_fntransformer_num_layers	mlp_ratiocross_attention_dimcross_attention_dim_t5pooled_projection_dimtext_lentext_len_t5"use_style_cond_and_image_meta_sizec                    s   t    |_|| _t||d dd_ttj	|| tj
d_t|||||d d_t|||||d_tg _t fddt|	d	 d
 D _tt||_ttjD ]}t||}t|}j| qhd S )N   	silu_fp32)in_featureshidden_sizeout_featuresact_fn)dtype)heightwidthr,   	embed_dimr-   pos_embed_type)r3   seq_lenr1   r6   c                    s2   g | ]}t jjj tj d ddqS )TF)dimr*   r.   ff_inner_dimr1   qk_normskip)r   	inner_dimconfigr*   int).0layerr.   r1   r0   selfr   r   
<listcomp>b   s    
z8HunyuanDiT2DControlNetModel.__init__.<locals>.<listcomp>r	   r   )super__init__	num_headsrG   r   text_embedderr   	Parameterr   randnfloat32text_embedding_paddingr   	pos_embedr   time_extra_emb
ModuleListcontrolnet_blocksrangeblocksr   Linearinput_blocklenappend)rM   r)   r*   r+   r,   r-   r.   sample_sizer:   r/   r0   r1   r2   r3   r4   r5   r6   _controlnet_block	__class__rL   r   rP   )   sN   

		
z$HunyuanDiT2DControlNetModel.__init__returnc                    sL   i }dt dtjjdtt tf f fdd |  D ]
\}} ||| q|S )z
        Returns:
            `dict` of attention processors: A dictionary containing all attention processors used in the model with
            indexed by its weight name.
        namemodule
processorsc                    sL   t |dr|jdd||  d< | D ]\}} |  d| || q|S )Nget_processorT)return_deprecated_lora
.processor.)hasattrrj   named_children)rg   rh   ri   sub_namechildfn_recursive_add_processorsr   r   rs      s
   
zPHunyuanDiT2DControlNetModel.attn_processors.<locals>.fn_recursive_add_processors)strr   r   Moduledictr
   ro   )rM   ri   rg   rh   r   rr   r   attn_processorsu   s
   &	z+HunyuanDiT2DControlNetModel.attn_processors	processorc                    s   t | j }t|tr"t ||kr"tdt | d| d| ddtdtjj	f fdd | 
 D ]
\}} ||| q3d	S )
a2  
        Sets the attention processor to use to compute attention.

        Parameters:
            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
                The instantiated processor class or a dictionary of processor classes that will be set as the processor
                for **all** `Attention` layers. If `processor` is a dict, the key needs to define the path to the
                corresponding cross attention processor. This is strongly recommended when setting trainable attention
                processors.
        z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.rg   rh   c                    sb   t |drt|ts|| n|||  d | D ]\}} |  d| || qd S )Nset_processorrl   rm   )rn   
isinstancerv   ry   popro   )rg   rh   rx   rp   rq   fn_recursive_attn_processorr   r   r}      s   

zSHunyuanDiT2DControlNetModel.set_attn_processor.<locals>.fn_recursive_attn_processorN)r_   rw   keysrz   rv   
ValueErrorrt   r   r   ru   ro   )rM   rx   countrg   rh   r   r|   r   set_attn_processor   s   
z.HunyuanDiT2DControlNetModel.set_attn_processorc                 C   s   |j }|j}|j}|j}|j}	|j}
|j}|j}|j}|j	}|j
}|j}|j}|}|p-|j}| ||||||	|
|||||||d}|rU|j| dd}td|d   |S )N)r)   r/   r.   r+   r1   r2   r:   r,   r0   r*   r-   ra   r4   r5   F)strictz0controlnet load from Hunyuan-DiT. missing_keys: r   )rH   r.   r+   r1   r2   r:   r,   r0   r*   r-   ra   r4   r5   r/   load_state_dict
state_dictloggerwarning)clstransformerr)   r/   load_weights_from_transformerrH   r.   r+   r1   r2   r:   r,   r0   r*   r-   ra   r4   r5   
controlnetkeyr   r   r   from_transformer   sF   
z,HunyuanDiT2DControlNetModel.from_transformer      ?controlnet_condconditioning_scalec                    s>  |j dd \}}| |}|| | | }| j|||	|
|jd}|j \}}}| |d|j d }|||d}tj||gdd}tj||gdd}|	d
 }t||| j}d}t| jD ]\}}|||||d	}||f }qdd}t|| jD ]\}}||}||f }q~ fd
d|D }|s|fS t|dS )  
        The [`HunyuanDiT2DControlNetModel`] forward method.

        Args:
        hidden_states (`torch.Tensor` of shape `(batch size, dim, height, width)`):
            The input tensor.
        timestep ( `torch.LongTensor`, *optional*):
            Used to indicate denoising step.
        controlnet_cond ( `torch.Tensor` ):
            The conditioning input to ControlNet.
        conditioning_scale ( `float` ):
            Indicate the conditioning scale.
        encoder_hidden_states ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*):
            Conditional embeddings for cross attention layer. This is the output of `BertModel`.
        text_embedding_mask: torch.Tensor
            An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output
            of `BertModel`.
        encoder_hidden_states_t5 ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*):
            Conditional embeddings for cross attention layer. This is the output of T5 Text Encoder.
        text_embedding_mask_t5: torch.Tensor
            An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output
            of T5 Text Encoder.
        image_meta_size (torch.Tensor):
            Conditional embedding indicate the image sizes
        style: torch.Tensor:
            Conditional embedding indicate the style
        image_rotary_emb (`torch.Tensor`):
            The image rotary embeddings to apply on query and key tensors during attention calculation.
        return_dict: bool
            Whether to return a dictionary.
        N)hidden_dtyper   )rC   r	   r   )tembencoder_hidden_statesimage_rotary_embc                    s   g | ]}|  qS r   r   )rJ   sampler   r   r   rN   /  s    z7HunyuanDiT2DControlNetModel.forward.<locals>.<listcomp>)r   )shaperW   r^   rX   r=   rR   viewr   cat	unsqueezeboolwhererV   	enumerater\   ziprZ   r   )rM   hidden_statestimestepr   r   r   text_embedding_maskencoder_hidden_states_t5text_embedding_mask_t5image_meta_sizestyler   return_dictr>   r?   r   
batch_sizesequence_lengthrb   block_res_samplesrK   blockcontrolnet_block_res_samplesblock_res_samplerc   r   r   r   forward   s@   /

z#HunyuanDiT2DControlNetModel.forward)r   r   r   NNr    r!   r"   r#   r$   r%   r&   r%   r'   r(   T)r   NT	r   NNNNNNNT)r   r   r   r   rI   rt   floatr   rP   propertyrv   r
   rw   r   classmethodr   r   r   r   __classcell__r   r   rd   r   r   (   s    
K .r   c                       sH   e Zd ZdZ fddZ									ddejdefd	d
Z  Z	S ) HunyuanDiT2DMultiControlNetModela  
    `HunyuanDiT2DMultiControlNetModel` wrapper class for Multi-HunyuanDiT2DControlNetModel

    This module is a wrapper for multiple instances of the `HunyuanDiT2DControlNetModel`. The `forward()` API is
    designed to be compatible with `HunyuanDiT2DControlNetModel`.

    Args:
        controlnets (`list[HunyuanDiT2DControlNetModel]`):
            Provides additional conditioning to the unet during the denoising process. You must set multiple
            `HunyuanDiT2DControlNetModel` as a list.
    c                    s   t    t|| _d S )N)rO   rP   r   rY   nets)rM   controlnetsrd   r   r   rP   D  s   
z)HunyuanDiT2DMultiControlNetModel.__init__r   NTr   r   c                 C   sv   t t||| jD ]/\}\}}}||||||||||	|
||d}|dkr'|}q	dd t|d |d D }|f}q	|S )r   )r   r   r   r   r   r   r   r   r   r   r   r   r   c                 S   s   g | ]\}}|| qS r   r   )rJ   control_block_sampleblock_sampler   r   r   rN     s    z<HunyuanDiT2DMultiControlNetModel.forward.<locals>.<listcomp>)r   r   r   )rM   r   r   r   r   r   r   r   r   r   r   r   r   iimagescaler   block_samplescontrol_block_samplesr   r   r   r   H  s,    .z(HunyuanDiT2DMultiControlNetModel.forwardr   )
r   r   r   __doc__rP   r   r   r   r   r   r   r   rd   r   r   7  s"    	r   )dataclassesr   r   r   configuration_utilsr   r   utilsr   r   attention_processorr
   
embeddingsr   r   r   modeling_utilsr   #transformers.hunyuan_transformer_2dr   r   r   
get_loggerr   r   r   r   r   r   r   r   r   <module>   s"   
  