o
    ۷i"                     @   s   d dl mZ d dlmZ d dlZd dlmZ ddlmZmZ ddl	m
Z
 ddlmZmZmZ d	d
lmZ d	dlmZmZ d	dlmZ d	dlmZ d	dlmZmZ d	dlmZ ddlmZ ee Z!eG dd deZ"G dd deeee
Z#dS )    )	dataclass)AnyN)nn   )ConfigMixinregister_to_config)PeftAdapterMixin)
BaseOutputapply_lora_scalelogging   )AttentionMixin)
PatchEmbedPixArtAlphaTextProjection)Transformer2DModelOutput)
ModelMixin)AdaLayerNormSingleRMSNormSanaTransformerBlock   )zero_modulec                   @   s   e Zd ZU eej ed< dS )SanaControlNetOutputcontrolnet_block_samplesN)__name__
__module____qualname__tupletorchTensor__annotations__ r!   r!   b/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/models/controlnets/controlnet_sana.pyr   $   s   
 r   c                &       s*  e Zd ZdZddgZddgZe								
										d4dededB dededededB dedB dedB dededede	ded ed!e	d"ed#edB d$df$ fd%d&Z
ed'	(				d5d)ejd*ejd+ejd,ejd-ed.ejdB d/ejdB d'eeef dB d0e	d$eejd1f eB fd2d3Z  ZS )6SanaControlNetModelTr   r   patch_embednorm    F         p      	        @        Fr   ư>Nin_channelsout_channelsnum_attention_headsattention_head_dim
num_layersnum_cross_attention_headscross_attention_head_dimcross_attention_dimcaption_channels	mlp_ratiodropoutattention_biassample_size
patch_sizenorm_elementwise_affinenorm_epsinterpolation_scalereturnc                    s   t    |p|}	 t||||||d urdnd d| _t| _t|	d| _tddd| _	t
 	
fddt|D | _t
g | _tt
| _tt| jD ]}t
}t|}| j| qbd	| _d S )
Nsincos)heightwidthr=   r0   	embed_dimr@   pos_embed_type)in_featureshidden_sizegh㈵>T)epselementwise_affinec                    s*   g | ]}t 	
 d qS ))r:   r5   r6   r7   r;   r>   r?   r9   r   ).0_r;   r3   r7   r6   r:   	inner_dimr9   r>   r?   r2   r5   r!   r"   
<listcomp>[   s     z0SanaControlNetModel.__init__.<locals>.<listcomp>F)super__init__r   r$   r   
time_embedr   caption_projectionr   caption_normr   
ModuleListrangetransformer_blockscontrolnet_blocksr   Linearinput_blocklenappendgradient_checkpointing)selfr0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rL   controlnet_block	__class__rM   r"   rQ   .   s6   


zSanaControlNetModel.__init__attention_kwargs      ?hidden_statesencoder_hidden_statestimestepcontrolnet_condconditioning_scaleencoder_attention_maskattention_maskreturn_dict.c
                    s  |d ur|j dkrd||j d }|d}|d ur0|j dkr0d||j d }|d}|j\}
}}}| jj}|| || }}| |}|| | ||j }| j	||
|jd\}}| 
|}||
d|jd }| |}d}t r| jr| jD ]}| ||||||||}||f }qn| jD ]}||||||||}||f }qd}t|| jD ]\}}||}||f }q fdd|D }|	s|fS t|d	S )
Nr   r   g     )
batch_sizehidden_dtyper!   c                    s   g | ]}|  qS r!   r!   )rK   samplerh   r!   r"   rO      s    z/SanaControlNetModel.forward.<locals>.<listcomp>)r   )ndimtodtype	unsqueezeshapeconfigr=   r$   rZ   rR   rS   viewrT   r   is_grad_enabledr]   rW   _gradient_checkpointing_funcziprX   r   )r^   rd   re   rf   rg   rh   ri   rj   rb   rk   rl   num_channelsrC   rD   ppost_patch_heightpost_patch_widthembedded_timestepblock_res_samplesblockcontrolnet_block_res_samplesblock_res_sampler_   r!   rp   r"   forwardx   sf   








	
zSanaControlNetModel.forward)r&   r&   r'   r&   r(   r)   r*   r+   r,   r-   r.   Fr&   r   Fr/   N)rc   NNNT)r   r   r    _supports_gradient_checkpointing_no_split_modules _skip_layerwise_casting_patternsr   intfloatboolrQ   r
   r   r   
LongTensordictstrr   r   r   r   __classcell__r!   r!   r`   r"   r#   )   s    	
I	
r#   )$dataclassesr   typingr   r   r   configuration_utilsr   r   loadersr   utilsr	   r
   r   	attentionr   
embeddingsr   r   modeling_outputsr   modeling_utilsr   normalizationr   r   transformers.sana_transformerr   
controlnetr   
get_loggerr   loggerr   r#   r!   r!   r!   r"   <module>   s$   
