o
    ۷iB                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlm	Z	 ddl
mZmZ ddlmZmZ ddlmZmZ ddlmZ dd	lmZmZmZ eeZe jjG d
d deZG dd dejZ eG dd dejeeZ!dS )    N)
FrozenDict   )ConfigMixinflax_register_to_config)
BaseOutputlogging   )FlaxTimestepEmbeddingFlaxTimesteps)FlaxModelMixin)FlaxCrossAttnDownBlock2DFlaxDownBlock2DFlaxUNetMidBlock2DCrossAttnc                   @   s&   e Zd ZU dZejed< ejed< dS )FlaxControlNetOutputz
    The output of [`FlaxControlNetModel`].

    Args:
        down_block_res_samples (`jnp.ndarray`):
        mid_block_res_sample (`jnp.ndarray`):
    down_block_res_samplesmid_block_res_sampleN)__name__
__module____qualname____doc__jnpndarray__annotations__ r   r   b/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/models/controlnets/controlnet_flax.pyr   "   s   
 
r   c                   @   sZ   e Zd ZU eed< dZeedf ed< ejZ	ej	ed< ddd	Z
d
ejdejfddZdS )#FlaxControlNetConditioningEmbeddingconditioning_embedding_channels       `      .block_out_channelsdtypereturnNc                 C   s   t d tj| jd dd| jd| _g }tt| jd D ]-}| j| }| j|d  }tj|dd| jd}|	| tj|ddd| jd}|	| q|| _
tj| jddtj tj | jd	| _d S )
NFlax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.r   r   r      r)   r(   )kernel_sizepaddingr#   r)   )r   r   r*   stridesr+   r#   r*   r+   kernel_init	bias_initr#   )loggerwarningnnConvr"   r#   conv_inrangelenappendblocksr   initializers
zeros_initconv_out)selfr9   i
channel_inchannel_outconv1conv2r   r   r   setup5   sJ   

z)FlaxControlNetConditioningEmbedding.setupconditioningc                 C   s@   |  |}t|}| jD ]}||}t|}q| |}|S )N)r5   r3   silur9   r<   )r=   rD   	embeddingblockr   r   r   __call__`   s   



z,FlaxControlNetConditioningEmbedding.__call__r$   N)r   r   r   intr   r"   tupler   float32r#   rC   r   rH   r   r   r   r   r   0   s   
 
+r   c                   @   s  e Zd ZU dZdZeed< dZeed< dZe	e
df ed< d	Zee	edf B ed
< dZe	edf ed< dZeed< dZee	edf B ed< dZee	edf B dB ed< dZeed< dZeed< d	Zeed< ejZejed< dZeed< dZeed< dZe
ed< dZe	edf ed < d!ejd"efd#d$Zd1d%d&Z 	'			d2d(ej!d)ej!eB eB d*ej!d+ej!d,ed-ed.ed"e"e	e	ej!df ej!f B fd/d0Z#dS )3FlaxControlNetModelu
  
    A ControlNet model.

    This model inherits from [`FlaxModelMixin`]. Check the superclass documentation for it’s generic methods
    implemented for all models (such as downloading or saving).

    This model is also a Flax Linen [`flax.linen.Module`](https://flax.readthedocs.io/en/latest/flax.linen.html#module)
    subclass. Use it as a regular Flax Linen module and refer to the Flax documentation for all matters related to its
    general usage and behavior.

    Inherent JAX features such as the following are supported:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        sample_size (`int`, *optional*):
            The size of the input sample.
        in_channels (`int`, *optional*, defaults to 4):
            The number of channels in the input sample.
        down_block_types (`tuple[str]`, *optional*, defaults to `("FlaxCrossAttnDownBlock2D", "FlaxCrossAttnDownBlock2D", "FlaxCrossAttnDownBlock2D", "FlaxDownBlock2D")`):
            The tuple of downsample blocks to use.
        block_out_channels (`tuple[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`):
            The tuple of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        attention_head_dim (`int` or `tuple[int]`, *optional*, defaults to 8):
            The dimension of the attention heads.
        num_attention_heads (`int` or `tuple[int]`, *optional*):
            The number of attention heads.
        cross_attention_dim (`int`, *optional*, defaults to 768):
            The dimension of the cross attention features.
        dropout (`float`, *optional*, defaults to 0):
            Dropout probability for down, up and bottleneck blocks.
        flip_sin_to_cos (`bool`, *optional*, defaults to `True`):
            Whether to flip the sin to cos in the time embedding.
        freq_shift (`int`, *optional*, defaults to 0): The frequency shift to apply to the time embedding.
        controlnet_conditioning_channel_order (`str`, *optional*, defaults to `rgb`):
            The channel order of conditional image. Will convert to `rgb` if it's `bgr`.
        conditioning_embedding_out_channels (`tuple`, *optional*, defaults to `(16, 32, 96, 256)`):
            The tuple of output channel for each block in the `conditioning_embedding` layer.
    r   sample_size   in_channels)CrossAttnDownBlock2DrQ   rQ   DownBlock2D.down_block_typesFonly_cross_attention)i@  i     rU   r"   r   layers_per_block   attention_head_dimNnum_attention_headsrU   cross_attention_dimg        dropoutuse_linear_projectionr#   Tflip_sin_to_cosr   
freq_shiftrgb%controlnet_conditioning_channel_orderr   #conditioning_embedding_out_channelsrngr$   c                 C   s   d| j | j| jf}tj|tjd}tjdtjd}tjdd| jftjd}dd| jd | jd f}tj|tjd}tj	
|\}}	||	d}
| |
||||d S )Nr)   r#   )r)   r   rW   )paramsr[   rd   )rP   rN   r   zerosrL   onesint32rZ   jaxrandomsplitinit)r=   rb   sample_shapesample	timestepsencoder_hidden_statescontrolnet_cond_shapecontrolnet_cond
params_rngdropout_rngrngsr   r   r   init_weights   s   
z FlaxControlNetModel.init_weightsc                 C   sR  t d | j}|d d }| jp| j}tj|d ddd| jd| _t	|d | j
| jjd| _t|| jd	| _t|d | jd
| _| j}t|trQ|ft| j }t|tr^|ft| j }g }g }|d }tj|ddtj tj | jd}|| t| jD ]x\}	}
|}||	 }|	t|d k}|
dkrt||| j| j||	 | | j ||	 | jd	}nt!||| j| j| | jd}|| t"| jD ]}tj|ddtj tj | jd}|| q|stj|ddtj tj | jd}|| q|| _#|| _$|d }t%|| j|d | j | jd| _&tj|ddtj tj | jd| _'d S )Nr%   r   rO   r&   r(   r'   r,   )r]   r^   rc   )r   r"   VALIDr.   r)   rQ   )	rP   out_channelsr[   
num_layersrY   add_downsampler\   rT   r#   )rP   rw   r[   rx   ry   r#   )rP   r[   rY   r\   r#   )(r1   r2   r"   rY   rX   r3   r4   r#   r5   r
   r]   configr^   	time_projr	   time_embeddingr   ra   controlnet_cond_embeddingrT   
isinstanceboolr7   rS   rJ   r:   r;   r8   	enumerater   r[   rV   r\   r   r6   down_blockscontrolnet_down_blocksr   	mid_blockcontrolnet_mid_block)r=   r"   time_embed_dimrY   rT   r   r   output_channelcontrolnet_blockr>   down_block_typeinput_channelis_final_block
down_block_mid_block_channelr   r   r   rC      s   	



	
zFlaxControlNetModel.setup      ?rm   rn   ro   rq   conditioning_scalereturn_dicttrainc                    s  | j }|dkrtj|dd}t|tjstj|gtjd}nt|tjr8t|jdkr8|j	tj
d}t|d}| |}	| |	}	t|d}| |}t|d}| |}||7 }|f}
| jD ]!}t|tru|||	|| d\}}n
|||	| d\}}|
|7 }
qb| j||	|| d}d}t|
| jD ]\}}||}||f7 }q|}
| |} fd	d
|
D }
| 9 }|s|
|fS t|
|dS )a  
        Args:
            sample (`jnp.ndarray`): (batch, channel, height, width) noisy inputs tensor
            timestep (`jnp.ndarray` or `float` or `int`): timesteps
            encoder_hidden_states (`jnp.ndarray`): (batch_size, sequence_length, hidden_size) encoder hidden states
            controlnet_cond (`jnp.ndarray`): (batch, channel, height, width) the conditional input tensor
            conditioning_scale (`float`, *optional*, defaults to `1.0`): the scale factor for controlnet outputs
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] instead of
                a plain tuple.
            train (`bool`, *optional*, defaults to `False`):
                Use deterministic functions and disable dropout when not training.

        Returns:
            [`~models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] or `tuple`:
                [`~models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] if `return_dict` is True, otherwise
                a `tuple`. When returning a tuple, the first element is the sample tensor.
        bgrr)   )axisrc   r   )r   r   r   r)   )deterministicr   c                    s   g | ]}|  qS r   r   ).0rm   r   r   r   
<listcomp>  s    z0FlaxControlNetModel.__call__.<locals>.<listcomp>)r   r   )r`   r   flipr   r   arrayrg   r7   shapeastyperL   expand_dimsr|   r}   	transposer5   r~   r   r   r   zipr   r   r   )r=   rm   rn   ro   rq   r   r   r   channel_ordert_embr   r   res_samples!controlnet_down_block_res_samplesdown_block_res_sampler   r   r   r   r   rH   E  sF   







zFlaxControlNetModel.__call__rI   )r   TF)$r   r   r   r   rN   rJ   r   rP   rS   rK   strrT   r   r"   rV   rX   rY   rZ   r[   floatr\   r   rL   r#   r]   r^   r`   ra   rh   Arrayr   ru   rC   r   r   rH   r   r   r   r   rM   m   sR   
 -
 	rM   )"flax
flax.linenlinenr3   rh   	jax.numpynumpyr   flax.core.frozen_dictr   configuration_utilsr   r   utilsr   r   embeddings_flaxr	   r
   modeling_flax_utilsr   unets.unet_2d_blocks_flaxr   r   r   
get_loggerr   r1   struct	dataclassr   Moduler   rM   r   r   r   r   <module>   s    
=