o
    pi.                     @   sp   d dl mZmZ d dlZddlmZmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZ eeZG dd	 d	ZdS )
    )OptionalUnionN   )BasicTransformerBlockFreeNoiseTransformerBlock)CrossAttnDownBlockMotionDownBlockMotionUpBlockMotion)logging)randn_tensorc                   @   s   e Zd ZdZdeeeef fddZdeeeef fddZ			d"de
d	e
d
e
de
de
dejdejdeej deej fddZ				d#dee
 de
dededdf
ddZd$ddZed d! ZdS )%AnimateDiffFreeNoiseMixinz>Mixin class for [FreeNoise](https://arxiv.org/abs/2310.15169).blockc                 C   s   |j D ]i}t|j}t|D ]]}t|j| tr&|j| | j| j| j	 qt|j| t
s0J |j| }t|j|j|j|j|j|j|j|j|j|j|j| j| j| j	dj| j| jd|j|< |j| j| dd qqdS )z:Helper function to enable FreeNoise in transformer blocks.)dimnum_attention_headsattention_head_dimdropoutcross_attention_dimactivation_fnattention_biasonly_cross_attentiondouble_self_attentionpositional_embeddingsnum_positional_embeddingscontext_lengthcontext_strideweighting_schemedevicedtypeTstrictN)motion_moduleslentransformer_blocksrange
isinstancer   set_free_noise_properties_free_noise_context_length_free_noise_context_stride_free_noise_weighting_schemer   r   r   r   r   r   r   r   r   r   r   r   tor   r   load_state_dict
state_dict)selfr   motion_modulenum_transformer_blocksibasic_transfomer_block r2   b/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/diffusers/pipelines/free_noise_utils.py_enable_free_noise_in_block#   sD   





z5AnimateDiffFreeNoiseMixin._enable_free_noise_in_blockc                 C   s   |j D ]K}t|j}t|D ]?}t|j| trM|j| }t|j|j|j	|j
|j|j|j|j|j|j|jdj| j| jd|j|< |j| j| dd qqdS )z;Helper function to disable FreeNoise in transformer blocks.)r   r   r   r   r   r   r   r   r   r   r   r   Tr   N)r!   r"   r#   r$   r%   r   r   r   r   r   r   r   r   r   r   r   r   r   r*   r   r   r+   r,   )r-   r   r.   r/   r0   free_noise_transfomer_blockr2   r2   r3   _disable_free_noise_in_blockI   s4   




z6AnimateDiffFreeNoiseMixin._disable_free_noise_in_blockN
batch_sizenum_channels_latents
num_framesheightwidthr   r   	generatorlatentsc
              	   C   s.  t |trt||krtdt| d| d| jdkr | jn|}
|||
|| j || j f}|	d u rCt||||d}	| jdkrB|	S n'|	d|krL|	S |	d| jkretd| d	| j d
|	d |		|}	| jdkrt
| j|| jD ]u}td|| j }t||| j }|| }|dkr nZttt
||}|tj||d }|}t||| }||| kr|	d d d d |f |	d d d d ||f< qw|| }|d | }|	d d d d |f |	d d d d ||f< qwn| jdkr|| j d | j }tj|	g| dd}	|	d d d d d |f }	|	S )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.repeat_context)r<   r   r   randomr   z_You have passed `latents` as a parameter to FreeNoise. The expected number of frames is either z or z, but found shuffle_contextr   )r<      )r   )r%   listr"   
ValueErrorr'   vae_scale_factorr   _free_noise_noise_typesizer*   r$   r(   maxmintorch
LongTensorrandpermcat)r-   r7   r8   r9   r:   r;   r   r   r<   r=   context_num_framesshaper0   window_start
window_endwindow_lengthindicesshuffled_indicescurrent_startcurrent_endprefix_lengthnum_repeatsr2   r2   r3   _prepare_latents_free_noisee   s`   


..z5AnimateDiffFreeNoiseMixin._prepare_latents_free_noise      pyramidr@   r   r   r   
noise_typereturnc           	      C   s   dg}g d}|| j jjkrtd|d| j jjd ||vr+td| d|||vr9td| d	||p?| j jj| _|| _|| _|| _	g | j
j| j
j| j
j}|D ]}| | qZd
S )a.  
        Enable long video generation using FreeNoise.

        Args:
            context_length (`int`, defaults to `16`, *optional*):
                The number of video frames to process at once. It's recommended to set this to the maximum frames the
                Motion Adapter was trained with (usually 16/24/32). If `None`, the default value from the motion
                adapter config is used.
            context_stride (`int`, *optional*):
                Long videos are generated by processing many frames. FreeNoise processes these frames in sliding
                windows of size `context_length`. Context stride allows you to specify how many frames to skip between
                each window. For example, a context length of 16 and context stride of 4 would process 24 frames as:
                    [0, 15], [4, 19], [8, 23] (0-based indexing)
            weighting_scheme (`str`, defaults to `pyramid`):
                Weighting scheme for averaging latents after accumulation in FreeNoise blocks. The following weighting
                schemes are supported currently:
                    - "pyramid"
                        Peforms weighted averaging with a pyramid like weight pattern: [1, 2, 3, 2, 1].
            noise_type (`str`, defaults to "shuffle_context"):
                TODO
        r[   )r@   r>   r?   zYou have set context_length=zH which is greater than self.motion_adapter.config.motion_max_seq_length=z*. This can lead to bad generation results.z0The parameter `weighting_scheme` must be one of z, but got weighting_scheme=z*The parameter `noise_type` must be one of z, but got noise_type=N)motion_adapterconfigmotion_max_seq_lengthloggerwarningrC   r'   r(   r)   rE   unetdown_blocks	mid_block	up_blocksr4   )	r-   r   r   r   r\   allowed_weighting_schemeallowed_noise_typeblocksr   r2   r2   r3   enable_free_noise   s(   z+AnimateDiffFreeNoiseMixin.enable_free_noisec                 C   s:   d | _ g | jj| jj| jj}|D ]}| | qd S )N)r'   rc   rd   re   rf   r6   )r-   ri   r   r2   r2   r3   disable_free_noise   s
   z,AnimateDiffFreeNoiseMixin.disable_free_noisec                 C   s   t | do	| jd uS )Nr'   )hasattrr'   )r-   r2   r2   r3   free_noise_enabled   s   z,AnimateDiffFreeNoiseMixin.free_noise_enabled)NN)rY   rZ   r[   r@   )r]   N)__name__
__module____qualname____doc__r   r   r   r	   r4   r6   intrI   r   r   r   	GeneratorTensorrX   strrj   rk   propertyrm   r2   r2   r2   r3   r       sV    &%	

L

4r   )typingr   r   rI   models.attentionr   r   models.unets.unet_motion_modelr   r   r	   utilsr
   utils.torch_utilsr   
get_loggerrn   ra   r   r2   r2   r2   r3   <module>   s   
