o
    pi                     @   sJ   d dl Z d dlmZmZ d dlZd dlmZ ddlmZ G dd dZ	dS )    N)TupleUnion   )randn_tensorc                   @   s   e Zd ZdZ						d'deded	ed
ededefddZdd Z	e
dd Zdeedf deeejf ded
edededejfddZdejdejdejdejfddZdejd ed!edejd"ejd#ejfd$d%Zd&S )(FreeInitMixinzMixin class for FreeInit.   Fbutterworth         ?	num_itersuse_fast_samplingmethodorderspatial_stop_frequencytemporal_stop_frequencyc                 C   s(   || _ || _|| _|| _|| _|| _dS )a   Enables the FreeInit mechanism as in https://arxiv.org/abs/2312.07537.

        This implementation has been adapted from the [official repository](https://github.com/TianxingWu/FreeInit).

        Args:
            num_iters (`int`, *optional*, defaults to `3`):
                Number of FreeInit noise re-initialization iterations.
            use_fast_sampling (`bool`, *optional*, defaults to `False`):
                Whether or not to speedup sampling procedure at the cost of probably lower quality results. Enables the
                "Coarse-to-Fine Sampling" strategy, as mentioned in the paper, if set to `True`.
            method (`str`, *optional*, defaults to `butterworth`):
                Must be one of `butterworth`, `ideal` or `gaussian` to use as the filtering method for the FreeInit low
                pass filter.
            order (`int`, *optional*, defaults to `4`):
                Order of the filter used in `butterworth` method. Larger values lead to `ideal` method behaviour
                whereas lower values lead to `gaussian` method behaviour.
            spatial_stop_frequency (`float`, *optional*, defaults to `0.25`):
                Normalized stop frequency for spatial dimensions. Must be between 0 to 1. Referred to as `d_s` in the
                original implementation.
            temporal_stop_frequency (`float`, *optional*, defaults to `0.25`):
                Normalized stop frequency for temporal dimensions. Must be between 0 to 1. Referred to as `d_t` in the
                original implementation.
        N)_free_init_num_iters_free_init_use_fast_sampling_free_init_method_free_init_order!_free_init_spatial_stop_frequency"_free_init_temporal_stop_frequency)selfr   r   r   r   r   r    r   a/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/diffusers/pipelines/free_init_utils.pyenable_free_init   s    
zFreeInitMixin.enable_free_initc                 C   s
   d| _ dS )z+Disables the FreeInit mechanism if enabled.N)r   r   r   r   r   disable_free_initB   s   
zFreeInitMixin.disable_free_initc                 C   s   t | do	| jd uS )Nr   )hasattrr   r   r   r   r   free_init_enabledF   s   zFreeInitMixin.free_init_enabledshape.devicefilter_typereturnc              	      s  |d |d |d }}}	t |}
dks|dkr|
S |dkr) fdd}n|dkr4fd	d}n|d
kr?fdd}ntdt|D ]<}t|D ]5}t|	D ].}| d| | d  d d| | d d  d| |	 d d  }|||
d|||f< qSqMqG|
|S )zLReturns the FreeInit filter based on filter type and other input conditions.r   r   c                    s   dd| d      S )N   r   r   xr   r   r   r   retrieve_mask]   s   z?FreeInitMixin._get_free_init_freq_filter.<locals>.retrieve_maskgaussianc                    s   t dd d   |  S )Nr%   r   )mathexpr'   r   r   r   r*   a   s   idealc                    s   |  d krdS dS )Nr   r&   r   r   r'   r.   r   r   r*   e   s   z;`filter_type` must be one of gaussian, butterworth or idealr   r&   .)torchzerosNotImplementedErrorrangeto)r   r   r    r!   r   r   r   timeheightwidthmaskr*   thwd_squarer   r)   r   _get_free_init_freq_filterJ   s0   

	z(FreeInitMixin._get_free_init_freq_filterr(   noiselow_pass_filterc                 C   sz   t j|dd}t j|dd}t j|dd}t j|dd}d| }|| }|| }|| }	t j|	dd}	t j|	ddj}
|
S )zNoise reinitialization.)r#   r$   r%   )dimr&   )fftfftnfftshift	ifftshiftifftnreal)r   r(   r>   r?   x_freq
noise_freqhigh_pass_filter
x_freq_lownoise_freq_highx_freq_mixedx_mixedr   r   r   _apply_freq_filterv   s   z FreeInitMixin._apply_freq_filterlatentsfree_init_iterationnum_inference_stepsdtype	generatorc                 C   s
  |dkr|   | _nV|j}dg|dd  R }| j||| j| j| j| jd}	| j	j
jd }
t|d f|
 }| j	j|| j||djtjd}t|||tjd}| j|||	d}||}| jrstdt|| j |d  }|dkr| j	j||d || j	jfS )	Nr   r&   )r   r    r!   r   r   r   )original_samplesr>   	timesteps)rR   )r   rS   r    rR   )r?   )r    )detachclone_free_init_initial_noiser   r=   r   r   r   r   	schedulerconfignum_train_timestepsr0   fulllong	add_noiser4   float32r   rN   r   maxintr   set_timestepsrU   )r   rO   rP   rQ   r    rR   rS   latent_shapefree_init_filter_shapefree_init_freq_filtercurrent_diffuse_timestepdiffuse_timestepsz_tz_randr   r   r   _apply_free_init   sD   		
zFreeInitMixin._apply_free_initN)r   Fr   r	   r
   r
   )__name__
__module____qualname____doc__ra   boolstrfloatr   r   propertyr   r   r   r0   rR   Tensorr=   rN   r    	Generatorrj   r   r   r   r   r      sj    
'


",r   )
r,   typingr   r   r0   	torch.fftrA   utils.torch_utilsr   r   r   r   r   r   <module>   s   