o
    ۷i/                  	   @  sp  d Z ddlmZ ddlZddlZddlmZ ddlmZm	Z	m
Z
mZ e rddlZddlmZmZmZmZ dddd	dd
Zejjejjdejjdd
Zejjejjdd dd dd
Zejjejjejejjejd
Zejjeejdddddd
Zejjeejdddddd
Z ejj!eejdddddd
Z"ejj#eejdddddd
Z$e%e&Z'zddl(m)Z* W n e+e,fy   dd Z*Y nw dWddZ-dXddZ.dYddZ/dYd d!Z0dYd"d#Z1dYd$d%Z2dYd&d'Z3dYd(d)Z4dYd*d+Z5				dZd[d5d6Z6d\d9d:Z7d;d< Z8d]dAdBZ9d^dGdHZ:dIdJ Z;ej<dKdL Z=d_d`dOdPZ>d_d`dQdRZ?dSdT Z@dUdV ZAe r6e= ZBdS dS )az1
PyTorch utilities: Utilities related to PyTorch
    )annotationsN   )logging)is_torch_availableis_torch_mlu_availableis_torch_npu_availableis_torch_version)fftnfftshiftifftn	ifftshiftTF)cudaxpucpumpsdefaultc                   C     dS Nr    r   r   r   Q/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/utils/torch_utils.py<lambda>*       r   c                   C  r   r   r   r   r   r   r   r   +   r   reset_peak_memory_statsmax_memory_allocatedsynchronize)allow_in_graphc                 C  s   | S Nr   )clsr   r   r   maybe_allow_in_graphW   s   r   devicestrdispatch_tabledict[str, callable]c                 O  s<   | |vr|d |i |S ||  }t |s|S ||i |S )Nr   )callable)r   r!   argskwargsfnr   r   r   _device_agnostic_dispatch\   s   r'   seedintc                 C  s   t | t|S r   )r'   BACKEND_MANUAL_SEED)r   r(   r   r   r   backend_manual_seedk   s   r+   c                 C  
   t | tS r   )r'   BACKEND_SYNCHRONIZEr   r   r   r   backend_synchronizeo      
r/   c                 C  r,   r   )r'   BACKEND_EMPTY_CACHEr.   r   r   r   backend_empty_caches   r0   r2   c                 C  r,   r   )r'   BACKEND_DEVICE_COUNTr.   r   r   r   backend_device_countw   r0   r4   c                 C  r,   r   )r'   BACKEND_RESET_PEAK_MEMORY_STATSr.   r   r   r   backend_reset_peak_memory_stats{   r0   r6   c                 C  r,   r   )r'   "BACKEND_RESET_MAX_MEMORY_ALLOCATEDr.   r   r   r   "backend_reset_max_memory_allocated   r0   r8   c                 C  r,   r   )r'   BACKEND_MAX_MEMORY_ALLOCATEDr.   r   r   r   backend_max_memory_allocated   r0   r:   c                 C  s   t  sdS | tvrd} t|  S )NFr   )r   BACKEND_SUPPORTS_TRAININGr.   r   r   r   backend_supports_training   s
   r<   shapetuple | list	generator2list['torch.Generator'] | 'torch.Generator' | Nonestr | 'torch.device' | Nonedtype'torch.dtype' | Nonelayout'torch.layout' | Nonec              	     sP  t |tr
t|}|d }ptj|ptd}durbt ts)jjnd jj}||jkrN|dkrNd|dkrMtd| d| d| d n||jkrb|d	krbt	d
| d| dt trqt
dkrqd t trddd   fddt|D }tj|dd|}|S tj d|}|S )zA helper function to create random tensors on the desired `device` with the desired `dtype`. When
    passing a list of generators, you can seed each batch size individually. If CPU generators are passed, the tensor
    is always created on the CPU.
    r   r   Nr   zBThe passed generator was created on 'cpu' even though a tensor on zB was expected. Tensors will be created on 'cpu' and then moved to zl. Note that one can probably slightly speed up this function by passing a generator that was created on the z device.r   zCannot generate a z! tensor from a generator of type .r   )r   c              	     s$   g | ]}t j|  d qS )r?   r   rB   rD   )torchrandn).0irB   r?   rD   rand_devicer=   r   r   
<listcomp>   s    z randn_tensor.<locals>.<listcomp>dimrG   )
isinstancer    rH   r   stridedlisttypeloggerinfo
ValueErrorlenrangecattorI   )r=   r?   r   rB   rD   
batch_sizegen_device_typelatentsr   rL   r   randn_tensor   s@   



r_   returnboolc                 C  s(   t dds
ttdsdS t| tjjjS )z:Check whether the module was compiled with torch.compile()<z2.0.0_dynamoF)r   hasattrrH   rQ   rc   
eval_frameOptimizedModulemoduler   r   r   is_compiled_module   s   ri   c                 C  s   t | r| jS | S )z8Unwraps a module if it was compiled with torch.compile())ri   	_orig_modrg   r   r   r   unwrap_module   s   rk   x_in'torch.Tensor'	thresholdscalec                 C  s  | }|j \}}}}||d @ dks||d @ dkr!|jtjd}n|jtjkr.|jtjd}t|dd}t|dd}|j \}}}}tj||||f|j	d}	|d |d }
}||	d|
| |
| || || f< ||	 }t
|dd}t|ddj}|j| jdS )	zFourier filter as introduced in FreeU (https://huggingface.co/papers/2309.11497).

    This version of the method comes from here:
    https://github.com/huggingface/diffusers/pull/5164#issuecomment-1732638706
    r   r   )rB   )rO   r.      .)r=   r[   rH   float32rB   bfloat16r	   r
   onesr   r   r   real)rl   rn   ro   xBCHWx_freqmaskcrowccol
x_filteredr   r   r   fourier_filter   s     &r   resolution_idxhidden_statesres_hidden_states%tuple['torch.Tensor', 'torch.Tensor']c                 K  s   | dkr*|j d d }|ddd|f |d  |ddd|f< t|d|d d}| dkrT|j d d }|ddd|f |d  |ddd|f< t|d|d	 d}||fS )
a]  Applies the FreeU mechanism as introduced in https:
    //arxiv.org/abs/2309.11497. Adapted from the official code repository: https://github.com/ChenyangSi/FreeU.

    Args:
        resolution_idx (`int`): Integer denoting the UNet block where FreeU is being applied.
        hidden_states (`torch.Tensor`): Inputs to the underlying block.
        res_hidden_states (`torch.Tensor`): Features from the skip block corresponding to the underlying block.
        s1 (`float`): Scaling factor for stage 1 to attenuate the contributions of the skip features.
        s2 (`float`): Scaling factor for stage 2 to attenuate the contributions of the skip features.
        b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
        b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
    r   r   rr   Nb1s1)rn   ro   b2s2)r=   r   )r   r   r   freeu_kwargsnum_half_channelsr   r   r   apply_freeu   s   ,,r   c                  C  sB   t j rt d} t j| }|d  d|d  }t|S d S )Nr   r   rF   r   )rH   r   is_availabler   get_device_capabilityfloat)r   compute_capabilityr   r   r    get_torch_cuda_device_capability  s   

r   c                   C  sN   t j rdS t rdS tt drt j rdS t jj r dS t r%dS dS )Nr   npur   r   mlur   )	rH   r   r   r   rd   r   backendsr   r   r   r   r   r   
get_device  s   
r   device_type
str | Nonec                 C  s4   | d u rt  } | dv rd S tt| tj}|  d S )N)r   )r   getattrrH   r   empty_cacher   
device_modr   r   r   empty_device_cache(  s   r   c                 C  s(   | d u rt  } tt| tj}|  d S r   )r   r   rH   r   r   r   r   r   r   device_synchronize1  s   r   c                   C  sB   dt jd< dt jd< td dtjj_dtjj_dtjjj	_
dS )z
    Helper function for reproducible behavior during distributed training. See
    - https://pytorch.org/docs/stable/notes/randomness.html for pytorch
    1CUDA_LAUNCH_BLOCKINGz:16:8CUBLAS_WORKSPACE_CONFIGTFN)osenvironrH   use_deterministic_algorithmsr   cudnndeterministic	benchmarkr   matmul
allow_tf32r   r   r   r   enable_full_determinism8  s   




r   c                   C  s"   dt jd< dt jd< td d S )N0r    r   F)r   r   rH   r   r   r   r   r   disable_full_determinismJ  s   

r   )r   r    r!   r"   )r   r    r(   r)   )r   r    )NNNN)
r=   r>   r?   r@   r   rA   rB   rC   rD   rE   )r`   ra   )rl   rm   rn   r)   ro   r)   r`   rm   )r   r)   r   rm   r   rm   r`   r   r   )r   r   )C__doc__
__future__r   	functoolsr   r   r   import_utilsr   r   r   r   rH   	torch.fftr	   r
   r   r   r;   r   r   r   r   r1   device_countr3   manual_seedr*   r   r   r5   reset_max_memory_allocatedr7   r   r9   r   r-   
get_logger__name__rU   torch._dynamor   r   ImportErrorModuleNotFoundErrorr'   r+   r/   r2   r4   r6   r8   r:   r<   r_   ri   rk   r   r   r   	lru_cacher   r   r   r   r   torch_devicer   r   r   r   <module>   s   










2

"

	
