o
    پi                      @   sZ  d Z ddlmZmZ ddlZddlmZ z
ddlm	Z	 dZ
W n ey)   dZ
Y nw zddlmZmZ dZW n eyA   dZY nw eed	Zdad+defddZd+defddZdd Zd,ddZ			d-dejdedeej deej dedejfddZ			d-dejdee deej deej dedejfddZ		d.dejdee deej defd d	Z		d.dejdee deej dedejf
d!d"Z		d.dejdee deej defd#d$Z		d.dejdee deej dedejf
d%d&Z		d.dejdee deej defd'd(Z 		d.dejdee deej dedejf
d)d*Z!dS )/a   'Fast' Normalization Functions

For GroupNorm and LayerNorm these functions bypass typical AMP upcast to float32.

Additionally, for LayerNorm, the APEX fused LN is used if available (which also does not upcast)

Hacked together by / Copyright 2022 Ross Wightman
    )ListOptionalN)
functional)fused_layer_norm_affineTF)fused_rms_norm_affinefused_rms_normrms_normcudadevicec              	   C   sL   zt | W S  ttfy%   | dkrt   Y S | dksJ t   Y S w Ncpur	   )torchget_autocast_dtypeAttributeError	TypeErrorget_autocast_cpu_dtypeget_autocast_gpu_dtyper
    r   I/home/ubuntu/.local/lib/python3.10/site-packages/timm/layers/fast_norm.pyr   !   s   r   c                 C   sH   zt | W S  ty#   | dkrt   Y S | dksJ t   Y S w r   )r   is_autocast_enabledr   is_autocast_cpu_enabledr   r   r   r   r   -   s   r   c                   C   s   t S N_USE_FAST_NORMr   r   r   r   is_fast_norm9   s   r   c                 C   s   | a d S r   r   )enabler   r   r   set_fast_norm=   s   r   h㈵>x
num_groupsweightbiasepsreturnc                 C   s   t j rt| ||||S t| jjr1t| jj}| 	||	||d ur+|	|nd } }}t j
j| jjdd t| ||||W  d    S 1 sOw   Y  d S NFdevice_typeenabled)r   jitis_scriptingF
group_normr   r
   typer   toampautocast)r   r    r!   r"   r#   dtr   r   r   fast_group_normB   s   
.$r2   normalized_shapec                 C   s   t j rt| ||||S trt| ||||S t| jj	r;t
| jj	}| ||||d ur5||nd } }}t jj| jj	dd t| ||||W  d    S 1 sYw   Y  d S r%   )r   r)   r*   r+   
layer_normhas_apexr   r   r
   r-   r   r.   r/   r0   )r   r3   r!   r"   r#   r1   r   r   r   fast_layer_normW   s   
.$r6   c                 C   s   t |}| d}tj r|dksJ tj|ddd}nttd| d d}tj||dd}| t	||  } |d urC| | } | S )N      dimTr;   keepdim)
lenpowr   r)   r*   mean	unsqueezetuplerangersqrtr   r3   r!   r#   	norm_ndimvdimsr   r   r   r   o   s   

c                 C   s   t j rt| |||S tr|d u rt| ||S t| |||S t| jj	r6t
| jj	}| |||} }t jj| jj	dd# trLt| |||} nt| |||} W d    | S W d    | S 1 sfw   Y  | S r%   )r   r)   r*   r   has_apex_rmsnormr   r   r   r
   r-   r   r.   r/   r0   has_torch_rms_normr+   r   r3   r!   r#   r1   r   r   r   fast_rms_norm   s(   


rL   c                 C   s\   t |dksJ | d}tj|ddd}| t||  } |d ur,| |dddd } | S )Nr8   r7   Tr<   r9   )r>   r?   r   r@   rD   reshape)r   r3   r!   r#   rG   r   r   r   
rms_norm2d   s   
rN   c                 C   s   t j rt| |||S tr0| dddd} |d u r!t| ||} nt| |||} | dddd} t| j	j
rGt| j	j
}| |||} }t jj| j	j
dd t| |||} W d    | S 1 sdw   Y  | S )Nr   r7      r8   Fr&   )r   r)   r*   rN   rI   permuter   r   r   r
   r-   r   r.   r/   r0   rK   r   r   r   fast_rms_norm2d   s"   

rQ   c                 C   s   t |}tj r|dksJ tj| ddd}nttd| d d}tj| |dd}| t||  } |d ur>| | } | S )Nr8   r9   r:   Tr<   )	r>   r   r)   r*   varrA   rB   rC   rD   rE   r   r   r   simple_norm   s   
rS   c                 C   s   t j rt| |||S t| jjr#t| jj}| |||} }t j	j
| jjdd t| |||} W d    | S 1 s@w   Y  | S r%   )r   r)   r*   rS   r   r
   r-   r   r.   r/   r0   rK   r   r   r   fast_simple_norm   s   

rT   )r	   )T)NNr   )Nr   )"__doc__typingr   r   r   torch.nnr   r+   #apex.normalization.fused_layer_normr   r5   ImportErrorr   r   rI   hasattrrJ   r   strr   r   r   r   Tensorintfloatr2   r6   r   rL   rN   rQ   rS   rT   r   r   r   r   <module>   s    





"

!
