o
    ÓÙ¾i&6  ã                	   @   sŠ  d Z ddlmZmZ ddlZddlmZ ddlm  mZ	 ddl
mZ ddlmZ d/defd	d
„Zd/defdd„Zd/defdd„Zd0deeee f defdd„Zd1dededefdd„Zd2dedededefdd„Zd3dedefdd„ZG dd„ dejƒZG dd „ d ejƒZG d!d"„ d"ejƒZG d#d$„ d$ejƒZG d%d&„ d&eƒZG d'd(„ d(ejƒZG d)d*„ d*eƒZG d+d,„ d,ejƒZ G d-d.„ d.e ƒZ!dS )4a7   EvoNorm in PyTorch

Based on `Evolving Normalization-Activation Layers` - https://arxiv.org/abs/2004.02967
@inproceedings{NEURIPS2020,
 author = {Liu, Hanxiao and Brock, Andy and Simonyan, Karen and Le, Quoc},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {H. Larochelle and M. Ranzato and R. Hadsell and M. F. Balcan and H. Lin},
 pages = {13539--13550},
 publisher = {Curran Associates, Inc.},
 title = {Evolving Normalization-Activation Layers},
 url = {https://proceedings.neurips.cc/paper/2020/file/9d4c03631b8b0c85ae08bf05eda37d0f-Paper.pdf},
 volume = {33},
 year = {2020}
}

An attempt at getting decent performing EvoNorms running in PyTorch.
While faster than other PyTorch impl, still quite a ways off the built-in BatchNorm
in terms of memory usage and throughput on GPUs.

I'm testing these modules on TPU w/ PyTorch XLA. Promising start but
currently working around some issues with builtin torch/tensor.var/std. Unlike
GPU, similar train speeds for EvoNormS variants and BatchNorm.

Hacked together by / Copyright 2020 Ross Wightman
é    )ÚSequenceÚUnionNé   )Úcreate_act_layer)Ú_assertçñhãˆµøä>Úepsc                 C   s2   |   ¡ jdddd |¡ ¡  | j¡}| | j¡S )N©é   é   FT©ÚdimÚunbiasedÚkeepdim)ÚfloatÚvarÚaddÚsqrtÚtoÚdtypeÚexpandÚshape©Úxr   Ústd© r   úH/home/ubuntu/.local/lib/python3.10/site-packages/timm/layers/evo_norm.pyÚinstance_std$   s   &r   c                 C   s"   t | dd |¡ ¡ }| | j¡S )Nr	   )r   )Ú
manual_varr   r   r   r   r   r   r   r   Úinstance_std_tpu)   s   r   c                 C   s4   |   ¡  ¡ jddd |¡ ¡  | j¡}| | j¡S )Nr	   T©r   r   )	r   ÚsquareÚmeanr   r   r   r   r   r   )r   r   Úrmsr   r   r   Úinstance_rms/   s   (r$   Fr   Údiff_sqmc                 C   sT   | j |dd}|r| |  j |dd||   d¡}|S | | | |  j |dd}|S )NTr    r   )r"   Úclamp)r   r   r%   Úxmr   r   r   r   r   4   s    ÿr   é    ÚgroupsÚflattenc           
      C   s°   | j \}}}}| j}t|| dkdƒ |r/|  ||d¡} |  ¡ jdddd |¡ ¡  |¡}	n|  |||| ||¡} |  ¡ jdddd |¡ ¡  |¡}	|	 	| j ¡ ||||¡S )	Nr   Ú éÿÿÿÿr
   FTr   ©r
   r   é   )
r   r   r   Úreshaper   r   r   r   r   r   )
r   r)   r   r*   ÚBÚCÚHÚWÚx_dtyper   r   r   r   Ú	group_std>   s   &$r5   c           
      C   sˆ   | j \}}}}t|| dkdƒ |r!|  ||d¡} t| d|d}	n|  |||| ||¡} t| d|d}	|	 |¡ ¡  | j ¡ ||||¡S )Nr   r+   r,   )r   r%   r-   )r   r   r/   r   r   r   r   )
r   r)   r   r%   r*   r0   r1   r2   r3   r   r   r   r   Úgroup_std_tpuK   s   "r6   c           	      C   sz   | j \}}}}t|| dkdƒ | j}|  |||| ||¡} |  ¡  ¡ jddd |¡ ¡  	|¡}| 
| j ¡ ||||¡S )Nr   r+   r-   Tr    )r   r   r   r/   r   r!   r"   r   Úsqrt_r   r   )	r   r)   r   r0   r1   r2   r3   r4   r#   r   r   r   Ú	group_rmsZ   s   &r8   c                       ó.   e Zd Zd
‡ fdd„	Zdd„ Zdd	„ Z‡  ZS )ÚEvoNorm2dB0Tçš™™™™™¹?çü©ñÒMbP?c                    sx   t ƒ  ¡  || _|| _|| _t t |¡¡| _	t t 
|¡¡| _|r*t t |¡¡nd | _|  dt |¡¡ |  ¡  d S ©NÚrunning_var)ÚsuperÚ__init__Ú	apply_actÚmomentumr   ÚnnÚ	ParameterÚtorchÚonesÚweightÚzerosÚbiasÚvÚregister_bufferÚreset_parameters©ÚselfÚnum_featuresrA   rB   r   Ú_©Ú	__class__r   r   r@   d   s   
zEvoNorm2dB0.__init__c                 C   ó<   t j | j¡ t j | j¡ | jd urt j | j¡ d S d S ©N©rC   ÚinitÚones_rG   Úzeros_rI   rJ   ©rN   r   r   r   rL   o   ó
   
ÿzEvoNorm2dB0.reset_parametersc           	      C   s  t | ¡ dkdƒ |j}d}| jd urp| jrB| ¡ jddd}| ¡ |jd  }| j	 
| j	d| j  | ¡ | j ||d    ¡ n| j	}| | j¡ ¡  |¡ |¡ |¡}| j |¡ |¡}|| t|| jƒ }|| |¡ }|| j |¡ |¡ | j |¡ |¡ S ©Nr.   úexpected 4D input©r   r,   r   r   )r   r
   r   F)r   r   r   )r   r   r   rJ   Útrainingr   r   Únumelr   r>   Úcopy_rB   Údetachr   r   r7   r   ÚviewÚ	expand_asr   ÚmaxrG   rI   )	rN   r   r4   Úv_shaper   ÚnÚleftrJ   Úrightr   r   r   Úforwardu   s$   
ÿÿ"(zEvoNorm2dB0.forward)Tr;   r<   ©Ú__name__Ú
__module__Ú__qualname__r@   rL   ri   Ú__classcell__r   r   rQ   r   r:   c   s    r:   c                       r9   )ÚEvoNorm2dB1Tr;   r   c                    ó^   t ƒ  ¡  || _|| _|| _t t |¡¡| _	t t 
|¡¡| _|  dt |¡¡ |  ¡  d S r=   ©r?   r@   rA   rB   r   rC   rD   rE   rF   rG   rH   rI   rK   rL   rM   rQ   r   r   r@   ‹   ó   
zEvoNorm2dB1.__init__c                 C   ó    t j | j¡ t j | j¡ d S rT   ©rC   rV   rW   rG   rX   rI   rY   r   r   r   rL   •   ó   zEvoNorm2dB1.reset_parametersc                 C   sú   t | ¡ dkdƒ |j}d}| jri| jrE| ¡ jddd}| ¡ |jd  }| j	 
| j	d| j  | ¡  | j	j¡| j ||d    ¡ n| j	}| |¡ |¡}| | j¡ ¡ }|d t|| jƒ }|| |¡ }|| j |¡ |¡ | j |¡ |¡ S r[   ©r   r   r   rA   r^   r   r   r_   r   r>   r`   rB   ra   r   rb   r   r   r7   r$   rd   rG   rI   ©rN   r   r4   re   r   rf   rg   rh   r   r   r   ri   ™   s$   "ÿÿ(zEvoNorm2dB1.forward©Tr;   r   rj   r   r   rQ   r   ro   Š   ó    
ro   c                       r9   )ÚEvoNorm2dB2Tr;   r   c                    rp   r=   rq   rM   rQ   r   r   r@   ®   rr   zEvoNorm2dB2.__init__c                 C   rs   rT   rt   rY   r   r   r   rL   ¸   ru   zEvoNorm2dB2.reset_parametersc                 C   sö   t | ¡ dkdƒ |j}d}| jrg| jrE| ¡ jddd}| ¡ |jd  }| j	 
| j	d| j  | ¡  | j	j¡| j ||d    ¡ n| j	}| |¡ |¡}| | j¡ ¡ }t|| jƒ| }|| |¡ }|| j |¡ |¡ | j |¡ |¡ S r[   rv   rw   r   r   r   ri   ¼   s$   "ÿÿ(zEvoNorm2dB2.forwardrx   rj   r   r   rQ   r   rz   ­   ry   rz   c                       s.   e Zd Zd‡ fdd„	Zdd„ Zd	d
„ Z‡  ZS )ÚEvoNorm2dS0r(   NTr   c                    s†   t ƒ  ¡  || _|r|| dksJ ‚|| | _n|| _|| _t t |¡¡| _	t t 
|¡¡| _|r:t t |¡¡nd | _|  ¡  d S ©Nr   )r?   r@   rA   r)   r   rC   rD   rE   rF   rG   rH   rI   rJ   rL   ©rN   rO   r)   Ú
group_sizerA   r   rP   rQ   r   r   r@   Ñ   s   
zEvoNorm2dS0.__init__c                 C   rS   rT   rU   rY   r   r   r   rL   ß   rZ   zEvoNorm2dS0.reset_parametersc                 C   s€   t | ¡ dkdƒ |j}d}| jd ur,| j |¡ |¡}|||  ¡  t|| j| j	ƒ }|| j
 |¡ |¡ | j |¡ |¡ S ©Nr.   r\   r]   )r   r   r   rJ   rb   r   Úsigmoidr5   r)   r   rG   rI   )rN   r   r4   re   rJ   r   r   r   ri   å   s   
 (zEvoNorm2dS0.forward)r(   NTr   rj   r   r   rQ   r   r{   Ð   s    r{   c                       s&   e Zd Zd	‡ fdd„	Zdd„ Z‡  ZS )
ÚEvoNorm2dS0ar(   NTr<   c                    s   t ƒ j|||||d d S )N)r)   r~   rA   r   ©r?   r@   r}   rQ   r   r   r@   ð   s   

ÿzEvoNorm2dS0a.__init__c                 C   sˆ   t | ¡ dkdƒ |j}d}t|| j| jƒ}| jd ur,| j |¡ |¡}|||  	¡  }|| }|| j
 |¡ |¡ | j |¡ |¡ S r   )r   r   r   r5   r)   r   rJ   rb   r   r€   rG   rI   )rN   r   r4   re   ÚdrJ   r   r   r   ri   ô   s   
(zEvoNorm2dS0a.forward)r(   NTr<   ©rk   rl   rm   r@   ri   rn   r   r   rQ   r   r   ï   s    r   c                       ó2   e Zd Z		d‡ fdd„	Zdd„ Zd	d
„ Z‡  ZS )ÚEvoNorm2dS1r(   NTr   c                    sž   t ƒ  ¡  |p	tj}|| _|d ur|rt|ƒ| _nt ¡ | _|r.|| dks(J ‚|| | _n|| _|| _	d| _
t t |¡¡| _t t |¡¡| _|  ¡  d S )Nr   F)r?   r@   rC   ÚSiLUrA   r   ÚactÚIdentityr)   r   Úpre_act_normrD   rE   rF   rG   rH   rI   rL   ©rN   rO   r)   r~   rA   Ú	act_layerr   rP   rQ   r   r   r@     s   


zEvoNorm2dS1.__init__c                 C   rs   rT   rt   rY   r   r   r   rL     ru   zEvoNorm2dS1.reset_parametersc                 C   ód   t | ¡ dkdƒ |j}d}| jr|  |¡t|| j| jƒ }|| j 	|¡ 
|¡ | j 	|¡ 
|¡ S r   )r   r   r   rA   rˆ   r5   r)   r   rG   rb   r   rI   ©rN   r   r4   re   r   r   r   ri     ó   (zEvoNorm2dS1.forward©r(   NTNr   rj   r   r   rQ   r   r†      s    þr†   c                       ó*   e Zd Z		d	‡ fdd„	Zdd„ Z‡  ZS )
ÚEvoNorm2dS1ar(   NTr<   c                    ó   t ƒ j||||||d d S ©N)r)   r~   rA   rŒ   r   r‚   r‹   rQ   r   r   r@   $  ó   
ÿzEvoNorm2dS1a.__init__c                 C   ó^   t | ¡ dkdƒ |j}d}|  |¡t|| j| jƒ }|| j |¡ 	|¡ | j
 |¡ 	|¡ S r   )r   r   r   rˆ   r5   r)   r   rG   rb   r   rI   rŽ   r   r   r   ri   *  ó
   (zEvoNorm2dS1a.forward©r(   NTNr<   r„   r   r   rQ   r   r’   #  ó
    þr’   c                       r…   )ÚEvoNorm2dS2r(   NTr   c                    s˜   t ƒ  ¡  |p	tj}|| _|d ur|rt|ƒ| _nt ¡ | _|r.|| dks(J ‚|| | _n|| _|| _	t 
t |¡¡| _t 
t |¡¡| _|  ¡  d S r|   )r?   r@   rC   r‡   rA   r   rˆ   r‰   r)   r   rD   rE   rF   rG   rH   rI   rL   r‹   rQ   r   r   r@   3  s   


zEvoNorm2dS2.__init__c                 C   rs   rT   rt   rY   r   r   r   rL   G  ru   zEvoNorm2dS2.reset_parametersc                 C   r   r   )r   r   r   rA   rˆ   r8   r)   r   rG   rb   r   rI   rŽ   r   r   r   ri   K  r   zEvoNorm2dS2.forwardr   rj   r   r   rQ   r   rš   2  s    þrš   c                       r‘   )
ÚEvoNorm2dS2ar(   NTr<   c                    r“   r”   r‚   r‹   rQ   r   r   r@   U  r•   zEvoNorm2dS2a.__init__c                 C   r–   r   )r   r   r   rˆ   r8   r)   r   rG   rb   r   rI   rŽ   r   r   r   ri   [  r—   zEvoNorm2dS2a.forwardr˜   r„   r   r   rQ   r   r›   T  r™   r›   )r   )F)r(   r   F)r(   r   FF)r(   r   )"Ú__doc__Útypingr   r   rE   Útorch.nnrC   Útorch.nn.functionalÚ
functionalÚFÚ
create_actr   Útrace_utilsr   r   r   r   r$   ÚintÚboolr   r5   r6   r8   ÚModuler:   ro   rz   r{   r   r†   r’   rš   r›   r   r   r   r   Ú<module>   s.     
	'###"