o
    پi=                     @   s   d Z ddlZddlmZ ddlm  mZ ddlmZ 						dd	e	d
e
de	dededefddZ					ddejd	e	d
e
de	dedefddZG dd dejZd d	e	dedefddZG dd dejZdS )!a>   DropBlock, DropPath

PyTorch implementations of DropBlock and DropPath (Stochastic Depth) regularization layers.

Papers:
DropBlock: A regularization method for convolutional networks (https://arxiv.org/abs/1810.12890)

Deep Networks with Stochastic Depth (https://arxiv.org/abs/1603.09382)

Code:
DropBlock impl inspired by two Tensorflow impl that I liked:
 - https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_model.py#L74
 - https://github.com/clovaai/assembled-cnn/blob/master/nets/blocks.py

Hacked together by / Copyright 2020 Ross Wightman
    N   )ndgrid皙?         ?F	drop_prob
block_sizegamma_scale
with_noiseinplace	batchwisec                 C   s  | j \}}}	}
|
|	 }t|t|
|	}|| | |d  |
| d |	| d   }ttj|
| jdtj|	| jd\}}||d k||
|d d  k @ ||d k||	|d d  k @ @ }t|dd|	|
fj| jd}|r|tj	d||	|
f| j| jd}nt
| }d| | | dkj| jd}tj| |d|d d }|r|rtjd||	|
f| j| jdnt| }|r| ||d|   | S | | |d|   } | S | |jtjd d | j}|r| ||  | S | | | } | S )a	   DropBlock. See https://arxiv.org/pdf/1810.12890.pdf

    DropBlock with an experimental gaussian noise option. This layer has been tested on a few training
    runs with success, but needs further validation and possibly optimization for lower runtime impact.
       r   )devicedtype)r   r   kernel_sizestridepaddinggHz>)shapeminr   torcharanger   reshapetor   rand	rand_likeF
max_pool2drandn
randn_likemul_add_numelfloat32sumadd)xr   r   r	   r
   r   r   BCHW
total_sizeclipped_block_sizegammaw_ih_ivalid_blockuniform_noise
block_masknormal_noisenormalize_scale r6   D/home/ubuntu/.local/lib/python3.10/site-packages/timm/layers/drop.pydrop_block_2d   sD   &
*	(r8   r'   c                 C   s"  | j \}}}}	|	| }
t|t|	|}|| |
 |d  |	| d || d   }t| |}tj|| j|d|d d}|ret| 	 }|rY| 
d| ||  | S | d|  ||  } | S d| }| |jtjd d j| jd}|r| 
||  | S | | | } | S )z DropBlock. See https://arxiv.org/pdf/1810.12890.pdf

    DropBlock with an experimental gaussian noise option. Simplied from above without concern for valid
    block mask at edges.
    r   r   r   r   r   gư>)r   r   r   
empty_like
bernoulli_r   r   r   r   normal_r!   r"   r#   r$   r%   r&   )r'   r   r   r	   r
   r   r(   r)   r*   r+   r,   r-   r.   r3   r4   r5   r6   r6   r7   drop_block_fast_2dN   s0   
*r<   c                       sV   e Zd ZdZ							ddeded	ed
edededef fddZdd Z  Z	S )DropBlock2dz9 DropBlock. See https://arxiv.org/pdf/1810.12890.pdf
    r   r   r   FTr   r   r	   r
   r   r   fastc                    s<   t t|   || _|| _|| _|| _|| _|| _|| _	d S N)
superr=   __init__r   r	   r   r
   r   r   r>   )selfr   r   r	   r
   r   r   r>   	__class__r6   r7   rA   y   s   	
zDropBlock2d.__init__c                 C   sR   | j r| js|S | jrt|| j| j| j| j| jS t|| j| j| j| j| j| j	S r?   )
trainingr   r>   r<   r   r	   r
   r   r8   r   rB   r'   r6   r6   r7   forward   s   zDropBlock2d.forward)r   r   r   FFFT)
__name__
__module____qualname____doc__floatintboolrA   rG   __classcell__r6   r6   rC   r7   r=   u   s2    r=           TrE   scale_by_keepc                 C   s`   |dks|s| S d| }| j d fd| jd   }| ||}|dkr,|r,|| | | S )a(  Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
    'survival rate' as the argument.

    rP   r   r   )r   )r   ndim	new_emptyr:   div_)r'   r   rE   rQ   	keep_probr   random_tensorr6   r6   r7   	drop_path   s   

rW   c                       s<   e Zd ZdZddedef fddZdd	 Zd
d Z  Z	S )DropPathz^Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    rP   Tr   rQ   c                    s   t t|   || _|| _d S r?   )r@   rX   rA   r   rQ   )rB   r   rQ   rC   r6   r7   rA      s   
zDropPath.__init__c                 C   s   t || j| j| jS r?   )rW   r   rE   rQ   rF   r6   r6   r7   rG         zDropPath.forwardc                 C   s   dt | jddS )Nz
drop_prob=   z0.3f)roundr   )rB   r6   r6   r7   
extra_repr   rY   zDropPath.extra_repr)rP   T)
rH   rI   rJ   rK   rL   rN   rA   rG   r\   rO   r6   r6   rC   r7   rX      s
    rX   )r   r   r   FFF)r   r   r   FF)rP   FT)rK   r   torch.nnnntorch.nn.functional
functionalr   gridr   rL   rM   rN   r8   Tensorr<   Moduler=   rW   rX   r6   r6   r6   r7   <module>   sZ    
8
'!