o
    ߥi3                     @   s   d dl Z d dlmZ ddedefddZG dd	 d	ejZdddZdddZ	G dd dejZ
G dd dejZG dd dejZdS )    N        F	drop_probtrainingc                 C   sd   |dks|s| S d| }| j d fd| jd   }|tj|| j| jd }|  | || }|S )a/  Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
    the original name is misleading as 'Drop Connect' is a.sh different form of dropout in a.sh separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
    changing the layer and argument names to 'drop path' rather than mix DropConnect as a.sh layer name and use
    'survival rate' as the argument.
    r      r   r   )dtypedevice)shapendimtorchrandr   r   floor_div)xr   r   	keep_probr	   random_tensoroutput r   \/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/multi_modal/ofa/resnet.py	drop_path   s   
r   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )DropPathz^Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    Nc                    s   t t|   || _d S N)superr   __init__r   )selfr   	__class__r   r   r   +   s   
zDropPath.__init__c                 C   s   t || j| jS r   )r   r   r   r   r   r   r   r   forward/   s   zDropPath.forwardr   )__name__
__module____qualname____doc__r   r   __classcell__r   r   r   r   r   '   s    r   r   c              
   C   s   t j| |d|||d|dS )z3x3 convolution with padding   F)kernel_sizestridepaddinggroupsbiasdilationnnConv2d)	in_planes
out_planesr&   r(   r*   r   r   r   conv3x33   s   r0   c                 C   s   t j| |d|ddS )z1x1 convolutionr   F)r%   r&   r)   r+   )r.   r/   r&   r   r   r   conv1x1@   s   
r1   c                       s6   e Zd ZdZ						d fdd	Zdd Z  ZS )	
BasicBlockr   N@   c	           	         s   t t|   |d u rtj}|dks|dkrtd|dkr"tdt|||| _||| _	tj
dd| _t||| _||| _|| _|| _d S )Nr   r3   z3BasicBlock only supports groups=1 and base_width=64z(Dilation > 1 not supported in BasicBlockTinplace)r   r2   r   r,   BatchNorm2d
ValueErrorNotImplementedErrorr0   conv1bn1ReLUreluconv2bn2
downsampler&   )	r   inplanesplanesr&   r?   r(   
base_widthr*   
norm_layerr   r   r   r   I   s$   	


zBasicBlock.__init__c                 C   s   J r   )r9   r:   r<   r=   r>   r?   r   r   identityoutr   r   r   r   d   s   zBasicBlock.forward)r   Nr   r3   r   Nr   r    r!   	expansionr   r   r#   r   r   r   r   r2   F   s    r2   c                       s8   e Zd ZdZ							d
 fdd	Zdd	 Z  ZS )
Bottleneck   r   Nr3   r   c
                    s   t t|   |d u rtj}t||d  | }
t||
| _||
| _t	|
|
|||| _
||
| _t|
|| j | _||| j | _tjdd| _|| _|| _|	dkrZt|	| _d S t | _d S )Ng      P@Tr4   r   )r   rI   r   r,   r6   intr1   r9   r:   r0   r=   r>   rH   conv3bn3r;   r<   r?   r&   r   Identityr   )r   r@   rA   r&   r?   r(   rB   r*   rC   drop_path_ratewidthr   r   r   r      s&   



zBottleneck.__init__c                 C   s   |}|  |}| |}| |}| |}| |}| |}| |}| |}| jd ur4| |}|| | }| |}|S r   )	r9   r:   r<   r=   r>   rL   rM   r?   r   rD   r   r   r   r      s   










zBottleneck.forward)r   Nr   r3   r   Nr   rG   r   r   r   r   rI   x   s    rI   c                       sN   e Zd ZdZ						d fdd	Z			dd	d
Zdd Zdd Z  ZS )ResNeta  
    Deep residual network, copy from https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py.

    You can see more details from https://arxiv.org/abs/1512.03385

    step 1. Get image embedding with `7` as the patch image size, `2` as stride.
    step 2. Do layer normalization, relu activation and max pooling.
    step 3. Go through three times residual branch.
    Fr   r3   Nr   c           	         s  t t|   |du rtj}|| _d| _d| _|du rg d}t|dkr,t	d
||| _|| _tjd| jdddd	d
| _|| j| _tjdd| _tjdddd| _| jtd|d |d| _| jtd|d d|d |d| _| jtd|d d|d |d| _|  D ].}t|tjrtjj|jddd qt|tjtjtjfrtj |jd tj |j!d q|r|  D ]!}t|trtj |j"jd qt|t#rtj |j$jd qdS dS )a  
        Args:
            layers (`Tuple[int]`): There are three layers in resnet, so the length
                of layers should greater then three. And each element in `layers` is
                the number of `Bottleneck` in relative residual branch.
            zero_init_residual (`bool`, **optional**, default to `False`):
                Whether or not to zero-initialize the last BN in each residual branch.
            groups (`int`, **optional**, default to `1`):
                The number of groups. So far, only the value of `1` is supported.
            width_per_group (`int`, **optional**, default to `64`):
                The width in each group. So far, only the value of `64` is supported.
            replace_stride_with_dilation (`Tuple[bool]`, **optional**, default to `None`):
                Whether or not to replace stride with dilation in each residual branch.
            norm_layer (`torch.nn.Module`, **optional**, default to `None`):
                The normalization module. If `None`, will use  `torch.nn.BatchNorm2d`.
            drop_path_rate (`float`, **optional**, default to 0.0):
                Drop path rate. See more details about drop path from
                https://arxiv.org/pdf/1605.07648v4.pdf.
        Nr3   r   )FFFr$   zHreplace_stride_with_dilation should be None or a 3-element tuple, got {}      F)r%   r&   r'   r)   Tr4   )r%   r&   r'   r   )rO      )r&   dilaterO      fan_outr<   )modenonlinearity)%r   rQ   r   r,   r6   _norm_layerr@   r*   lenr7   formatr(   rB   r-   r9   r:   r;   r<   	MaxPool2dmaxpool_make_layerrI   layer1layer2layer3modules
isinstanceinitkaiming_normal_weightSyncBatchNorm	GroupNorm	constant_r)   rM   r2   r>   )	r   layerszero_init_residualr(   width_per_groupreplace_stride_with_dilationrC   rO   mr   r   r   r      sz   

zResNet.__init__c                 C   s   | j }d}| j}	|r|  j|9  _d}|dks| j||j kr2tt| j||j ||||j }g }
|
|| j|||| j| j	|	| ||j | _dd t
d||D }td|D ]}|
|| j|| j| j	| j||| d q\tj|
 S )a5  
        Making a single residual branch.

        step 1. If dilate==`True`, switch the value of dilate and stride.
        step 2. If the input dimension doesn't equal to th output output dimension
            in `block`, initialize a down sample module.
        step 3. Build a sequential of `blocks` number of `block`.

        Args:
            block (`torch.nn.Module`): The basic block in residual branch.
            planes (`int`): The output dimension of each basic block.
            blocks (`int`): The number of `block` in residual branch.
            stride (`int`, **optional**, default to `1`):
                The stride using in conv.
            dilate (`bool`, **optional**, default to `False`):
                Whether or not to replace dilate with stride.
            drop_path_rate (`float`, **optional**, default to 0.0):
                Drop path rate. See more details about drop path from
                https://arxiv.org/pdf/1605.07648v4.pdf.

        Returns:
            A sequential of basic layer with type `torch.nn.Sequential[block]`
        Nr   c                 S   s   g | ]}|  qS r   )item).0r   r   r   r   
<listcomp>C  s    z&ResNet._make_layer.<locals>.<listcomp>r   )r(   rB   r*   rC   rO   )rZ   r*   r@   rH   r,   
Sequentialr1   appendr(   rB   r   linspacerange)r   blockrA   blocksr&   rU   rO   rC   r?   previous_dilationrk   dprir   r   r   r_     s@   

zResNet._make_layerc                 C   sJ   |  |}| |}| |}| |}| |}| |}| |}|S r   )r9   r:   r<   r^   r`   ra   rb   r   r   r   r   _forward_implQ  s   






zResNet._forward_implc                 C   s
   |  |S r   )r|   r   r   r   r   r   [  s   
zResNet.forward)Fr   r3   NNr   )r   Fr   )	r   r    r!   r"   r   r_   r|   r   r#   r   r   r   r   rQ      s    Y
>
rQ   )r   F)r   r   r   r   )r   torch.nnr,   floatboolr   Moduler   r0   r1   r2   rI   rQ   r   r   r   r   <module>   s   

2;