o
    ߥi]'                     @   sD  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlZ	d dl
Z
d dlmZ d dlm  mZ d dlmZ d dlmZ ddlmZmZ G d	d
 d
ejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZ				d ddZ G dd de
jjZ!dS )!    N)OrderedDict)reduce)mul)Image)models   )convert_weightsload_pretrainedc                       s2   e Zd ZdZd fdd	ZdejfddZ  ZS )	
Bottleneck   r   c                    s  t    tj||ddd| _t|| _tj||dddd| _t|| _|dkr/t	|nt
 | _tj||| j ddd| _t|| j | _tjdd| _d | _|| _|dksb||tj krttdt	|fd	tj||| j dddd
fdt|| j fg| _d S d S )Nr   F)bias   )paddingr   T)inplacez-10)strider   1)super__init__nnConv2dconv1BatchNorm2dbn1conv2bn2	AvgPool2dIdentityavgpool	expansionconv3bn3ReLUrelu
downsampler   r
   
Sequentialr   )selfinplanesplanesr   	__class__ e/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/cv/image_probing_model/backbone.pyr      s6   

zBottleneck.__init__xc                 C   st   |}|  | | |}|  | | |}| |}| | |}| jd ur/| |}||7 }|  |}|S N)	r#   r   r   r   r   r   r!   r    r$   )r&   r-   identityoutr+   r+   r,   forward9   s   



zBottleneck.forward)r   )	__name__
__module____qualname__r   r   torchTensorr1   __classcell__r+   r+   r)   r,   r
      s    "r
   c                	       s:   e Zd Z	d
dedededef fddZdd	 Z  ZS )AttentionPool2dNspacial_dim	embed_dim	num_heads
output_dimc                    st   t    tt|d d ||d  | _t||| _t||| _	t||| _
t||p2|| _|| _d S )N   r   g      ?)r   r   r   	Parameterr5   randnpositional_embeddingLineark_projq_projv_projc_projr;   )r&   r9   r:   r;   r<   r)   r+   r,   r   K   s   

zAttentionPool2d.__init__c              	   C   s4  | |jd |jd |jd |jd  ddd}tj|jddd|gdd}|| jd d d d d f |j }t	j
di d|d	|d
|d|jd d| jd| jjd| jjd| jjdd dt| jj| jj| jjgdd dd ddddd| jjd| jjddd| jdd\}}|d S )Nr   r   r=   r   T)dimkeepdimrF   querykeyvalueembed_dim_to_checkr;   q_proj_weightk_proj_weightv_proj_weightin_proj_weightin_proj_biasbias_kbias_vadd_zero_attnF	dropout_pout_proj_weightout_proj_biasuse_separate_proj_weighttrainingneed_weightsr+   )reshapeshapepermuter5   catmeanr@   todtypeFmulti_head_attention_forwardr;   rC   weightrB   rD   r   rE   rZ   )r&   r-   _r+   r+   r,   r1   Y   sb   
$

	
zAttentionPool2d.forwardr.   )r2   r3   r4   intr   r1   r7   r+   r+   r)   r,   r8   I   s    r8   c                       s(   e Zd ZdZdejf fddZ  ZS )	LayerNormz*Subclass torch's LayerNorm to handle fp16.r-   c                    s$   |j }t |tj}||S r.   )rb   r   r1   typer5   float32)r&   r-   	orig_typeretr)   r+   r,   r1   z   s   
zLayerNorm.forward)r2   r3   r4   __doc__r5   r6   r1   r7   r+   r+   r)   r,   rh   w   s    rh   c                   @   s   e Zd ZdejfddZdS )	QuickGELUr-   c                 C   s   |t d|  S )NgZd;?)r5   sigmoidr&   r-   r+   r+   r,   r1      s   zQuickGELU.forwardN)r2   r3   r4   r5   r6   r1   r+   r+   r+   r,   rn      s    rn   c                       sP   e Zd Z	ddededejf fddZdejfdd	Zdejfd
dZ  Z	S )ResidualAttentionBlockNd_modeln_head	attn_maskc              
      sr   t    t||| _t|| _ttdt	||d fdt
 fdt	|d |fg| _t|| _|| _d S )Nc_fcr   gelurE   )r   r   r   MultiheadAttentionattnrh   ln_1r%   r   rA   rn   mlpln_2rt   )r&   rr   rs   rt   r)   r+   r,   r      s   



zResidualAttentionBlock.__init__r-   c                 C   s>   | j d ur| j j|j|jdnd | _ | j|||d| j dd S )Nrb   deviceF)r[   rt   r   )rt   ra   rb   r}   rx   rp   r+   r+   r,   	attention   s   
z ResidualAttentionBlock.attentionc                 C   s   i }|  |}|ddd|d|< | |}|ddd|d|< || }| | |}|ddd|d|< || }||fS )Nr   r   r=   zlayer_{}_pre_attnzlayer_{}_attnzlayer_{}_mlp)ry   r^   formatr~   rz   r{   )r&   r-   idxfeaturesx_normrx   rz   r+   r+   r,   r1      s   

zResidualAttentionBlock.forwardr.   )
r2   r3   r4   rg   r5   r6   r   r~   r1   r7   r+   r+   r)   r,   rq      s    rq   c                	       sD   e Zd Z	ddedededejf fddZdejfd	d
Z  ZS )TransformerNwidthlayersheadsrt   c                    sJ   t    || _|| _t | _t|D ]}t|||}| j	| qd S r.   )
r   r   r   r   r   
ModuleList	resblocksrangerq   append)r&   r   r   r   rt   iblockr)   r+   r,   r      s   

zTransformer.__init__r-   c                 C   s8   i }t | jD ]\}}|||\}}|| q||fS r.   )	enumerater   update)r&   r-   r   r   r   block_featsr+   r+   r,   r1      s
   zTransformer.forwardr.   )	r2   r3   r4   rg   r5   r6   r   r1   r7   r+   r+   r)   r,   r      s    r   c                       sH   e Zd Zdedededededef fddZdd
ejfddZ  ZS )VisualTransformerinput_resolution
patch_sizer   r   r   r<   c                    s   t    t|||||| || _|| _tjd|||dd| _|d }t|t	
| | _t|t	
|| d d | | _t|| _t|||| _t|| _t|t	
|| | _d S )Nr   F)in_channelsout_channelskernel_sizer   r   g      r=   r   )r   r   printr   r<   r   r   r   r>   r5   r?   class_embeddingr@   rh   ln_prer   transformerln_postproj)r&   r   r   r   r   r   r<   scaler)   r+   r,   r      s(   




zVisualTransformer.__init__Tr-   c                 C   s   |  |}||jd |jd d}|ddd}tj|jd d|jd |j|jd}tj| j	
|j| |gdd}|| j
|j }| |}|ddd}| |\}}|ddd}| |d d dd d f }|rt||d< |S | jd ur~|| j }|S )Nr   r   rM   r=   r|   rH   
pre_logits)r   r\   r]   r^   r5   zerosrb   r}   r_   r   ra   r@   r   r   r   r   )r&   r-   
return_allr   r   r+   r+   r,   r1      s*   
 


zVisualTransformer.forward)T)	r2   r3   r4   rg   r   r5   r6   r1   r7   r+   r+   r)   r,   r      s    r   c                       $   e Zd Z fddZdd Z  ZS )CLIPNetc                    s   t t|   |dkrtddddddd| _d S |dv r)tdd	ddddd| _d S |d
v r:tddddd	dd| _d S td| )NCLIP_ViTB32       i      i   )r   r   r   r   r   r<   )CLIP_ViTB16CLIP_ViTB16_FP16   )CLIP_ViTL14CLIP_ViTL14_FP16   i      z Unsupported arch_name for CLIP, )r   r   r   r   clipKeyError)r&   	arch_name
pretrainedkwargsr)   r+   r,   r      s:   	zCLIPNet.__init__c                 C   s   |  |}|S r.   )r   )r&   
input_dataoutputr+   r+   r,   r1     s   
zCLIPNet.forwardr2   r3   r4   r   r1   r7   r+   r+   r)   r,   r      s    !r   	CLIP_RN50F c                 K   s>   t d| d d|}|r| drt|j t|j|| |S )N)r   r   FP16r+   )r   endswithr   r   r	   )r   use_pretrain	load_from
state_dictr   modelr+   r+   r,   CLIP   s   

r   c                       r   )ProbingModelc                    s"   t t|   tj||| _d S r.   )r   r   r   r5   r   rA   linear)r&   	feat_sizenum_classesr)   r+   r,   r   /  s   zProbingModel.__init__c                 C   s
   |  |S r.   )r   rp   r+   r+   r,   r1   3  s   
zProbingModel.forwardr   r+   r+   r)   r,   r   -  s    r   )r   Fr   N)"mathsyscollectionsr   	functoolsr   operatorr   numpynpr5   torch.nnr   torch.nn.functional
functionalrc   PILr   torchvisionr   utilsr   r	   Moduler
   r8   rh   rn   rq   r   r   r   r   r   r+   r+   r+   r,   <module>   s4   5.	%6(
