o
    i$                  	   @   s  d Z ddlZddlZddlmZ ddlmZmZ ddl	Z	ddl	m
Z
 ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlmZmZmZ ddlmZ eeZ eeddG dd deZ!dd Z"dd Z#dKde	j$de%de&de	j$fddZ'G dd  d e
j(Z)G d!d" d"e
j(Z*G d#d$ d$e
j(Z+G d%d& d&e
j(Z,G d'd( d(e
j(Z-G d)d* d*e
j(Z.G d+d, d,e
j(Z/G d-d. d.e
j(Z0G d/d0 d0e
j(Z1G d1d2 d2e
j(Z2G d3d4 d4e
j(Z3G d5d6 d6eZ4G d7d8 d8e
j(Z5eG d9d: d:eZ6eG d;d< d<e6Z7G d=d> d>e
j(Z8G d?d@ d@e
j(Z9G dAdB dBe
j(Z:G dCdD dDe
j(Z;G dEdF dFe
j(Z<edGdG dHdI dIe6Z=g dJZ>dS )Lz"PyTorch Swin2SR Transformer model.    N)	dataclass)OptionalUnion)nn   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputImageSuperResolutionOutput)PreTrainedModel) find_pruneable_heads_and_indicesmeshgridprune_linear_layer)ModelOutputauto_docstringlogging   )Swin2SRConfigzQ
    Swin2SR encoder's outputs, with potential hidden states and attentions.
    )custom_introc                   @   sL   e Zd ZU dZeej ed< dZee	ej  ed< dZ
ee	ej  ed< dS )Swin2SREncoderOutputNlast_hidden_statehidden_states
attentions)__name__
__module____qualname__r   r   torchFloatTensor__annotations__r   tupler    r    r    `/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/swin2sr/modeling_swin2sr.pyr   %   s   
 r   c                 C   sR   | j \}}}}| ||| ||| ||} | dddddd d|||}|S )z2
    Partitions the given input into windows.
    r   r   r            shapeviewpermute
contiguous)input_featurewindow_size
batch_sizeheightwidthnum_channelswindowsr    r    r!   window_partition2   s   $r2   c                 C   sN   | j d }| d|| || |||} | dddddd d|||} | S )z?
    Merges windows to produce higher resolution features.
    r%   r   r   r   r"   r#   r$   r&   )r1   r,   r.   r/   r0   r    r    r!   window_reverse?   s   
$r3           Finput	drop_probtrainingreturnc                 C   sd   |dks|s| S d| }| j d fd| jd   }|tj|| j| jd }|  | || }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r4   r   r   )r   )dtypedevice)r'   ndimr   randr9   r:   floor_div)r5   r6   r7   	keep_probr'   random_tensoroutputr    r    r!   	drop_pathJ   s   
rB   c                       sT   e Zd ZdZddee ddf fddZdejdejfdd	Z	de
fd
dZ  ZS )Swin2SRDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr6   r8   c                    s   t    || _d S N)super__init__r6   )selfr6   	__class__r    r!   rF   b   s   

zSwin2SRDropPath.__init__r   c                 C   s   t || j| jS rD   )rB   r6   r7   rG   r   r    r    r!   forwardf   s   zSwin2SRDropPath.forwardc                 C   s   d| j  S )Nzp=)r6   rG   r    r    r!   
extra_repri   s   zSwin2SRDropPath.extra_reprrD   )r   r   r   __doc__r   floatrF   r   TensorrK   strrM   __classcell__r    r    rH   r!   rC   _   s
    rC   c                       s>   e Zd ZdZ fddZdeej deej	 fddZ
  ZS )Swin2SREmbeddingsz?
    Construct the patch and optional position embeddings.
    c                    s`   t    t|| _| jj}|jr tt	d|d |j
| _nd | _t|j| _|j| _d S )Nr   )rE   rF   Swin2SRPatchEmbeddingspatch_embeddingsnum_patchesuse_absolute_embeddingsr   	Parameterr   zeros	embed_dimposition_embeddingsDropouthidden_dropout_probdropoutr,   )rG   configrV   rH   r    r!   rF   r   s   

zSwin2SREmbeddings.__init__pixel_valuesr8   c                 C   s4   |  |\}}| jd ur|| j }| |}||fS rD   )rU   r[   r^   )rG   r`   
embeddingsoutput_dimensionsr    r    r!   rK      s
   


zSwin2SREmbeddings.forward)r   r   r   rN   rF   r   r   r   r   rP   rK   rR   r    r    rH   r!   rS   m   s    &rS   c                       sD   e Zd Zd fdd	Zdeej deejee	 f fddZ
  ZS )	rT   Tc                    s   t    |j}|j|j}}t|tjjr|n||f}t|tjjr%|n||f}|d |d  |d |d  g}|| _	|d |d  | _
tj||j||d| _|r[t|j| _d S d | _d S )Nr   r   )kernel_sizestride)rE   rF   rZ   
image_size
patch_size
isinstancecollectionsabcIterablepatches_resolutionrV   r   Conv2d
projection	LayerNorm	layernorm)rG   r_   normalize_patchesr0   re   rf   rk   rH   r    r!   rF      s   
  zSwin2SRPatchEmbeddings.__init__ra   r8   c                 C   sN   |  |}|j\}}}}||f}|ddd}| jd ur#| |}||fS )Nr"   r   )rm   r'   flatten	transposero   )rG   ra   _r.   r/   rb   r    r    r!   rK      s   


zSwin2SRPatchEmbeddings.forward)T)r   r   r   rF   r   r   r   r   rP   intrK   rR   r    r    rH   r!   rT      s    .rT   c                       (   e Zd ZdZ fddZdd Z  ZS )Swin2SRPatchUnEmbeddingszImage to Patch Unembeddingc                    s   t    |j| _d S rD   )rE   rF   rZ   )rG   r_   rH   r    r!   rF      s   
z!Swin2SRPatchUnEmbeddings.__init__c                 C   s2   |j \}}}|dd|| j|d |d }|S )Nr   r"   r   )r'   rr   r(   rZ   )rG   ra   x_sizer-   height_widthr0   r    r    r!   rK      s   "z Swin2SRPatchUnEmbeddings.forwardr   r   r   rN   rF   rK   rR   r    r    rH   r!   rv      s    rv   c                	       sh   e Zd ZdZejfdee dedejddf fddZ	d	d
 Z
dejdeeef dejfddZ  ZS )Swin2SRPatchMerginga'  
    Patch Merging Layer.

    Args:
        input_resolution (`tuple[int]`):
            Resolution of input feature.
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    input_resolutiondim
norm_layerr8   Nc                    sB   t    || _|| _tjd| d| dd| _|d| | _d S )Nr#   r"   Fbias)rE   rF   r{   r|   r   Linear	reductionnorm)rG   r{   r|   r}   rH   r    r!   rF      s
   
zSwin2SRPatchMerging.__init__c                 C   sF   |d dkp|d dk}|r!ddd|d d|d f}t j||}|S )Nr"   r   r   )r   
functionalpad)rG   r+   r.   r/   
should_pad
pad_valuesr    r    r!   	maybe_pad   s
   zSwin2SRPatchMerging.maybe_padr+   input_dimensionsc                 C   s   |\}}|j \}}}|||||}| |||}|d d dd ddd dd d f }|d d dd ddd dd d f }	|d d dd ddd dd d f }
|d d dd ddd dd d f }t||	|
|gd}||dd| }| |}| |}|S )Nr   r"   r   r%   r#   )r'   r(   r   r   catr   r   )rG   r+   r   r.   r/   r-   r|   r0   input_feature_0input_feature_1input_feature_2input_feature_3r    r    r!   rK      s   $$$$

zSwin2SRPatchMerging.forward)r   r   r   rN   r   rn   r   rt   ModulerF   r   r   rP   rK   rR   r    r    rH   r!   rz      s
    **rz   c                       sb   e Zd Zddgf fdd	Z			ddejdeej deej d	ee d
e	ej f
ddZ
  ZS )Swin2SRSelfAttentionr   c              
      s  t    || dkrtd| d| d|| _t|| | _| j| j | _t|tj	j
r0|n||f| _|| _ttdt|ddf | _ttjddd	d
tjd	dtjd|dd
| _tj| jd d  | jd tjd }tj| jd d  | jd tjd }tt||gddddd d}|d dkr|d d d d d d df  |d d   < |d d d d d d df  |d d   < n3|dkr|d d d d d d df  | jd d   < |d d d d d d df  | jd d   < |d9 }t|t t!|d  t" d }|#t$| j% j&}| j'd|dd t| jd }	t| jd }
tt|	|
gdd}t(|d}|d d d d d f |d d d d d f  }|ddd }|d d d d df  | jd d 7  < |d d d d df  | jd d 7  < |d d d d df  d| jd  d 9  < |)d}| j'd|dd tj| j| j|j*d
| _+tj| j| jdd
| _,tj| j| j|j*d
| _-t.|j/| _0d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()
   r   r"   i   Tr~   inplaceFr9   ij)indexing         ?relative_coords_table
persistentr%   relative_position_index)1rE   rF   
ValueErrornum_attention_headsrt   attention_head_sizeall_head_sizerg   rh   ri   rj   r,   pretrained_window_sizer   rX   r   logoneslogit_scale
Sequentialr   ReLUcontinuous_position_bias_mlparangeint64rO   stackr   r)   r*   	unsqueezesignlog2absmathtonext
parametersr9   register_bufferrq   sumqkv_biasquerykeyvaluer\   attention_probs_dropout_probr^   )rG   r_   r|   	num_headsr,   r   relative_coords_hrelative_coords_wr   coords_hcoords_wcoordscoords_flattenrelative_coordsr   rH   r    r!   rF      s`   
"&((
,.
..&,((,
zSwin2SRSelfAttention.__init__NFr   attention_mask	head_maskoutput_attentionsr8   c                 C   s"  |j \}}}| ||d| j| jdd}| ||d| j| jdd}	| ||d| j| jdd}
tj	j
|ddtj	j
|	dddd }tj| jtdd }|| }| | jd| j}|| jd | jd | jd  | jd | jd  d}|ddd }d	t| }||d }|d ur|j d }||| || j|||dd }||dd }|d| j||}tj	j|dd}| |}|d ur|| }t||
}|dddd
 }| d d | jf }||}|r||f}|S |f}|S )Nr%   r   r"   r|   g      Y@)maxr      r   )r'   r   r(   r   r   rr   r   r   r   r   	normalizer   clampr   r   r   expr   r   r   r,   r)   r*   sigmoidr   softmaxr^   matmulsizer   )rG   r   r   r   r   r-   r|   r0   query_layer	key_layervalue_layerattention_scoresr   relative_position_bias_tablerelative_position_bias
mask_shapeattention_probscontext_layernew_context_layer_shapeoutputsr    r    r!   rK   *  sd   &


zSwin2SRSelfAttention.forwardNNF)r   r   r   rF   r   rP   r   r   boolr   rK   rR   r    r    rH   r!   r      s     @r   c                       s8   e Zd Z fddZdejdejdejfddZ  ZS )Swin2SRSelfOutputc                    s*   t    t||| _t|j| _d S rD   )rE   rF   r   r   denser\   r   r^   rG   r_   r|   rH   r    r!   rF   t  s   
zSwin2SRSelfOutput.__init__r   input_tensorr8   c                 C      |  |}| |}|S rD   r   r^   )rG   r   r   r    r    r!   rK   y     

zSwin2SRSelfOutput.forwardr   r   r   rF   r   rP   rK   rR   r    r    rH   r!   r   s  s    $r   c                       sd   e Zd Zd fdd	Zdd Z			ddejd	eej d
eej dee	 de
ej f
ddZ  ZS )Swin2SRAttentionr   c                    sL   t    t||||t|tjjr|n||fd| _t||| _	t
 | _d S )Nr_   r|   r   r,   r   )rE   rF   r   rg   rh   ri   rj   rG   r   rA   setpruned_heads)rG   r_   r|   r   r,   r   rH   r    r!   rF     s   
	zSwin2SRAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   r   )lenr   rG   r   r   r   r   r   r   r   rA   r   r   union)rG   headsindexr    r    r!   prune_heads  s   zSwin2SRAttention.prune_headsNFr   r   r   r   r8   c                 C   s6   |  ||||}| |d |}|f|dd   }|S Nr   r   )rG   rA   )rG   r   r   r   r   self_outputsattention_outputr   r    r    r!   rK     s   zSwin2SRAttention.forwardr   r   )r   r   r   rF   r   r   rP   r   r   r   r   rK   rR   r    r    rH   r!   r     s"    r   c                       2   e Zd Z fddZdejdejfddZ  ZS )Swin2SRIntermediatec                    sJ   t    t|t|j| | _t|jt	rt
|j | _d S |j| _d S rD   )rE   rF   r   r   rt   	mlp_ratior   rg   
hidden_actrQ   r   intermediate_act_fnr   rH   r    r!   rF     s
   
zSwin2SRIntermediate.__init__r   r8   c                 C   r   rD   )r   r   rJ   r    r    r!   rK        

zSwin2SRIntermediate.forwardr   r    r    rH   r!   r     s    r   c                       r   )Swin2SROutputc                    s4   t    tt|j| || _t|j| _	d S rD   )
rE   rF   r   r   rt   r   r   r\   r]   r^   r   rH   r    r!   rF     s   
zSwin2SROutput.__init__r   r8   c                 C   r   rD   r   rJ   r    r    r!   rK     r   zSwin2SROutput.forwardr   r    r    rH   r!   r     s    r   c                       s   e Zd Z	d fdd	Zdeeeef eeef f fddZdd	 Zd
d Z		dde	j
deeef dee	j dee dee	j
e	j
f f
ddZ  ZS )Swin2SRLayerr4   r   c           	         s   t    || _| |j|jf||f\}}|d | _|d | _t|||| jt|tj	j
r/|n||fd| _tj||jd| _|dkrGt|nt | _t||| _t||| _tj||jd| _d S )Nr   r   epsr4   )rE   rF   r{   _compute_window_shiftr,   
shift_sizer   rg   rh   ri   rj   	attentionr   rn   layer_norm_epslayernorm_beforerC   IdentityrB   r   intermediater   rA   layernorm_after)	rG   r_   r|   r{   r   drop_path_rater   r   r,   rH   r    r!   rF     s*   


	zSwin2SRLayer.__init__r8   c                 C   s6   dd t | j|D }dd t | j||D }||fS )Nc                 S   s    g | ]\}}||kr|n|qS r    r    ).0rwr    r    r!   
<listcomp>  s     z6Swin2SRLayer._compute_window_shift.<locals>.<listcomp>c                 S   s"   g | ]\}}}||krd n|qS r   r    )r  r  r  sr    r    r!   r    s   " )zipr{   )rG   target_window_sizetarget_shift_sizer,   r   r    r    r!   r     s   z"Swin2SRLayer._compute_window_shiftc              	   C   s  | j dkrtjd||df|d}td| j t| j | j  t| j  d f}td| j t| j | j  t| j  d f}d}|D ]}|D ]}	||d d ||	d d f< |d7 }qDq@t|| j}
|
d| j| j }
|
d|
d }||dkd|dkd}|S d }|S )Nr   r   r   r%   r"   g      Yr4   )	r   r   rY   slicer,   r2   r(   r   masked_fill)rG   r.   r/   r9   img_maskheight_sliceswidth_slicescountheight_slicewidth_slicemask_windows	attn_maskr    r    r!   get_attn_mask  s.   

zSwin2SRLayer.get_attn_maskc                 C   sR   | j || j   | j  }| j || j   | j  }ddd|d|f}tj||}||fS )Nr   )r,   r   r   r   )rG   r   r.   r/   	pad_right
pad_bottomr   r    r    r!   r     s
   zSwin2SRLayer.maybe_padNFr   r   r   r   c                 C   s  |\}}|  \}}}	|}
|||||	}| |||\}}|j\}}}}| jdkr9tj|| j | j fdd}n|}t|| j}|d| j| j |	}| j	|||j
d}|d ur_||j}| j||||d}|d }|d| j| j|	}t|| j||}| jdkrtj|| j| jfdd}n|}|d dkp|d dk}|r|d d d |d |d d f  }|||| |	}| |}|
| | }| |}| |}|| | | }|r||d	 f}|S |f}|S )
Nr   )r   r"   )shiftsdimsr%   r   )r   r   r$   r   )r   r(   r   r'   r   r   rollr2   r,   r  r9   r   r:   r   r3   r*   r   rB   r  rA   r  )rG   r   r   r   r   r.   r/   r-   rs   channelsshortcutr   
height_pad	width_padshifted_hidden_stateshidden_states_windowsr  attention_outputsr   attention_windowsshifted_windows
was_paddedlayer_outputlayer_outputsr    r    r!   rK     sH   

$


zSwin2SRLayer.forward)r4   r   r   NF)r   r   r   rF   r   rt   r   r  r   r   rP   r   r   r   rK   rR   r    r    rH   r!   r     s&    &
r   c                       s`   e Zd ZdZd fdd	Z		ddejdeeef d	e	ej
 d
e	e deej f
ddZ  ZS )Swin2SRStagezh
    This corresponds to the Residual Swin Transformer Block (RSTB) in the original implementation.
    r   c                    s   t     | _| _t fddt|D | _ jdkr.t	ddd| _
n6 jdkrdtt	d dddtjdd	d
t	d d dddtjdd	d
t	d ddd| _
t dd| _t | _d S )Nc              
      s6   g | ]}t  |d  dkrdn jd  dqS )r"   r   )r_   r|   r{   r   r   r   )r   r,   )r  ir_   r|   r{   r   r   r    r!   r  T  s    	z)Swin2SRStage.__init__.<locals>.<listcomp>1convr   r   3convr#   皙?Tnegative_sloper   r   F)rp   )rE   rF   r_   r|   r   
ModuleListrangelayersresi_connectionrl   convr   	LeakyReLUrT   patch_embedrv   patch_unembed)rG   r_   r|   r{   depthr   rB   r   rH   r,  r!   rF   O  s(   
	

zSwin2SRStage.__init__NFr   r   r   r   r8   c                 C   s   |}|\}}t | jD ]\}}	|d ur|| nd }
|	|||
|}|d }q||||f}| ||}| |}| |\}}|| }||f}|rO||dd  7 }|S r   )	enumerater4  r9  r6  r8  )rG   r   r   r   r   residualr.   r/   r+  layer_modulelayer_head_maskr(  rb   rs   stage_outputsr    r    r!   rK   q  s   

zSwin2SRStage.forwardr   r)  )r   r   r   rN   rF   r   rP   r   rt   r   r   r   rK   rR   r    r    rH   r!   r*  J  s     &
r*  c                       sp   e Zd Z fddZ				ddejdeeef deej	 d	ee
 d
ee
 dee
 deeef fddZ  ZS )Swin2SREncoderc                    sn   t    t j| _ | _dd tjd jt	 jddD t
 fddt| jD | _d| _d S )Nc                 S   s   g | ]}|  qS r    )item)r  xr    r    r!   r    s    z+Swin2SREncoder.__init__.<locals>.<listcomp>r   cpu)r:   c                    sd   g | ].}t   jd  d f j|  j| t jd| t jd|d   d dqS )r   r   N)r_   r|   r{   r:  r   rB   r   )r*  rZ   depthsr   r   )r  	stage_idxr_   dpr	grid_sizer    r!   r    s    
*F)rE   rF   r   rD  
num_stagesr_   r   linspacer  r   r   r2  r3  stagesgradient_checkpointing)rG   r_   rH  rH   rF  r!   rF     s   
$

zSwin2SREncoder.__init__NFTr   r   r   r   output_hidden_statesreturn_dictr8   c                 C   s   d}|rdnd }|rdnd }	|r||f7 }t | jD ];\}
}|d ur&||
 nd }|||||}|d }|d }|d |d f}||f7 }|rK||f7 }|rU|	|dd  7 }	q|sdtdd |||	fD S t|||	d	S )
Nr    r   r   r   r%   r"   c                 s   s    | ]	}|d ur|V  qd S rD   r    )r  vr    r    r!   	<genexpr>  s    z)Swin2SREncoder.forward.<locals>.<genexpr>r   r   r   )r;  rK  r   r   )rG   r   r   r   r   rM  rN  all_input_dimensionsall_hidden_statesall_self_attentionsr+  stage_moduler>  r(  rb   r    r    r!   rK     s0   	


zSwin2SREncoder.forward)NFFT)r   r   r   rF   r   rP   r   rt   r   r   r   r   r   rK   rR   r    r    rH   r!   r@    s*    

r@  c                   @   s*   e Zd ZU eed< dZdZdZdd ZdS )Swin2SRPreTrainedModelr_   swin2srr`   Tc                 C   sx   t |tjtjfr%tjjj|jj| j	j
d |jdur#|jj  dS dS t |tjr:|jj  |jjd dS dS )zInitialize the weights)stdNr   )rg   r   r   rl   r   inittrunc_normal_weightdatar_   initializer_ranger   zero_rn   fill_)rG   moduler    r    r!   _init_weights  s   
z$Swin2SRPreTrainedModel._init_weightsN)	r   r   r   r   r   base_model_prefixmain_input_namesupports_gradient_checkpointingra  r    r    r    r!   rV    s   
 rV  c                       s   e Zd Z fddZdd Zdd Zdd Ze								dd
ej	de
ej	 de
e de
e de
e deeef fddZ  ZS )Swin2SRModelc                    s   t  | || _|jdkr!|jdkr!tg ddddd}ntdddd}| j	d|dd |j
| _
t|j|jddd| _t|| _t|| jjjd| _tj|j|jd| _t|| _t|j|jddd| _|   d S )	Nr   )gw#?g8EGr?gB`"?r   meanFr   )rH  r   )rE   rF   r_   r0   num_channels_outr   tensorr(   rY   r   	img_ranger   rl   rZ   first_convolutionrS   ra   r@  rU   rk   encoderrn   r   ro   rv   r9  conv_after_body	post_init)rG   r_   rf  rH   r    r!   rF     s   

zSwin2SRModel.__init__c                 C   s   | j jS rD   )ra   rU   rL   r    r    r!   get_input_embeddings  s   z!Swin2SRModel.get_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrk  layerr   r   )rG   heads_to_prunerp  r   r    r    r!   _prune_heads  s   zSwin2SRModel._prune_headsc           	      C   sn   |  \}}}}| jj}|||  | }|||  | }tj|d|d|fd}| j|}|| | j }|S )Nr   reflect)	r   r_   r,   r   r   r   rf  type_asri  )	rG   r`   rs   r.   r/   r,   modulo_pad_heightmodulo_pad_widthrf  r    r    r!   pad_and_normalize	  s   zSwin2SRModel.pad_and_normalizeNr`   r   r   rM  rN  r8   c                 C   s   |d ur|n| j j}|d ur|n| j j}|d ur|n| j j}| |t| j j}|j\}}}}| |}| 	|}	| 
|	\}
}| j|
|||||d}|d }| |}| |||f}| ||	 }|sp|f|dd   }|S t||j|jdS )Nr   r   rM  rN  r   r   rQ  )r_   r   rM  use_return_dictget_head_maskr   rD  r'   rw  rj  ra   rk  ro   r9  rl  r	   r   r   )rG   r`   r   r   rM  rN  rs   r.   r/   ra   embedding_outputr   encoder_outputssequence_outputrA   r    r    r!   rK     s:   	

	
zSwin2SRModel.forward)NNNN)r   r   r   rF   rn  rr  rw  r   r   r   r   r   r   r   r	   rK   rR   r    r    rH   r!   re    s.    
re  c                       ru   )UpsamplezUpsample module.

    Args:
        scale (`int`):
            Scale factor. Supported scales: 2^n and 3.
        num_features (`int`):
            Channel number of intermediate features.
    c                    s   t    || _||d @ dkr<ttt|D ] }| d| t	|d| ddd | d| t
d qd S |dkrTt	|d| ddd| _t
d| _d S td	| d
)Nr   r   convolution_r#   r   pixelshuffle_r"   	   zScale z/ is not supported. Supported scales: 2^n and 3.)rE   rF   scaler3  rt   r   r   
add_moduler   rl   PixelShuffleconvolutionpixelshuffler   )rG   r  num_featuresr+  rH   r    r!   rF   [  s   
$zUpsample.__init__c                 C   s|   | j | j d @ dkr-ttt| j D ]}| d| |}| d| |}q|S | j dkr<| |}| |}|S )Nr   r   r  r  r   )r  r3  rt   r   r   __getattr__r  r  )rG   hidden_stater+  r    r    r!   rK   j  s   


zUpsample.forwardry   r    r    rH   r!   r~  Q  s    	r~  c                       ru   )UpsampleOneStepa  UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)

    Used in lightweight SR to save parameters.

    Args:
        scale (int):
            Scale factor. Supported scales: 2^n and 3.
        in_channels (int):
            Channel number of intermediate features.
        out_channels (int):
            Channel number of output features.
    c                    s6   t    t||d | ddd| _t|| _d S )Nr"   r   r   )rE   rF   r   rl   r6  r  pixel_shuffle)rG   r  in_channelsout_channelsrH   r    r!   rF     s   
zUpsampleOneStep.__init__c                 C   r   rD   )r6  r  )rG   rB  r    r    r!   rK     r   zUpsampleOneStep.forwardry   r    r    rH   r!   r  w  s    r  c                       $   e Zd Z fddZdd Z  ZS )PixelShuffleUpsamplerc                    sV   t    t|j|ddd| _tjdd| _t|j	|| _
t||jddd| _d S Nr   r   Tr   )rE   rF   r   rl   rZ   conv_before_upsampler7  
activationr~  upscaleupsamplerg  final_convolutionrG   r_   r  rH   r    r!   rF     s
   
zPixelShuffleUpsampler.__init__c                 C   s,   |  |}| |}| |}| |}|S rD   )r  r  r  r  )rG   r}  rB  r    r    r!   rK     s
   



zPixelShuffleUpsampler.forwardr   r   r   rF   rK   rR   r    r    rH   r!   r    s    r  c                       r  )NearestConvUpsamplerc                    s   t    |jdkrtdt|j|ddd| _tjdd| _	t||ddd| _
t||ddd| _t||ddd| _t||jddd| _tjddd| _d S )	Nr#   zNThe nearest+conv upsampler only supports an upscale factor of 4 at the moment.r   r   Tr   r/  r0  )rE   rF   r  r   r   rl   rZ   r  r7  r  conv_up1conv_up2conv_hrrg  r  lrelur  rH   r    r!   rF     s   

zNearestConvUpsampler.__init__c              	   C   sn   |  |}| |}| | tjjj|ddd}| | tjjj|ddd}| 	| | 
|}|S )Nr"   nearest)scale_factormode)r  r  r  r  r   r   r   interpolater  r  r  )rG   r}  reconstructionr    r    r!   rK     s   

zNearestConvUpsampler.forwardr  r    r    rH   r!   r    s    r  c                       r  )PixelShuffleAuxUpsamplerc              	      s   t    |j| _t|j|ddd| _t|j|ddd| _tj	dd| _
t||jddd| _ttd|dddtj	dd| _t|j|| _t||jddd| _d S r  )rE   rF   r  r   rl   r0   conv_bicubicrZ   r  r7  r  conv_auxr   conv_after_auxr~  r  rg  r  r  rH   r    r!   rF     s   
$z!PixelShuffleAuxUpsampler.__init__c                 C   s   |  |}| |}| |}| |}| |}| |d d d d d || j d || j f |d d d d d || j d || j f  }| |}||fS rD   )r  r  r  r  r  r  r  r  )rG   r}  bicubicr.   r/   auxr  r    r    r!   rK     s   




0*
z PixelShuffleAuxUpsampler.forwardr  r    r    rH   r!   r    s    r  zm
    Swin2SR Model transformer with an upsampler head on top for image super resolution and restoration.
    c                       sz   e Zd Z fddZe						ddeej deej deej dee	 dee	 d	ee	 d
e
eef fddZ  ZS )Swin2SRForImageSuperResolutionc                    s   t  | t|| _|j| _|j| _d}| jdkr!t||| _n4| jdkr-t||| _n(| jdkr=t	|j|j
|j| _n| jdkrIt||| _nt|j
|jddd| _|   d S )N@   r  pixelshuffle_auxpixelshuffledirectnearest+convr   r   )rE   rF   re  rW  	upsamplerr  r  r  r  r  rZ   rg  r  r   rl   r  rm  r  rH   r    r!   rF     s   




z'Swin2SRForImageSuperResolution.__init__Nr`   r   labelsr   rM  rN  r8   c                 C   s^  |dur|n| j j}d}|durtd|jdd \}}	| j jdkr5tjj||| j |	| j fddd}
| j	|||||d}|d	 }| jd
v rN| 
|}n!| jdkrh| 
||
||	\}}|| j	j | j	j }n|| | }|| j	j | j	j }|ddddd|| j d|	| j f }|s|f|dd  }|dur|f| S |S t|||j|jdS )a  
        Example:
         ```python
         >>> import torch
         >>> import numpy as np
         >>> from PIL import Image
         >>> import requests

         >>> from transformers import AutoImageProcessor, Swin2SRForImageSuperResolution

         >>> processor = AutoImageProcessor.from_pretrained("caidas/swin2SR-classical-sr-x2-64")
         >>> model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-classical-sr-x2-64")

         >>> url = "https://huggingface.co/spaces/jjourney1125/swin2sr/resolve/main/samples/butterfly.jpg"
         >>> image = Image.open(requests.get(url, stream=True).raw)
         >>> # prepare image for the model
         >>> inputs = processor(image, return_tensors="pt")

         >>> # forward pass
         >>> with torch.no_grad():
         ...     outputs = model(**inputs)

         >>> output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
         >>> output = np.moveaxis(output, source=0, destination=-1)
         >>> output = (output * 255.0).round().astype(np.uint8)  # float32 to uint8
         >>> # you can visualize `output` with `Image.fromarray`
         ```Nz'Training is not supported at the momentr"   r  r  F)r   r  align_cornersrx  r   )r  r  r  r   )lossr  r   r   )r_   ry  NotImplementedErrorr'   r  r   r   r  r  rW  r  ri  rf  r  r
   r   r   )rG   r`   r   r  r   rM  rN  r  r.   r/   r  r   r}  r  r  rA   r    r    r!   rK     sJ   %

,z&Swin2SRForImageSuperResolution.forward)NNNNNN)r   r   r   rF   r   r   r   r   
LongTensorr   r   r   r
   rK   rR   r    r    rH   r!   r    s0    
r  )r  re  rV  )r4   F)?rN   collections.abcrh   r   dataclassesr   typingr   r   r   r   activationsr   modeling_layersr   modeling_outputsr	   r
   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   configuration_swin2srr   
get_loggerr   loggerr   r2   r3   rP   rO   r   rB   r   rC   rS   rT   rv   rz   r   r   r   r   r   r   r*  r@  rV  re  r~  r  r  r  r  r  __all__r    r    r    r!   <module>   sf   
 7 /}GBk&q