o
    .wiB                     @   s  d dl Z d dlZd dlmZmZmZmZ d dlZd dlmZm	Z	 d dl
mZ d dlmZ dddd	Zes8d
dgZdFdedede	jjjfddZG dd dej	jZG dd dej	jZG dd dej	jZdGdededefddZdHdedeedf defdd ZdId"ed#edefd$d%ZdJd'ed(edefd)d*Z G d+d, d,e	jZ!G d-d. d.e	jZ"G d/d0 d0e	jZ#G d1d2 d2e#Z$d3ed4edefd5d6Z%d7ed8ede	jd4edeeeeef f f
d9d:Z&dKd<ed=eeef d>ed? defd@dAZ'	B	;	dLd7ed8edCedD d>ed? d4edefdEd
Z(dS )M    N)List
NamedTupleOptionalUnion)Tensornn)Literal)_TORCHVISION_AVAILABLESqueezeNet1_1_WeightsAlexNet_WeightsVGG16_Weights)squeezenet1_1alexnetvgg16)learned_perceptual_image_patch_similarity_get_tv_model_featuresFnet
pretrainedreturnc                 C   sZ   t stdddl}|r!t|jt|  }t|j| |jd}|jS t|j| dd}|jS )aA  Get torchvision network.

    Args:
        net: Name of network
        pretrained: If pretrained weights should be used

    >>> _ = _get_tv_model_features("alexnet", pretrained=True)
    >>> _ = _get_tv_model_features("squeezenet1_1", pretrained=True)
    >>> _ = _get_tv_model_features("vgg16", pretrained=True)

    zSTorchvision is not installed. Please install torchvision to use this functionality.r   N)weights)r	   ModuleNotFoundErrortorchvisiongetattrmodels_weight_mapDEFAULTfeatures)r   r   r   model_weightsmodel r   `/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/functional/image/lpips.pyr   -   s   c                       B   e Zd ZdZddededdf fdd	Zd
edefddZ  Z	S )
SqueezeNetzSqueezeNet implementation.FTrequires_gradr   r   Nc           
   	      s   t    td|}d| _g }tdtddtddtddtddtddtdd	g}|D ]}tj }|D ]}|t	|||  q7|
| q.t|| _|s\|  D ]}	d
|	_qVd S d S )Nr               
            F)super__init__r   N_slicesrangetorchr   
Sequential
add_modulestrappend
ModuleListslices
parametersr#   )
selfr#   r   pretrained_featuresr6   feature_rangesfeature_rangeseqiparam	__class__r   r    r-   H   s    

:
zSqueezeNet.__init__xc                 C   s:   G dd dt }g }| jD ]}||}|| q|| S )Process input.c                   @   sF   e Zd ZU eed< eed< eed< eed< eed< eed< eed< dS )	z*SqueezeNet.forward.<locals>._SqueezeOutputrelu1relu2relu3relu4relu5relu6relu7N__name__
__module____qualname__r   __annotations__r   r   r   r    _SqueezeOutput]   s   
 rO   )r   r6   r4   )r8   rA   rO   relusslice_r   r   r    forwardZ   s   	
zSqueezeNet.forwardFT
rK   rL   rM   __doc__boolr-   r   r   rR   __classcell__r   r   r?   r    r"   E   s    r"   c                       r!   )AlexnetzAlexnet implementation.FTr#   r   r   Nc                    s2  t    td|}tj | _tj | _tj | _tj | _	tj | _
d| _tdD ]}| jt|||  q/tddD ]}| jt|||  qBtddD ]}| jt|||  qUtddD ]}| j	t|||  qhtddD ]}| j
t|||  q{|s|  D ]}d|_qd S d S )Nr   r&   r%   r'   r(   r*   Fr,   r-   r   r0   r   r1   slice1slice2slice3slice4slice5r.   r/   r2   r3   r7   r#   )r8   r#   r   alexnet_pretrained_featuresrA   r>   r?   r   r    r-   p   .   

zAlexnet.__init__rA   c           	      C   f   |  |}|}| |}|}| |}|}| |}|}| |}|}G dd dt}||||||S )rB   c                   @   6   e Zd ZU eed< eed< eed< eed< eed< dS )z(Alexnet.forward.<locals>._AlexnetOutputsrC   rD   rE   rF   rG   NrJ   r   r   r   r    _AlexnetOutputs      
 rc   rZ   r[   r\   r]   r^   r   )	r8   rA   hh_relu1h_relu2h_relu3h_relu4h_relu5rc   r   r   r    rR         




zAlexnet.forwardrS   rT   r   r   r?   r    rX   m       rX   c                       r!   )Vgg16zVgg16 implementation.FTr#   r   r   Nc                    s2  t    td|}tj | _tj | _tj | _tj | _	tj | _
d| _tdD ]}| jt|||  q/tddD ]}| jt|||  qBtddD ]}| jt|||  qUtddD ]}| j	t|||  qhtddD ]}| j
t|||  q{|s|  D ]}d|_qd S d S )	Nr   r&      	            FrY   )r8   r#   r   vgg_pretrained_featuresrA   r>   r?   r   r    r-      r`   zVgg16.__init__rA   c           	      C   ra   )rB   c                   @   rb   )z"Vgg16.forward.<locals>._VGGOutputsrelu1_2relu2_2relu3_3relu4_3relu5_3NrJ   r   r   r   r    _VGGOutputs   rd   rz   re   )	r8   rA   rf   	h_relu1_2	h_relu2_2	h_relu3_3	h_relu4_3	h_relu5_3rz   r   r   r    rR      rl   zVgg16.forwardrS   rT   r   r   r?   r    rn      rm   rn   Tin_tenskeep_dimc                 C   s   | j ddg|dS )z2Spatial averaging over height and width of images.r%      )keepdimmean)r   r   r   r   r    _spatial_average   s   r   @   r   out_hw.c                 C   s   t j|ddd| S )z+Upsample input with bilinear interpolation.bilinearF)sizemodealign_corners)r   Upsample)r   r   r   r   r    	_upsample   s   r   :0yE>in_featepsc                 C   s&   t |t j| d ddd }| | S )zNormalize input tensor.r%      T)dimr   )r0   sqrtsum)r   r   norm_factorr   r   r    _normalize_tensor   s   r   r   rA   r   c                 C   sN   | j d |kr| j d |krtjjj| ||fddS tjjj| ||fdddS )zlhttps://github.com/toshas/torch-fidelity/blob/master/torch_fidelity/sample_similarity_lpips.py#L127C22-L132.area)r   r   F)r   r   )shaper0   r   
functionalinterpolate)rA   r   r   r   r    _resize_tensor   s   r   c                       sF   e Zd ZU dZeed< eed< d fddZdedefd	d
Z  ZS )ScalingLayerzScaling layer.shiftscaler   Nc                    sb   t    | jdtg dd d d d d f dd | jdtg dd d d d d f dd d S )Nr   )gQgI+gMbȿF)
persistentr   )gZd;O?gy&1?g?)r,   r-   register_bufferr0   r   )r8   r?   r   r    r-      s   
*.zScalingLayer.__init__inpc                 C   s   || j  | j S rB   )r   r   )r8   r   r   r   r    rR      s   zScalingLayer.forward)r   N)	rK   rL   rM   rU   r   rN   r-   rR   rW   r   r   r?   r    r      s   
 r   c                	       sF   e Zd ZdZddedededdf fd	d
ZdedefddZ  Z	S )NetLinLayerz,A single linear layer which does a 1x1 conv.r   Fchn_inchn_outuse_dropoutr   Nc              	      sH   t    |rt gng }|tj||dddddg7 }tj| | _d S )Nr   r   F)stridepaddingbias)r,   r-   r   DropoutConv2dr1   r   )r8   r   r   r   layersr?   r   r    r-      s   
zNetLinLayer.__init__rA   c                 C   s
   |  |S r   )r   )r8   rA   r   r   r    rR     s   
zNetLinLayer.forward)r   F)
rK   rL   rM   rU   intrV   r-   r   rR   rW   r   r   r?   r    r      s     	r   c                       s   e Zd Z									ddeded ded	ed
ededee dedee ddf fddZ	dde	de	dedede
e	ee	ee	 f f f
ddZ  ZS )_LPIPSTalexFNr   r   r   vggsqueezespatial	pnet_rand	pnet_tuner   
model_path	eval_moderesizer   c
              	      s  t    || _|| _|| _|| _|	| _t | _| jdv r%t	}
g d| _
n| jdkr2t}
g d| _
n| jdkr>t}
g d| _
t| j
| _|
| j | jd| _t| j
d |d	| _t| j
d
 |d	| _t| j
d |d	| _t| j
d |d	| _t| j
d |d	| _| j| j| j| j| jg| _| jdkrt| j
d |d	| _t| j
d |d	| _|  j| j| jg7  _t| j| _|r|du rtjtjt | jdd| d}| j!t"j#|dddd |r| $  | js| % D ]}d|_&qdS dS )a  Initializes a perceptual loss torch.nn.Module.

        Args:
            pretrained: This flag controls the linear layers should be pretrained version or random
            net: Indicate backbone to use, choose between ['alex','vgg','squeeze']
            spatial: If input should be spatial averaged
            pnet_rand: If backbone should be random or use imagenet pre-trained weights
            pnet_tune: If backprop should be enabled for both backbone and linear layers
            use_dropout: If dropout layers should be added
            model_path: Model path to load pretained models from
            eval_mode: If network should be in evaluation mode
            resize: If input should be resized to this size

        )r   r   )r            r   r   )r        r   r   r   )r   r   r   r   r   r   r   )r   r#   r   )r   r   r%   r   ro   r&      Nz..zlpips_models/z.pthcpu)map_locationF)strict)'r,   r-   	pnet_typer   r   r   r   r   scaling_layerrn   chnsrX   r"   lenLr   r   lin0lin1lin2lin3lin4linslin5lin6r   r5   ospathabspathjoininspectgetfileload_state_dictr0   loadevalr7   r#   )r8   r   r   r   r   r   r   r   r   r   net_typer>   r?   r   r    r-   
  sT   





z_LPIPS.__init__in0in1retperlayer	normalizec              	   C   sN  |rd| d }d| d }|  ||  |}}| jd ur,t|| jd}t|| jd}| j|| j|}}i i i }	}
}t| jD ]}t|| t|| |	|< |
|< |	| |
|  d ||< qFg }t| jD ]-}| jr|	t
| j| || t|jdd  d qm|	t| j| || dd qmt|}|r||fS |S )Nr%   r   )r   )r   T)r   )r   r   r   r   rR   r/   r   r   r   r4   r   r   tupler   r   r   )r8   r   r   r   r   	in0_input	in1_inputouts0outs1feats0feats1diffskkresvalr   r   r    rR   U  s*   
"0"z_LPIPS.forward)	Tr   FFFTNTN)FF)rK   rL   rM   rV   r   r   r3   r   r-   r   r   r   r   rR   rW   r   r   r?   r    r   	  sV    	
Lr   c                       s*   e Zd ZdZdedd f fddZ  ZS )_NoTrainLpipsz8Wrapper to make sure LPIPS never leaves evaluation mode.r   r   c                    s   t  dS )z.Force network to always be in evaluation mode.F)r,   train)r8   r   r?   r   r    r   {  s   z_NoTrainLpips.train)rK   rL   rM   rU   rV   r   rW   r   r   r?   r    r   x  s    r   imgr   c                 C   sD   |r|   dko|  dkn|  dk}| jdko!| jd dko!|S )z1Check that input is a valid image to the network.g      ?g        r   ro   r   r   )maxminndimr   )r   r   value_checkr   r   r    
_valid_img  s    r   img1img2c                 C   s   t | |r
t ||s4td| j d|j d|  |  g d| | g d|r,ddgnddg d|| ||d	 }|| jd fS )
NzeExpected both input arguments to be normalized tensors with shape [N, 3, H, W]. Got input with shape z and z and values in range z+ when all values are expected to be in the r   r   r   z range.)r   )r   
ValueErrorr   r   r   r   )r   r   r   r   lossr   r   r    _lpips_update  s    r   r   
sum_scorestotal	reduction)r   r   c                 C   s   |dkr| | S | S )Nr   r   )r   r   r   r   r   r    _lpips_compute  s   r   r   r   r   c                 C   s:   t |dj| j| jd}t| |||\}}t| ||S )ao  The Learned Perceptual Image Patch Similarity (`LPIPS_`) calculates perceptual similarity between two images.

    LPIPS essentially computes the similarity between the activations of two image patches for some pre-defined network.
    This measure has been shown to match human perception well. A low LPIPS score means that image patches are
    perceptual similar.

    Both input image patches are expected to have shape ``(N, 3, H, W)``. The minimum size of `H, W` depends on the
    chosen backbone (see `net_type` arg).

    Args:
        img1: first set of images
        img2: second set of images
        net_type: str indicating backbone network type to use. Choose between `'alex'`, `'vgg'` or `'squeeze'`
        reduction: str indicating how to reduce over the batch dimension. Choose between `'sum'` or `'mean'`.
        normalize: by default this is ``False`` meaning that the input is expected to be in the [-1,1] range. If set
            to ``True`` will instead expect input to be in the ``[0,1]`` range.

    Example:
        >>> from torch import rand
        >>> from torchmetrics.functional.image.lpips import learned_perceptual_image_patch_similarity
        >>> img1 = (rand(10, 3, 100, 100) * 2) - 1
        >>> img2 = (rand(10, 3, 100, 100) * 2) - 1
        >>> learned_perceptual_image_patch_similarity(img1, img2, net_type='squeeze')
        tensor(0.1005)

    )r   )devicedtype)r   tor   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r    r     s   !)F)T)r   )r   )r   r   )r   r   F))r   r   typingr   r   r   r   r0   r   r   typing_extensionsr   torchmetrics.utilities.importsr	   r   __doctest_skip__r3   rV   modules	containerr1   r   Moduler"   rX   rn   r   r   r   r   floatr   r   r   r   r   r   r   r   r   r   r   r   r   r    <module>   sX   (22 o0(