o
    .wi/                     @   sH  d dl Z d dlmZ d dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ er8d dlmZ d dlmZ d	d
dZe
d ZdZerGesJdgZG dd dejZG dd deZ	d%dedejdededeeeeef f f
ddZ	d&dedeeef de
d defdd Z	!		"	d'ded#ede
d dededefd$dZdS )(    N)Union)Tensornn	normalize)Literal)_TORCH_GREATER_EQUAL_2_2_TORCHVISION_AVAILABLE)
transforms)resnet50)      )r   d   )kadid10kkoniq10kz=https://github.com/miccunifi/ARNIQA/releases/download/weightsarniqac                	       s   e Zd ZdZddeddf fddZddd	Zddedede	eef fddZ
dedefddZddededefddZ  ZS )_ARNIQAzInitializes a No-Reference Image Quality Assessment ARNIQA torch.nn.Module.

    Args:
        regressor_dataset: dataset used for training the regressor, choose between [``koniq10k``, ``kadid10k``]

    r   regressor_datasetreturnNc                    s   t    tstdtstdt }||vr$td| d| d|| _	g d| _
g d| _t }|jj| _tjt| d d  }|| _t| jd	 d
| _|   dtjdd fdd}|| j || j d S )Nz'ARNIQA metric requires PyTorch >= 2.2.0zARNIQA metric requires that torchvision is installed. Either install as `pip install torchmetrics[image]` or `pip install torchvision`.z,Argument `regressor_dataset` must be one of 
, but got .)g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?   r   moduler   c                 S   s    |    |  D ]}d|_qd S )NF)eval
parametersrequires_grad)r   p r   a/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/functional/image/arniqa.py_freezeU   s   z!_ARNIQA.__init__.<locals>._freeze)super__init__r   RuntimeErrorr	   ModuleNotFoundError_AVAILABLE_REGRESSOR_DATASETSkeys
ValueErrorr   imagenet_norm_meanimagenet_norm_stdr   fcin_featuresfeat_dimr   
SequentiallistchildrenencoderLinear	regressor_load_weightsModule)selfr   valid_regressor_datasetsr0   r    	__class__r   r   r"   8   s0   




z_ARNIQA.__init__c                 C   s   t jjt dddd}dd | D }| jj|dd t < tj	dt
d	d
 t jjt d| j dddd }|d|d< |dd|d< | jj|dd W d   dS 1 s`w   Y  dS )z/Loads the weights of the encoder and regressor.z/ARNIQA.pthTcpu)progressmap_locationc                 S   s&   i | ]\}}d |vr| dd|qS )	projectorzmodel. )replace).0kvr   r   r   
<dictcomp>b   s     z)_ARNIQA._load_weights.<locals>.<dictcomp>)strictignoreztorch.serialization)categoryr   z/regressor_z.pthweightsweightbiasesr   biasN)torchhubload_state_dict_from_url	_base_urlitemsr0   load_state_dictwarningscatch_warningsfilterwarningsUserWarningr   
state_dictpop	unsqueezer2   )r5   encoder_state_dictfiltered_encoder_state_dictregressor_state_dictr   r   r   r3   ]   s$   
"z_ARNIQA._load_weightsFimgr   c                 C   sd   |j dd \}}t|d |d f|}|r.tj| j| jd|}tj| j| jd|}||fS )zPreprocesses the input to the model.

        Obtains the half-scale version of the input image and applies normalization if needed.

        Nr   )meanstd)shaper
   Resize	Normalizer(   r)   )r5   rZ   r   hwimg_dsr   r   r   _preprocess_inputq   s   z_ARNIQA._preprocess_inputscorec                 C   s   t | j \}}|| ||  S )zKScales the quality score to be in the [0, 1] range, where higher is better.)r%   r   )r5   re   	min_score	max_scorer   r   r   _scale_score~   s   z_ARNIQA._scale_scorec                 C   sz   |  ||\}}| |}|d| j}t|dd}| |}|d| j}t|dd}t||f}| |}| |S )Nr   r   )dim)	rd   r0   viewr,   normalize_fnrJ   hstackr2   rh   )r5   rZ   r   rc   img_fimg_ds_ffre   r   r   r   forward   s   



z_ARNIQA.forward)r   )r   NF)__name__
__module____qualname____doc___TYPE_REGRESSOR_DATASETr"   r3   r   booltuplerd   rh   rp   __classcell__r   r   r7   r   r   0   s    
%  r   c                       s*   e Zd ZdZdedd f fddZ  ZS )_NoTrainArniqaz9Wrapper to make sure ARNIQA never leaves evaluation mode.moder   c                    s   t  dS )z.Force network to always be in evaluation mode.F)r!   train)r5   r{   r7   r   r   r|      s   z_NoTrainArniqa.train)rr   rs   rt   ru   rw   r|   ry   r   r   r7   r   rz      s    rz   FrZ   modelr   autocastr   c                 C   s   | j dkr| jd dkstd| j d|  dkr!|  dks2|r2td|   d	|   d|rVtjj| jj	| j
d
 || |d}W d   n1 sPw   Y  n|j| j
d| |d}| | jd fS )a  Update step for ARNIQA metric.

    Args:
        img: the input image
        model: the pre-trained model
        normalize: boolean indicating whether the input image is normalized
        autocast: boolean indicating whether to use automatic mixed precision

       r      z?Input image must have shape [N, 3, H, W]. Got input with shape r   g      ?g        zdInput image values must be in the [0, 1] range when normalize==True. Got input with values in range z and )device_typedtyper   N)r   r   )ndimr^   r'   maxminrJ   ampr~   devicetyper   tosqueeze)rZ   r}   r   r~   lossr   r   r   _arniqa_update   s"   r   r\   scores
num_scores	reduction)sumr\   nonec                 C   s(   |   }|dkr
| S |dkr|| S |S )zCompute step for ARNIQA metric.r   r\   )r   )r   r   r   
sum_scoresr   r   r   _arniqa_compute   s   r   r   Tr   c           	      C   sp   d}||vrt d| d| t|tst d| t|dj| j| jd}t| |||d\}}t|||S )a  ARNIQA: leArning distoRtion maNifold for Image Quality Assessment metric.

    `ARNIQA`_ is a No-Reference Image Quality Assessment metric that predicts the technical quality of an image with
    a high correlation with human opinions. ARNIQA consists of an encoder and a regressor. The encoder is a ResNet-50
    model trained in a self-supervised way to model the image distortion manifold to generate similar representation for
    images with similar distortions, regardless of the image content. The regressor is a linear model trained on IQA
    datasets using the ground-truth quality scores. ARNIQA extracts the features from the full- and half-scale versions
    of the input image and then outputs a quality score in the [0, 1] range, where higher is better.

    The input image is expected to have shape ``(N, 3, H, W)``. The image should be in the [0, 1] range if `normalize`
    is set to ``True``, otherwise it should be normalized with the ImageNet mean and standard deviation.

    .. note::
        Using this metric requires you to have ``torchvision`` package installed. Either install as
        ``pip install torchmetrics[image]`` or ``pip install torchvision``.

    Args:
        img: the input image
        regressor_dataset: dataset used for training the regressor. Choose between [``koniq10k``, ``kadid10k``].
            ``koniq10k`` corresponds to the `KonIQ-10k`_ dataset, which consists of real-world images with authentic
            distortions. ``kadid10k`` corresponds to the `KADID-10k`_ dataset, which consists of images with
            synthetically generated distortions.
        reduction: indicates how to reduce over the batch dimension. Choose between [``sum``, ``mean``, ``none``].
        normalize: by default this is ``True`` meaning that the input is expected to be in the [0, 1] range. If set
            to ``False`` will instead expect input to be already normalized with the ImageNet mean and standard
            deviation.
        autocast: boolean indicating whether to use automatic mixed precision

    Returns:
        A tensor in the [0, 1] range, where higher is better, representing the ARNIQA score of the input image. If
        `reduction` is set to ``none``, the output will have shape ``(N,)``, otherwise it will be a scalar tensor.

    Raises:
        ModuleNotFoundError:
            If ``torchvision`` package is not installed
        ValueError:
            If ``regressor_dataset`` is not in [``"kadid10k"``, ``"koniq10k"``]
        ValueError:
            If ``reduction`` is not in [``"sum"``, ``"mean"``, ``"none"``]
        ValueError:
            If ``normalize`` is not a bool
        ValueError:
            If the input image is not a valid image tensor with shape [N, 3, H, W].
        ValueError:
            If the input image values are not in the [0, 1] range when ``normalize`` is set to ``True``

    Examples:
        >>> from torch import rand
        >>> from torchmetrics.functional.image.arniqa import arniqa
        >>> img = rand(8, 3, 224, 224)
        >>> # Non-normalized input
        >>> arniqa(img, regressor_dataset='koniq10k', normalize=True)
        tensor(0.5308)


        >>> from torch import rand
        >>> from torchmetrics.functional.image.arniqa import arniqa
        >>> from torchvision.transforms import Normalize
        >>> img = rand(8, 3, 224, 224)
        >>> img = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(img)
        >>> # Normalized input
        >>> arniqa(img, regressor_dataset='koniq10k', normalize=False)
        tensor(0.5065)

    )r\   r   r   z$Argument `reduction` must be one of r   z.Argument `normalize` should be a bool but got )r   )r   r   )r   r~   )	r'   
isinstancerw   rz   r   r   r   r   r   )	rZ   r   r   r   r~   valid_reductionr}   r   r   r   r   r   r      s   H
rq   )r\   )r   r\   TF)rP   typingr   rJ   r   r   torch.nn.functionalr   rk   typing_extensionsr   torchmetrics.utilities.importsr   r	   torchvisionr
   torchvision.modelsr   r%   rv   rM   __doctest_skip__r4   r   rz   rw   rx   intr   r   r   r   r   r   r   <module>   st   e	


