o
    	Ti                     @   s   d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
mZmZ G dd dejZG dd dejjZd	d
 ZdS )    N)hf_hub_download)EntryNotFoundError)	CLIPModelis_torch_npu_availableis_torch_xpu_availablec                       s$   e Zd Z fddZdd Z  ZS )MLPc                    sb   t    ttddtdtddtdtddtdtddtdd| _d S )	Ni   i   g?   @   g?      )super__init__nn
SequentialLinearDropoutlayers)self	__class__ P/home/ubuntu/.local/lib/python3.10/site-packages/trl/models/auxiliary_modules.pyr      s   






zMLP.__init__c                 C   s
   |  |S )N)r   )r   embedr   r   r   forward'   s   
zMLP.forward)__name__
__module____qualname__r   r   __classcell__r   r   r   r   r      s    r   c                       s(   e Zd ZdZ fddZdd Z  ZS )AestheticScorera  
    This model attempts to predict the aesthetic score of an image. The aesthetic score is a numerical approximation of
    how much a specific image is liked by humans on average. This is from
    https://github.com/christophschuhmann/improved-aesthetic-predictor
    c                   s   t    td| _tjjg dg dd| _d| _	t
 | _zt||}W n ty6   tj||}Y nw tj|tddd}| j| || _|   d S )	Nzopenai/clip-vit-large-patch14)g3<4'?gwgM?gy{ ?)gB91?gwt.?g	U?)meanstd   cpuT)map_locationweights_only)r   r   r   from_pretrainedcliptorchvision
transforms	Normalize	normalizetarget_sizer   mlpr   r   ospathjointorchloaddeviceload_state_dictdtypeeval)r   r4   model_idmodel_filenamecached_path
state_dictr   r   r   r   2   s    
zAestheticScorer.__init__c                 C   sp   t |  j}tj| j|}| || j	|}| j
j|d}|tjj|ddd }| |d}|S )N)pixel_valuesT)dimkeepdimr   )next
parametersr2   r'   r(   Resizer+   r*   tor4   r&   get_image_featuresr0   linalgvector_normr,   squeeze)r   imagesr2   r   rewardr   r   r   __call__C   s   zAestheticScorer.__call__)r   r   r   __doc__r   rH   r   r   r   r   r   r   +   s    r   c                    sH   t | |tjd t r   nt r   n    fdd}|S )N)r6   r7   r4   c                    s   |  dd}  | }|i fS )Nr   r   )clamp)rF   promptsmetadatascoresscorerr   r   _fn[   s   zaesthetic_scorer.<locals>._fn)r   r0   float32r   npur   xpucuda)hub_model_idr7   rP   r   rN   r   aesthetic_scorerN   s   

rV   )r-   r0   torch.nnr   r'   huggingface_hubr   huggingface_hub.utilsr   transformersr   r   r   Moduler   r   rV   r   r   r   r   <module>   s   #