o
    Gi8                     @   sP  d dl Zd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZmZ ddlmZmZ eeZejdduZejdduZejd	duZer[d dlZd d
lmZ nederpd dlmZ d dlm Z m!Z! nederd dl"m#Z# d dl$m%Z% nedd ddZ&d!ddZ'dd Z(		d"ddZ)	d#ddZ*dd Z+dS )$    N)ImageImageOps)InterpolationMode)	normalizeresize   )
get_logger
load_imageinsightfaceconsisid_eva_clipfacexlib)FaceAnalysiszPinsightface is not available. Please install it using 'pip install insightface'.)create_model_and_transforms)OPENAI_DATASET_MEANOPENAI_DATASET_STDz\consisid_eva_clip is not available. Please install it using 'pip install consisid_eva_clip'.)init_parsing_model)FaceRestoreHelperzJfacexlib is not available. Please install it using 'pip install facexlib'.   c                 C   sd   | j dd \}}t|||kr| S |t|| }t|| }t|| }tj| ||ftjd} | S )a  
    Resize the input image to a specified long edge while maintaining aspect ratio.

    Args:
        image (numpy.ndarray): Input image (H x W x C or H x W).
        resize_long_edge (int): The target size for the long edge of the image. Default is 768.

    Returns:
        numpy.ndarray: Resized image with the long edge matching `resize_long_edge`, while maintaining the aspect
        ratio.
    N   )interpolation)shapemaxintcv2r   INTER_LANCZOS4)imageresize_long_edgehwk r    _/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/pipelines/consisid/consisid_utils.pyresize_numpy_image_long'   s   r"   Tc                    s4   dd  t | tr fdd| D S  | S )aO  Numpy array to tensor.

    Args:
        imgs (list[ndarray] | ndarray): Input images.
        bgr2rgb (bool): Whether to change bgr to rgb.
        float32 (bool): Whether to change to float32.

    Returns:
        list[tensor] | tensor: Tensor images. If returned results only have
            one element, just return tensor.
    c                 S   sX   | j d dkr|r| jdkr| d} t| tj} t| ddd} |r*| 	 } | S )Nr   r   float64float32r      )
r   dtypeastyper   cvtColorCOLOR_BGR2RGBtorch
from_numpy	transposefloat)imgbgr2rgbr$   r    r    r!   	_totensorK   s   

zimg2tensor.<locals>._totensorc                    s   g | ]} |qS r    r    ).0r.   r0   r/   r$   r    r!   
<listcomp>V   s    zimg2tensor.<locals>.<listcomp>)
isinstancelist)imgsr/   r$   r    r2   r!   
img2tensor>   s   

r7   c                 C   s\   d| ddddf  d| ddddf   d| ddddf   }| dddd}|S )	a  
    Converts an RGB image to grayscale by applying the standard luminosity formula.

    Args:
        img (torch.Tensor): The input image tensor with shape (batch_size, channels, height, width).
                             The image is expected to be in RGB format (3 channels).

    Returns:
        torch.Tensor: The grayscale image tensor with shape (batch_size, 3, height, width).
                      The grayscale values are replicated across all three channels.
    gA`"?Nr   r%   gbX9?r   gv/?r   )repeat)r.   xr    r    r!   to_grayZ   s   Hr:   c                    s0  |    t|tj}||}t|dkr)t|dd dd }|d }|d }nd}d}| | | jd	d
 |du rA| j	d }| 
  t| jdkrPtd| jd }|du rctd ||}t|||}|jdkrv|d}|
rt|d	ddd }||}| t|g dg dd   jdd	d g d}t fdd|D  }t|}t||t|}t|||}nt|	tj}t|d	ddd }||}| }}t||jt j!}t|||}|||dd	dd\}}t"|ddd	}t#||}tj$||gdd}||||fS )a  
    Process face embeddings from an image, extracting relevant features such as face embeddings, landmarks, and parsed
    face features using a series of face detection and alignment tools.

    Args:
        face_helper_1: Face helper object (first helper) for alignment and landmark detection.
        clip_vision_model: Pre-trained CLIP vision model used for feature extraction.
        face_helper_2: Face helper object (second helper) for embedding extraction.
        eva_transform_mean: Mean values for image normalization before passing to EVA model.
        eva_transform_std: Standard deviation values for image normalization before passing to EVA model.
        app: Application instance used for face detection.
        device: Device (CPU or GPU) where the computations will be performed.
        weight_dtype: Data type of the weights for precision (e.g., `torch.float32`).
        image: Input image in RGB format with pixel values in the range [0, 255].
        original_id_image: (Optional) Original image for feature extraction if `is_align_face` is False.
        is_align_face: Boolean flag indicating whether face alignment should be performed.

    Returns:
        tuple:
            - id_cond: Concatenated tensor of Ante face embedding and CLIP vision embedding
            - id_vit_hidden: Hidden state of the CLIP vision model, a list of tensors.
            - return_face_features_image_2: Processed face features image after normalization and parsing.
            - face_kps: Keypoints of the face detected in the image.
    r   c                 S   s0   | d d | d d  | d d | d d   S )Nbboxr   r   r   r%   r    )r9   r    r    r!   <lambda>   s   0 z)process_face_embeddings.<locals>.<lambda>)key	embeddingkpsNT)only_center_facezfacexlib align face failzMFailed to detect face using insightface. Extracting embedding with align facer%   )r/   g     o@)g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?)dimkeepdim)r               	         c                 3   s    | ]} |kV  qd S )Nr    )r1   iparsing_outr    r!   	<genexpr>   s    z*process_face_embeddings.<locals>.<genexpr>F)return_all_featuresreturn_hiddenshuffler   )rB   )%	clean_allr   r(   COLOR_RGB2BGRgetlensorted
read_imageget_face_landmarks_5all_landmarks_5align_warp_facecropped_facesRuntimeErrorloggerwarningget_featr*   r+   tondim	unsqueezer7   
face_parser   argmaxsumbool	ones_likewherer:   r   
image_sizer   BICUBICnormdivcat)face_helper_1clip_vision_modelface_helper_2eva_transform_meaneva_transform_stdappdeviceweight_dtyper   original_id_imageis_align_face	image_bgr	face_infoid_ante_embeddingface_kps
align_faceinputbg_labelbgwhite_imagereturn_face_features_imagereturn_face_features_image_2original_image_bgrface_features_imageid_cond_vitid_vit_hiddenid_cond_vit_normid_condr    rL   r!   process_face_embeddingsk   sp   &













r   c
                 C   s   t |trtt|dd}
nttt	|d}
t
|
d}
|
}t| ||||||||
||	\}}}}|  }| }|ddd}| d }|tj}tt	|}
|||
|fS )a  
    Process face embeddings from an input image for inference, including alignment, feature extraction, and embedding
    concatenation.

    Args:
        face_helper_1: Face helper object (first helper) for alignment and landmark detection.
        clip_vision_model: Pre-trained CLIP vision model used for feature extraction.
        face_helper_2: Face helper object (second helper) for embedding extraction.
        eva_transform_mean: Mean values for image normalization before passing to EVA model.
        eva_transform_std: Standard deviation values for image normalization before passing to EVA model.
        app: Application instance used for face detection.
        device: Device (CPU or GPU) where the computations will be performed.
        weight_dtype: Data type of the weights for precision (e.g., `torch.float32`).
        img_file_path: Path to the input image file (string) or a numpy array representing an image.
        is_align_face: Boolean flag indicating whether face alignment should be performed (default: True).

    Returns:
        tuple:
            - id_cond: Concatenated tensor of Ante face embedding and CLIP vision embedding.
            - id_vit_hidden: Hidden state of the CLIP vision model, a list of tensors.
            - image: Processed face image after feature extraction and alignment.
            - face_kps: Keypoints of the face detected in the image.
    )r   RGBi   r%   r   r      )r4   strnparrayr	   convertr   exif_transposer   	fromarrayr"   r   cpudetachsqueezepermutenumpyr'   uint8)rn   ro   rp   rq   rr   rs   rt   ru   img_file_pathrw   r   rv   r   r   align_crop_face_imager{   tensorr    r    r!   process_face_embeddings_infer   s2   
%
r   c                 C   sT  t ddddd|tj| dd}d|_td	|tj| dd
|_tjj|  ddgd}|j	dd t
dtj| dddd\}}}|j}t|dt}t|dt}	t|ttfs]|fd }t|	ttfsi|	fd }	|}|	}	tdtj| ddgd}
|
j	ddd |j  |j  |  |j| |j| |j||d ||||
||	fS )a  
    Prepare all face models for the facial recognition task.

    Parameters:
    - model_path: Path to the directory containing model files.
    - device: The device (e.g., 'cuda', 'xpu', 'cpu') where models will be loaded.
    - dtype: Data type (e.g., torch.float32) for model inference.

    Returns:
    - face_helper_1: First face restoration helper.
    - face_helper_2: Second face restoration helper.
    - face_clip_model: CLIP model for face extraction.
    - eva_transform_mean: Mean value for image normalization.
    - eva_transform_std: Standard deviation value for image normalization.
    - face_main_model: Main face analysis model.
    r%   i   )r%   r%   retinaface_resnet50pngface_encoder)upscale_factor	face_size
crop_ratio	det_modelsave_extrt   model_rootpathNbisenet)
model_namert   r   z./face_encoder/models/antelopev2/glintr100.onnxCUDAExecutionProvider)	providersr   )ctx_idzEVA02-CLIP-L-14-336zEVA02_CLIP_L_336_psz14_s6B.ptT)force_custom_clip
image_mean	image_stdr   
antelopev2)namerootr   )  r   )r   det_size)r&   )r   ospathjoinrc   r   r
   	model_zoo	get_modelpreparer   visualgetattrr   r   r4   r5   tupler   face_detevalr`   )
model_pathrt   r&   rn   rp   model_face_clip_modelrq   rr   face_main_modelr    r    r!   prepare_face_models#  sT   	



r   )r   )TT)NT)T),importlib.util	importlibr   r   r   r   r*   PILr   r   torchvision.transformsr   !torchvision.transforms.functionalr   r   utilsr   r	   __name__r]   util	find_spec_insightface_available_consisid_eva_clip_available_facexlib_availabler
   insightface.appr   ImportErrorr   r   consisid_eva_clip.constantsr   r   facexlib.parsingr   &facexlib.utils.face_restoration_helperr   r"   r7   r:   r   r   r   r    r    r    r!   <module>   sD    


z
H