o
    ٷi                      @   sx   d dl Z d dlZd dlmZ d dlmZ d dlmZ e eZ	G dd dej
Zdedefd	d
ZdedefddZdS )    N)SAM2Base)compare_tensors_with_tolerance)nnc                	       s   e Zd Zdef fddZe dejdejdejdejfdd	Zdejdejd
ejfddZ	dejdejd
ejfddZ
  ZS )SAM2PromptEncoder	sam_modelc                    s   t    |j| _|| _d S )N)super__init__sam_prompt_encoderprompt_encodermodel)selfr   	__class__ g/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/sam2/prompt_encoder.pyr      s   

zSAM2PromptEncoder.__init__point_coordspoint_labelsinput_maskshas_input_masksc                 C   s,   |  ||}| ||}| j }|||fS )aj  Encode prompts.

           Args:
            point_coords (torch.Tensor): [L, P, 2] shape and float32 dtype and contains the absolute pixel
                                         coordinate in (x, y) format of the P input points in image of size 1024x1024.
            point_labels (torch.Tensor): shape [L, P] and int32 dtype, where 1 means
                                         positive (foreground), 0 means negative (background), -1 means padding,
                                         2 (box left upper corner), 3 (box right bottom corner).
            input_masks (torch.Tensor): [L, 1, H/4, W/4]. Low resolution mask input to the model.
                                        Typically coming from a previous iteration.
            has_input_masks (torch.Tensor): [L]. 1.0 if input_masks is used, 0.0 otherwise.
        Returns:
            sparse_embeddings (torch.Tensor): [L, P+1, 256], embedding for points and boxes.
            dense_embeddings (torch.Tensor):  [L, 256, 64, 64]. embedding for input masks.
            image_pe (torch.Tensor, optional): [1, 256, 64, 64]. image positional encoding.
        )_embed_points_embed_masksr
   get_dense_pe)r   r   r   r   r   sparse_embeddingsdense_embeddingsimage_per   r   r   forward   s   

zSAM2PromptEncoder.forwardreturnc                 C   s4  |d }t j|jd ddf|jd}t j|jd df|jd }t j||gdd}t j||gdd}|d d d d df | jj |d d d d df< |d d d d df | jj |d d d d df< | jj	
|}|d|}||dk }|| jjj|dk  }t| jjD ]}|| jj| j||k  }q|S )Ng      ?r         )device)dim)torchzerosshaper   onescatr   
image_sizer
   pe_layer_pe_encoding	unsqueeze	expand_asnot_a_point_embedweightrangenum_point_embeddingspoint_embeddings)r   r   r   padding_pointpadding_labelpoint_embeddingir   r   r   r   3   s   00zSAM2PromptEncoder._embed_pointsc                 C   sV   | j |}| j jjdddd}td|j || d| |  }td|j |S )Nr   r!   zno_mask_embedding.shape: %sg      ?zmask_embedding.shape: %s)r
   mask_downscalingno_mask_embedr-   reshapeloggerinfor$   )r   r   r   mask_embeddingno_mask_embeddingr   r   r   r   J   s   zSAM2PromptEncoder._embed_masks)__name__
__module____qualname__r   r   r"   no_gradTensorr   r   r   __classcell__r   r   r   r   r      s    $r   
sam2_modelonnx_model_pathc                 C   sD  t |  }d}d}tjdd||dftjd}tjdd||ftjd}tj|dddtjd}tjdtjd}|||||\}	}
}t	d	|j
 t	d
|j
 t	d|j
 t	d|j
 t	d|	j
 t	d|
j
 t	d|j
 tjj|||||f|dddg dg dddddddddidddddidd	 td| d S )Nr      r      lowhighsizedtyper      rJ   zpoint_coords.shape: %szpoint_labels.shape: %szinput_masks.shape: %szhas_input_masks.shape: %szsparse_embeddings.shape: %szdense_embeddings.shape: %szimage_pe.shape: %sT   r   r   r   r   )r   r   r   
num_labels
num_points)r   r   znum_points+1)r   r   r   r   r   )export_paramsopset_versiondo_constant_foldinginput_namesoutput_namesdynamic_axesz#prompt encoder onnx model saved to )r   cpur"   randintfloatint32r#   r%   r8   r9   r$   onnxexportprint)rB   rC   sam2_prompt_encoderrO   rP   r   r   r   r   r   r   r   r   r   r   export_prompt_encoder_onnxS   sD   

r_   c                    s  t |  }d}d}tjdd||dftjd}tjdd||ftjd}tj|dddtjd}tjdtjd}|||||\}	}
}dd l}|j	|d	gd
}|
   fddtt D }td| | fddttD }td| ||| | | | d}t|D ]\}}td||| j q|\}}}td|	t|ddrtd|
t|ddrtd|t|ddrtd|  d S td|  d S )Nr      r   rE   r   rF   rK   rL   CPUExecutionProvider)	providersc                       g | ]} | j qS r   name.0r4   )model_inputsr   r   
<listcomp>       z,test_prompt_encoder_onnx.<locals>.<listcomp>zinput_names: %sc                    rc   r   rd   rf   )model_outputsr   r   ri      rj   zoutput_names: %srN   zoutput %s shape: %sr   g?)mismatch_percentage_tolerancer   r   zonnx model has been verified: z onnx model verification failed: )r   rW   r"   rX   rY   rZ   randr%   onnxruntimeInferenceSession
get_inputsr.   lenr8   r9   get_outputsrunnumpy	enumerater$   r   tensorr]   )rB   rC   r^   rO   rP   r   r   r   r   r   r   r   rn   ort_sessionrT   rU   outputsr4   output_nameort_sparse_embeddingsort_dense_embeddingsort_image_per   )rh   rk   r   test_prompt_encoder_onnx   s\   



r}   )loggingr"   sam2.modeling.sam2_baser   
sam2_utilsr   r   	getLoggerr<   r8   Moduler   strr_   r}   r   r   r   r   <module>   s"   
D
/