o
    ٷi"                  	   @   s   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dlm
Z
 e eZG dd de
jZ			dd
e	dededefddZd
e	dededefddZdS )    N)SAM2ImageEncoderrandom_sam2_input_image)SAM2PromptEncoder)SAM2Base)nnc                       sh   e Zd Z	ddedededdf fddZe d	ejd
ejdejdejdejdejfddZ	  Z
S )SAM2MaskDecoderT	sam_modelmultimask_outputdynamic_multimask_via_stabilityreturnNc                    s0   t    |j| _|j| _|| _|| _|| _d S )N)	super__init__sam_mask_decodermask_decodersam_prompt_encoderprompt_encodermodelr	   r
   )selfr   r	   r
   	__class__ e/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/sam2/mask_decoder.pyr      s   

zSAM2MaskDecoder.__init__image_features_0image_features_1image_embeddingsimage_pesparse_embeddingsdense_embeddingsc           
      C   s   | j j|||||jd dk||gd\}}}	}	| jr8|ddddddddf }|ddddf }||fS | jrH| j ||\}}||fS |ddddddddf }|ddddf }||fS )a  
        Decode masks from image and prompt embeddings. Only support H=W=1024.

        Args:
            image_features_0 (torch.Tensor): [1, 32, H/4, W/4]. high resolution features of level 0 from image encoder.
            image_features_1 (torch.Tensor): [1, 64, H/8, W/8]. high resolution features of level 1 from image encoder.
            image_embeddings (torch.Tensor): [1, 256, H/16, W/16]. image embedding from image encoder.
            image_pe (torch.Tensor): [1, 256, H/16, W/16]. image positional encoding.
            sparse_embeddings (torch.Tensor): [L, P+1, 256], embedding for points and boxes.
            dense_embeddings (torch.Tensor):  [L, 256, H/16, W/16]. embedding for input masks.

        Returns:
            low_res_masks (torch.Tensor, optional): [1, M, H/4, W/4]. low resolution masks.
            iou_predictions (torch.Tensor): [1, M]. scores for M masks.
        r      )r   r   sparse_prompt_embeddingsdense_prompt_embeddingsrepeat_imagehigh_res_featuresN)r   predict_masksshaper	   r
    _dynamic_multimask_via_stability)
r   r   r   r   r   r   r   low_res_masksiou_predictions_r   r   r   forward   s(   	  zSAM2MaskDecoder.forward)T)__name__
__module____qualname__r   boolr   torchno_gradTensorr)   __classcell__r   r   r   r   r      s2    r   TF
sam2_modelonnx_model_pathr	   r
   c                 C   s  t |  }t }t|  }||\}}	}
td|j td|	j td|
j d}d}tjdd||dftj	d}tjdd	||ftj	d}tj
|d	d
d
tj	d}tjd	tj	d}|||||\}}}td|j td|j td|j t| ||}||	|
|||f}|| \}}td|j td|j t ; |stjdtjjd tjdtd tjj|||dddg dddgdddddiddiddidd	 W d    n1 sw   Y  td| d S )Nzimage_features_0.shape: %szimage_features_1.shape: %szimage_embeddings.shape: %s      r      lowhighsizedtyper      r;   zsparse_embeddings.shape: %szdense_embeddings.shape: %szimage_pe.shape: %szlow_res_masks.shape: %sziou_predictions.shape: %signore)categoryT   r   r   r   r   r   r   r&   r'   
num_labelsznum_points+1)r   r   )r   r   r&   r'   )export_paramsopset_versiondo_constant_foldinginput_namesoutput_namesdynamic_axesz mask decoder onnx model saved to)r   cpur   r   loggerinfor$   r.   randintfloatzerosonesr   warningscatch_warningsfilterwarningsjitTracerWarningUserWarningonnxexportprint)r2   r3   r	   r
   verbosesam2_prompt_encoderimagesam2_encoderr   r   r   rB   
num_pointspoint_coordspoint_labelsinput_maskshas_input_masksr   r   r   sam2_mask_decoderinputsr&   r'   r   r   r   export_mask_decoder_onnxR   sZ   

rd   c               
      s  t |  }t }t|  }||\}}}	d}
d}tjdd|
|dftjd}tjdd|
|ftjd}tj|
dddtjd}tjdtjd}|||||\}}}t	| ||}|||	|||f}|| \}}dd l
}|j|d	gd
}|   fddtt D }td| | fddttD }td| ||| | |	 | | | d}t|D ]\}}td||| j q|\}}tjj|t|ddd tjj|t|ddd td|  d S )Nr      r   r6   r4   r7   r<   r=   CPUExecutionProvider)	providersc                       g | ]} | j qS r   name.0i)model_inputsr   r   
<listcomp>       z*test_mask_decoder_onnx.<locals>.<listcomp>zinput_names: %sc                    rh   r   ri   rk   )model_outputsr   r   ro      rp   zoutput_names: %srA   zoutput %s shape: %sg{Gzt?g-C6?)atolrtolzonnx model has been verified: )r   rI   r   r   r.   rL   rM   randrO   r   onnxruntimeInferenceSession
get_inputsrangelenrJ   rK   get_outputsrunnumpy	enumerater$   testingassert_closetensorrX   ) r2   r3   r	   r
   rZ   r[   r\   r   r   r   rB   r]   r^   r_   r`   ra   r   r   r   rb   rc   r&   r'   ru   ort_sessionrF   rG   outputsrm   output_nameort_low_res_masksort_iou_predictionsr   )rn   rq   r   test_mask_decoder_onnx   sP   
r   )TF)loggingrP   r.   image_encoderr   r   r   r   sam2.modeling.sam2_baser   r   	getLoggerr*   rJ   Moduler   strr-   rd   r   r   r   r   r   <module>   s:   
E
E