o
    i6                     @   s(  d dl Z d dlZd dlmZ d dlmZmZ d dlZd dl	Z	d dl
mZ d dlmZ dd Zdd	 Zd
e	jfddZdd ZG dd dZddde	dfddZddde	dfddZdde	dfddZdd Zdd Zdeeje	jf fd d!Zdeeje	jf d"eeeeef fd#d$ZdS )%    N)Thread)TupleUnion)Image)tqdmc                  C   s   t j rKt jdjdk } t jdjdk}|s!tjdtdd tdd t j	
d	d d D }|d
k rCtjdt j	 dtdd |d
k pI| }nd} d}d}| ||fS )Nr         zSFlash Attention is disabled as it requires a GPU with Ampere (8.0) CUDA capability.   category
stacklevelc                 s   s    | ]}t |V  qd S N)int).0v r   S/home/ubuntu/.local/lib/python3.10/site-packages/torchao/_models/sam2/utils/misc.py	<genexpr>   s    z$get_sdpa_settings.<locals>.<genexpr>.)r	   r	   zYou are using PyTorch zw without Flash Attention v2 support. Consider upgrading to PyTorch 2.2+ for Flash Attention v2 (which could be faster).TF)torchcudais_availableget_device_propertiesmajorwarningswarnUserWarningtuple__version__split)old_gpuuse_flash_attnpytorch_versionmath_kernel_onr   r   r   get_sdpa_settings   s*   
"
r$   c                 C   s"   ddl m} || tj S )a  
    Get the connected components (8-connectivity) of binary masks of shape (N, 1, H, W).

    Inputs:
    - mask: A binary mask tensor of shape (N, 1, H, W), where 1 is foreground and 0 is
            background.

    Outputs:
    - labels: A tensor of shape (N, 1, H, W) containing the connected component labels
              for foreground pixels and 0 for background pixels.
    - counts: A tensor of shape (N, 1, H, W) containing the area of the connected
              components for foreground pixels and 0 for background pixels.
    r   )_C)sam2r%   get_connected_componnetstor   uint8
contiguous)maskr%   r   r   r   get_connected_components0   s   r,   masksc                 C   s  | j \}}}}| j}tj||tjd}tj||tjd}tj||dd\}}	|d |d||}|	d |d||}	tjt| ||	ddd\}
}tj
t| |d	ddd\}}tjt| |	|	ddd\}}tj
t| |	d	ddd\}}tj|
|||fdd}|S )	z
    compute bounding box given an input mask

    Inputs:
    - masks: [B, 1, H, W] masks, dtype=torch.Tensor

    Returns:
    - box_coords: [B, 1, 4], contains (x, y) coordinates of top left and bottom right box corners, dtype=torch.Tensor
    )devicedtypexy)indexing)NN.   dim)shaper.   r   arangeint32meshgridexpandminwhereflattenmaxstack)r-   B_hwr.   xsysgrid_xsgrid_ysmin_xsmax_xsmin_ysmax_ysbbox_coordsr   r   r   mask_to_boxC   s   
""""rN   c                 C   sx   t | }t|d||f}|jtjkr|d }ntd|j d|  t	
|ddd}|j\}}|||fS )NRGB     o@zUnknown image dtype: z on r	   r   r2   )r   opennparrayconvertresizer/   r)   RuntimeErrorr   
from_numpypermutesize)img_path
image_sizeimg_pilimg_npimgvideo_widthvideo_heightr   r   r   _load_img_as_tensor]   s   



ra   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	AsyncVideoFrameLoaderzZ
    A list of video frames to be load asynchronously without blocking session start.
    c                    sx   | _ | _| _| _| _d gt|  _d  _d  _d  _	| _
 d  fdd}t|dd _ j  d S )Nr   c               
      sZ   zt tt jddD ]}  |  qW d S  ty, } z
| _W Y d }~d S d }~ww )Nframe loading (JPEG)desc)r   rangelenimages__getitem__	Exception	exception)neselfr   r   _load_frames   s   z4AsyncVideoFrameLoader.__init__.<locals>._load_framesT)targetdaemon)	img_pathsr[   offload_video_to_cpuimg_meanimg_stdrg   rh   rk   r`   r_   compute_deviceri   r   threadstart)ro   rs   r[   rt   ru   rv   rw   rp   r   rn   r   __init__n   s   	
zAsyncVideoFrameLoader.__init__c                 C   s   | j d urtd| j | j| }|d ur|S t| j| | j\}}}|| _|| _|| j8 }|| j	 }| j
s=|j| jdd}|| j|< |S )NzFailure in frame loading threadT)non_blocking)rk   rV   rh   ra   rs   r[   r`   r_   ru   rv   rt   r(   rw   )ro   indexr^   r`   r_   r   r   r   ri      s    





z!AsyncVideoFrameLoader.__getitem__c                 C   s
   t | jS r   )rg   rh   rn   r   r   r   __len__   s   
zAsyncVideoFrameLoader.__len__N)__name__
__module____qualname____doc__rz   ri   r}   r   r   r   r   rb   i   s
    &rb   )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?Fr   c           
   	   C   sv   t | t}t | t}|otj| d dv }	|s|	r$t| |||||dS |r7tj| r7t| ||||||dS t	d)z
    Load the video frames from video_path. The frames are resized to image_size as in
    the model and are loaded to GPU if offload_video_to_cpu=False. This is used by the demo.
    r4   )z.mp4z.MP4)
video_pathr[   rt   ru   rv   rw   )r   r[   rt   ru   rv   async_loading_framesrw   z;Only MP4 video and JPEG folder are supported at this moment)

isinstancebytesstrospathsplitext!load_video_frames_from_video_fileisdir!load_video_frames_from_jpg_imagesNotImplementedError)
r   r[   rt   ru   rv   r   rw   is_bytesis_stris_mp4_pathr   r   r   load_video_frames   s2   


r   c                    sX  t | trtj| r|  ntddd t D }|jdd d t|}|dkr3t	d   fd	d|D }	t
j|t
jd
ddddf }t
j|t
jd
ddddf }|rlt|	|||||}
|
|
j|
jfS t
j|d||t
jd
}tt|	ddD ]\}}t||\||< }}q|s||}||}||}||8 }|| }|||fS )aX  
    Load the video frames from a directory of JPEG files ("<frame_index>.jpg" format).

    The frames are resized to image_size x image_size and are loaded to GPU if
    `offload_video_to_cpu` is `False` and to CPU if `offload_video_to_cpu` is `True`.

    You can load a frame asynchronously by setting `async_loading_frames` to `True`.
    ak  Only JPEG frames are supported at this moment. For video files, you may use ffmpeg (https://ffmpeg.org/) to extract frames into a folder of JPEG files, such as 
```
ffmpeg -i <your_video>.mp4 -q:v 2 -start_number 0 <output_dir>/'%05d.jpg'
```
where `-q:v` generates high-quality JPEG frames and `-start_number 0` asks ffmpeg to start the JPEG file from 00000.jpg.c                 S   s$   g | ]}t j|d  dv r|qS )r4   )z.jpgz.jpegz.JPGz.JPEG)r   r   r   )r   pr   r   r   
<listcomp>   s
    z5load_video_frames_from_jpg_images.<locals>.<listcomp>c                 S   s   t tj| d S )Nr   )r   r   r   r   )r   r   r   r   <lambda>   s    z3load_video_frames_from_jpg_images.<locals>.<lambda>)keyr   zno images found in c                    s   g | ]	}t j |qS r   )r   r   join)r   
frame_name
jpg_folderr   r   r      s    r/   N   rc   rd   )r   r   r   r   r   r   listdirsortrg   rV   r   tensorfloat32rb   r`   r_   zeros	enumerater   ra   r(   )r   r[   rt   ru   rv   r   rw   frame_names
num_framesrs   lazy_imagesrh   rl   rZ   r`   r_   r   r   r   r      sF   




r   c                 C   s   ddl }tj|tjdddddf }tj|tjdddddf }|jd ||  j\}}}	g }
|j| ||dD ]}|
	|
ddd q=tj|
dd d	 }
|sf|
|}
||}||}|
|8 }
|
| }
|
||fS )
z(Load the video frames from a video file.r   Nr   r   )widthheightr	   r2   r5   rP   )decordr   r   r   bridge
set_bridgeVideoReadernextr7   appendrX   r@   floatr(   )r   r[   rt   ru   rv   rw   r   r`   r_   rB   rh   framer   r   r   r     s    	



r   c              
   C   s   |dksJ d| }zt | dk\}}|dk||k@ }t|d| } W | S  tyD } ztj| dtdd |} W Y d}~| S d}~ww )zY
    A post processor to fill small holes in mask scores with area under `max_area`.
    r   zmax_area must be positiveg?a*  

Skipping the post-processing step due to the error above. You can still use SAM 2 and it's OK to ignore the error above, although some post-processing functionality may be limited (which doesn't affect the results in most cases; see https://github.com/facebookresearch/sam2/blob/main/INSTALL.md).r	   r
   N)r,   r   r=   rj   r   r   r   )r+   max_area
input_masklabelsareasis_holerm   r   r   r   fill_holes_in_mask_scores9  s"   r   c                 C   sJ   | du r
||}}nt j| d |gdd}t j| d |gdd}||dS )zDAdd new points and labels to previous point inputs (add at the end).Npoint_coordsr2   r5   point_labels)r   r   )r   cat)old_point_inputs
new_points
new_labelspointsr   r   r   r   concat_pointsV  s
   
r   imagec                 C   sh   t | tjr| jd d S t | tjr| j\}}}||fS t | tr+| j\}}||fS tdt	|  )Nr	   z;Only support np.ndarray, torch.Tensoror PIL Image, but got )
r   rR   ndarrayr7   r   Tensorr   rY   r   type)r   rB   rC   rD   r   r   r   get_image_sizea  s   

r   crop_boxc                 C   sj   |\}}}}t | tjr| ||||d d f S t | tjr,| d d ||||f S tdt|  )NzAExpected image to be of type np.ndarray or torch.Tensor, but got )r   rR   r   r   r   
ValueErrorr   )r   r   x0y0x1y1r   r   r   
crop_imagep  s   r   )r   r   	threadingr   typingr   r   numpyrR   r   PILr   r   r$   r,   r   rN   ra   rb   r.   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sF   H
-
G
 