o
    }oiE.                     @   s  d Z ddlZddlZddlmZ ddlZddlZddlZddl	m
Z
mZ ejdZZeeejjZdZdZdDded	ed
ejjfddZ		dEdejjejjB ded
dfddZd
ee fddZdFdeded
efddZded
ejfddZ ded
ejfddZ!dFdejde"d
ejfddZ#dFdejde"d
ejfdd Z$dedejfd!d"Z%dGdedejd$e"d
dfd%d&Z&eed'fd(ejd)ej'd	ed*e"d
ej(f
d+d,Z)dHd-ej(d*e"d
ejfd.d/Z*efd0ejd1e"d
e+ejee" f fd2d3Z,eefd0ejd4e"d1e"d
e+ejee" f fd5d6Z-d0ejd7ee" d
ejfd8d9Z.d0ejd7ee" d
ejfd:d;Z/dEded<efd=d>Z0ded<e1d?efd@dAZ2d
e
fdBdCZ3dS )Iz.Utility functions for the inference libraries.    N)glob)TokenizerConfigsTokenizerModelscuda      jit_filepathdevicereturnc                 C   s   t j| }| |S )a  Loads a torch.jit.ScriptModule from a filepath.

    Args:
        jit_filepath: The filepath to the JIT-compiled model.
        device: The device to load the model onto, default=cuda.
    Returns:
        The JIT compiled model loaded to device and on eval mode.
    )torchjitloadevalto)r   r	   model r   b/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/common/video_tokenizers/utils.pyload_jit_model!   s   	r   r   c                 C   s   t j| | dS )zSaves a torch.jit.ScriptModule or torch.jit.RecursiveScriptModule to file.

    Args:
        model: JIT compiled model loaded onto `config.checkpoint.jit.device`.
        jit_filepath: The filepath to the JIT-compiled model.
    N)r   r   save)r   r   r   r   r   save_jit_model.   s   
r   c                 C   s   t tt| }tt|S )z+Returns a list of filepaths from a pattern.)sortedr   strlistset)input_pattern	filepathsr   r   r   get_filepaths;   s   r   filepath
output_dirc                 C   s>   |p
t j|  d}| dt j|  }t j|dd |S )z9Returns the output filepath for the given input filepath.z/reconstructions/T)exist_ok)ospathdirnamebasenamemakedirs)r   r   output_filepathr   r   r   get_output_filepathA   s   r'   c                 C   sJ   t | }|jdkrtj|gd dd}|jd dkr#|dddf }|S )zReads an image from a filepath.

    Args:
        filepath: The filepath to the image.

    Returns:
        The image as a numpy array, layout HxWxC, range [0..255], uint8 dtype.
          axis   .N)media
read_imagendimnpstackshaper   imager   r   r   r/   I   s   
	
r/   c                 C   sJ   t | }|jdkrtj|gd dd}|jd dkr#|dddf }|S )zReads a video from a filepath.

    Args:
        filepath: The filepath to the video.
    Returns:
        The video as a numpy array, layout TxHxWxC, range [0..255], uint8 dtype.
    r)   r*   r+   r-   .N)r.   
read_videor0   r1   r2   r3   )r   videor   r   r   r6   ]   s   

r6   r5   
short_sizec                 C      |du r| S | j dd \}}||kr-|t|| | d }}|d dkr(|n|d }nt|| | d |}}|d dkrB|n|d }tj| ||fdS )	zResizes an image to have the short side of `short_size`.

    Args:
        image: The image to resize, layout HxWxC, of any range.
        short_size: The size of the short side.
    Returns:
        The resized image.
    Nr*         ?r(   r      r3   )r3   intr.   resize_image)r5   r8   heightwidth
height_new	width_newr   r   r   r?   p      	r?   r7   c                 C   r9   )	zResizes a video to have the short side of `short_size`.

    Args:
        video: The video to resize, layout TxHxWxC, of any range.
        short_size: The size of the short side.
    Returns:
        The resized video.
    Nr:   r*   r;   r(   r   r<   r=   )r3   r>   r.   resize_video)r7   r8   r@   rA   rB   rC   r   r   r   rE      rD   rE   c                 C   s   t | |S )zWrites an image to a filepath.)r.   write_imager4   r   r   r   rF      s   rF      fpsc                 C   s   t j| ||dS )zWrites a video to a filepath.)rH   )r.   write_video)r   r7   rH   r   r   r   rI      s   rI   r*   input_imagedtype	range_minc                 C   sr   | j }ttd|dd ttd|dd  }| dt| t }|dkr.d| d }t|||S )zConverts image(dtype=np.uint8) to `dtype` in range [0..255].

    Args:
        input_image: A batch of images in range [0..255], BxHxWx3 layout.
    Returns:
        A torch.Tensor of layout Bx3xHxW in range [-1..1], dtype.
    r<   r*   Nr          @      ?)	r0   r   range	transposetuple_UINT8_MAX_Fr   
from_numpyr   )rJ   rK   r	   rL   r0   indicesr5   r   r   r   numpy2tensor   s   ,rV   input_tensorc                 C   sb   |dkr|   d d } | j}| dd  }|dttd| d }|t d	 	t
jS )
zConverts tensor in [-1,1] to image(dtype=np.uint8) in range [0..255].

    Args:
        input_tensor: Input image tensor of Bx3xHxW layout, range [-1..1].
    Returns:
        A numpy image of layout BxHxWx3, range [0..255], uint8 dtype.
    r*   rO   rN   r   r<   rM   r(   )r<   r;   )floatr0   clampcpunumpyrQ   rR   rP   rS   astyper1   uint8)rW   rL   r0   output_imager   r   r   tensor2numpy   s   r_   batchspatial_alignc                 C   s   | j dd \}}|}|| dkr|||  nd}|| dkr%|||  nd}|d? |d? ||d?  ||d?  g}tj| d|d? ||d?  f|d? ||d?  fdfdd} | |fS )zPads a batch of images to be divisible by `spatial_align`.

    Args:
        batch: The batch of images to pad, layout BxHxWx3, in any range.
        align: The alignment to pad to.
    Returns:
        The padded batch and the crop region.
    r<   r)   r   r   r   constantmoder3   r1   pad)r`   ra   r@   rA   alignheight_to_padwidth_to_padcrop_regionr   r   r   pad_image_batch   s&   	


rl   temporal_alignc              	   C   s  | j dd \}}}|}|| dkr|||  nd}|| dkr&|||  nd}|}|d | dkr:||d |  nd}	|	d? |d? |d? ||	d?  ||d?  ||d?  g}
tj| dd|d? ||d?  f|d? ||d?  fdfdd} tj| d|	d? |	|	d?  fdddfdd} | |
fS )	a[  Pads a batch of videos to be divisible by `temporal_align` or `spatial_align`.

    Zero pad spatially. Reflection pad temporally to handle causality better.
    Args:
        batch: The batch of videos to pad., layout BxFxHxWx3, in any range.
        align: The alignment to pad to.
    Returns:
        The padded batch and the crop region.
    r*   r   r<   rb   rc   rd   edgerf   )r`   rm   ra   
num_framesr@   rA   rh   ri   rj   frames_to_padrk   r   r   r   pad_video_batch   sD   $


rr   rk   c                 C   sF   t |dks
J d|\}}}}}}| d||||||ddf S )a  Unpads video with `crop_region`.

    Args:
        batch: A batch of numpy videos, layout BxFxHxWxC.
        crop_region: [f1,y1,x1,f2,y2,x2] first, top, left, last, bot, right crop indices.

    Returns:
        np.ndarray: Cropped numpy video, layout BxFxHxWxC.
       zcrop_region should be len of 6..Nlen)r`   rk   f1y1x1f2y2x2r   r   r   unpad_video_batch'  s   
"r|   c                 C   s<   t |dks
J d|\}}}}| d||||ddf S )zUnpads image with `crop_region`.

    Args:
        batch: A batch of numpy images, layout BxHxWxC.
        crop_region: [y1,x1,y2,x2] top, left, bot, right crop indices.

    Returns:
        np.ndarray: Cropped numpy image, layout BxHxWxC.
    r-   zcrop_region should be len of 4..Nrt   )r`   rk   rw   rx   rz   r{   r   r   r   unpad_image_batch6  s   
r}   tokenizer_configc                 C   s0   |d }t | jdi |}tj| }||fS )Nnamer   )r   valuer   r   r   )r   r~   tokenizer_namer   ckptsr   r   r   get_pytorch_modelE  s   r   
model_typec                 C   sZ   t | |\}}|dkr| }n|dkr| }|j| dd | |d |S )z(Loads a torch.nn.Module from a filepath.encdecF)strictrK   )r   encoder_jitdecoder_jitload_state_dict
state_dictr   r   )r   r~   r   r	   r   r   r   r   r   load_pytorch_modelL  s   
r   c                 C   sX   t d| }|r*| \}}}t| j}|tt|d |tt|d |S dS )z+return tokeinzer config from tokenizer namez#Cosmos-Tokenizer-(\D+)(\d+)x(\d+).*)spatial_compression)temporal_compressionN)rematchgroupsr   r   updatedictr>   )tokenizer_typer   r   temporalspatialr~   r   r   r   get_tokenizer_configW  s   
r   )Nr   )NN)N)rG   )r*   )4__doc__r!   r   r   mediapyr.   r[   r1   r   1nemo.collections.common.video_tokenizers.networksr   r   bfloat16_DTYPE_DEVICErX   iinfor]   maxrS   _SPATIAL_ALIGN_TEMPORAL_ALIGNr   r   ScriptModuler   RecursiveScriptModuler   r   r   r'   ndarrayr/   r6   r>   r?   rE   rF   rI   rK   TensorrV   r_   rR   rl   rr   r|   r}   r   r   r   r   r   r   r   r   <module>   s|   

*#
7