o
    .i*                  
   @   s4  d dl Z d dlmZmZ d dlmZmZmZmZm	Z	m
Z
 d dlZd dlZddlmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ erRd d
lmZ ddlmZ G dd de	ZeG dd dZddde fddZ!dddefddZ"dej#defddZ$	d de%dee&e%e
e'e%f f  de%ddfddZ(dS )!    N)	dataclassfield)TYPE_CHECKINGAnyClassVarOptional	TypedDictUnion   )config)DownloadConfig)
array_cast)is_local_pathxopen)string_to_dictVideoReader   )FeatureTypec                   @   s&   e Zd ZU ee ed< ee ed< dS )ExamplepathbytesN)__name__
__module____qualname__r   str__annotations__r    r   r   K/home/ubuntu/.local/lib/python3.10/site-packages/datasets/features/video.pyr      s   
 r   c                
   @   s  e Zd ZU dZdZeed< dZee	 ed< dZ
ee	 ed< ee e dZee ed	< ed d
d
dZe	ed< dd Zdee	eeejdf defddZ	ddee	ef deee	eee	f f  ddfddZdedee	df f fddZdeejejej f dejfddZ!dS )Videoa%  
    **Experimental.** Video [`Feature`] to read video data from a video file.

    Input: The Video feature accepts as input:
    - A `str`: Absolute path to the video file (i.e. random access is allowed).
    - A `dict` with the keys:

        - `path`: String with relative path of the video file in a dataset repository.
        - `bytes`: Bytes of the video file.

      This is useful for archived files with sequential access.

    - A `torchvision.io.VideoReader`: torchvision video reader object.

    Args:
        mode (`str`, *optional*):
            The mode to convert the video to. If `None`, the native mode of the video is used.
        decode (`bool`, defaults to `True`):
            Whether to decode the video data. If `False`,
            returns the underlying dictionary in the format `{"path": video_path, "bytes": video_bytes}`.

    Examples:

    ```py
    >>> from datasets import Dataset, Video
    >>> ds = Dataset.from_dict({"video":["path/to/Screen Recording.mov"]}).cast_column("video", Video())
    >>> ds.features["video"]
    Video(decode=True, id=None)
    >>> ds[0]["video"]
    <torchvision.io.video_reader.VideoReader object at 0x325b1aae0>
    >>> ds = ds.cast_column('video', Video(decode=False))
    {'bytes': None,
     'path': 'path/to/Screen Recording.mov'}
    ```
    TdecodeNidztorchvision.io.VideoReaderdtyper   r   pa_typeF)defaultinitrepr_typec                 C   s   | j S N)r$   )selfr   r   r   __call__G      zVideo.__call__valuer   returnc                 C   s   t jr
ddlm} nd}t|trt|}t|tr |ddS t|t	r*d|dS t|tj
r4t|S |durAt||rAt|S t|tru|d|d}}|dur`tj|r`d|dS |dush|durm||dS td| d	td
t| )zEncode example into a format for Arrow.

        Args:
            value (`str`, `np.ndarray`, `VideoReader` or `dict`):
                Data passed as input to Video feature.

        Returns:
            `dict` with "path" and "bytes" fields
        r   r   Nr   r   r   r   r#   zTA video sample should have one of 'path' or 'bytes' but they are missing or None in .z!Unsupported encode_example type: )r   TORCHVISION_AVAILABLEtorchvision.ior   
isinstancelistnparrayr   r   ndarrayencode_np_arrayencode_torchvision_videodictgetosr   isfile
ValueError	TypeErrortype)r*   r-   r   r   bytes_r   r   r   encode_exampleJ   s.   










zVideo.encode_exampletoken_per_repo_idc                 C   s   | j stdtjrddlm} ntd|du ri }t|tr&|d}}n	|d |d }}|du rO|du r?t	d| d	t
|rH||}nt||d
}n||}||d|_|S )a|  Decode example video file into video data.

        Args:
            value (`str` or `dict`):
                A string with the absolute video file path, a dictionary with
                keys:

                - `path`: String with absolute or relative video file path.
                - `bytes`: The bytes of the video file.
            token_per_repo_id (`dict`, *optional*):
                To access and decode
                video files from private repositories on the Hub, you can pass
                a dictionary repo_id (`str`) -> token (`bool` or `str`).

        Returns:
            `torchvision.io.VideoReader`
        zMDecoding is disabled for this feature. Please use Video(decode=True) instead.r   r   z9To support decoding videos, please install 'torchvision'.Nr   r   zBA video should have one of 'path' or 'bytes' but both are None in r0   )rC   r/   )r    RuntimeErrorr   r1   r2   r   ImportErrorr3   r   r>   r   hf_video_reader_hf_encoded)r*   r-   rC   r   r   rA   videor   r   r   decode_examplev   s&   

zVideo.decode_exampler   c                 C   s(   ddl m} | jr| S |d|ddS )zfIf in the decodable state, return the feature itself, otherwise flatten the feature into a dictionary.r   )Valuebinarystringr#   )featuresrJ   r    )r*   rJ   r   r   r   flatten   s   zVideo.flattenstoragec                 C   s  t j|jr%t jdgt| t  d}t jj||gddg|	 d}nt j
|jrJt jdgt| t  d}t jj||gddg|	 d}nt j|jr|jddkr_|d}nt jdgt| t  d}|jddkr{|d}nt jdgt| t  d}t jj||gddg|	 d}n4t j|jrt jdd | D t  d}t jdgt| t  d}t jj||gddg|	 d}t|| jS )	a'  Cast an Arrow array to the Video arrow storage type.
        The Arrow types that can be converted to the Video pyarrow storage type are:

        - `pa.string()` - it must contain the "path" data
        - `pa.binary()` - it must contain the video bytes
        - `pa.struct({"bytes": pa.binary()})`
        - `pa.struct({"path": pa.string()})`
        - `pa.struct({"bytes": pa.binary(), "path": pa.string()})`  - order doesn't matter
        - `pa.list(*)` - it must contain the video array data

        Args:
            storage (`Union[pa.StringArray, pa.StructArray, pa.ListArray]`):
                PyArrow array to cast.

        Returns:
            `pa.StructArray`: Array in the Video arrow storage type, that is
                `pa.struct({"bytes": pa.binary(), "path": pa.string()})`.
        N)r@   r   r   )maskr   c                 S   s*   g | ]}|d urt t|d nd qS )Nr   )r8   r5   r6   ).0arrr   r   r   
<listcomp>   s   * z&Video.cast_storage.<locals>.<listcomp>)patypes	is_stringr@   r6   lenrK   StructArrayfrom_arraysis_null	is_binaryrL   	is_structget_field_indexr   is_list	to_pylistr   r$   )r*   rO   bytes_array
path_arrayr   r   r   cast_storage   s0      zVideo.cast_storager)   )"r   r   r   __doc__r    boolr   r!   r   r   r"   r   rT   structrK   rL   r$   r   r   r(   r+   r	   r   r   r5   r7   rB   r:   rI   rN   StringArrayrX   	ListArrayrb   r   r   r   r   r      s&   
 $$"/

3(r   rH   r   r.   c                 C      t  )zPConvert a torchvision Video object to bytes using native compression if possibleNotImplementedErrorrH   r   r   r   video_to_bytes   s   rl   c                 C   s   t | dr| jS td)NrG   zXEncoding a VideoReader that doesn't come from datasets.Video.decode() is not implemented)hasattrrG   rj   rk   r   r   r   r9      s
   
r9   r6   c                 C   rh   r)   ri   )r6   r   r   r   r8      r,   r8   r   rC   streamc                 C   s$  dd l }ddlm} ddlm} |d u ri }| dd }|tjr&tj	ntj
}t||}|d ur9||d nd }	t|	d}
t| d|
d	}t|}| |_|jd
kr]td|j |j|dd|_|dd }t|ddkrwdnt|dd }||i|_|jjdi |j|_|S )Nr   )get_video_backendr   z::repo_id)tokenrb)download_configpyavz9Unsupported video backend for VideoReader from HF files: ignore)metadata_errors:r   r   )avtorchvisionro   r2   r   split
startswithr   HF_ENDPOINTHUB_DATASETS_URLHUB_DATASETS_HFFS_URLr   r;   r   r   object__new__backendrD   open	containerrW   intpyav_streamr    _c)r   rC   rn   ry   ro   r   
source_urlpatternsource_url_fieldsrr   rt   fvrstream_type	stream_idr   r   r   rF      s*   



(
rF   )NrH   ))r<   dataclassesr   r   typingr   r   r   r   r   r	   numpyr5   pyarrowrT    r   download.download_configr   tabler   utils.file_utilsr   r   utils.py_utilsr   r2   r   rM   r   r   r   r   rl   r9   r7   r8   r   r:   rd   rF   r   r   r   r   <module>   s<      K	