o
    8wit                     @   s~   d dl Z d dlmZ d dlmZ d dlZd dlZddl	m
Z
 ddlmZ ddlmZ er0d dlZG d	d
 d
eedef ZdS )    N)Mapping)TYPE_CHECKING   )config)
map_nested   )TensorFormatterc                       s   e Zd Zd fdd	Zdd Zdd Zdd	 Zd
efddZde	j
defddZde	j
ddfddZde	j
defddZ  ZS )TFFormatterNc                    s"   t  j||d || _dd l}d S )N)featurestoken_per_repo_idr   )super__init__tf_tensor_kwargs
tensorflow)selfr
   r   r   tf	__class__ ]/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/datasets/formatting/tf_formatter.pyr   !   s   zTFFormatter.__init__c                    s`   dd l t tr. r.t fdd D r S t fdd D r.j S  S )Nr   c                 3   s<    | ]}t |jo|j d  jko|j d  jkV  qdS )r   N)
isinstanceTensorshapedtype.0xcolumnr   r   r   	<genexpr>*   s    ,
z+TFFormatter._consolidate.<locals>.<genexpr>c                 3   s<    | ]}t |jjfo|jd ko|j d jkV  qdS )r   r   N)r   r   RaggedTensorndimr   r   r   r   r   r   .   s
    *
)r   r   listallstackragged)r   r   r   r   r   _consolidate&   s   
zTFFormatter._consolidatec                 C   s  dd l }|d u r
|S i }t|tjtjfr#t|jtjr#d|ji}nt|tjtjfr9t|jtj	r9d|j
i}tjrQdtjv rQdd l}t||jjrQt|}tjrfdtjv rfddlm} t||rf|S tjrdtjv rddlm}m} t|||fr|S |j|fi i || jS )Nr   r   PILtorchvision)VideoReader
torchcodec)AudioDecoderVideoDecoder)r   r   npnumberndarray
issubdtyper   integerint64floatingfloat32r   PIL_AVAILABLEsysmodules	PIL.ImageImageasarrayTORCHVISION_AVAILABLEtorchvision.ior)   TORCHCODEC_AVAILABLEtorchcodec.decodersr+   r,   convert_to_tensorr   )r   valuer   default_dtyper'   r)   r+   r,   r   r   r   
_tensorize7   s*   ""


zTFFormatter._tensorizec                    s   dd l }tjr#dtjv r#dd l}t||jr# |	 
  d S t|dr2t||js2| }t|tjrJ|jtkrI  fdd|D S nt|ttfr]  fdd|D S  |S )Nr   torchr   	__array__c                       g | ]}  |qS r   recursive_tensorizer   	substructr   r   r   
<listcomp>d       z4TFFormatter._recursive_tensorize.<locals>.<listcomp>c                    rE   r   rF   rH   rJ   r   r   rK   f   rL   )r   r   TORCH_AVAILABLEr6   r7   rC   r   r   rB   detachcpunumpyhasattrrD   r-   r/   r   objectr&   r"   tuple)r   data_structr   rC   r   rJ   r   _recursive_tensorizeV   s   

z TFFormatter._recursive_tensorizerT   c                 C   s   t | j|ddS )NF)map_list)r   rU   )r   rT   r   r   r   rG   i   s   zTFFormatter.recursive_tensorizepa_tablereturnc                 C   s$   |   |}| j|}| |S N)numpy_arrow_extractorextract_rowpython_features_decoder
decode_rowrG   )r   rW   rowr   r   r   
format_rowl   s   
zTFFormatter.format_row	tf.Tensorc                 C   s:   |   |}| j||jd }| |}| |}|S )Nr   )rZ   extract_columnr\   decode_columncolumn_namesrG   r&   )r   rW   r   r   r   r   format_columnq   s
   

zTFFormatter.format_columnc                 C   sD   |   |}| j|}| |}|D ]}| || ||< q|S rY   )rZ   extract_batchr\   decode_batchrG   r&   )r   rW   batchcolumn_namer   r   r   format_batchx   s   
zTFFormatter.format_batch)NN)__name__
__module____qualname__r   r&   rB   rU   dictrG   paTabler   r_   rd   ri   __classcell__r   r   r   r   r	       s    r	   r`   )r6   collections.abcr   typingr   rP   r-   pyarrowrn    r   utils.py_utilsr   
formattingr   r   r   r	   r   r   r   r   <module>   s   