o
    .i                     @   s   U d dl Z d dlmZ d dlmZmZ d dlZd dlZ	ddl
mZ ddlmZ ddlmZ dd	lmZ er=d dlZd dlZe Zdaee ed
< G dd deedef ZdS )    N)Mapping)TYPE_CHECKINGOptional   )config)
get_logger)
map_nested   )TensorFormatterDEVICE_MAPPINGc                       s   e Zd Zd fdd	Zedeedf fddZdd	 Zd
d Z	dd Z
defddZdejdefddZdejddfddZdejdefddZ  ZS )JaxFormatterNc              
      s   t  j||d dd l}ddlm} t||r$td| dt| dt|tr+|nt|	 d | _
td u r<|  a| j
tt vrhtd| j
 dtt  d	t|	 d  d
 t|	 d | _
|| _d S )N)featurestoken_per_repo_idr   )Devicez	Expected z to be a `str` not z, as `jaxlib.xla_extension.Device` is not serializable neither with `pickle` nor with `dill`. Instead you can surround the device with `str()` to get its string identifier that will be internally mapped to the actual `jaxlib.xla_extension.Device`.zDevice with string identifier z) not listed among the available devices: z), so falling back to the default device: .)super__init__jaxjaxlib.xla_clientr   
isinstance
ValueErrortypestrdevicesdevicer   _map_devices_to_strlistkeysloggerwarningjnp_array_kwargs)selfr   r   r   r    r   r   	__class__ U/home/ubuntu/.local/lib/python3.10/site-packages/datasets/formatting/jax_formatter.pyr   '   s(   
 


zJaxFormatter.__init__returnzjaxlib.xla_extension.Devicec                  C   s   dd l } dd |  D S )Nr   c                 S   s   i | ]}t ||qS r$   )r   ).0r   r$   r$   r%   
<dictcomp>F       z4JaxFormatter._map_devices_to_str.<locals>.<dictcomp>)r   r   )r   r$   r$   r%   r   B   s   z JaxFormatter._map_devices_to_strc                    sL   dd l dd lm} t tr$ r$t fdd D r$|j ddS  S )Nr   c                 3   s<    | ]}t |jo|j d  jko|j d  jkV  qdS )r   N)r   Arrayshapedtype)r'   xcolumnr   r$   r%   	<genexpr>M   s    ,
z,JaxFormatter._consolidate.<locals>.<genexpr>)axis)r   	jax.numpynumpyr   r   allstack)r!   r/   jnpr$   r.   r%   _consolidateH   s   zJaxFormatter._consolidatec                 C   s  dd l }dd lm} t|tttd fr|S t|tjtj	fr+t
|jtjr+| S i }t|tjtj	frNt
|jtjrN|jjrHd|ji}nd|ji}nt|tjtj	frdt
|jtjrdd|ji}tjr|dtjv r|dd l}t||jjr|t|}tjrdtjv rddlm} t||r|S td u r|  a| t| j!  |j"|fi i || j#W  d    S 1 sw   Y  d S )Nr   r,   PILtorchvision)VideoReader)$r   r2   r3   r   r   bytesr   np	characterndarray
issubdtyper,   tolistnumberintegerr   jax_enable_x64int64int32floatingfloat32PIL_AVAILABLEsysmodules	PIL.ImageImageasarrayTORCHVISION_AVAILABLEtorchvision.ior:   r   r   default_devicer   arrayr    )r!   valuer   r6   default_dtyper8   r:   r$   r$   r%   
_tensorizeS   s4   """


$zJaxFormatter._tensorizec                    s   dd l }tjr#dtjv r#dd l}t||jr# |	 
  d S t|dr2t||js2| }t|tjrJ|jtkrI  fdd|D S nt|ttfr]  fdd|D S  |S )Nr   torchr$   	__array__c                       g | ]}  |qS r$   recursive_tensorizer'   	substructr!   r$   r%   
<listcomp>   r)   z5JaxFormatter._recursive_tensorize.<locals>.<listcomp>c                    rW   r$   rX   rZ   r\   r$   r%   r]      r)   )r   r   TORCH_AVAILABLErI   rJ   rU   r   TensorrT   detachcpur3   hasattrr*   rV   r<   r>   r,   objectr7   r   tuple)r!   data_structr   rU   r$   r\   r%   _recursive_tensorize~   s   

z!JaxFormatter._recursive_tensorizere   c                 C   s   t | j|ddS )NF)map_list)r   rf   )r!   re   r$   r$   r%   rY      s   z JaxFormatter.recursive_tensorizepa_tablec                 C   s$   |   |}| j|}| |S N)numpy_arrow_extractorextract_rowpython_features_decoder
decode_rowrY   )r!   rh   rowr$   r$   r%   
format_row   s   
zJaxFormatter.format_row	jax.Arrayc                 C   s:   |   |}| j||jd }| |}| |}|S )Nr   )rj   extract_columnrl   decode_columncolumn_namesrY   r7   )r!   rh   r/   r$   r$   r%   format_column   s
   

zJaxFormatter.format_columnc                 C   sD   |   |}| j|}| |}|D ]}| || ||< q|S ri   )rj   extract_batchrl   decode_batchrY   r7   )r!   rh   batchcolumn_namer$   r$   r%   format_batch   s   
zJaxFormatter.format_batch)NNN)__name__
__module____qualname__r   staticmethoddictr   r   r7   rT   rf   rY   paTabler   ro   rt   ry   __classcell__r$   r$   r"   r%   r   &   s    +r   rp   )rI   collections.abcr   typingr   r   r3   r<   pyarrowr    r   utils.loggingr   utils.py_utilsr   
formattingr
   r   jaxlibr   r   r~   __annotations__r   r$   r$   r$   r%   <module>   s   
