o
    8wi                     @   s   U d dl Z d dlmZ d dlmZmZ d dlZd dlZ	ddl
mZ ddlmZ ddlmZ dd	lmZ er=d dlZd dlZe Zdaee ed
< G dd deedef ZdS )    N)Mapping)TYPE_CHECKINGOptional   )config)
get_logger)
map_nested   )TensorFormatterDEVICE_MAPPINGc                       s   e Zd Zd fdd	Zedeedf fddZdd	 Zd
d Z	dd Z
defddZdejdefddZdejddfddZdejdefddZ  ZS )JaxFormatterNc              
      s   t  j||d dd l}ddlm} t||r$td| dt| dt|tr+|nt|	 d | _
td u r<|  a| j
tt vrhtd| j
 dtt  d	t|	 d  d
 t|	 d | _
|| _d S )N)featurestoken_per_repo_idr   )Devicez	Expected z to be a `str` not z, as `jaxlib.xla_extension.Device` is not serializable neither with `pickle` nor with `dill`. Instead you can surround the device with `str()` to get its string identifier that will be internally mapped to the actual `jaxlib.xla_extension.Device`.zDevice with string identifier z) not listed among the available devices: z), so falling back to the default device: .)super__init__jaxjaxlib.xla_clientr   
isinstance
ValueErrortypestrdevicesdevicer   _map_devices_to_strlistkeysloggerwarningjnp_array_kwargs)selfr   r   r   r    r   r   	__class__ ^/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/datasets/formatting/jax_formatter.pyr   '   s(   
 


zJaxFormatter.__init__returnzjaxlib.xla_extension.Devicec                  C   s   dd l } dd |  D S )Nr   c                 S   s   i | ]}t ||qS r$   )r   ).0r   r$   r$   r%   
<dictcomp>F       z4JaxFormatter._map_devices_to_str.<locals>.<dictcomp>)r   r   )r   r$   r$   r%   r   B   s   z JaxFormatter._map_devices_to_strc                    sL   dd l dd lm} t tr$ r$t fdd D r$|j ddS  S )Nr   c                 3   s<    | ]}t |jo|j d  jko|j d  jkV  qdS )r   N)r   Arrayshapedtype)r'   xcolumnr   r$   r%   	<genexpr>M   s    ,
z,JaxFormatter._consolidate.<locals>.<genexpr>)axis)r   	jax.numpynumpyr   r   allstack)r!   r/   jnpr$   r.   r%   _consolidateH   s   zJaxFormatter._consolidatec           	      C   s  dd l }dd lm} t|tttd fr|S t|tjtj	fr+t
|jtjr+| S i }t|tjtj	frNt
|jtjrN|jjrHd|ji}nd|ji}nt|tjtj	frdt
|jtjrdd|ji}tjr|dtjv r|dd l}t||jjr|t|}tjrdtjv rddlm} t||r|S tjrdtjv rddlm }m!} t|||fr|S t"d u r| # a"|$t"| j%  |j&|fi i || j'W  d    S 1 sw   Y  d S )Nr   r,   PILtorchvision)VideoReader
torchcodec)AudioDecoderVideoDecoder)(r   r2   r3   r   r   bytesr   np	characterndarray
issubdtyper,   tolistnumberintegerr   jax_enable_x64int64int32floatingfloat32PIL_AVAILABLEsysmodules	PIL.ImageImageasarrayTORCHVISION_AVAILABLEtorchvision.ior:   TORCHCODEC_AVAILABLEtorchcodec.decodersr<   r=   r   r   default_devicer   arrayr    )	r!   valuer   r6   default_dtyper8   r:   r<   r=   r$   r$   r%   
_tensorizeS   s<   """


$zJaxFormatter._tensorizec                    s   dd l }tjr#dtjv r#dd l}t||jr# |	 
  d S t|dr2t||js2| }t|tjrJ|jtkrI  fdd|D S nt|ttfr]  fdd|D S  |S )Nr   torchr$   	__array__c                       g | ]}  |qS r$   recursive_tensorizer'   	substructr!   r$   r%   
<listcomp>   r)   z5JaxFormatter._recursive_tensorize.<locals>.<listcomp>c                    r\   r$   r]   r_   ra   r$   r%   rb      r)   )r   r   TORCH_AVAILABLErL   rM   rZ   r   TensorrY   detachcpur3   hasattrr*   r[   r?   rA   r,   objectr7   r   tuple)r!   data_structr   rZ   r$   ra   r%   _recursive_tensorize   s   

z!JaxFormatter._recursive_tensorizerj   c                 C   s   t | j|ddS )NF)map_list)r   rk   )r!   rj   r$   r$   r%   r^      s   z JaxFormatter.recursive_tensorizepa_tablec                 C   s$   |   |}| j|}| |S N)numpy_arrow_extractorextract_rowpython_features_decoder
decode_rowr^   )r!   rm   rowr$   r$   r%   
format_row   s   
zJaxFormatter.format_row	jax.Arrayc                 C   s:   |   |}| j||jd }| |}| |}|S )Nr   )ro   extract_columnrq   decode_columncolumn_namesr^   r7   )r!   rm   r/   r$   r$   r%   format_column   s
   

zJaxFormatter.format_columnc                 C   sD   |   |}| j|}| |}|D ]}| || ||< q|S rn   )ro   extract_batchrq   decode_batchr^   r7   )r!   rm   batchcolumn_namer$   r$   r%   format_batch   s   
zJaxFormatter.format_batch)NNN)__name__
__module____qualname__r   staticmethoddictr   r   r7   rY   rk   r^   paTabler   rt   ry   r~   __classcell__r$   r$   r"   r%   r   &   s    0r   ru   )rL   collections.abcr   typingr   r   r3   r?   pyarrowr    r   utils.loggingr   utils.py_utilsr   
formattingr
   r   jaxlibr   r   r   __annotations__r   r$   r$   r$   r%   <module>   s   
