o
    پi#                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ eeZd
ZG dd dZdejde	de
e fddZ				ddee	 dee deeeee
f  defddZ G dd deZ!dS )aS   A dataset reader that reads tarfile based datasets

This reader can extract image samples from:
* a single tar of image files
* a folder of multiple tarfiles containing imagefiles
* a tar of tars containing image files

Labels are based on the combined folder and/or tar name structure.

Hacked together by / Copyright 2020 Ross Wightman
    N)glob)ListTupleDictSetOptionalUnion)natural_key   )load_class_map)get_img_extensions)Readerz_tarinfos.picklec                   @   s,   e Zd ZddejdejfddZdd ZdS )	TarStateNtftic                 C   s   || _ || _i | _d S N)r   r   children)selfr   r    r   Y/home/ubuntu/.local/lib/python3.10/site-packages/timm/data/readers/reader_image_in_tar.py__init__!   s   
zTarState.__init__c                 C   s
   d | _ d S r   )r   r   r   r   r   reset&      
zTarState.reset)NN)__name__
__module____qualname__tarfileTarFileTarInfor   r   r   r   r   r   r      s    r   r   parent_info
extensionsc                 C   s  d}t | D ]\}}| sqtj|j\}}tj|\}}	|	 }	|	dkrwtj| 	|dd<}
t
|jtj|d ||g g d}|t|
||d7 }t| d|j d	t|d
  d |d | W d    n1 sqw   Y  q|	|v r|d
 | |d7 }q|S )Nr   .tarr|)fileobjmodepathnamer&   r   r   samplesr!   z"/?. Extracted child tarinfos from . r)   z images.r   r
   )	enumerateisfileosr&   splitsplitextlowerr   openextractfiledictr(   join_extract_tarinfo_loggerdebuglenappend)r   r    r!   sample_countir   dirnamebasenamer(   extctf
child_infor   r   r   r6   *   s,   &r6   Tclass_name_to_idxcache_tarinfosortc                    sB  |st ddnt|}d}tj| r7tj| d  dks!J | g}tj| \} }tj|d }d}n| tjj	tjj	d }t
tj| ddd}t|}td	d
 |D }	|sgJ d|  dtd|	d dd tg d}
d}|d u r|	dkrdnd}|rd| t }tj| |}tj|rtd| d t|d}t|}
W d    n1 sw   Y  t|
d |ksJ dnt|D ]b\}}|rdntjtj|d }tj|dd7}ttj|| |d g g d}t|||d}t|d }t| d| d | d!| d"| d#
 W d    n	1 s%w   Y  |
d | q|rZtd$| d t|d%}t|
| W d    n	1 sUw   Y  g g dd u rgdg }dd&d'd(  fd)d*}td+ |
d D ]D}|rd n|d, }t }d}|d D ]}|||d-}|rt|d. d/|j|d, < ||7 }q||||d-7 }|r|||f q~
rt t!tt"d0}d1d2 t|D td3 fd4d
t#D }|rt!|d5d6 d0}t#| \}t$%t$%|}td7t d8t| d9 ||fS ):NT)as_setFr"   r   z*.tar)	recursivec                 S   s   g | ]}t j|qS r   )r.   r&   getsize).0fr   r   r   
<listcomp>R   s    z$extract_tarinfos.<locals>.<listcomp>z'No .tar files found at specified path (z).z	Scanning i   z.2fzMB of tar files...)tartrees l       
 _z!Reading tar info from cache file .rbrL   z3Cached tartree len doesn't match number of tarfilesr#   )r%   r'   r*   r   /z. Extracted tarinfos from r+   z children, z	 samples.zWriting tar info to cache file wb)	leaf_onlyc                 W   s<   t jj| t jj}| r|t jjd S |t jjdS )NrF   rN   )r.   r&   r5   stripsepr/   replace)rS   r&   r   r   r   _label_from_paths}   s   &z+extract_tarinfos.<locals>._label_from_pathsc                    sb   d}| d D ](} | d t j|j}s|vrq||| d f | |d7 }q|S )Nr   r)   r&   r   r
   )r.   r&   r=   r:   )infofnaddedslabelrW   build_class_maprB   labelsr)   r   r   _add_samples   s   

z&extract_tarinfos.<locals>._add_samplesz+Collecting samples and building tar states.r(   )rY   r   )r   )keyc                 S      i | ]\}}||qS r   r   )rI   idxcr   r   r   
<dictcomp>       z$extract_tarinfos.<locals>.<dictcomp>z$Mapping targets and sorting samples.c                    s$   g | ]\}}| v r| | fqS r   r   )rI   r[   l)rB   r   r   rK      s   $ c                 S   s   t | d d jS Nr   )r	   r&   )kr   r   r   <lambda>   s    z"extract_tarinfos.<locals>.<lambda>zFinished processing z samples across z tar files.)&r   setr.   r&   r-   r0   r1   r/   rT   rU   r   r5   r9   sumr7   rX   r4   CACHE_FILENAME_SUFFIXexistsr2   pickleloadr,   r>   r   relpathr6   r8   r:   dumpr   r   listsortedr	   zipnparray)rootrB   rC   r!   rD   root_is_tartar_filenames	root_namenum_tars	tar_bytesrX   
cache_pathcache_filenamepfr<   rY   r&   r   r    num_samplesnum_childrentarfilesr`   tar_name	tar_stateparent_addedrA   child_addedsorted_labelssamples_and_targetstargetsr   r]   r   extract_tarinfos?   s   
  




 r   c                       s<   e Zd ZdZd fdd	Zdd Zd	d
 ZdddZ  ZS )ReaderImageInTarzI Multi-tarfile dataset reader where there is one .tar file per class
    rM   TNc                    s   t    d }|rt||}|| _t| j||d\| _| _| _}dd | j D | _	t
|dkrD|d d d u rDd| _|d d | _nd| _t|| _|| _d S )N)rB   rC   c                 S   rb   r   r   )rI   ri   vr   r   r   re      rf   z-ReaderImageInTar.__init__.<locals>.<dictcomp>r
   r   TF)superr   r   rx   r   r)   r   rB   itemsclass_idx_to_namer9   ry   r   r4   cache_tarfiles)r   rx   	class_mapr   rC   rB   r   	__class__r   r   r      s"   



zReaderImageInTar.__init__c                 C   s
   t | jS r   )r9   r)   r   r   r   r   __len__   r   zReaderImageInTar.__len__c                 C   s   | j | }| j| }|\}}}|rtj| j|n| j}d }d }	| jr1| jr)| jn| j| }	|	j	}|d u r@t
|}| jr@||	_	|d uri| jrN|	j|j j	nd }
|
d u rgt
j||d}
| jrg|
|	j|j _	|
}|||fS )N)r$   )r)   r   r.   r&   r5   rx   r   ry   r   r   r   r2   r   r(   r3   )r   indexsampletarget	sample_ti	parent_fnchild_ti
parent_absr   cache_stater@   r   r   r   __getitem__   s*   



zReaderImageInTar.__getitem__Fc                 C   s$   | j | d j}|rtj|}|S rh   )r)   r(   r.   r&   r>   )r   r   r>   absolutefilenamer   r   r   	_filename   s   zReaderImageInTar._filename)rM   TN)FF)	r   r   r   __doc__r   r   r   r   __classcell__r   r   r   r   r      s    r   )NNNT)"r   loggingr.   ro   r   r   typingr   r   r   r   r   r   numpyrv   timm.utils.miscr	   r   r   img_extensionsr   readerr   	getLoggerr   r7   rm   r   r   strr6   boolr   r   r   r   r   r   <module>   s<     

m