o
    پiPB                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZ ddlZddlmZ ddlZddlmZ ddlmZmZmZ z ddl Z!dd	l"m#Z#m$Z$ dd
l%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+ W n e,y   dZ!dZ&Y nw ddl-m.Z. ddl/m0Z0 ddl1m2Z2 e3e4Z5e6ej78ddZ9e6ej78ddZ:d-ddZ;e	G dd dZ<de=defddZ>dd Z?		 	!	"d.d#d$Z@d%d& ZAe!durG d'd( d(e!jBZCndZCG d)d* d*eZDG d+d, d,e0ZEdS )/zR Dataset reader for webdataset

Hacked together by / Copyright 2022 Ross Wightman
    N)	dataclass)partial)islice)AnyCallableDictListOptionalTuple)Image)DatasetIterableDatasetget_worker_info)_shufflegetfirst)expand_urls)base_plus_ext
url_openertar_file_expandervalid_sample   )load_class_map)Reader)SharedCountWDS_SHUFFLE_SIZEi    WDS_INITIAL_SIZEi   z
_info.jsonz	info.jsonc           	      C   s   t |tr|f}g }d}|D ]N}tj| |}z/|| t|}|dr.t	
|}nt|}W d    n1 s=w   Y  |W   S  ty\ } z
t|}W Y d }~qd }~ww td| d| d i S )N z.jsonzDataset info file not found at z	. Error: z.. Falling back to provided split and size arg.)
isinstancestrospathjoinappendwdsgopenendswithjsonloadyaml	safe_load	Exception_loggerwarning)	rootnamestriederr_strn	full_pathf	info_dicte r7   P/home/ubuntu/.local/lib/python3.10/site-packages/timm/data/readers/reader_wds.py
_load_info*   s.   




r9   c                   @   sJ   e Zd ZU eed< ee ed< dZee ed< dZeed< dZ	eed< dS )		SplitInfonum_samples	filenamesr7   shard_lengthsr   	alt_labelnameN)
__name__
__module____qualname__int__annotations__r
   r   r=   r>   r?   r7   r7   r7   r8   r:   B   s   
 r:   splitinfoc           	         st  dd }d| v sd| v r|  d} d}d}t| dkr!t| d }| d } d	| vrG|  d
d}t|d }|rGd|v rG|| |d v rG|| }t| }|r|d | }|sdd t|d |d D  t fdd|D }t  |d< t  |d< ||d< ||}|S t	|||d}|S d|vs| |d vrt
d|  d|di   d| } |d |  }||}|S )Nc                 S   s2   t | d t| d t| d | dd| d dS )Nr;   r<   r=   r>   r   r?   )r;   r<   r=   r>   r?   )r:   tupleget)	dict_infor7   r7   r8   _info_convertL   s   


z(_parse_split_info.<locals>._info_converttarz..|r   r   r   z::-   splitsc                 S   s   i | ]\}}||qS r7   r7   ).0r4   cr7   r7   r8   
<dictcomp>h   s    z%_parse_split_info.<locals>.<dictcomp>r<   r=   c                 3   s    | ]} | V  qd S Nr7   rP   r4   _fcr7   r8   	<genexpr>i   s    z$_parse_split_info.<locals>.<genexpr>r;   )r?   r;   r<   zsplit z not found in info ())rE   lenrC   r   zipsumrG   keysvaluesr:   RuntimeErrorrH   )	rE   rF   rJ   r;   
split_namesplit_parts	split_idxsplit_filenames
split_infor7   rU   r8   _parse_split_infoK   sH   	
"rd   c                 C   s(   t dt|  d t| tr| dS )zQCall in an exception handler to ignore exceptions, issue a warning, and continue.zHandling webdataset error (z). Ignoring.T)r,   r-   reprr   	TypeError)exnr7   r7   r8   log_and_continue~   s   
rh   jpgRGBclsr   c           
      C   s   |rt | d }t|| }|dk rdS nt| | }t| |}t|}t|}|  W d   n1 s:w   Y  |rF|	|}t
||| ddd}	|	S )z Custom sample decode
    * decode and convert PIL Image
    * cls byte string label to int
    * pass through JSON byte string (if it exists) without parse
    r'   r   N)ri   rk   r'   )r'   loadsrC   r   ioBytesIOr   openr(   convertdictrH   )
sample	image_key
image_mode
target_keyr>   metaclass_labelimgbdecodedr7   r7   r8   _decode   s    



r{   c                  C   s   t  } | dur
| jS tj S )z'get dataloader worker seed from pytorchN)r   seedr$   utilspytorch_worker_seed)worker_infor7   r7   r8   r~      s   
r~   c                   @   s&   e Zd Z				d
ddZdd Zd	S )detshuffle2  d   r   c                 C   s   || _ || _|| _|| _d S rS   )bufsizeinitialr|   epoch)selfr   r   r|   r   r7   r7   r8   __init__   s   
zdetshuffle2.__init__c                 C   sf   t | jtr| jj}n
|  jd7  _| j}| jdk r t | }n| j| }t|}t|| j	| j
|S )Nr   r   )r   r   r   valuer|   r~   randomRandomr   r   r   )r   srcr   r|   rngr7   r7   r8   run   s   



zdetshuffle2.runN)r   r   r   r   )r@   rA   rB   r   r   r7   r7   r7   r8   r      s    
r   c                       s4   e Zd ZdZejdddf fdd	Zdd Z  ZS )	ResampledShards2z,An iterable dataset yielding a list of urls.NTr   c                    sb   t    tj|}|| _t| jd tsJ || _t	
 | _|du r&tn|| _|| _|| _dS )zSample shards from the shard list with replacement.

        :param urls: a list of URLs as a Python list or brace notation string
        r   N)superr   r$   
shardlistsr   urlsr   r   nshardsr   r   r   r~   worker_seeddeterministicr   )r   r   r   r   r   r   	__class__r7   r8   r      s   


zResampledShards2.__init__c                 c   s    t | jtr| jj}n
|  jd7  _| j}| jr#t|  | | _t	| j
D ]}| jdt| jd }t| j| dV  q(dS )z#Return an iterator over the shards.r   r   )urlN)r   r   r   r   r   r   r   r   r   ranger   randintrY   r   rq   )r   r   _indexr7   r7   r8   __iter__   s   
zResampledShards2.__iter__)	r@   rA   rB   __doc__sysmaxsizer   r   __classcell__r7   r7   r   r8   r      s    r   c                "       s   e Zd Z													
			d1dedee dededee dedededee dedededededee dee f  fddZdd Z		d2d ee fd!d"Z
d#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd3d-d.Zd3d/d0Z  ZS )4	ReaderWdsNtrainFr   r   *   jpg;png;webprj   rk   r   filenamer.   r?   rE   is_trainingr;   
batch_sizerepeatsr|   	class_map	input_keyinput_img_moderu   target_img_modefilename_keysample_shuffle_sizesample_initial_sizec                    sP  t    td u rtd|| _|| _|| _|| _|| _d| _	|p"t
| _|p't| _|
| _|| _|| _|| _d| _t| j| _t|| j| _|d urM|| _n| jj| _|r[| js[tdd| _|	rit|	| _d| _ni | _d| _d| _t rt rt  dkrt! | _t  | _d | _"d| _#|| _$d| _%d| _&d| _'d| _(t) | _*d | _+d S )	NzcPlease install webdataset 0.2.x package `pip install git+https://github.com/webdataset/webdataset`.i  z.JPEGzBInvalid split definition, num_samples not specified in train mode.FTr   r   ),r   r   r$   r^   r.   r   r   r   common_seedshard_shuffle_sizeSAMPLE_SHUFFLE_SIZEr   SAMPLE_INITIAL_SIZEr   r   r   ru   r   key_extr9   rF   rd   rc   r;   remap_classr   class_to_idx	dist_rankdist_num_replicasdistis_availableis_initializedget_world_sizeget_rankr   	worker_idr   num_workersglobal_worker_idglobal_num_workers
init_countr   epoch_countds)r   r.   r?   rE   r   r;   r   r   r|   r   r   r   ru   r   r   r   r   r   r7   r8   r     sX   








zReaderWds.__init__c                 C   s   || j _d S rS   )r   r   )r   countr7   r7   r8   	set_epochQ  s   zReaderWds.set_epochr   c                 C   s2   | j d urd S |d ur|| _| j| j | _d S d S rS   )r   r   r   r   )r   r   r7   r7   r8   set_loader_cfgT  s   
zReaderWds.set_loader_cfgc                    s.   j du r/tjj }|dur| _ |j _|j _|j	 _	 j
 j	  _ j j	  j  _ fdd jjD }t|g} jrf|t j j jd jtjtdtj j jt jdg n| jtjtdg |tj t!t" j# j$ jj%dtdtj& j# j'dg tj(|  _)dS )	z8 Lazily initialize worker (in worker processes)
        Nc                    s   g | ]
}t j j|qS r7   )r    r!   r"   r.   rT   r   r7   r8   
<listcomp>l  s    z(ReaderWds._lazy_init.<locals>.<listcomp>)r|   r   )handler)r   r   r   )rs   rt   r>   )imagetarget)*r   torchr}   datar   idr   r|   r   r   r   r   r   r   rc   r<   r$   SimpleShardListr   extendr   r   r   r   _split_by_node_and_workertarfile_to_samplesrh   shuffler   r   r   r   mapr   r{   r   r   r>   renameru   DataPipeliner   )r   r   abs_shard_filenamespipeliner7   r   r8   
_lazy_init^  sV   



	zReaderWds._lazy_initc                 c   sB    | j dkrt|| jd | j D ]}|V  qd S |D ]}|V  qd S Nr   )r   r   r   )r   r   sr7   r7   r8   r     s   
z#ReaderWds._split_by_node_and_workerc                 C   sR   | j t| j| j }| js| jdkrt|}| jr%t|| j | j }t|S r   )	r;   maxr   r   r   mathceilr   rC   )r   num_worker_samplesr7   r7   r8   _num_samples_per_worker  s   
z!ReaderWds._num_samples_per_workerc                 c   s    | j d u r
|   |  }| js| jdkr| j |}n| j }d}|D ]}|d }| jr2| j| }|d |fV  |d7 }q$d S )Nr   r   r   r   )r   r   r   r   r   
with_epochr   r   )r   r   r   irr   r   r7   r7   r8   r     s   


zReaderWds.__iter__c                 C   s   |   | j }|S rS   )r   r   )r   r;   r7   r7   r8   __len__  s   zReaderWds.__len__c                 C   s   J d)NFzNot supportedr7   )r   r   basenameabsoluter7   r7   r8   	_filename  s   zReaderWds._filenamec                 C   sz   | j du r	|   g }| j D ],}| j|v r|| j }nd|v r'|d | j }nJ d|| t|| jkr: |S q|S )z0 Return all filenames in dataset, overrides baseN__key__FzNo supported name field present)r   r   r   r   r#   rY   r;   )r   r   r   r/   rr   r?   r7   r7   r8   r<     s   



zReaderWds.filenames)Nr   FNr   r   r   Nr   rj   rk   r   r   NNrS   )FF)r@   rA   rB   r   r	   boolrC   rq   r   r   r   r   r   r   r   r   r   r<   r   r7   r7   r   r8   r     sz    	
J

5
r   )r   )ri   rj   rk   r   )Fr   rm   r'   loggingr   r    r   r   dataclassesr   	functoolsr   	itertoolsr   typingr   r   r   r   r	   r
   r   torch.distributeddistributedr   r)   PILr   torch.utils.datar   r   r   
webdatasetr$   webdataset.filtersr   r   webdataset.shardlistsr   webdataset.tariteratorsr   r   r   r   ImportErrorr   r   readerr   shared_countr   	getLoggerr@   r,   rC   environrH   r   r   r9   r:   r   rd   rh   r{   r~   PipelineStager   r   r   r7   r7   r7   r8   <module>   s`     

3
$
,