o
    ϯiFN                     @   s  d Z ddlZddlZddlZddlmZ ddlZddlZddlZddl	m
  mZ ddlmZ ddlZzddlZW n eyD   dZY nw zddlm  mZ W n ey[   dZY nw zddlmZ W n eyo   dZY nw ddlmZmZmZ ddlmZ ddlmZ ddlmZm Z m!Z! dd	l"m#Z# dd
l$m%Z% ddl&m'Z' ddl(m)Z)m*Z* ddl+m,Z,m-Z- ddl+m.Z.m/Z/ ddl0m1Z1 dd Z2	d#ddZ3dd Z4d$ddZ5dd Z6dd Z7dd Z8d d! Z9e:d"kre6  dS dS )%z?
Evalute the linear probe performance on different checkpoints
    N)datetime)
GradScaler)create_model_and_transformstrace_modelcreate_model)get_data)
parse_args)	is_masterinit_distributed_deviceworld_info_from_env)setup_logging)	cosine_lr)config_lp_optimizer)train_one_epochevaluate)get_tar_path_from_dataset_namedataset_split)load_pload_class_label)LinearProbec              
   C   s   t t||D ].}tjtj| jd| dr5ttj| jd| dtj| jd|d  d qtjtj| jd| drUttj| jd| d d S )N
epoch_top_.pt   )	reversedrangeospathexistsjoincheckpoint_pathrenameremove)argsstartidxall_idx_leni r&   Y/home/ubuntu/.local/lib/python3.10/site-packages/laion_clap/evaluate/eval_linear_probe.pymaintain_ckpts*   s    r(   Tc                 C   s|  t | ttfrt| } t| |||||dS t | tr.tt|  } t| |||||dS t | tt	frdd |
 D }t|
 }t| |d}t|}	||  t||d}|dd }||	krj|| fS tt|D ]}
|||
  ||
 kr||
 |||
 < d|||
 < qptt|D ]&}
||
 rt||
t| t|tj|jd| d	|
 d
  || fS q|| fS dS )z
    Record the top-k performance of the current epoch.
    current_top_k_metrics is a dictionary of the form: {1: top_1_ckpt_measure, 2: top_2_ckpt_measure, ...}
    )r"   ckptbignumbetterpretrain_epochc                 S      i | ]}|d qS )Fr&   ).0kr&   r&   r'   
<dictcomp>T   s    z,update_top_k_performance.<locals>.<dictcomp>)reverseNTpretrain_epoch__lp_epoch_top_r   )
isinstancelisttuplenpmeanupdate_top_k_performancedictvaluesfloatintkeyssortedcopydeepcopyappendr   lenr(   torchsaver   r   r   r   )new_metrics_inputscurrent_top_k_ckpt_metricsr"   r)   r*   r+   update_flagsorted_keyssorted_valuessorted_values_r%   r&   r&   r'   r9   8   sb   



r9   c                 C   s0   |  dp| dv p|  dp|  dp|  dS )Nzclap_model.transformer)zclap_model.positional_embeddingzclap_model.text_projectionzclap_model.token_embeddingzclap_model.ln_finalzclap_model.logit_scale_t)
startswith)nr&   r&   r'   is_pretrained_paramsu   s   
rN   *   c                 C   s0   t | |  tj| |  t| |  d S N)rD   manual_seedr7   randomseed)rS   rankr&   r&   r'   random_seed   s   rU   c                  C   s  t  } | jdd| _tttj| jdtjj	d}| j
d u rFdt dd| j d| j d| j d	| j d
| j g| _
d| _t \| _| _| _| jrt| r| jD ]2}t| D ]+}tjd| d| s{td| d|  td| d| d| d| d	 qbq\d | _t| | jdrtj| j | j
}tj|dd | jrd| j nd}tj||| _d}tj| jr|d7 }|d t!| }tj|dd | jrd| j nd}tj||| _tj| js| j"rt#j$nt#j%| _&t'| j| j& t(| }d| j)v pd| j)v | _*d| j)v pd| j)v | _+t| rU| j+r2tj| j | j
dnd| _,tj| j | j
d| _-| j,| j-fD ]}	|	rRtj|	dd qFnd| _,d| _-| j.rct.|  | jdv skJ | jdkrvt#/d | j0rt#1d| j2 d | j d!| j d"| j d#	 n%| jrt#1d$| j2 d | j d!| j d"| j d#	 n
t#1d%| j2 d# t#1d&tj3| j4  | j o| j 5 d'kot| | _6d }
| j6r| j+rt+d usJ d(t+7| j,}
| j*rt| rt*d usJ d)t#"d* t*j8d+| j9| j9g t:| d, t#"d- t;|D ]R\}}t#1d.|  || _t<j=|d/d0}d}d1|v r<|d1 }t>| ||
||}| j*rlt| rlt*d usVJ d)|? D ]\}}t*@d2| |d1|i qZq| j*r}t| rt*A  d S d S d S )3N/-z*.pt)keyz%Y_%m_%d-%H_%M_%Slinear_probemodel_lr_b_j_p_Fz./json_files/z,aws s3 cp s3://s-laion-audio/webdataset_tar/z/sizes.json ./json_files/z/sizes.json)localT)exist_okzout-zout.logr   r   wandballtensorboard checkpoints)ampfp16fp32rf   zIt is recommended to use AMP mixed-precision instead of FP16. FP16 support needs further verification and tuning, especially for train.zARunning in horovod mode with multiple processes / nodes. Device: z.Process (global: z, local z	), total .z=Running in distributed mode with multiple processes. Device: z&Running with a single process. Device zopenai cache dir: nonezPlease install tensorboard.zPlease install wandb.zStarting wandb.clap)projectnotesnametagsconfigzFinished loading wandb.zpretrained on cpumap_locationepochzval/summary/)Br   amodelreplacer?   globr   r   r   
pretrainedgetmtimerm   r   nowstrftimelr
batch_sizeworkers	precisiondistributedr   
local_rankrT   
world_size
remotedatar	   datasetnamesr   r   makedirssystemlog_path	log_locallogsstrdebugloggingDEBUGINFO	log_levelr   r
   	report_tor`   rb   tensorboard_pathr   copy_codebasewarninghorovodinfodevice
expanduseropenai_model_cache_dirlower	save_logsSummaryWriterinitwandb_notesvars	enumeraterD   loadlp_mainitemslogfinish)r"   pretrained_ckptsdataset_namesplitlog_base_pathlog_filenamepostfixlog_base_path_newr   dirnamewriteridxfr)   r+   best_metricsrm   valr&   r&   r'   main   s   "








"


r   c                 C   s:   |D ]}|| vr|| | |< qt | | || | |< q| S rP   )max)best_metric
new_metricrX   r&   r&   r'   update_metric  s
   r   c                    s  t | j t| j tj| j tj| j tj | j t| j| _	t
| j| j| j| j|| j| jtj| jd| j| jd\}}tt| j	 | _|dkrtd| j  td| j  td| j  td| j  td| j   td	| j!  td
| j"  t#|| j| jd| j| jd}|$|}| j%rt&  |' D ]	}|(|)  qW d    n1 sw   Y  | j*rt+|| j,|d}t-| r6|dkr6td tt.|  td tj/| j0| j1d}	t2|	d-}
t3t4| D ]}t5| |}td| d|  |
6| d| d qW d    n	1 s1w   Y  | j7rb| j%sb| j8rItj9j:;|}i }| j<rSd|d< tj9j=j>|f|gdd|}t?| | t spJ d| j*r}d vs}J dt@| | \}}}| jdkrtA nd }d}| jBd urLtjC| jBrCtjD| jB|d}d|v r|d }|d }| j7stEtF|G d Hdrd d! |G D }|I| | jJr|d ur|G D ]\}}|I||d" d#   q|d ur |I|d#  |d urd$|v r|I|d$  td%| jB d&| d' n|I| td(| jB d&| d' | jKrBtLd) |D ]}d|_Mq;n	td*N| jB dtO_PdtO_Q| jRrt-| r d jSjT| _U| jVd uro d+ jSjT| _W| jXrztRjY|d,d- |dkrtRZ|	 i }d vrt[| || |d.t.| d/}t-| rt\||}d S |dkrd+ v r| j]st[| d| |d.t.| d/}t-| rt\||}| j^rd0d! t_| j^D }t_|| j`D ]}|| jakrtLd1 |D ]}d|_Mqt-| rtd2|  tb| ||||| |d.t.| d/	 |d3 }tc fd4d5d6D rT| j]sTt[| || |d.t.| d/}t-| r>t\||}| j^rT| jd| jefd7d8|G D }| jfrd9d! |G D }||| j1|g d:}|h| |d ur{|g |d$< || j`ks| jidkr|| ji dkrtZ|tj/| jjd;| d<| d= | jkrtZ|tj/| jjd;| d> | j^r| j]stl||| |d|d? q~|S )@NF)r~   r   jitforce_quick_gelur   skip_paramsenable_fusionfusion_typer   zlinear probe using mlp: zlinear probe using freeze: zlinear probe act layer: zlinear probe out ch: z,linear probe learning rate (if applicable): zlinear probe loss func: zlinear probe lp_metrics: i   )mlpfreezein_chout_chact)r|   r   zLinear Probe CLAP Model:zParams:z
params.txtwz  z: 
Tstatic_graph)
device_idsfind_unused_parametersz5At least one train or eval dataset must be specified.trainzCannot train with traced modelre   rq   rs   
state_dictmodulec                 S   s"   i | ]\}}|t d d |qS )zmodule.N)rC   r-   r.   vr&   r&   r'   r/   v  s   " zlp_main.<locals>.<dictcomp>_	optimizerscalerz=> resuming checkpoint 'z	' (epoch )z=> loaded checkpoint 'zFreeze Text!!!!z=> no checkpoint found at '{}'r   ra   )r   z_pe@)extra_suffixc                 S   r,   )r   r&   )r-   r%   r&   r&   r'   r/     s    z8Text pretrained parameters are freezed since this epoch.zStart epoch r   c                 3   s    | ]}| v V  qd S rP   r&   )r-   r   )datar&   r'   	<genexpr>  s    zlp_main.<locals>.<genexpr>)r   zimagenet-valzimagenet-v2c                    s$   g | ]\}}|v r |v r|qS r&   r&   r   )top_k_datasettop_k_metricr&   r'   
<listcomp>  s
    zlp_main.<locals>.<listcomp>c                 S   s"   i | ]\}}|d  d |  qS )r   r   )r   r   r&   r&   r'   r/     s    )rs   r+   rm   r   r2   
_lp_epoch_r   z_lp_epoch_latest.pt)r*   r+   )mrR   rS   rD   rQ   cudamanual_seed_allr7   r   class_label_pathclass_index_dictr   rt   tmodelrw   r~   torchscriptr   r   r   r   r   r   r   rC   r5   r>   	lp_out_chr   r   lp_mlp	lp_freezelp_actlp_lrlp_loss
lp_metricsr   tor   no_grad
parametersset_
contiguoustracer   r|   r	   r   r   r   rm   openr?   r   getattrwriter   use_bn_syncnnSyncBatchNormconvert_sync_batchnormddp_static_graphparallelDistributedDataParallelr   r   r   resumeisfiler   nextiterr   rL   load_state_dict	split_optfreeze_textprintrequires_gradformatcudnn	benchmarkdeterministicr`   
dataloadernum_samplestrain_szval_dataval_szr   watchrE   r   r   no_evalsave_top_performancer   epochsfreeze_text_afterr   anytop_k_checkpoint_select_datasettop_k_checkpoint_select_metricr   r   updatesave_frequencyr   save_most_recentr9   )r"   r   r   r+   r   
clap_modelclap_model_cfgmodelparamparams_filer   rm   r   ddp_argsr   	schedulertext_freeze_parametersr   start_epoch
checkpointsdr.   o_r   metricrG   rs   completed_epochfiltered_metricsopt_dictcheckpoint_dictr&   )r   r   r   r'   r     sr  





















" 



r   c                 C   s   ddl m}m} tj| j| jd}tj|r"t	d| d dS t	d|  tj
t}tdD ]}tj|}q3||||d	d
dd t	d dS )Nr   )copytreeignore_patternscodez$Error. Experiment already exists at z). Use --name to specify a new experiment.r1   zCopying codebase to    r   r   r`   )ignorezDone copying code.r   )shutilr   r!  r   r   r   r   rm   r   r   realpath__file__r   r   )r"   r   r!  new_code_pathcurrent_code_pathr   r&   r&   r'   r     s    
r   __main__)Tr   )rO   r   );__doc__r   r   rR   r   r@   numpyr7   rD   torch.backends.cudnnbackendsr   torch.cuda.ampr   rv   r`   ImportErrortorch.utils.tensorboardutilsrb   horovod.torchhvdclap_moduler   r   r   training.datar   training.paramsr   training.distributedr	   r
   r   training.loggerr   training.schedulerr   training.lp_mainr   training.lp_trainr   r   clap_module.utilsr   r   r   r   clap_module.linear_prober   r(   r9   rN   rU   r   r   r   r   __name__r&   r&   r&   r'   <module>   sf    
=

  W
