o
    `۷i                     @   s  d dl Z d dlZd dlZd dlmZ d dlZd dlmZmZ d dl	m
Z
 d dlmZ dd Zedkre  Zejd	ed
d ejdedd ejdedd ejdedd ejdedd ejded d ejdedd ejdedd ejdedd ejdedd ejdedd ejdedd ejd ed!d ejd"ed#d e Zejd$v rdZnd%Ze
ejd&d'ejejejd(d) d*d) d+d) d,d) d-ed.Zeejd&d'ejejd/d0gd1d2gd3d4gd%d5gd-d6Zeed7Zee e! d8 ee"e!  Z#d9$ej%ej&ej'eej(ejeje_)eej%d:$e#ej'ejeej*ej&eej' d;ej(d<ejej+iejd=ej*d0d ed>eej,-d8d  eej,-d8d; gd<d?d@dAedBd) edCd) edDd) edEd) dFdGZ.e/e.j01 Z2e3 Z4e5ej(D ]Z6e2e6 Z7e7g dH Z7e6e7dI< e8e4e7gj9d<dJZ4qej:rej;<dKej) se=dKej)  e4>dL$ej)eej* dS dS dS )M    N)datetime)runsample_from)PopulationBasedTraining)PB2c                 C   sL   | d | d d k r| d d | d< | d dkrd| d< t | d | d< | S )Ntrain_batch_sizesgd_minibatch_size   lambda   )int)config r   W/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/tune/examples/pb2_ppo_example.pyexplore   s   r   __main__z--maxi@B )typedefaultz--algoPPOz--num_workers   z--num_samplesz	--t_readyiP  z--seedz	--horizoni@  z	--perturbg      ?z
--env_nameBipedalWalker-v2z
--criteriatimesteps_totalz--net32_32z
--filename z--methodpb2z
--save_csvF)r   zBipedalWalker-v3  episode_reward_meanmaxc                   C      t ddS N?      ?randomuniformr   r   r   r   <lambda>A       r%   c                   C   r   N皙?      ?r"   r   r   r   r   r%   B   r&   c                   C   r   NMbP?h㈵>r"   r   r   r   r   r%   C   r&   c                   C   r   Nr   `  r#   randintr   r   r   r   r%   D   r&   )r
   
clip_paramlrr   )	time_attrmetricmodeperturbation_intervalresample_probabilityquantile_fractionhyperparam_mutationscustom_explore_fnr    r!   r(   r)   r,   r+   r.   )r3   r4   r5   r6   r8   hyperparam_bounds)pbtr   _z{}_{}_{}_Size{}_{}_{}z{}_{}_{}_seed{}_{}r   TINFOMeanStdFilter)fcnet_hiddensfree_log_std
      c                 C   r   r   r"   specr   r   r   r%      r&   c                 C   r   r'   r"   rD   r   r   r   r%      r&   c                 C   r   r*   r"   rD   r   r   r   r%      r&   c                 C   r   r-   r/   rD   r   r   r   r%      r&   )env	log_levelseedkl_coeffnum_gpushorizonobservation_filtermodelnum_sgd_iterr   r
   r1   r2   r   )name	schedulerverbosenum_samplesreuse_actorsstopr   )r   episodes_totalr   z(info/learner/default_policy/cur_kl_coeffAgent)dropzdata/zdata/{}/seed{}.csv)?argparseosr#   r   pandaspdray.tuner   r   ray.tune.schedulersr   ray.tune.schedulers.pb2r   r   __name__ArgumentParserparseradd_argumentr   strfloatbool
parse_argsargsenv_namerK   criteriat_readyperturbr<   r   methodsdatenowtimetimelogformatalgofilenamemethodrR   dirrH   r   netsplitanalysislisttrial_dataframesvaluesall_dfs	DataFrameresultsrangeidfconcatreset_indexsave_csvpathexistsmakedirsto_csvr   r   r   r   <module>   s    


&	




"  