o
    `۷im
                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ edkr|dd Z	ed	d
ddd dd g ddd dd dd de	dZ
ejeejddeje
dddddddddddid d!d"eg d#eg d$eg d%d&d'Ze Zed(e j dS dS ))as  Example of using PBT with RLlib.

Note that this requires a cluster with at least 8 GPUs in order for all trials
to run concurrently, otherwise PBT will round-robin train the trials which
is less efficient (or you can set {"gpu": 0} to use CPUs for SGD instead).

Note that Tune in general does not need 8 GPUs, and this is just a more
computationally demanding example.
    N)tune)PPO)PopulationBasedTraining__main__c                 C   s<   | d | d d k r| d d | d< | d dk rd| d< | S )Ntrain_batch_sizesgd_minibatch_size   num_sgd_iter    )configr   r   W/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/tune/examples/pbt_ppo_example.pyexplore   s
   r   time_total_sx   g      ?c                   C      t ddS )Ng?      ?randomuniformr   r   r   r   <lambda>#       r   c                   C   r   )Ng{Gz?g      ?r   r   r   r   r   r   $   r   )gMbP?gMb@?-C6?g-C6
?gh㈵>c                   C   r   )Nr
      r   randintr   r   r   r   r   &   r   c                   C   r   )N   i @  r   r   r   r   r   r   '   r   c                   C   r   )Ni  i q r   r   r   r   r   r   (   r   )lambda
clip_paramlrr	   r   r   )	time_attrperturbation_intervalresample_probabilityhyperparam_mutationscustom_explore_fnpbt_humanoid_test)name   episode_reward_meanmaxT)	schedulernum_samplesmetricmodereuse_actorszHumanoid-v1r   r
   free_log_stdgffffff?g?r   )
      r   )r   i   i   )i'  i N  i@  )envkl_coeffnum_workersnum_gpusmodelr   r   r   r	   r   r   )
run_configtune_configparam_spacezbest hyperparameters: )__doc__r   rayr   ray.rllib.algorithms.ppor   ray.tune.schedulersr   __name__r   pbtTuner	RunConfig
TuneConfigchoicetunerfitresultsprintget_best_resultr   r   r   r   r   <module>   s`   
	