o
    ci                     @   s  d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ e Zejdddgddd ejdddd ejdeddd ejdeddd ejdeddd e Z dZ!d d! Z"ed"d#d$  e j#d"d%d&id'd(j$e j%sd)nd*d+d,d-d.&dj'd/d0d0d1d2d&d3d4d5e j%sd6nd7d8d'id9j(e j%sd+ndd:j)d;d<Z*ej+d=e*, ej-ee j.ee j/e d>e e j0id*d?d@1  dS )Aa  
Example of interfacing with an environment that produces 2D observations.

This example shows how turning 2D observations with shape (A, B) into a 3D
observations with shape (C, D, 1) can enable usage of RLlib's default models.
RLlib's default Catalog class does not provide default models for 2D observation
spaces, but it does so for 3D observations.
Therefore, one can either write a custom model or transform the 2D observations into 3D
observations. This enables RLlib to use one of the default CNN filters, even though the
original observation space of the environment does not fit them.

This simple example should reach rewards of 50 within 150k timesteps.
    )float32N)pistonball_v6)normalize_obs_v0dtype_v0color_reduction_v0
reshape_v0	resize_v1)TRAINING_ITERATION)	PPOConfig)PettingZooEnv)ENV_RUNNER_RESULTSEPISODE_RETURN_MEANNUM_ENV_STEPS_SAMPLED_LIFETIME)register_env)tunez--frameworktf2torchzThe DL framework specifier.)choicesdefaulthelpz	--as-test
store_truez8Whether this script should be run as a compilation test.)actionr   z--stop-iters   zNumber of iterations to train.)typer   r   z--stop-timestepsi@B zNumber of timesteps to train.z--stop-reward2   z!Reward at which we stop training.)*   r      c                 C   sR   t jdd}t|td}t|dd}t|}t|td td d}t|td	}|S )
N   )	n_pistons)dtypeR)moder   r   )x_sizey_size)shape)	r   envr   r   r   r   r   TRANSFORMED_OBS_SPACEr   )configr%    r(   Y/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/examples/envs/greyscale_env.pyenv_creatorE   s   r*   
pistonballc                 C   s   t t| S )N)r   r*   )r'   r(   r(   r)   <lambda>V   s    r,   local_ratiog      ?T)
env_configclip_rewards      r   NoFilterauto)num_env_runnersnum_envs_per_env_runnerobservation_filterrollout_fragment_lengthg{Gz?g?g      $@
   g-C6?d   i  i  i  vf_share_layers)entropy_coeffvf_loss_coeff
clip_paramvf_clip_param
num_epochskl_coefflr	grad_clipminibatch_sizetrain_batch_sizemodel)num_gpus   )min_time_s_per_iterationPPO/)stopverbose)param_space
run_config)2__doc__numpyr   argparsepettingzoo.butterflyr   	supersuitr   r   r   r   r   ray.tune.resultr	   ray.rllib.algorithms.ppor
   ray.rllib.envr   ray.rllib.utils.metricsr   r   r   ray.tune.registryr   rayr   ArgumentParserparseradd_argumentintfloat
parse_argsargsr&   r*   environmentenv_runnersas_test	frameworktraining	resources	reportingr'   Tunerto_dict	RunConfig
stop_itersstop_timestepsstop_rewardfitr(   r(   r(   r)   <module>   s   
