o
    ci                     @   s  d Z ddlZddlZddlmZmZ ddlmZm	Z	m
Z
 ddlmZmZ ed eddd	d
Zejdeddd ejdedd ejdedd G dd dejZdd Zedkre Zeejed eejZe jejdjejdjddZ ejdkre j!dgdd d!d" e j"d#d$d% nejd&kre jd'd( e j#dd) e j"d*d+ e d,e	 Z$e
ej%e$ej&iZ'ee ee'e d,e	 ej&id- dS dS ).am  An example showing how to use PyFlyt gymnasium environment to train a UAV to
reach waypoints.

For more infos about the PyFlyt gymnasium environment see the GitHub Repository:
https://github.com/jjshoots/PyFlyt/tree/master/PyFlyt

This example
    - Runs a single-agent `PyFlyt/QuadX-Waypoints-v1` experiment.
    - Uses a gymnasium reward wrapper for reward scaling.
    - Stops the experiment, if either `--stop-iters` (default is 200) or
        `--stop-reward` (default is 90.0) is reached.


How to run this script
----------------------
`python [script file name].py --enable-new-api-stack`

Control the number of environments per `EnvRunner` via `--num-envs-per-env-runner`.
This will increase sampling speed.

For debugging, use the following additional command line options
`--no-tune --num-env-runners=0` which should allow you to set breakpoints
anywhere in the RLlib code and have the execution stop there for inspection
and debugging.

For logging to your WandB account, use:
`--wandb-key=[your WandB API key] --wandb-project=[some project name]
--wandb-run-name=[optional: WandB run name (within the defined project)]`
    N)add_rllib_example_script_args#run_rllib_example_script_experiment)ENV_RUNNER_RESULTSEPISODE_RETURN_MEANTRAINING_ITERATION_TIMER)get_trainable_clsregister_envi     i g     V@)default_itersdefault_timestepsdefault_rewardz--runPPOz&The RLlib-registered algorithm to use.)typedefaulthelpz
--env-namequadx_waypoints)r   r   z--num-envs-per-env-runner   c                       s$   e Zd Z fddZdd Z  ZS )RewardWrapperc                    s   t  | d S )N)super__init__)selfenv	__class__ V/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/examples/quadx_waypoints.pyr   ;   s   zRewardWrapper.__init__c                 C   s   |dks|dkr|d S |S )Ng     X@g     X
   r   )r   rewardr   r   r   r   >   s   zRewardWrapper.reward)__name__
__module____qualname__r   r   __classcell__r   r   r   r   r   :   s    r   c                 C   s2   dd l }ddl m} td}t|}||ddS )Nr   )FlattenWaypointEnvzPyFlyt/QuadX-Waypoints-v1   )context_length)PyFlyt.gym_envsr"   gymmaker   )
env_configPyFlytr"   r   r   r   r   create_quadx_waypoints_envE   s
   
r*   __main__)env_creator)r   )num_envs_per_env_runnerg?)min_time_s_per_iteration    linearT)fcnet_hiddensfcnet_activationvf_share_layers)model_config   i'  )minibatch_sizetrain_batch_size_per_learnerIMPALA   )num_env_runners)num_gpus_per_learnerg{Gz?)vf_loss_coeff/)stopsuccess_metric)(__doc__	gymnasiumr&   sysray.rllib.utils.test_utilsr   r   ray.rllib.utils.metricsr   r   r   ray.tune.registryr   r   setrecursionlimitparseradd_argumentstrintr   r*   r   
parse_argsargsenv_namerunalgo_clsget_default_configenvironmentenv_runnersr-   	reportingconfig	rl_moduletraininglearnersEPISODE_RETURN_MEAN_KEY
stop_itersstop_rewardr>   r   r   r   r   <module>   sv    





