o
    ci                     @   s   d Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	m
Z
 edddd	Zejd
dd ejdddd edkrje ZejdksEJ de	ej jedejidjdd djddhdd dZeee dS dS )a  Example of running a multi-agent experiment w/ agents always acting simultaneously.

This example:
    - demonstrates how to write your own (multi-agent) environment using RLlib's
    MultiAgentEnv API.
    - shows how to implement the `reset()` and `step()` methods of the env such that
    the agents act simultaneously.
    - shows how to configure and setup this environment class within an RLlib
    Algorithm config.
    - runs the experiment with the configured algo, trying to solve the environment.


How to run this script
----------------------
`python [script file name].py --enable-new-api-stack --sheldon-cooper-mode`

For debugging, use the following additional command line options
`--no-tune --num-env-runners=0`
which should allow you to set breakpoints anywhere in the RLlib code and
have the execution stop there for inspection and debugging.

For logging to your WandB account, use:
`--wandb-key=[your WandB API key] --wandb-project=[some project name]
--wandb-run-name=[optional: WandB run name (within the defined project)]`


Results to expect
-----------------
You should see results similar to the following in your console output:

+-----------------------------------+----------+--------+------------------+-------+
| Trial name                        | status   |   iter |   total time (s) |    ts |
|-----------------------------------+----------+--------+------------------+-------+
| PPO_RockPaperScissors_8cef7_00000 | RUNNING  |      3 |          16.5348 | 12000 |
+-----------------------------------+----------+--------+------------------+-------+
+-------------------+------------------+------------------+
|   combined return |   return player2 |   return player1 |
|-------------------+------------------+------------------|
|                 0 |            -0.15 |             0.15 |
+-------------------+------------------+------------------+

Note that b/c we are playing a zero-sum game, the overall return remains 0.0 at
all times.
    )RockPaperScissorsFlattenObservations)add_rllib_example_script_args#run_rllib_example_script_experiment)get_trainable_clsregister_envg?2   i )default_rewarddefault_itersdefault_timestepsT   )enable_new_api_stack
num_agentsz--sheldon-cooper-mode
store_truezWhether to add two more actions to the game: Lizard and Spock. Watch here for more details :) https://www.youtube.com/watch?v=x5Q6-wMx-K8)actionhelp__main__z1Must set --num-agents=2 when running this script!sheldon_cooper_mode)
env_configc                 C   s
   t ddS )NT)multi_agentr   )envspacesdevice r   e/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/examples/envs/agents_act_simultaneously.py<lambda>b   s   
 r   )env_to_module_connectorplayer1player2c                 K   s   | S )Nr   )agent_idepisodekwr   r   r   r   j   s    )policiespolicy_mapping_fnN)__doc__?ray.rllib.examples.envs.classes.multi_agent.rock_paper_scissorsr   7ray.rllib.connectors.env_to_module.flatten_observationsr   ray.rllib.utils.test_utilsr   r   ray.tune.registryr   r   parserset_defaultsadd_argument__name__
parse_argsargsr   algoget_default_configenvironmentr   env_runnersr   base_configr   r   r   r   <module>   sH    ,