o
    ciz<                     @   s   d dl mZmZmZ d dlZd dlZd dlZd dl	Z	d dl
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ eeZeG dd	 d	eZdS )
    )BoxMultiDiscreteTupleN)CallableOptionalr   )MultiAgentEnv)
PolicySpec)	PublicAPI)MultiAgentDictPolicyIDAgentIDc                       s   e Zd ZdZdZdZdZ						dd	ed
ee	 de	de
de	de	f fddZdedeeeeeef fddZddddeeef fddZdd Zededeeeegef f fddZ  ZS )
Unity3DEnvap  A MultiAgentEnv representing a single Unity3D game instance.

    For an example on how to use this Env with a running Unity3D editor
    or with a compiled game, see:
    `rllib/examples/unity3d_env_local.py`
    For an example on how to use it inside a Unity game client, which
    connects to an RLlib Policy server, see:
    `rllib/examples/envs/external_envs/unity3d_[client|server].py`

    Supports all Unity3D (MLAgents) examples, multi- or single-agent and
    gets converted automatically into an ExternalMultiAgentEnv, when used
    inside an RLlib PolicyClient for cloud/distributed training of Unity games.
    i  i  r   NF,    	file_nameportseedno_graphicstimeout_waitepisode_horizonc                    s   t    |du rtd ddl}ddlm} d}		 |	dur'tt	dd |p0|r.| j
n| j}	|r6tjnd}
t jd7  _z|||
|	|||d| _td	|	|
  W n |jjy`   Y nw nq| jjd
| _dd | jD | _|| _d| _dS )a
  Initializes a Unity3DEnv object.

        Args:
            file_name (Optional[str]): Name of the Unity game binary.
                If None, will assume a locally running Unity3D editor
                to be used, instead.
            port (Optional[int]): Port number to connect to Unity environment.
            seed: A random seed value to use for the Unity3D game.
            no_graphics: Whether to run the Unity3D simulator in
                no-graphics mode. Default: False.
            timeout_wait: Time (in seconds) to wait for connection from
                the Unity3D instance.
            episode_horizon: A hard horizon to abide to. After at most
                this many steps (per-agent episode `step()` calls), the
                Unity3D game is reset and will start again (finishing the
                multi-agent episode that the game represents).
                Note: The game itself may contain its own episode length
                limits, which are always obeyed (on top of this value here).
        NzNo game binary provided, will use a running Unity editor instead.
Make sure you are pressing the Play (|>) button in your editor to start.r   )UnityEnvironmentT   
   )r   	worker_id	base_portr   r   r   z$Created UnityEnvironment for port {}.c                 S   s   g | ]}t |qS  )int).0sr   r   V/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/env/wrappers/unity3d_env.py
<listcomp>o   s    z'Unity3DEnv.__init__.<locals>.<listcomp>)super__init__printmlagents_envsmlagents_envs.environmentr   timesleeprandomrandint_BASE_PORT_ENVIRONMENT_BASE_PORT_EDITORr   
_WORKER_ID	unity_envformat	exceptionUnityWorkerInUseExceptionAPI_VERSIONsplitapi_versionr   episode_timesteps)selfr   r   r   r   r   r   r%   r   port_
worker_id_	__class__r   r    r#   '   sF   

zUnity3DEnv.__init__action_dictreturnc                 C   s  ddl m} g }| jjD ]}| jd dks#| jd dkrg| jd dkrgg }| j|d jD ]}|d| }|| |||  q.|rf|d j	t
jkrW|t
|d}n|t
|d}| j|| q| j|d j D ]}|d| }|| | j||||  qrq| j  |  \}	}
}}}|  jd7  _| j| jkr|	|
|tdd	ifi d
d |D |fS |	|
|||fS )a  Performs one multi-agent step through the game.

        Args:
            action_dict: Multi-agent action dict with:
                keys=agent identifier consisting of
                [MLagents behavior name, e.g. "Goalie?team=1"] + "_" +
                [Agent index, a unique MLAgent-assigned index per single agent]

        Returns:
            tuple:
                - obs: Multi-agent observation dict.
                    Only those observations for which to get new actions are
                    returned.
                - rewards: Rewards dict matching `obs`.
                - dones: Done dict with only an __all__ multi-agent entry in
                    it. __all__=True, if episode is done for all agents.
                - infos: An (empty) info dict.
        r   )ActionTupler      _{})
continuous)discrete__all__Tc                 S   s   i | ]}|d qS )Tr   )r   agent_idr   r   r    
<dictcomp>   s    z#Unity3DEnv.step.<locals>.<dictcomp>)mlagents_envs.base_envr=   r.   behavior_specsr4   	get_stepsrC   r/   appenddtypenpfloat32arrayset_actionsagent_id_to_indexkeysset_action_for_agentstep_get_step_resultsr5   r   dict)r6   r;   r=   
all_agentsbehavior_nameactionsrC   keyaction_tupleobsrewardsterminateds
truncatedsinfosr   r   r    rQ   v   sL   




	zUnity3DEnv.step)r   optionsc                C   s*   d| _ | j  |  \}}}}}||fS )z?Resets the entire Unity3D scene (a single multi-agent episode).r   )r5   r.   resetrR   )r6   r   r^   rY   _r]   r   r   r    r_      s   
zUnity3DEnv.resetc           
         s,  i }i }i }| j jD ]}| j |\}}|j D ]3\} |d| }t fdd|jD }	t|	dkr:|	d n|	}	|	||< |j	  |j
   ||< q|j D ]7\} |d| }||vr}t fdd|jD }	t|	dkrw|	d n|	 ||< }	|j	  |j
   ||< qRq
||ddiddi|fS )	a  Collects those agents' obs/rewards that have to act in next `step`.

        Returns:
            Tuple:
                obs: Multi-agent observation dict.
                    Only those observations for which to get new actions are
                    returned.
                rewards: Rewards dict matching `obs`.
                dones: Done dict with only an __all__ multi-agent entry in it.
                    __all__=True, if episode is done for all agents.
                infos: An (empty) info dict.
        r?   c                 3       | ]}|  V  qd S Nr   r   oidxr   r    	<genexpr>       z/Unity3DEnv._get_step_results.<locals>.<genexpr>r   r   c                 3   ra   rb   r   rc   re   r   r    rg      rh   rB   F)r.   rF   rG   rN   itemsr/   tuplerY   lenrewardgroup_reward)
r6   rY   rZ   r]   rU   decision_stepsterminal_stepsrC   rW   osr   re   r    rR      s*    zUnity3DEnv._get_step_results	game_namec                    s  t tdtddt tdtddt tdtddtt tdtddt tdtddt tdtddt tdtddgtt dd	d
t dd	dgt tdtddtt tdtddt tdtddgtt tdtddt tdtddt tdtddgt tdtddt tdtddt tdtddtt tdtddt tdtddgd}t dd	dtjdt dd	dtjdtg dtdgtg dtg dtg dtg dt dd	dtdgt dd	dtg dd} dkr#t|d |d d t|d! |d! d d"}d#d$ }||fS  d%krEt|d& |d& d t|d& |d& d d'}d(d$ }||fS  t|  |  d i} fd)d$}||fS )*Nz-infinf)   )-   )(   ru      )8   )r>   g      g      ?)i  )H   )i  )   )?   )      )r   )   )T   r~      )   )1   )3DBall
3DBallHardGridFoodCollectorPyramidsSoccerPlayerGoalieStrikerSorterTennisVisualHallwayWalkerFoodCollector)   )rI   )r   r   r   r      )r   r   r   )r   )'   )r   r   r   r   r   r   r   r   r   r   r   r   SoccerStrikersVsGoalier   )observation_spaceaction_spacer   )r   r   c                 [   s   d| v rdS dS )Nr   r   r   rC   episodeworkerkwargsr   r   r    policy_mapping_fn`     zAUnity3DEnv.get_policy_configs_for_game.<locals>.policy_mapping_fn
SoccerTwosr   )PurplePlayer
BluePlayerc                 [   s   d| v rdS dS )N1_r   r   r   r   r   r   r    r   o  r   c                    s    S rb   r   r   rq   r   r    r   z  s   )r   float
TupleSpacerJ   rK   r   r   )rq   
obs_spacesaction_spacespoliciesr   r   r   r    get_policy_configs_for_game   s   		

>









z&Unity3DEnv.get_policy_configs_for_game)NNr   Fr   r   )__name__
__module____qualname____doc__r,   r+   r-   strr   r   boolr#   r
   r   rQ   r_   rR   staticmethodrS   r   r   r   r   __classcell__r   r   r9   r    r      sT    O
K

	-r   )gymnasium.spacesr   r   r   r   loggingnumpyrJ   r)   r'   typingr   r   ray.rllib.env.multi_agent_envr   ray.rllib.policy.policyr   ray.rllib.utils.annotationsr	   ray.rllib.utils.typingr
   r   r   	getLoggerr   loggerr   r   r   r   r    <module>   s    
