o
    ci0K                     @   s   d dl Zd dlZd dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZmZmZ d dlmZmZ d dlmZmZmZmZ e
rJd dlmZ eeZeG dd	 d	eZej d d
G dd dZ!ej d d
G dd dZ"dS )    N)CallableDictListOptionalSetTupleTYPE_CHECKING)log_once)BaseEnv_DUMMY_AGENT_IDASYNC_RESET_RETURN)overrideOldAPIStack)AgentIDEnvIDEnvTypeMultiEnvDict)RolloutWorkerc                   @   s  e Zd ZdZ			d*deegef dedededee	e
jj  d	ed
 defddZeedeeeeeeef fddZeededdfddZee	d+ddddee dee dee deeef fddZeed+dee ddfddZeed,ddZeed-dede	e fdd Zeeedejjfd!d"Zeeedejfd#d$Z d+d%ee fd&d'Z!eede"e# fd(d)Z$dS ).RemoteBaseEnva'  BaseEnv that executes its sub environments as @ray.remote actors.

    This provides dynamic batching of inference as observations are returned
    from the remote simulator actors. Both single and multi-agent child envs
    are supported, and envs can be stepped synchronously or asynchronously.

    NOTE: This class implicitly assumes that the remote envs are gym.Env's

    You shouldn't need to instantiate this class directly. It's automatically
    inserted when you use the `remote_worker_envs=True` option in your
    Algorithm's config.
    NFmake_envnum_envs
multiagentremote_env_batch_wait_msexisting_envsworkerr   restart_failed_sub_environmentsc                    s0  | _ | _| _|d  _| _| _|pg }d _d _d _d _	t
|dkrVt|d tjjrVd _| _	t
 j	 jk rU j	 t
 j	 t
 j	 jk sAn7 fddt jD  _	t
|dkrv|d j _|d j _nt j	d j  j	d j g\ _ _dd	  j	D  _dS )
ae  Initializes a RemoteVectorEnv instance.

        Args:
            make_env: Callable that produces a single (non-vectorized) env,
                given the vector env index as only arg.
            num_envs: The number of sub-environments to create for the
                vectorization.
            multiagent: Whether this is a multiagent env or not.
            remote_env_batch_wait_ms: Time to wait for (ray.remote)
                sub-environments to have new observations available when
                polled. Only when none of the sub-environments is ready,
                repeat the `ray.wait()` call until at least one sub-env
                is ready. Then return only the observations of the ready
                sub-environment(s).
            existing_envs: Optional list of already created sub-environments.
                These will be used as-is and only as many new sub-envs as
                necessary (`num_envs - len(existing_envs)`) will be created.
            worker: An optional RolloutWorker that owns the env. This is only
                used if `remote_worker_envs` is True in your config and the
                `on_sub_environment_created` custom callback needs to be
                called on each created actor.
            restart_failed_sub_environments: If True and any sub-environment (within
                a vectorized env) throws any error during env stepping, the
                Sampler will try to restart the faulty sub-environment. This is done
                without disturbing the other (still intact) sub-environment and without
                the RolloutWorker crashing.
        i  FNr   Tc                    s   g | ]}  |qS  )_make_sub_env).0iselfr   Q/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/env/remote_base_env.py
<listcomp>o   s    z*RemoteBaseEnv.__init__.<locals>.<listcomp>c                 S   s   i | ]}|j  |qS r   )resetremote)r   ar   r   r"   
<dictcomp>   s    z*RemoteBaseEnv.__init__.<locals>.<dictcomp>)r   r   r   poll_timeoutr   r   make_env_creates_actors_observation_space_action_spaceactorslen
isinstancerayactorActorHandleappendr   rangeobservation_spaceaction_spacegetr%   pending)r!   r   r   r   r   r   r   r   r   r    r"   __init__    s@   '
zRemoteBaseEnv.__init__returnc                 C   sv  i i i i i f\}}}}}g }|s#t jt| jt| j| jd\}}|rt }|D ]}	| j|	}
| j	|
}|
| zt |	}W n0 tys } z$| jrgt|jd  | | |i ddiddii f}n|W Y d }~nd }~ww | jrd\}}}}| jrt|trt|dkr|\}}}}}ndt|dkr|d }|d	 }nUtd
tdt|trt|dkrt|d i}t|d	 i}t|d d|d i}t|d d|d i}t|d i}nt|dkrt|d i}t|d	 i}ntd
td|d u rdd | D }ddi}ddi}n|\}}}}}|||< |||< |||< |||< |||< q(td|  |||||i fS )N)num_returnstimeoutr   __all__TF)NNNN         zYour gymnasium.Env seems to NOT return the correct number of return values for `step()` (needs to return 5 values: obs, reward, terminated, truncated and info) or `reset()` (needs to return 2 values: obs and info)!zeYour gymnasium.Env seems to only return a single value upon `reset()`! Must return 2 (obs AND infos).      zeYour gymnasium.Env seems to only return a single value upon `reset()`! Must return 2 (obs and infos).c                 S   s   i | ]}|d qS )r   r   )r   agent_idr   r   r"   r'      s    z&RemoteBaseEnv.poll.<locals>.<dictcomp>zGot obs batch for actors )r/   waitlistr7   r-   r(   setpopr,   indexaddr6   	Exceptionr   logger	exceptionargstry_restartr)   r   r.   tupleAssertionErrorr   keysdebug)r!   obsrewardsterminateds
truncatedsinfosready_env_idsobj_refr0   env_idreterew
terminated	truncatedinfoobr   r   r"   poll   s   



	

	
	

zRemoteBaseEnv.pollaction_dictc                 C   sT   |  D ]#\}}| j| }| js| jr|j|t }n|j|}|| j|< qd S N)itemsr,   r   r)   stepr%   r   r7   )r!   rd   r[   actionsr0   rZ   r   r   r"   send_actions  s   
zRemoteBaseEnv.send_actionsseedoptionsr[   rk   rl   c                C   s,   | j | }|jj||d}|| j|< ttfS )Nrj   )r,   r$   r%   r7   r   )r!   r[   rk   rl   r0   rZ   r   r   r"   	try_reset  s   

zRemoteBaseEnv.try_resetc              
   C   s|   z
| j | j  W n! ty+ } ztdr!td| d|  W Y d }~nd }~ww | j | j  | || j |< d S )Nclose_sub_envzBTrying to close old and replaced sub-environment (at vector index=z"), but closing resulted in error:
)	r,   closer%   rI   r	   rJ   warning__ray_terminate__r   )r!   r[   r]   r   r   r"   rM   /  s   zRemoteBaseEnv.try_restartc                 C   s(   | j d ur| j D ]	}|j  qd S d S re   )r,   rq   r%   )r!   r0   r   r   r"   stopB  s
   

zRemoteBaseEnv.stopas_dictc                 C   s   |r	t t| jS | jS re   )dict	enumerater,   )r!   rs   r   r   r"   get_sub_environmentsH  s   z"RemoteBaseEnv.get_sub_environmentsc                 C      | j S re   )r*   r    r   r   r"   r4   N     zRemoteBaseEnv.observation_spacec                 C   rw   re   )r+   r    r   r   r"   r5   S  rx   zRemoteBaseEnv.action_spaceidxc                    s^    j r# |} jdur! jjj j j|  jjj|dd |S  fdd}||}|S )z.Re-creates a sub-environment at the new index.Nvector_indexr   sub_environmentenv_contextc                    sd   t d|   jrt j| }nt j| } jd ur0 jj	j
 j| jjj| dd |S )Nz Launching env {} in remote actorrz   r|   )rJ   ra   formatr   _RemoteMultiAgentEnvr%   r   _RemoteSingleAgentEnvr   	callbackson_sub_environment_createdr~   copy_with_overrides)r   sub_envr    r   r"   make_remote_envk  s   
z4RemoteBaseEnv._make_sub_env.<locals>.make_remote_env)r)   r   r   r   r   r,   r~   r   )r!   ry   r   r   r   r    r"   r   X  s   

 zRemoteBaseEnv._make_sub_envc                 C   s"   | j rt| jd j S thS )Nr   )r   r/   r6   r,   get_agent_idsr%   r   r    r   r   r"   r     s   zRemoteBaseEnv.get_agent_ids)NNFre   )r9   N)F)%__name__
__module____qualname____doc__r   intr   boolr   r   r/   r0   r1   r8   r   r
   r   r   rc   ri   r   rt   rm   rM   rr   rv   propertygymspacesr   r4   Spacer5   r   r   r   r   r   r   r   r"   r      s    
f 	
)r   )num_cpusc                   @   sd   e Zd ZdZdd Zddddee dee fdd	Zd
d Z	dd Z
dd Zdee fddZdS )r   z:Wrapper class for making a multi-agent env a remote actor.c                 C   s   ||| _ t | _d S re   )envrE   	agent_idsr!   r   r   r   r   r"   r8     s   
z_RemoteMultiAgentEnv.__init__Nrj   rk   rl   c          	      C   sX   | j j||d\}}i }| D ]}| j| d||< qddi}ddi}|||||fS )Nrj           r<   F)r   r$   rP   r   rH   )	r!   rk   rl   rR   ra   r^   rB   r_   r`   r   r   r"   r$     s   
z_RemoteMultiAgentEnv.resetc                 C   s   | j |S re   )r   rg   )r!   rd   r   r   r"   rg     s   z_RemoteMultiAgentEnv.stepc                 C      | j jS re   r   r4   r    r   r   r"   r4        z&_RemoteMultiAgentEnv.observation_spacec                 C   r   re   r   r5   r    r   r   r"   r5     r   z!_RemoteMultiAgentEnv.action_spacer9   c                 C   rw   re   )r   r    r   r   r"   r     s   z"_RemoteMultiAgentEnv.get_agent_ids)r   r   r   r   r8   r   r   rt   r$   rg   r4   r5   r   r   r   r   r   r   r"   r     s    "r   c                   @   sR   e Zd ZdZdd Zddddee dee fdd	Zd
d Z	dd Z
dd ZdS )r   z2Wrapper class for making a gym env a remote actor.c                 C   s   ||| _ d S re   )r   r   r   r   r"   r8     s   z_RemoteSingleAgentEnv.__init__Nrj   rk   rl   c          	      C   sN   | j j||d}t|d i}t|d i}tdi}ddi}ddi}|||||fS )Nrj   r   r?   r   r<   F)r   r$   r   )	r!   rk   rl   obs_and_inforR   ra   r^   r_   r`   r   r   r"   r$     s   z_RemoteSingleAgentEnv.resetc                 C   sN   | j |t }dd |D \}}}}}|t |d< |t |d< |||||fS )Nc                 S   s   g | ]}t |iqS r   )r   )r   xr   r   r"   r#     s    z._RemoteSingleAgentEnv.step.<locals>.<listcomp>r<   )r   rg   r   )r!   actionresultsrR   r^   r_   r`   ra   r   r   r"   rg     s
   z_RemoteSingleAgentEnv.stepc                 C   r   re   r   r    r   r   r"   r4     r   z'_RemoteSingleAgentEnv.observation_spacec                 C   r   re   r   r    r   r   r"   r5     r   z"_RemoteSingleAgentEnv.action_space)r   r   r   r   r8   r   r   rt   r$   rg   r4   r5   r   r   r   r"   r     s    "r   )#	gymnasiumr   loggingtypingr   r   r   r   r   r   r   r/   ray.utilr	   ray.rllib.env.base_envr
   r   r   ray.rllib.utils.annotationsr   r   ray.rllib.utils.typingr   r   r   r   #ray.rllib.evaluation.rollout_workerr   	getLoggerr   rJ   r   r%   r   r   r   r   r   r"   <module>   s&    $
  
y
"