o
    ciJ                     @   s   d dl mZ d dlmZ d dlZd dlZd dlZd dlmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZmZ d d
lmZm Z  d dl!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9m:Z: d dl;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZP d dlQmRZR d dlSmTZTmUZUmVZVmWZW d dlXmYZYmZZZ d dl[m\Z\ e: \Z]Z^e_dZ`e\ddG dd  d e$e5ZadS )!    )defaultdict)partialN)
CollectionDefaultDictDictListOptionalUnion)AlgorithmConfig)make_callback)!COMPONENT_ENV_TO_MODULE_CONNECTOR!COMPONENT_MODULE_TO_ENV_CONNECTORCOMPONENT_RL_MODULE)Columns)MultiRLModuleMultiRLModuleSpec)INPUT_ENV_SPACESINPUT_ENV_SINGLE_SPACES)
EnvContext)	EnvRunnerENV_STEP_FAILURE)MultiAgentEnv)MultiAgentEpisode)VectorMultiAgentEnv)make_vec)_gym_env_creator)
force_list)override)Checkpointable)
Deprecated)
get_devicetry_import_torch)ENV_TO_MODULE_CONNECTOREPISODE_DURATION_SEC_MEANEPISODE_LEN_MAXEPISODE_LEN_MEANEPISODE_LEN_MINEPISODE_RETURN_MAXEPISODE_RETURN_MEANEPISODE_RETURN_MINMODULE_TO_ENV_CONNECTORNUM_AGENT_STEPS_SAMPLED NUM_AGENT_STEPS_SAMPLED_LIFETIMENUM_ENV_STEPS_SAMPLEDNUM_ENV_STEPS_SAMPLED_LIFETIMENUM_EPISODESNUM_EPISODES_LIFETIMENUM_MODULE_STEPS_SAMPLED!NUM_MODULE_STEPS_SAMPLED_LIFETIMERLMODULE_INFERENCE_TIMERSAMPLE_TIMERTIME_BETWEEN_SAMPLINGWEIGHTS_SEQ_NO)check_multiagent_environments)	EpisodeIDModelWeights
ResultDict	StateDict)ENV_CREATOR_global_registry)	PublicAPIz	ray.rllibalpha)	stabilityc                       s@  e Zd ZdZeedef fddZeeddddddded	ed
e	de	de	de
e fddZddddddee d	ee d
e	de	de	de
e fddZdd Zeedd ZeedefddZee	dJdddeeeee f  deeeee f  defddZeededdfdd Zeed!d" Zeed#d$ Zeed%d& Zeed'd( Zeed)d* Zeed+d, Zeed-d. Z d/d0 Z!dJd1d2Z"d3ed4ed5e
e fd6d7Z#d8d9 Z$			dKd:d;Z%e&d<efd=d>Z'e(d?dd@dJdAdBZ)e(dCdd@		DdLdEe*dFee+ dGeddfdHdIZ,  Z-S )MMultiAgentEnvRunnerz8The genetic environment runner for the multi-agent case.configc                    s,  t  jdd|i| | jjstdt| j d|di | _| 	  dd t
| jjD | _t| j| js9dn| jj| _d| _d| _| jdu sY| jdksY| jjsY| jjdkr]|   | jj| j| j| jd	| _d| _d| _|   | jj| jr~| jjnd| jd
| _d| _d| _d| _ d| _!d| _"dS )zInitializes a MultiAgentEnvRunner instance.

        Args:
            config: An `AlgorithmConfig` object containing all settings needed to
                build this `EnvRunner` class.
        rB   z!Cannot use this EnvRunner class (z), if your setup is not multi-agent! Try adding multi-agent information to your AlgorithmConfig via calling the `config.multi_agent(policies=..., policy_mapping_fn=...)`.spacesc                 S   s   g | ]}| qS  rD   ).0clsrD   rD   X/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env_runner.py
<listcomp>_   s    z0MultiAgentEnvRunner.__init__.<locals>.<listcomp>r   N)envrC   device)rI   rC   TrD   )#super__init__rB   is_multi_agent
ValueErrortype__name__getrC   _setup_metricsr   callbacks_class
_callbacksr    worker_indexnum_gpus_per_env_runner_devicerI   num_envscreate_env_on_local_workernum_env_runnersmake_envbuild_env_to_module_connector_env_to_module_cached_to_modulemodulemake_modulebuild_module_to_env_connector	unwrapped_module_to_env_needs_initial_reset_episode_shared_data_weights_seq_no_time_after_sampling)selfrB   kwargs	__class__rD   rG   rL   G   sD   


zMultiAgentEnvRunner.__init__NFnum_timestepsnum_episodesexplorerandom_actionsforce_resetrn   ro   rp   rq   rr   returnc             
   C   sH  | j du rt|  d|dur|durJ | jdur(| jjtt | j d | jjt| j	dd | j
t] |du rA| jj}|du rY|du rY| jjdkrY| j| j| j }|durg| j||||d}n|durt| j|||d}n	| j| j||d}td	| j| jjt| | j|d
d W d   n1 sw   Y  t | _|S )a  Runs and returns a sample (n timesteps or m episodes) on the env(s).

        Args:
            num_timesteps: The number of timesteps to sample during this call.
                Note that only one of `num_timetseps` or `num_episodes` may be provided.
            num_episodes: The number of episodes to sample during this call.
                Note that only one of `num_timetseps` or `num_episodes` may be provided.
            explore: If True, will use the RLModule's `forward_exploration()`
                method to compute actions. If False, will use the RLModule's
                `forward_inference()` method. If None (default), will use the `explore`
                boolean setting from `self.config` passed into this EnvRunner's
                constructor. You can change this setting in your config via
                `config.env_runners(explore=True|False)`.
            random_actions: If True, actions will be sampled randomly (from the action
                space of the environment). If False (default), actions or action
                distribution parameters are computed by the RLModule.
            force_reset: Whether to force-reset all (vector) environments before
                sampling. Useful if you would like to collect a clean slate of new
                episodes via this call. Note that when sampling n episodes
                (`num_episodes != None`), this is fixed to True.

        Returns:
            A list of `MultiAgentEpisode` instances, carrying the sampled data.
        Nz2 doesn't have an env! Can't call `sample()` on it.)keyvalue   )rt   ru   windowtruncate_episodes)rn   rp   rq   rr   )ro   rp   rq   on_sample_end)
env_runnermetrics_loggersamplescallbacks_objectscallbacks_functionsrj   )rI   rN   rh   metrics	log_valuer5   timeperf_counterr6   rg   log_timer4   rB   rp   
batch_modeget_rollout_fragment_lengthrU   rX   _sampler   rT   callbacks_on_sample_enddict)ri   rn   ro   rp   rq   rr   r|   rD   rD   rG   sample   sp   
"

3zMultiAgentEnvRunner.sample)rn   ro   rq   rr   c          "   
      s  g }|s|d usj r(dd tjD   _i  }_ || d_ nj j}|d ur5d_ d}d}	|d urC||k rn|	|k r|r[tj fddtjD i}
nmj}|d usdJ d _|r|rj	j
tdd| jjpyd }j	t jj||d	}
W d    n1 sw   Y  nj	t j|}
W d    n1 sw   Y  jj|
 ||j	tfd
}
ni }
i |d< |
tjdd  D }|
tj|}|}|tkrj||||ddS |\}}}}}t }tjD ]r}tt}|
 D ] \}}|| }| D ]\}}||| |< j|| t< qqt|}j| j sK | j!|| || d |"| q | j#|| || || || || || |d |$d||  | 7 }qg }tjD ][}||v r%d|  n%d|   | j&r|	d7 }	%d|  ' |  |( |  |( |  |	|kr n | j)}*|  |d +| | j)i q{jd ur|rj,||j|d d j, |j|j	t-fd_jj.r	|D ]}|/  q|d ur||k sHn|	|k sHj01| g } |d urefddjD }!jD ]0}	|	j2dkr;q1|	3  j4|	j) (|	 '|	 jj.r[| (|	/  q1| (|	 q1|!_||  S )Nc                 S   s   g | ]}d qS NrD   rE   _rD   rD   rG   rH         z/MultiAgentEnvRunner._sample.<locals>.<listcomp>FTr   c                    s(   g | ]  fd d    D qS )c                    s&   i | ]}|j j  j| qS rD   )rI   envsrb   get_action_spacer   rE   aid)iri   rD   rG   
<dictcomp>(  s    z:MultiAgentEnvRunner._sample.<locals>.<listcomp>.<dictcomp>)get_agents_to_act)rE   episodesri   )r   rG   rH   '  s    	
defaultrv   )t)	rl_modulebatchr   rp   shared_datar   metrics_prefix_keyvector_env_episodes_mapc                 S   s   g | ]}i qS rD   rD   r   rD   rD   rG   rH   `  r   rm   observationsinfos)r   actionsrewardsr   terminateds
truncatedsextra_model_outputson_episode_starton_episode_stepon_episode_end)r   rp   r   r   r   )r   rp   r   r   r   r   c                    s   g | ]
}|j  jjd qS ))len_lookback_buffer)cutrB   episode_lookback_horizon)rE   epsri   rD   rG   rH         )5rd   rangerX   	_episodesrf   _reset_envsr   ACTIONSr^   r   peekr.   rB   rZ   r   r3   r_   forward_explorationforward_inferencerc   r*   popACTIONS_FOR_ENV_try_env_stepr   r   setr   r   itemsrg   r6   is_resetadd_env_resetaddadd_env_step_increase_sampled_metrics_make_on_episode_callbackis_done_prune_zero_len_sa_episodesappendid__new_episodeupdater]   r"   episodes_to_numpyto_numpy_done_episodes_for_metricsextendenv_tvalidate_ongoing_episodes_for_metrics)"ri   rn   ro   rp   rq   rr   done_episodes_to_returnr   tsr   to_env	to_moduleglobal_env_steps_lifetimer   actions_for_envresultsr   r   r   r   r   call_on_episode_start	env_indexr   colma_dict_listma_dictagent_idval"done_episodes_to_run_env_to_moduleold_episode_idepisodeongoing_episodes_to_returnongoing_episodes_continuationsrD   r   rG   r     s8  

	






  Q




zMultiAgentEnvRunner._samplec                 C   s   t | jD ]}| || q| j  | j| jr| jnd d d\}}t | jD ]}|| j|| || d q'd | _	| j
rL| j| j
|||| jtfd| _	t | jD ]	}| d|| qQd S )N)seedoptionsr   )r   r   rp   r   r   metrics_key_prefixr   )r   rX   r   r   clear_try_env_resetrd   _seedr   r^   r_   r]   r   r"   r   )ri   r   r   rp   r   r   r   rD   rD   rG   r     s2   


zMultiAgentEnvRunner._reset_envsc                    sP    j d u r jS t j j j jft j j j jfi fdd jjj	 D S )Nc                    s"   i | ]\}}|| j j| fqS rD   )r]   action_space)rE   midor   rD   rG   r   L  s    z2MultiAgentEnvRunner.get_spaces.<locals>.<dictcomp>)
rI   rC   r   observation_spacer   r   single_observation_spacesingle_action_spacer]   r   r   rD   r   rG   
get_spaces@  s   

zMultiAgentEnvRunner.get_spacesc              
   C   sR  | j D ]}|js
J t|}ttdd |j D }| }| }tt	dd |j
 D }tt	dd |j
 D }|j| jv r| j|j D ]C}| }	|t|7 }||	7 }|| 7 }|j
 D ]'}
|
 }|t|
j  t|
7  < |t|
j  |7  < ||
j  |7  < qdqI| j|j= | |||||t| q| j   | j S )Nc                 S   s   i | ]\}}t |t|qS rD   )strlen)rE   r   sa_epsrD   rD   rG   r   Z  s    z3MultiAgentEnvRunner.get_metrics.<locals>.<dictcomp>c                 S   s   i | ]
}t |j| qS rD   )r   r   
get_returnrE   r   rD   rD   rG   r   a  r   c                 S   s   i | ]}|j | qS rD   )	module_idr   r   rD   rD   rG   r   h  s    
)r   r   r   r   intagent_episodesr   r   get_duration_sfloatvaluesr   r   r   r   r   _log_episode_metricsr   r   r   reduce)ri   r   episode_lengthagent_stepsepisode_returnepisode_duration_sagent_episode_returnsmodule_episode_returnseps2return_eps2r   	return_sarD   rD   rG   get_metricsR  sX   

	



zMultiAgentEnvRunner.get_metrics)not_components
componentsr  c                K   s   t | jjt ddi}| t||r,| jjd| t|| t|d||t< | j|t	< | t
||r:| j |t
< | t||rH| j |t< |S )Nr   r   )r  r  rD   )r.   r   r   _check_componentr   r_   	get_state_get_subcomponentsrg   r6   r   r]   r   rc   )ri   r  r  rj   staterD   rD   rG   r
    s,   



zMultiAgentEnvRunner.get_stater  c                 C   s   t |v r| j|t   t|v r| j|t  t|v rG|td}|dks+| j|k r@|t }t	|t
jr:t
|}| j| |dkrG|| _t|v rY| jjt|t ddd d S d S )Nr   sumT)rt   ru   r   with_throughput)r   r]   	set_stater   rc   r   rQ   r6   rg   
isinstanceray	ObjectRefr_   r.   r   	set_value)ri   r  weights_seq_norl_module_staterD   rD   rG   r    s*   

zMultiAgentEnvRunner.set_statec                 C   s   dd| j ifS )NrD   rB   )rB   r   rD   rD   rG   get_ctor_args_and_kwargs  s   z,MultiAgentEnvRunner.get_ctor_args_and_kwargsc                 C   s   t | }|i  |S r   )r   get_metadatar   )ri   metadatarD   rD   rG   r    s
   
z MultiAgentEnvRunner.get_metadatac                 C   s   t | jft| jft| jfgS r   )r   r_   r   r]   r   rc   r   rD   rD   rG   get_checkpointable_components  s   z1MultiAgentEnvRunner.get_checkpointable_componentsc                 C   s   | j r| jsJ dS )a  Checks that self.__init__() has been completed properly.

        Ensures that the instances has a `MultiRLModule` and an
        environment defined.

        Raises:
            AssertionError: If the EnvRunner Actor has NOT been properly initialized.
        N)rI   r_   r   rD   rD   rG   assert_healthy  s   z"MultiAgentEnvRunner.assert_healthyc              
   C   sT  | j d ur,z| j   W n ty) } ztd|jd   W Y d }~nd }~ww | ` | jj}t|t	sBt	|| j
| jj| jjd}| jj sJtdt| jj trett| jj rettt| jj |}n	tt| jj |d}tjd|dd | jj}td| jjt|tjjjr|ntjj| d	| _ | j j| _| j| jjksJ | jjsz| j jD ]}t |j! qW n[ ty } zt"|jd  W Y d }~nFd }~ww zt| j t#sJ t| j jd j!t$sJ W n* t%y   t"d
| j  d| j jd j! d Y n t&y   t"d Y nw d| _'t(d| j)| jj*t+| | j,| j j!|dd d S )NzETried closing the existing env (multi-agent), but failed with error: r   )rU   num_workersremotez`config.env` is not provided! You should provide a valid environment to your config through `config.environment([env descriptor e.g. 'CartPole-v1'])`.)env_descriptorenv_contextzrllib-multi-agent-env-v0T)entry_pointdisable_env_checker)rX   vectorization_modezWhen using the `MultiAgentEnvRunner`, the environment must inherit from `ray.rllib.env.vector.vector_multi_agent_env.VectorMultiAgentEnv` (but yours is zJ) and the individual envs must inherit from `MultiAgentEnv` (but yours is z)!z^When using the `MultiAgentEnvRunner`, the env must have a subscriptable `self.envs` attribute!on_environment_created)rz   r{   rI   r  r}   )-rI   close	ExceptionloggerwarningargsrB   
env_configr  r   rU   rZ   remote_worker_envsrN   r   r=   containsr<   r   rQ   r   gymregistergym_env_vectorize_moder   num_envs_per_env_runnerr   registrationVectorizeModelowerrX   disable_env_checkingr7   rb   	exceptionr   r   AssertionError	TypeErrorrd   r   rT    callbacks_on_environment_createdr   r   )ri   eenv_ctxr  vectorize_moderI   rD   rD   rG   r[     s   


	
zMultiAgentEnvRunner.make_envc                    sv    j d ur	 j jnd }z" jj|  dd}|  _tr+ j fdd W d S W d S  t	y:   d  _Y d S w )NT)rI   rC   inference_onlyc                    s   t |tjjr| jS |S r   )r  torchnnModuletorW   )r   modr   rD   rG   <lambda>w  s   z1MultiAgentEnvRunner.make_module.<locals>.<lambda>)
rI   rb   rB   get_multi_rl_module_specr   buildr_   r;  foreach_moduleNotImplementedError)ri   rI   module_specrD   r   rG   r`   h  s   



zMultiAgentEnvRunner.make_modulec                 C   s   | j d ur| j   d S d S r   )rI   r#  r   rD   rD   rG   stop  s   
zMultiAgentEnvRunner.stopc                 C   s   g | _ tt| _d S r   )r   r   listr   r   rD   rD   rG   rR     s   z"MultiAgentEnvRunner._setup_metricsc                    sr   |d ur|nj }t fddjj  jjD  fddjj  jjD jjd| < d | d S )Nc                    "   i | ]}|j j  j|qS rD   )rI   r   rb   get_observation_spacer   r   ri   rD   rG   r         z4MultiAgentEnvRunner._new_episode.<locals>.<dictcomp>c                    rH  rD   )rI   r   rb   r   r   rJ  rD   rG   r     rK  )r   r   agent_to_module_mapping_fnon_episode_created)	r   r   rI   r   rb   possible_agentsrB   policy_mapping_fnr   )ri   r   r   rD   rJ  rG   r     s   
z MultiAgentEnvRunner._new_episodewhichidxr   c                 C   s`   t || | | j| jj| j|d}|dkr| j|| j |d< t|| jt	| j
d| |d d S )N)r   rz   r{   rI   r   r   r   prev_episode_chunks
callbacks_r}   )r   r   rI   rb   r_   r   r   r   rT   getattrrB   )ri   rP  rQ  r   rj   rD   rD   rG   r     s$   
z-MultiAgentEnvRunner._make_on_episode_callbackc                 C   s   | j jt|ddd | j jt|ddd |jr*| j jtdddd | j jtddd |D ]:}| j jtt|fdddd | j jt	t|fddd | j jt
||fdddd | j jt||fddd q,|S )Nr  T)r   clear_on_reduce)r   r  rv   )r   )r   r   r-   r.   r   r/   r0   r+   r   r,   r1   
module_forr2   )ri   	num_stepsnext_obsr   r   rD   rD   rG   r     sH   

z-MultiAgentEnvRunner._increase_sampled_metricsc                 C   s   t dtt| jj| jjpd }| jjt	|t
|t|i|d ur&|||dni |d | jjt|t|id|d | jjt|t|id|d d S )Nrv   )agent_episode_returns_meanmodule_episode_returns_meanr   )rw   min)r   rw   max)r\  r   mathceilrB   "metrics_num_episodes_for_smoothingrZ   r   log_dictr%   r(   r#   r&   r)   r$   r'   )ri   lengthretsecagentsmodulesr   winrD   rD   rG   r     sL   


z(MultiAgentEnvRunner._log_episode_metricsr   c                 C   s(   | j   D ]
\}}|s| j |= qd S r   )r   copyr   )r   r   	agent_epsrD   rD   rG   r     s
   z/MultiAgentEnvRunner._prune_zero_len_sa_episodesz5MultiAgentEnvRunner.get_state(components='rl_module'))newerrorc                 C   s   | j tdt }|S )N)r  )r
  r   )ri   re  r  rD   rD   rG   get_weights"  s   
zMultiAgentEnvRunner.get_weightszMultiAgentEnvRunner.set_state()r   weightsglobal_varsr  c                 C   s   |d u sJ |  t|t|iS r   )r  r   r6   )ri   rl  rm  r  rD   rD   rG   set_weights,  s   zMultiAgentEnvRunner.set_weightsr   )NNN)Nr   ).rP   
__module____qualname____doc__r   r   r
   rL   r   boolr   r   r   r   r   r   r   r:   r  r   r	   r   r   r;   r
  r  r  r  r  r  r[   r`   rF  rR   r   r   r   r   staticmethodr   r   rk  r9   r   rn  __classcell__rD   rD   rk   rG   rA   C   s    Kp
  (
:&!

	


k



/
=
rA   )bcollectionsr   	functoolsr   r]  loggingr   typingr   r   r   r   r   r	   	gymnasiumr+  r  %ray.rllib.algorithms.algorithm_configr
   ray.rllib.callbacks.utilsr   ray.rllib.corer   r   r   ray.rllib.core.columnsr   (ray.rllib.core.rl_module.multi_rl_moduler   r   ray.rllib.envr   r   ray.rllib.env.env_contextr   ray.rllib.env.env_runnerr   r   ray.rllib.env.multi_agent_envr   !ray.rllib.env.multi_agent_episoder   +ray.rllib.env.vector.vector_multi_agent_envr   !ray.rllib.env.vector.registrationr   ray.rllib.env.utilsr   ray.rllib.utilsr   ray.rllib.utils.annotationsr   ray.rllib.utils.checkpointsr   ray.rllib.utils.deprecationr   ray.rllib.utils.frameworkr    r!   ray.rllib.utils.metricsr"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   ray.rllib.utils.pre_checks.envr7   ray.rllib.utils.typingr8   r9   r:   r;   ray.tune.registryr<   r=   ray.util.annotationsr>   r;  r   	getLoggerr%  rA   rD   rD   rD   rG   <module>   sF     \

