o
    ci'                     @   s  d dl Z d dlZd dlmZmZmZmZmZ d dlZ	d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZm Z  erfd dl!m"Z" e#dZ$e \Z%Z&Z'dZ(dZ)eddG dd dee j*dZ+dS )    N)AnyDictOptionalTupleTYPE_CHECKING)COMPONENT_RL_MODULE)FaultAwareApply)update_global_seed_if_necessary)try_import_tf)ENV_RESET_TIMERENV_STEP_TIMER)MetricsLogger)convert_to_torch_tensor)	StateDict
TensorType)	PublicAPIDeveloperAPI)AlgorithmConfigz	ray.rllibenv_reset_failureenv_step_failurealpha)	stabilityc                	       s   e Zd ZdZd# fddZejdd Zdd	 Zd
d Z	ejde
fddZde
fddZedeejeef fddZejdeeeejejf f fddZd$ddZd$ddZddddee dee dee
e
f fddZdd  Zdefd!d"Z   Z!S )%	EnvRunnera  Base class for distributed RL-style data collection from an environment.

    The EnvRunner API's core functionalities can be summarized as:
    - Gets configured via passing a AlgorithmConfig object to the constructor.
    Normally, subclasses of EnvRunner then construct their own environment (possibly
    vectorized) copies and RLModules/Policies and use the latter to step through the
    environment in order to collect training data.
    - Clients of EnvRunner can use the `sample()` method to collect data for training
    from the environment(s).
    - EnvRunner offers parallelism via creating n remote Ray Actors based on this class.
    Use `ray.remote([resources])(EnvRunner)` method to create the corresponding Ray
    remote class. Then instantiate n Actors using the Ray `[ctor].remote(...)` syntax.
    - EnvRunner clients can get information about the server/node on which the
    individual Actors are running.
    configr   c                   s   |j dd| _|d| _|d| jj| _d| _t | _t	 
  tr5| jjdks-|jr5t s5t  d| _| jjdurPt| jj| jpFd d| jj  | _t| jj| jd	 dS )
zInitializes an EnvRunner instance.

        Args:
            config: The AlgorithmConfig to use to setup this EnvRunner.
            **kwargs: Forward compatibility kwargs.
        F)copy_frozenworker_indexnum_workersNtf2r   g    .A)	frameworkseed)copyr   getr   num_env_runnersr   envr   metricssuper__init__tf1framework_strenable_tf1_exec_eagerlyexecuting_eagerlyenable_eager_execution_seedr   intin_evaluationr	   )selfr   kwargs	__class__ L/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/env/env_runner.pyr&   2   s2   


zEnvRunner.__init__c                 C      dS )aH  Checks that self.__init__() has been completed properly.

        Useful in case an `EnvRunner` is run as @ray.remote (Actor) and the owner
        would like to make sure the Ray Actor has been properly initialized.

        Raises:
            AssertionError: If the EnvRunner Actor has NOT been properly initialized.
        Nr3   r/   r3   r3   r4   assert_healthy`       zEnvRunner.assert_healthyc                 C   r5   )a  Creates the RL environment for this EnvRunner and assigns it to `self.env`.

        Note that users should be able to change the EnvRunner's config (e.g. change
        `self.config.env_config`) and then call this method to create new environments
        with the updated configuration.
        It should also be called after a failure of an earlier env in order to clean up
        the existing env (for example `close()` it), re-create a new one, and then
        continue sampling with that new env.
        Nr3   r6   r3   r3   r4   make_envl   s   
zEnvRunner.make_envc                 C   r5   )a(  Creates the RLModule for this EnvRunner and assigns it to `self.module`.

        Note that users should be able to change the EnvRunner's config (e.g. change
        `self.config.rl_module_spec`) and then call this method to create a new RLModule
        with the updated configuration.
        Nr3   r6   r3   r3   r4   make_moduley   s   zEnvRunner.make_modulereturnc                 K   r5   )a`  Returns experiences (of any form) sampled from this EnvRunner.

        The exact nature and size of collected data are defined via the EnvRunner's
        config and may be overridden by the given arguments.

        Args:
            **kwargs: Forward compatibility kwargs.

        Returns:
            The collected experience in any form.
        Nr3   )r/   r0   r3   r3   r4   sample   r8   zEnvRunner.samplec                 C   r5   )zReturns metrics (in any form) of the thus far collected, completed episodes.

        Returns:
            Metrics of any form.
        Nr3   r6   r3   r3   r4   get_metrics      zEnvRunner.get_metricsc                 C   s,   |   }| jtd}|  }t|||fS )a  Convenience method for fast, async algorithms.

        Use this in Algorithms that need to sample Episode lists as ray.ObjectRef, but
        also require (in the same remote call) the metrics and the EnvRunner states,
        except for the module weights.
        )not_components)r<   	get_stater   r=   rayput)r/   	_episodes_connector_states_metricsr3   r3   r4   sample_get_state_and_metrics   s   
z&EnvRunner.sample_get_state_and_metricsc                 C   r5   )zFReturns a dict mapping ModuleIDs to 2-tuples of obs- and action space.Nr3   r6   r3   r3   r4   
get_spaces   r8   zEnvRunner.get_spacesNc                 C   r5   )zReleases all resources used by this EnvRunner.

        For example, when using a gym.Env in this EnvRunner, you should make sure
        that its `close()` method is called.
        Nr3   r6   r3   r3   r4   stop   r>   zEnvRunner.stopc                 C   r5   )z:If this Actor is deleted, clears all resources used by it.Nr3   r6   r3   r3   r4   __del__   s   zEnvRunner.__del__r   optionsr   rK   c             
   C   s   z%| j t | jj||d\}}W d   n1 sw   Y  ||fW S  tyS } z"| jjrMt	d|j
d   |   | j||dW  Y d}~S |d}~ww )a  Tries resetting the env and - if an error occurs - handles it gracefully.

        Args:
            seed: An optional seed (int) to be passed to the Env.reset() call.
            options: An optional options-dict to be passed to the Env.reset() call.

        Returns:
            The results of calling `Env.reset()`, which is a tuple of observations and
            info dicts.

        Raises:
            Exception: In case `config.restart_failed_sub_environments` is False and
                `Env.reset()` resulted in an error.
        rJ   Nz?Resetting the env resulted in an error! The original error is: r   )r$   log_timer   r#   reset	Exceptionr   restart_failed_sub_environmentslogger	exceptionargsr9   _try_env_reset)r/   r   rK   obsinfoser3   r3   r4   rS      s"   
zEnvRunner._try_env_resetc              
   C   s   z!| j t | j|}W d   |W S 1 sw   Y  |W S  tyJ } z| jjrDt	d|j
d   |   tW  Y d}~S |d}~ww )zHTries stepping the env and - if an error orrurs - handles it gracefully.Nz>Stepping the env resulted in an error! The original error is: r   )r$   rL   r   r#   steprN   r   rO   rP   rQ   rR   r9   ENV_STEP_FAILURE)r/   actionsresultsrV   r3   r3   r4   _try_env_step   s&   
zEnvRunner._try_env_stepc                 C   s"   | j jdkr
t|S ttj|S )z0Converts structs to a framework-specific tensor.torch)r   r(   r   treemap_structuretfconvert_to_tensor)r/   structr3   r3   r4   _convert_to_tensor   s   zEnvRunner._convert_to_tensor)r   r   )r;   N)"__name__
__module____qualname____doc__r&   abcabstractmethodr7   r9   r:   r   r<   r=   r   r   rA   	ObjectRefr   rF   r   strgymSpacerG   rH   rI   r   r-   dictrS   r[   r   rb   __classcell__r3   r3   r1   r4   r       s:    .
	$



)r   )	metaclass),rg   loggingtypingr   r   r   r   r   	gymnasiumrk   r]   rA   ray.rllib.corer   ray.rllib.utils.actor_managerr   ray.rllib.utils.debugr	   ray.rllib.utils.frameworkr
   ray.rllib.utils.metricsr   r   &ray.rllib.utils.metrics.metrics_loggerr   ray.rllib.utils.torch_utilsr   ray.rllib.utils.typingr   r   ray.util.annotationsr   r   %ray.rllib.algorithms.algorithm_configr   	getLoggerrP   r'   r_   _ENV_RESET_FAILURErX   ABCMetar   r3   r3   r3   r4   <module>   s.    
