o
    `Û·in  ã                   @   sn   d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZ e	G dd	„ d	ƒƒZd
S )é    )ÚDict)ÚBaseEnv)ÚRolloutWorker)ÚPolicy)ÚOldAPIStack)Ú
TensorType)ÚAgentIDÚPolicyIDc                   @   sF   e Zd ZdZdeeef dededee	e
f deeef f
dd„Zd	S )
ÚObservationFunctiona™  Interceptor function for rewriting observations from the environment.

    These callbacks can be used for preprocessing of observations, especially
    in multi-agent scenarios.

    Observation functions can be specified in the multi-agent config by
    specifying ``{"observation_fn": your_obs_func}``. Note that
    ``your_obs_func`` can be a plain Python function.

    This API is **experimental**.
    Ú	agent_obsÚworkerÚbase_envÚpoliciesÚreturnc                 K   s   |S )aZ  Callback run on each environment step to observe the environment.

        This method takes in the original agent observation dict returned by
        a MultiAgentEnv, and returns a possibly modified one. It can be
        thought of as a "wrapper" around the environment.

        TODO(ekl): allow end-to-end differentiation through the observation
            function and policy losses.

        TODO(ekl): enable batch processing.

        Args:
            agent_obs: Dictionary of default observations from the
                environment. The default implementation of observe() simply
                returns this dict.
            worker: Reference to the current rollout worker.
            base_env: BaseEnv running the episode. The underlying
                sub environment objects (BaseEnvs are vectorized) can be
                retrieved by calling `base_env.get_sub_environments()`.
            policies: Mapping of policy id to policy objects. In single
                agent mode there will only be a single "default" policy.
            episode: Episode state object.
            kwargs: Forward compatibility placeholder.

        Returns:
            new_agent_obs: copy of agent obs with updates. You can
                rewrite or drop data from the dict if needed (e.g., the env
                can have a dummy "global" observation, and the observer can
                merge the global state into individual observations.

        .. testcode::
            :skipif: True

            # Observer that merges global state into individual obs. It is
            # rewriting the discrete obs into a tuple with global state.
            example_obs_fn1({"a": 1, "b": 2, "global_state": 101}, ...)

        .. testoutput::

            {"a": [1, 101], "b": [2, 101]}

        .. testcode::
            :skipif: True

            # Observer for e.g., custom centralized critic model. It is
            # rewriting the discrete obs into a dict with more data.
            example_obs_fn2({"a": 1, "b": 2}, ...)

        .. testoutput::

            {"a": {"self": 1, "other": 2}, "b": {"self": 2, "other": 1}}
        © )Úselfr   r   r   r   ÚepisodeÚkwr   r   ú_/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/evaluation/observation_function.pyÚ__call__   s   >zObservationFunction.__call__N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r	   r   r   r   r   r   r   r
      s    
þýü
û
ør
   N)Útypingr   Úray.rllib.envr   Úray.rllib.evaluationr   Úray.rllib.policyr   Úray.rllib.utils.annotationsr   Úray.rllib.utils.frameworkr   Úray.rllib.utils.typingr   r	   r
   r   r   r   r   Ú<module>   s    