o
    `۷i                     @   sX   d dl mZ d dlZd dlmZ d dlmZ eG dd deZeG dd deZ	dS )	    )OptionalN)MultiAgentEnv)	PublicAPIc                       sf   e Zd ZdZ fddZddddee dee fdd	Zd
d Z	dd Z
dd Zedd Z  ZS )PettingZooEnvaa
  An interface to the PettingZoo MARL environment library.

    See: https://github.com/Farama-Foundation/PettingZoo

    Inherits from MultiAgentEnv and exposes a given AEC
    (actor-environment-cycle) game from the PettingZoo project via the
    MultiAgentEnv public API.

    Note that the wrapper has the following important limitation:

    Environments are positive sum games (-> Agents are expected to cooperate
       to maximize reward). This isn't a hard restriction, it just that
       standard algorithms aren't expected to work well in highly competitive
       games.

    Also note that the earlier existing restriction of all agents having the same
    observation- and action spaces has been lifted. Different agents can now have
    different spaces and the entire environment's e.g. `self.action_space` is a Dict
    mapping agent IDs to individual agents' spaces. Same for `self.observation_space`.

    .. testcode::
        :skipif: True

        from pettingzoo.butterfly import prison_v3
        from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
        env = PettingZooEnv(prison_v3.env())
        obs, infos = env.reset()
        # only returns the observation for the agent which should be stepping
        print(obs)

    .. testoutput::

        {
            'prisoner_0': array([[[0, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                ...,
                [0, 0, 0],
                [0, 0, 0],
                [0, 0, 0]]], dtype=uint8)
        }

    .. testcode::
        :skipif: True

        obs, rewards, terminateds, truncateds, infos = env.step({
            "prisoner_0": 1
        })
        # only returns the observation, reward, info, etc, for
        # the agent who's turn is next.
        print(obs)

    .. testoutput::

        {
            'prisoner_1': array([[[0, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                ...,
                [0, 0, 0],
                [0, 0, 0],
                [0, 0, 0]]], dtype=uint8)
        }

    .. testcode::
        :skipif: True

        print(rewards)

    .. testoutput::

        {
            'prisoner_1': 0
        }

    .. testcode::
        :skipif: True

        print(terminateds)

    .. testoutput::

        {
            'prisoner_1': False, '__all__': False
        }

    .. testcode::
        :skipif: True

        print(truncateds)

    .. testoutput::

        {
            'prisoner_1': False, '__all__': False
        }

    .. testcode::
        :skipif: True

        print(infos)

    .. testoutput::

        {
            'prisoner_1': {'map_tuple': (1, 0)}
        }
    c                    s   t    | _|  t jj _ jst j _ js% j	  _ fdd jD  _
 fdd jD  _tj j
 _tj j _d S )Nc                       i | ]	}| j |qS  )envobservation_space.0aidselfr   [/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/env/wrappers/pettingzoo_env.py
<dictcomp>       z*PettingZooEnv.__init__.<locals>.<dictcomp>c                    r   r   )r   action_spacer
   r   r   r   r      r   )super__init__r   resetsetagents
_agent_idslistpossible_agentscopyobservation_spacesaction_spacesgymspacesDictr	   r   r   r   	__class__r   r   r   x   s    


zPettingZooEnv.__init__Nseedoptionsr%   r&   c                C   s0   | j j||d}| j j| j | j ji|pi fS Nr$   )r   r   agent_selectionobserve)r   r%   r&   infor   r   r   r      s   zPettingZooEnv.resetc                 C   s   | j || j j  i }i }i }i }i }| j jrV| j  \}}}	}
}| j j}|||< |||< |	||< |
||< |||< | j j| j j sJ| j j| j j rQ| j d  nn| j js| j j }|obt| |d< |olt| |d< |||||fS N__all__)	r   stepr(   r   lastterminationstruncationsallvalues)r   actionobs_drew_dterminated_dtruncated_dinfo_dobsrew
terminated	truncatedr*   agent_idall_goner   r   r   r-      s2   
zPettingZooEnv.stepc                 C      | j   d S N)r   closer   r   r   r   rA         zPettingZooEnv.closec                 C      | j | jS r@   )r   renderrender_moder   r   r   r   rD      rB   zPettingZooEnv.renderc                 C      | j jS r@   )r   	unwrappedr   r   r   r   get_sub_environments      z"PettingZooEnv.get_sub_environments)__name__
__module____qualname____doc__r   r   intdictr   r-   rA   rD   propertyrH   __classcell__r   r   r"   r   r   	   s    m"r   c                       sb   e Zd Z fddZddddee dee fddZd	d
 Zdd Z	dd Z
edd Z  ZS )ParallelPettingZooEnvc                    s   t    | _ j  t jj _ jst j _ js& j	  _t
j fdd jD  _t
j fdd jD  _d S )Nc                    r   r   )par_envr	   r
   r   r   r   r          z2ParallelPettingZooEnv.__init__.<locals>.<dictcomp>c                    r   r   )rS   r   r
   r   r   r   r      rT   )r   r   rS   r   r   r   r   r   r   r   r   r   r    r	   r   r!   r"   r   r   r      s   


zParallelPettingZooEnv.__init__Nr$   r%   r&   c                C   s    | j j||d\}}||pi fS r'   )rS   r   )r   r%   r&   r9   r*   r   r   r   r      s   zParallelPettingZooEnv.resetc                 C   sD   | j |\}}}}}t| |d< t| |d< |||||fS r+   )rS   r-   r1   r2   )r   action_dictobssrewsterminateds
truncatedsinfosr   r   r   r-      s   zParallelPettingZooEnv.stepc                 C   r?   r@   )rS   rA   r   r   r   r   rA      rB   zParallelPettingZooEnv.closec                 C   rC   r@   )rS   rD   rE   r   r   r   r   rD      rB   zParallelPettingZooEnv.renderc                 C   rF   r@   )rS   rG   r   r   r   r   rH      rI   z*ParallelPettingZooEnv.get_sub_environments)rJ   rK   rL   r   r   rN   rO   r   r-   rA   rD   rP   rH   rQ   r   r   r"   r   rR      s    "rR   )
typingr   	gymnasiumr   ray.rllib.env.multi_agent_envr   ray.rllib.utils.annotationsr   r   rR   r   r   r   r   <module>   s     6