o
    `۷ib*                     @   s   d Z ddlZddlmZmZ ddlZddlZddl	Z	ddl
mZ ddlmZmZ ddlmZ ddlmZ er<ddlmZ eeZedddZdddZdddZdddZdd Z		dddZdd Zdd ZdS ) z,Common pre-checks for all RLlib experiments.    N)TYPE_CHECKINGSet)DeveloperAPI)ERR_MSG_OLD_GYM_APIUnsupportedSpaceException)get_base_struct_from_space)log_onceMultiAgentEnvenvr
   returnc              
      s  ddl m} t |stdt drt drt ds-tdr+td  d	 d
S z	 jdi d}W n t	yK } z	tt
 d|d
}~ww |\}}t |d  fdd| D }t |d z |}W n t	y } z	tt
 d|d
}~ww |\}}	}
}}t |d t |	d t |
d t |d t |ddd td|	id jd td|
id|id jd td|id jd d
S )zgChecking for common errors in RLlib MultiAgentEnvs.

    Args:
        env: The env to be checked.
    r   r	   z&The passed env is not a MultiAgentEnv.observation_spaceaction_space
_agent_idsma_env_super_ctor_calledzYour MultiAgentEnv z does not have some or all of the needed base-class attributes! Make sure you call `super().__init__()` from within your MutiAgentEnv's constructor. This will raise an error in the future.N*   )seedoptionsz7In particular, the `reset()` method seems to be faulty.zreset()c                    s   i | ]
}|  | qS  )get_action_spacesample).0aidr   r   T/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/utils/pre_checks/env.py
<dictcomp>9   s    z1check_multiagent_environments.<locals>.<dictcomp>z&get_action_space(agent_id=..).sample()z6In particular, the `step()` method seems to be faulty.zstep, next_obszstep, rewardz
step, donezstep, truncatedz
step, infoT)allow_commondummy_env_id)base_env	agent_ids)ray.rllib.envr
   
isinstance
ValueErrorhasattrr   loggerwarningreset	Exceptionr   format"_check_if_element_multi_agent_dictkeysstep_check_rewardagents_check_done_and_truncated_check_info)r   r
   obs_and_infose	reset_obsreset_infossampled_actionresultsnext_obsrewarddone	truncatedinfor   r   r   check_multiagent_environments   sx   


r;   Fc                 C   s   |rO|   D ]F\}}|  D ]=\}}t|r,t|ts,t|s:t|tjr,|jdks:d| dt| }t	|||v sK|dksKd| }t	|qqd S t| rit| tsit| stt| tjri| jdksvd
t| }t	|d S d S )Nr   zJYour step function must return rewards that are integer or float. reward: z. Instead it was a __all__zrYour reward dictionary must have agent ids that belong to the environment. AgentIDs received from env.agents are: zUYour step function must return a reward that is integer or float. Instead it was a {})itemsnpisrealr!   boolisscalarndarrayshapetyper"   r(   )r7   r   r   _multi_agent_dictagent_idrewerrorr   r   r   r,   Y   sV   




r,   c              	   C   s   dD ]]}|dkr
| n|}|rI|  D ]5\}}|  D ],\}}	t|	ttjfs2td| dt| ||v sF|dksFd| d| }
t|
qqqt|ttjfs_d| d	t| }
t|
qd S )
N)r8   r9   r8   z Your step function must return `z's` that are boolean. But instead was a r<   zYour `zis` dictionary must have agent ids that belong to the environment. AgentIDs received from env.agents are: z"Your step function must return a `z'` that is a boolean. But instead was a )r=   r!   r@   r>   bool_r"   rD   )r8   r9   r   r   whatdatarE   rF   rG   done_rI   r   r   r   r.      s8   r.   c                 C   s   |r<|   D ]3\}}|  D ]*\}}t|ts#tdt| d| ||v s8|dks8|dks8d| }t|qqd S t| tsOdt|  d|  }t|d S )NzDYour step function must return infos that are a dict. instead was a z: element: r<   
__common__zqYour dones dictionary must have agent ids that belong to the environment. AgentIDs received from env.agents are: zDYour step function must return a info that is a dict. element type: z. element: )r=   r!   dictr"   rD   )r:   r   r   rE   rF   rG   infrI   r   r   r   r/      s>   

r/   c                 C   s.   d| d|  d| d| d| d| d}|S )NzThe z collected from z% was not contained within your env's z@ space. Its possible that there was a typemismatch (for example z)s of np.float32 and a space ofnp.float64 zs), or that one of the sub-zs wasout of boundsr   )	func_name_type_errorr   r   r   _not_contained_error   s   rT   c                    s   t |ts#|rd| dt| }t|d| dt| }t|t| j  d |r4 d t fdd|D sj|rUd| dt|	  d	| j }t|d| d
t|	  d	| j d}t|d S )NzThe element returned by zJ contains values that are not MultiAgentDicts. Instead, they are of type: z3 is not a MultiAgentDict. Instead, it is of type:  r<   rN   c                 3   s    | ]}| v V  qd S Nr   )r   kr   r   r   	<genexpr>   s    z5_check_if_element_multi_agent_dict.<locals>.<genexpr>z_ has agent_ids that are not the names of the agents in the env.agent_ids in this
MultiEnvDict: z
AgentIDs in this env: zb has agent_ids that are not the names of the agents in the env. 
AgentIDs in this MultiAgentDict: z. You likely need to add the attribute `agents` to your env, which is a list containing the IDs of agents currently in your env/episode, as well as, `possible_agents`, which is a list of all possible agents that could ever show up in your env.)
r!   rO   rD   r"   setr-   addalllistr*   )r   elementfunction_stringr   r   rI   r   rW   r   r)      sB   






r)   c              
   C   s   t | tjjtjjfsd| | j|t|fS t| }dd }z
t	||| W dS  t
yZ } z'|jd d |jd d } }d|jd d | | j|t|fW  Y d}~S d}~ww )	aC  Returns error, value, and space when offending `space.contains(value)` fails.

    Returns only the offending sub-value/sub-space in case `space` is a complex Tuple
    or Dict space.

    Args:
        space: The gym.Space to check.
        value: The actual (numpy) value to check for matching `space`.

    Returns:
        Tuple consisting of 1) key-sequence of the offending sub-space or the empty
        string if `space` is not complex (Tuple or Dict), 2) the offending sub-space,
        3) the offending sub-space's dtype, 4) the offending sub-value, 5) the offending
        sub-value's dtype.

    .. testcode::
        :skipif: True

        path, space, space_dtype, value, value_dtype = _find_offending_sub_space(
            gym.spaces.Dict({
           -2.0, 1.5, (2, ), np.int8), np.array([-1.5, 3.0])
        )

    Nc                 S   s   | |st| ||fd S rU   )containsr   )psvr   r   r   map_fn  s   
z)_find_offending_sub_space.<locals>.map_fnr         z->)NNNNN)r!   gymspacesDictTupledtype	_get_typer   treemap_structure_with_pathr   argsjoin)spacevaluestructured_spacerc   r1   r   r   r   _find_offending_sub_space   s   0rs   c                 C   s   t | dr| jS t| S )Nrj   )r#   rj   rD   )varr   r   r   rk     s   rk   )r   r
   r   N)FN)FF) __doc__loggingtypingr   r   	gymnasiumrf   numpyr>   rl   ray.rllib.utils.annotationsr   ray.rllib.utils.errorr   r   "ray.rllib.utils.spaces.space_utilsr   ray.utilr   r    r
   	getLogger__name__r$   r;   r,   r.   r/   rT   r)   rs   rk   r   r   r   r   <module>   s0    

D
(

1,