o
    $ie:                     @   s   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZmZ erbd dl m!Z! d dl"m#Z# eG dd dZ$dS )    N)defaultdict)TYPE_CHECKINGAnyCallableDictListOptionalTuple)_DUMMY_AGENT_ID)AgentCollector)_PolicyCollector_PolicyCollectorGroup)	PolicyMap)SampleBatch)OldAPIStack)AgentIDEnvIDEnvInfoDictPolicyID
TensorType)RLlibCallback)RolloutWorkerc                   @   s  e Zd ZdZddddededeed dgef de	d d	e	d
 f
ddZ
edfdededefddZdee fddZdedefddZd7ddZdddededeeef deddf
ddZded eeef ddfd!d"Z		d8d#ed$ed%eddfd&d'Zd9dedefd(d)Zdedefd*d+Zdedefd,d-Zdedefd.d/Zded0efd1d2Zefdede	e fd3d4Z e!d5d6 Z"dS ):	EpisodeV2z=Tracks the current state of a (possibly multi-agent) episode.N)worker	callbacksenv_idpoliciespolicy_mapping_fnr   r   r   r   c                C   s   t td| _|| _d| _d| _d| _d| _d| _	i | _
i | _i | _i | _|| _|| _|| _|| _i | _i | _d| _i | _tt| _tt| _i | _i | _i | _i | _dS )a  Initializes an Episode instance.

        Args:
            env_id: The environment's ID in which this episode runs.
            policies: The PolicyMap object (mapping PolicyIDs to Policy
                objects) to use for determining, which policy is used for
                which agent.
            policy_mapping_fn: The mapping function mapping AgentIDs to
                PolicyIDs.
            worker: The RolloutWorker instance, in which this episode runs.
        g NgmCg        r   N)random	randrangeint
episode_idr   total_rewardactive_env_stepstotal_env_stepsactive_agent_stepstotal_agent_stepscustom_metrics	user_data	hist_datamediar   r   
policy_mapr   _agent_to_policy_agent_collectors_next_agent_index_agent_to_indexr   floatagent_rewardslist_agent_reward_history_has_init_obs_last_terminateds_last_truncateds_last_infos)selfr   r   r   r   r    r:   \/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/rllib/evaluation/episode_v2.py__init__   s4   


zEpisodeV2.__init__Fagent_idrefreshreturnc                 C   sT   || j vs|r| j|| | jd }| j |< n| j | }|| jvr(td| d|S )a  Returns and stores the policy ID for the specified agent.

        If the agent is new, the policy mapping fn will be called to bind the
        agent to a policy for the duration of the entire episode (even if the
        policy_mapping_fn is changed in the meantime!).

        Args:
            agent_id: The agent ID to lookup the policy ID for.

        Returns:
            The policy ID for the specified agent.
        )r   z.policy_mapping_fn returned invalid policy id 'z'!)r-   r   r   r,   KeyError)r9   r=   r>   	policy_idr:   r:   r;   
policy_fore   s   


zEpisodeV2.policy_forc                 C   s   t | j S )zReturns list of agent IDs that have appeared in this episode.

        Returns:
            The list of all agent IDs that have appeared so far in this
            episode.
        )r3   r0   keysr9   r:   r:   r;   
get_agents   s   zEpisodeV2.get_agentsc                 C   s.   || j vr| j| j |< |  jd7  _| j | S )zGet the index of an agent among its environment.

        A new index will be created if an agent is seen for the first time.

        Args:
            agent_id: ID of an agent.

        Returns:
            The index of this agent.
           )r0   r/   r9   r=   r:   r:   r;   agent_index   s   

zEpisodeV2.agent_indexc                 C   s    |  j d7  _ |  jd7  _dS )z(Advance the episode forward by one step.rF   N)r$   r%   rD   r:   r:   r;   step   s   zEpisodeV2.stepr   )tinit_obs
init_infosrJ   c                C   s   | j | | }|| jvsJ t|j|jd d |jdd| | dd| j|< | j| j	| j
| || j|||d d| j|< dS )	zAdd initial env obs at the start of a new episode

        Args:
            agent_id: Agent ID.
            init_obs: Initial observations.
            init_infos: Initial infos dicts.
            t: timestamp.
        modelmax_seq_len_disable_action_flatteningF)rN   disable_action_flatteningis_policy_recurrentintial_states_enable_new_api_stack)r"   rH   r   rK   rL   rJ   TN)r,   rB   r.   r   view_requirementsconfiggetis_recurrentget_initial_stateadd_init_obsr"   rH   r   r5   )r9   r=   rK   rL   rJ   policyr:   r:   r;   rY      s*   

	zEpisodeV2.add_init_obsvaluesc                 C   s   || j v sJ |  jd7  _|  jd7  _|tkr||d< | j | | |tj }|  j|7  _| j|| 	|f  |7  < | j
| | tj|v rT|tj | j|< tj|v ra|tj | j|< tj|v rq| ||tj  dS dS )zAdd action, reward, info, and next_obs as a new step.

        Args:
            agent_id: Agent ID.
            values: Dict of action, reward, info, and next_obs.
        rF   r=   N)r.   r&   r'   r
   add_action_reward_next_obsr   REWARDSr#   r2   rB   r4   appendTERMINATEDSr6   
TRUNCATEDSr7   INFOSset_last_info)r9   r=   r[   rewardr:   r:   r;   add_action_reward_done_next_obs   s"   



z)EpisodeV2.add_action_reward_done_next_obsbatch_builderis_donecheck_donesc              
   C   s  i }| j  D ]!\}}|jdkrq| |}| j| }||j}	|||	f||< q| D ]\}\}}}	|rK|rK|	 sKtd	| j
|| |d | j|i ddsWq-|	 rgtt|	tj dkrltd|	t|dkrz| }
|
|= ni }
|	}t|dd	d	ur|j|||  |d	 |||
| }dd
lm} | jj| | ||| j||d ||jvrt||j|< |j| ||j q-| j| j 7  _| j!| j"7  _!d| _ d| _"d	S )a  Build and return currently collected training samples by policies.

        Clear agent collector states if this episode is done.

        Args:
            batch_builder: _PolicyCollectorGroup for saving the collected per-agent
                sample batches.
            is_done: If this episode is done (terminated or truncated).
            check_dones: Whether to make sure per-agent trajectories are actually done.
        r   zkEpisode {} terminated for all agents, but we still don't have a last observation for agent {} (policy {}). zkPlease ensure that you include the last observations of all live agents when setting done[__all__] to True.training_enabledTrF   zPBatches sent to postprocessing must only contain steps from a single trajectory.explorationN)get_global_worker)r   episoder=   rA   r   postprocessed_batchoriginal_batches)#r.   itemsagent_stepsrB   r,   build_for_trainingrT   is_terminated_or_truncated
ValueErrorformatr"   r8   rV   is_single_trajectorylennpuniquer   EPS_IDcopygetattrri   postprocess_trajectoryget_sessionset_get_interceptor#ray.rllib.evaluation.rollout_workerrj   r   on_postprocess_trajectorypolicy_collectorsr   $add_postprocessed_batch_for_trainingr&   	env_stepsr$   )r9   re   rf   rg   pre_batchesr=   	collectorpidrZ   	pre_batchother_batches
post_batchrj   r:   r:   r;   postprocess_episode   sn   







zEpisodeV2.postprocess_episodec                 C   s.   |dur|| j v o| j | S tt| j  S )zReturns whether this episode has initial obs for an agent.

        If agent_id is None, return whether we have received any initial obs,
        in other words, whether this episode is completely fresh.
        N)r5   anyr3   r[   rG   r:   r:   r;   has_init_obs]  s   zEpisodeV2.has_init_obsc                 C   s   |  |p	| |S N)is_terminatedis_truncatedrG   r:   r:   r;   rf   h  s   zEpisodeV2.is_donec                 C      | j |dS NF)r6   rV   rG   r:   r:   r;   r   k     zEpisodeV2.is_terminatedc                 C   r   r   )r7   rV   rG   r:   r:   r;   r   n  r   zEpisodeV2.is_truncatedinfoc                 C   s   || j |< d S r   )r8   )r9   r=   r   r:   r:   r;   rb   q  r   zEpisodeV2.set_last_infoc                 C   s   | j |S r   )r8   rV   rG   r:   r:   r;   last_info_fort  s   zEpisodeV2.last_info_forc                 C   s   | j S r   )r%   rD   r:   r:   r;   lengthy  s   zEpisodeV2.length)r?   N)FFr   )#__name__
__module____qualname____doc__r   r   r   r   r   r   r<   r
   boolrB   r   rE   r!   rH   rI   r   r   strrY   rd   r   r   r   rf   r   r   rb   r   r   propertyr   r:   r:   r:   r;   r      s    
K
#	


*

,
d
r   )%r   collectionsr   typingr   r   r   r   r   r   r	   numpyrv   ray.rllib.env.base_envr
   /ray.rllib.evaluation.collectors.agent_collectorr   5ray.rllib.evaluation.collectors.simple_list_collectorr   r   ray.rllib.policy.policy_mapr   ray.rllib.policy.sample_batchr   ray.rllib.utils.annotationsr   ray.rllib.utils.typingr   r   r   r   r   ray.rllib.callbacks.callbacksr   r~   r   r   r:   r:   r:   r;   <module>   s     $