o
    $i%                     @   s  d dl Z d dlZd dlmZmZ d dlmZmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* e	rd dl+m,Z, d dl-m.Z. d dl/m0Z0 e& \Z1Z2Z3e 4e5Z6edg dZ7eee
  Z8G dd deZ9e#G dd deedZ:e#G dd de:Z;dS )    N)ABCMetaabstractmethod)defaultdict
namedtuple)TYPE_CHECKINGAnyListOptionalTypeUnion)DEPRECATED_VALUEdeprecation_warning)BaseEnvconvert_to_base_env)SampleCollector)SimpleListCollector)EnvRunnerV2
_PerfStats)RolloutMetrics)InputReader)concat_samples)OldAPIStackoverride)try_import_tf)SampleBatchType)log_once)RLlibCallback)ObservationFunction)RolloutWorker_PolicyEvalData)env_idagent_idobsinfo	rnn_stateprev_actionprev_rewardc                   @   s   e Zd Zdd ZdS )_NewEpisodeDefaultDictc                 C   s(   | j d u r	t||  | }| |< |S N)default_factoryKeyError)selfr    ret r-   Y/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py__missing__-   s   
z"_NewEpisodeDefaultDict.__missing__N)__name__
__module____qualname__r/   r-   r-   r-   r.   r'   ,   s    r'   c                   @   sd   e Zd ZdZeedefddZedefddZ	ede
e fddZede
e fd	d
ZdS )SamplerInputz1Reads input experiences from an existing sampler.returnc                 C   s4   |   g}||   t|dkrtdt|S )Nr   zNo data available from sampler.)get_dataextendget_extra_batcheslenRuntimeErrorr   )r+   batchesr-   r-   r.   next9   s
   
zSamplerInput.nextc                 C      t )zCalled by `self.next()` to return the next batch of data.

        Override this in child classes.

        Returns:
            The next batch of data.
        NotImplementedErrorr+   r-   r-   r.   r5   A   s   	zSamplerInput.get_datac                 C   r<   )a$  Returns list of episode metrics since the last call to this method.

        The list will contain one RolloutMetrics object per completed episode.

        Returns:
            List of RolloutMetrics objects, one per completed episode since
            the last call to this method.
        r=   r?   r-   r-   r.   get_metricsL   s   
zSamplerInput.get_metricsc                 C   r<   )a  Returns list of extra batches since the last call to this method.

        The list will contain all SampleBatches or
        MultiAgentBatches that the user has provided thus-far. Users can
        add these "extra batches" to an episode by calling the episode's
        `add_extra_batch([SampleBatchType])` method. This can be done from
        inside an overridden `Policy.compute_actions_from_input_dict(...,
        episodes)` or from a custom callback's `on_episode_[start|step|end]()`
        methods.

        Returns:
            List of SamplesBatches or MultiAgentBatches provided thus-far by
            the user since the last call to this method.
        r=   r?   r-   r-   r.   r7   X   s   zSamplerInput.get_extra_batchesN)r0   r1   r2   __doc__r   r   r   r;   r   r5   r   r   r@   r7   r-   r-   r-   r.   r3   5   s    
r3   )	metaclassc                   @   s   e Zd ZdZddddddddddddeeedddd	ed
eeef de	de
dddedededed deee  defddZeedefddZeedee fddZeedee fddZdS )SyncSamplerzHSync SamplerInput that collects experiences when `get_data()` is called.	env_stepsFTN)count_steps_bymultiple_episodes_in_batchnormalize_actionsclip_actionsobservation_fnsample_collector_classrenderpoliciespolicy_mapping_fnpreprocessorsobs_filterstf_sesshorizonsoft_horizonno_done_at_endworkerr   envclip_rewardsrollout_fragment_lengthrE   	callbacksr   rF   rG   rH   rI   r   rJ   rK   c             
   C   s(  t drO|durtdd |durtdd |durtdd |dur(tdd |dur1tdd |tkr;td	d
d |tkrEtdd
d |tkrOtdd
d t|| _|| _t | _t	|j
jd| _|sht}||j|||||d| _|| _t|| j||| j||| jd| _| j | _t | _dS )aN  Initializes a SyncSampler instance.

        Args:
            worker: The RolloutWorker that will use this Sampler for sampling.
            env: Any Env object. Will be converted into an RLlib BaseEnv.
            clip_rewards: True for +/-1.0 clipping,
                actual float value for +/- value clipping. False for no
                clipping.
            rollout_fragment_length: The length of a fragment to collect
                before building a SampleBatch from the data and resetting
                the SampleBatchBuilder object.
            count_steps_by: One of "env_steps" (default) or "agent_steps".
                Use "agent_steps", if you want rollout lengths to be counted
                by individual agent steps. In a multi-agent env,
                a single env_step contains one or more agent_steps, depending
                on how many agents are present at any given time in the
                ongoing episode.
            callbacks: The RLlibCallback object to use when episode
                events happen during rollout.
            multiple_episodes_in_batch: Whether to pack multiple
                episodes into each batch. This guarantees batches will be
                exactly `rollout_fragment_length` in size.
            normalize_actions: Whether to normalize actions to the
                action space's bounds.
            clip_actions: Whether to clip actions according to the
                given action_space's bounds.
            observation_fn: Optional multi-agent observation func to use for
                preprocessing observations.
            sample_collector_class: An optional SampleCollector sub-class to
                use to collect, store, and retrieve environment-, model-,
                and sampler data.
            render: Whether to try to render the environment after each step.
        deprecated_sync_sampler_argsNrL   )oldrM   rN   rO   rP   rQ   T)rZ   errorrR   rS   )ema_coef)rE   )rT   base_envrF   rX   
perf_statsrW   rE   rK   )r   r   r   r   r]   rW   queueQueueextra_batchesr   configsampler_perf_stats_ema_coefr^   r   
policy_mapsample_collectorrK   r   _env_runner_objrun_env_runnermetrics_queue)r+   rT   rU   rV   rW   rE   rX   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   r-   r-   r.   __init__o   s\   <







zSyncSampler.__init__r4   c                 C   s*   	 t | j}t|tr| j| n|S qr(   )r;   rh   
isinstancer   ri   put)r+   itemr-   r-   r.   r5      s   

zSyncSampler.get_datac                 C   sB   g }	 z| | j j| j d W n tjy   Y |S w q)NT)r^   )appendri   
get_nowait_replacer^   getr_   Empty)r+   	completedr-   r-   r.   r@      s   
zSyncSampler.get_metricsc                 C   s4   g }	 z
| | j  W n tjy   Y |S w qr(   )rn   ra   ro   r_   rr   )r+   extrar-   r-   r.   r7      s   zSyncSampler.get_extra_batches)r0   r1   r2   rA   r   r   r   boolfloatintstrr	   r
   r   rj   r   r3   r   r5   r   r   r@   r7   r-   r-   r-   r.   rC   k   s`    	
	


orC   )<loggingr_   abcr   r   collectionsr   r   typingr   r   r   r	   r
   r   ray._common.deprecationr   r   ray.rllib.env.base_envr   r   0ray.rllib.evaluation.collectors.sample_collectorr   5ray.rllib.evaluation.collectors.simple_list_collectorr   "ray.rllib.evaluation.env_runner_v2r   r   ray.rllib.evaluation.metricsr   ray.rllib.offliner   ray.rllib.policy.sample_batchr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr   ray.rllib.utils.typingr   ray.util.debugr   ray.rllib.callbacks.callbacksr   )ray.rllib.evaluation.observation_functionr   #ray.rllib.evaluation.rollout_workerr   tf1tf_	getLoggerr0   loggerr   
StateBatchr'   r3   rC   r-   r-   r-   r.   <module>   sB     	
	5