o
    $i<                     @   sD  d dl Z d dlmZmZmZmZmZmZmZ d dl	Z	d dl
mZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0m1Z1m2Z2 erd dl3m4Z4 e \Z5Z6dZ7G dd de+eZ8dS )    N)TYPE_CHECKINGAny
CollectionDictIterableOptionalUnion)DataIterator)ALL_MODULESCOMPONENT_RL_MODULE)SelfSupervisedLossAPI)MultiRLModuleSpec)MultiAgentBatch)override)Checkpointable)
get_devicetry_import_torch)	DATASET_NUM_ITERS_EVALUATED$DATASET_NUM_ITERS_EVALUATED_LIFETIMEMODULE_SAMPLE_BATCH_SIZE_MEANNUM_ENV_STEPS_SAMPLEDNUM_ENV_STEPS_SAMPLED_LIFETIMENUM_MODULE_STEPS_SAMPLED!NUM_MODULE_STEPS_SAMPLED_LIFETIMEOFFLINE_SAMPLING_TIMERWEIGHTS_SEQ_NO)MiniBatchRayDataIterator)convert_to_numpy)Runnerconvert_to_torch_tensor)
DeviceTypeModuleID	StateDict
TensorType)AlgorithmConfigtotal_eval_lossc                   @   sF  e Zd Z	dDdddee fddZee		dEd	ed
eddfddZ	de
fddZd	ed
eddfddZeedd Zee	dDdddeeeee f  deeeee f  defddZdefddZeedFddZeedFddZeedd  Zeed!d" Z			dGd#ed$ed%ed&edef
d'd(Zd)eeef d#eeef deeef fd*d+Zd,eddd#eeef d)eeef def
d-d.Z eed/eddfd0d1Z!d#eddfd2d3Z"eed4d5 Z#eed6d7 Z$d8d9 Z%e&de'fd:d;Z(d<d= Z)e&de*fd>d?Z+e&dee,df fd@dAZ-e&defdBdCZ.dS )HOfflineEvaluationRunnerNconfigr%   module_specc                 K   sH   || _ d | _d | _tj| fd|i| t|  t|  | | _	d S )Nr(   )
%_OfflineEvaluationRunner__module_spec*_OfflineEvaluationRunner__dataset_iterator(_OfflineEvaluationRunner__batch_iteratorr   __init__r   types
MethodTypeget_loss_for_module_fn_loss_for_module_fn)selfr(   r)   kwargs r4   h/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/rllib/offline/offline_evaluation_runner.pyr-   *   s   

z OfflineEvaluationRunner.__init__FTexploretrainreturnc                 K   s   | j d u rt|  d| js| jdi | jj| _| jjt	| j
dd | jt |d u r3| jj}| j||dW  d    S 1 sDw   Y  d S )NzM doesn't have a data iterator. Can't call `run` on `OfflineEvaluationRunner`.   keyvaluewindow)r6   r7   r4   )r+   
ValueError_batch_iterator_create_batch_iteratorr(   iter_batches_kwargsr,   metrics	log_valuer   _weights_seq_nolog_timer   r6   	_evaluate)r2   r6   r7   r3   r4   r4   r5   run>   s*   
$zOfflineEvaluationRunner.runc                 K   s$   t d| j| j| jj| jjd|S )N)iteratordeviceminibatch_size	num_itersr4   )r   _dataset_iterator_devicer(   "offline_eval_batch_size_per_runner!dataset_num_iters_per_eval_runner)r2   r3   r4   r4   r5   r@   b   s   z.OfflineEvaluationRunner._create_batch_iteratorc           
      C   s   t | jD ]D\}}t|j t| j  }|r!td| d|r+| j|j}n|r5| j|j}n| j	|j}| j
||jd}| | q| jjttf|d dd | jjttf|d dd t| D ]\}}	| jj|tf|	dd qj| j S )	Nz&Batch contains one or more ModuleIDs (z) that are not in this Learner!)fwd_outbatchr9   sumreducelifetime_sumr:   )	enumerater?   setpolicy_batcheskeysmoduler>   forward_explorationforward_trainforward_inferencecompute_eval_losses_log_steps_evaluated_metricsrB   rC   r
   r   r   r   itemsTOTAL_EVAL_LOSS_KEYrT   )
r2   r6   r7   	iterationtensor_minibatchunknown_module_idsrP   eval_loss_per_modulemidlossr4   r4   r5   rF   m   sH   

z!OfflineEvaluationRunner._evaluatec                 C   s   dd| j ifS )Nr4   r(   )r(   r2   r4   r4   r5   get_ctor_args_and_kwargs   s   z0OfflineEvaluationRunner.get_ctor_args_and_kwargs)not_components
componentsrj   c                K   sL   i }|  t||r$| jjd| t|| t|d||t< | j|t< |S )N)rk   rj   r4   )_check_componentr   rZ   	get_state_get_subcomponentsrD   r   )r2   rk   rj   r3   stater4   r4   r5   rm      s   


z!OfflineEvaluationRunner.get_statec                 C   s   t |S )z0Converts structs to a framework-specific tensor.r   )r2   structr4   r4   r5   _convert_to_tensor   s   z*OfflineEvaluationRunner._convert_to_tensorc                 C      dS )zReleases all resources used by this EnvRunner.

        For example, when using a gym.Env in this EnvRunner, you should make sure
        that its `close()` method is called.
        Nr4   rh   r4   r4   r5   stop   s   zOfflineEvaluationRunner.stopc                 C   rr   )z:If this Actor is deleted, clears all resources used by it.Nr4   rh   r4   r4   r5   __del__   s   zOfflineEvaluationRunner.__del__c                 C   s   | j rt| ds
J dS )a  Checks that self.__init__() has been completed properly.

        Ensures that the instances has a `MultiRLModule` and an
        environment defined.

        Raises:
            AssertionError: If the EnvRunner Actor has NOT been properly initialized.
        rZ   N)rL   hasattrrh   r4   r4   r5   assert_healthy   s   z&OfflineEvaluationRunner.assert_healthyc                 C   s
   | j  S N)rB   rT   rh   r4   r4   r5   get_metrics      
z#OfflineEvaluationRunner.get_metricsrQ   	to_device
pin_memory
use_streamc                 C   sB   t |j|r| jnd ||d}tdd | D }t||d}|S )N)rI   r{   r|   c                 s   s    | ]}t |V  qd S rw   )len).0br4   r4   r5   	<genexpr>   s    z>OfflineEvaluationRunner._convert_batch_type.<locals>.<genexpr>)	env_steps)r    rX   rM   maxvaluesr   )r2   rQ   rz   r{   r|   lengthr4   r4   r5   _convert_batch_type   s   z+OfflineEvaluationRunner._convert_batch_typerP   c          	      C   sz   i }|D ]6}|| }|| }| j |  }t|tr)|j| || j|||d}n| j|| j|||d}|||< q|S )N)learner	module_idr(   rQ   rP   r   r(   rQ   rP   )rZ   	unwrapped
isinstancer   compute_self_supervised_lossr(   get_config_for_modulecompute_eval_loss_for_module)	r2   rP   rQ   loss_per_moduler   module_batchmodule_fwd_outrZ   rg   r4   r4   r5   r^      s*   



z+OfflineEvaluationRunner.compute_eval_lossesr   c                C   s   | j ||||dS )Nr   )r1   )r2   r   r(   rQ   rP   r4   r4   r5   r     s   	z4OfflineEvaluationRunner.compute_eval_loss_for_modulero   c                 C   sj   t |v r1|td}|dks| j|k r(|t  }t|tjr"t|}| j| |dkr3|| _d S d S d S Nr   )	r   getr   rD   r   ray	ObjectRefrZ   	set_state)r2   ro   weights_seq_norl_module_stater4   r4   r5   r   %  s   

z!OfflineEvaluationRunner.set_statec                 C   s   |j  D ]J\}}| jj|tf| jdd t|}| jj|tf|d | jj|tf|dd | jj|t	f|dd | jjt
tf|dd | jjt
t	f|dd q| jjt
tf| dd | jjt
tf| ddd	 d S )
Nr9   )r=   )r;   r<   rR   )r;   r<   rT   rU   rS   T)rT   with_throughput)rX   r`   rB   rC   r   rD   r}   r   r   r   r
   r   r   r   )r2   rQ   rf   r   module_batch_sizer4   r4   r5   r_   :  sT   
z4OfflineEvaluationRunner._log_steps_evaluated_metricsc                 C   s>   zt | j| js	dn| jj| _W d S  ty   d | _Y d S w r   )r   r(   worker_index num_gpus_per_offline_eval_runner _OfflineEvaluationRunner__deviceNotImplementedErrorrh   r4   r4   r5   
set_devicel  s   z"OfflineEvaluationRunner.set_devicec                    s   z2ddl m}  js  jj jj| jj jjfi jjd _	 j
  _ j fdd W d S  ty?   d  _Y d S w )Nr   )INPUT_ENV_SPACES)envspacesinference_onlyc                    s   t |tjjr| jS |S rw   )r   torchnnModuletorM   )rf   modrh   r4   r5   <lambda>  s   z5OfflineEvaluationRunner.make_module.<locals>.<lambda>)ray.rllib.envr   _module_specr(   get_multi_rl_module_specr   observation_spaceaction_space%offline_eval_rl_module_inference_onlyr*   buildrZ   foreach_moduler   )r2   r   r4   rh   r5   make_modulez  s&   

z#OfflineEvaluationRunner.make_modulec                 C   s   | j jp| j  jd S )Ncompute_loss_for_module)r(   offline_loss_for_module_fnget_default_learner_class__dict__rh   r4   r4   r5   r0     s
   
z.OfflineEvaluationRunner.get_loss_for_module_fnc                 C      | j S )zReturns the dataset iterator.r+   rh   r4   r4   r5   rL        z)OfflineEvaluationRunner._dataset_iteratorc                 C   s
   || _ dS )zSets the dataset iterator.Nr   )r2   rH   r4   r4   r5   set_dataset_iterator  ry   z,OfflineEvaluationRunner.set_dataset_iteratorc                 C   r   rw   )r,   rh   r4   r4   r5   r?        z'OfflineEvaluationRunner._batch_iteratorc                 C   r   rw   )r   rh   r4   r4   r5   rM     r   zOfflineEvaluationRunner._devicec                 C   r   )z1Returns the `MultiRLModuleSpec` of this `Runner`.)r*   rh   r4   r4   r5   r     r   z$OfflineEvaluationRunner._module_specrw   )FT)r8   N)TFF)/__name__
__module____qualname__r   r   r-   r   r   boolrG   r   r@   rF   r   ri   r   strr   r#   rm   r$   rq   rs   rt   rv   rx   r   r   r   r   r^   r"   r   r   r_   r   r   r0   propertyr	   rL   r   r   r?   r!   rM   r   r4   r4   r4   r5   r'   )   s    
#
9










2

#
r'   )9r.   typingr   r   r   r   r   r   r   r   ray.data.iteratorr	   ray.rllib.corer
   r   ray.rllib.core.rl_module.apisr   (ray.rllib.core.rl_module.multi_rl_moduler   ray.rllib.policy.sample_batchr   ray.rllib.utils.annotationsr   ray.rllib.utils.checkpointsr   ray.rllib.utils.frameworkr   r   ray.rllib.utils.metricsr   r   r   r   r   r   r   r   r   ray.rllib.utils.minibatch_utilsr   ray.rllib.utils.numpyr   ray.rllib.utils.runners.runnerr   ray.rllib.utils.torch_utilsr    ray.rllib.utils.typingr!   r"   r#   r$   %ray.rllib.algorithms.algorithm_configr%   r   _ra   r'   r4   r4   r4   r5   <module>   s,    $,
