o
    `۷i                     @   s  d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
 ddlZddlmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4m5Z5m6Z6 ddl7m8Z8 e# \Z9Z:Z;e<e=Z>d*ddZ?dd Z@dd ZAdd ZBdd ZCd d! ZDeG d"d# d#eZEd$d% ZFG d&d' d'ZGeddddddddddddddddddeddfd(d)ZHdS )+zrEager mode TF policy built using build_tf_policy().

It supports both traced and non-traced eager execution modes.    N)DictListOptionalTupleUnion)DEPRECATED_VALUEdeprecation_warning)ModelCatalog)RepeatedValues)PolicyPolicyState)#pad_batch_to_sequences_of_same_size)SampleBatch)
add_mixins
force_list)OldAPIStackoverride))ERR_MSG_TF_POLICY_CANNOT_SAVE_KERAS_MODEL)try_import_tf)'DIFF_NUM_GRAD_UPDATES_VS_SAMPLER_POLICYNUM_AGENT_STEPS_TRAINEDNUM_GRAD_UPDATES_LIFETIME)LEARNER_STATS_KEY)convert_to_numpy)normalize_action)get_gpu_devices)	with_lock)LocalOptimizerModelGradientsTensorStructType
TensorType)log_oncec                    s~   t | trdd |  D }tt|S t | tr| S t | tr-ttt| j| j	| j
S | d ur=| t fdd| S | S )Nc                 S   s    i | ]\}}|t jkr||qS  )r   INFOS.0kvr"   r"   V/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/policy/eager_tf_policy.py
<dictcomp>2        z"_convert_to_tf.<locals>.<dictcomp>c                    s6   t | tr
t|  S | d urt| st|  S | S N)
isinstancer
   _convert_to_tftf	is_tensorconvert_to_tensor)fdr"   r(   <lambda>?   s
   z _convert_to_tf.<locals>.<lambda>)r,   r   itemstreemap_structurer-   r   r
   valueslengthsmax_len)xdtypedict_r"   r2   r(   r-   0   s    



	r-   c                 C   s:   dd }zt j|| W S  ty   tdt| w )Nc                 S   s   t | tjr
|  S | S r+   )r,   r.   Tensornumpy)r;   r"   r"   r(   _mapK   s   z_convert_to_numpy.<locals>._mapz4Object of type {} has no method to convert to numpy.)r.   nestr7   AttributeError	TypeErrorformattype)r;   r@   r"   r"   r(   _convert_to_numpyJ   s   rF   c                       t   fdd}|S )Nc                     sD   t  rdd | D }dd | D } |i |S  | i |S )Nc                 S   s   g | ]}t |qS r"   )r-   )r%   r;   r"   r"   r(   
<listcomp>\       z8_convert_eager_inputs.<locals>._func.<locals>.<listcomp>c                 S   s4   i | ]\}}|d vr|t ||dkrtjnddqS )>   episodes
info_batchtimestepNr<   )r-   r.   int64r$   r"   r"   r(   r)   ^   s
    z8_convert_eager_inputs.<locals>._func.<locals>.<dictcomp>)r.   executing_eagerlyr5   )argskwargs
eager_argseager_kwargsfuncr"   r(   _funcY   s   z$_convert_eager_inputs.<locals>._func	functoolswrapsrU   rV   r"   rT   r(   _convert_eager_inputsX   s   r[   c                    rG   )Nc                     s(    | i |}t  rt jt|}|S r+   )r.   rO   rA   r7   rF   )rP   rQ   outrT   r"   r(   rV   k   s   z%_convert_eager_outputs.<locals>._funcrW   rZ   r"   rT   r(   _convert_eager_outputsj   s   r]   c                 K   s   | di |}t d|j)NzDetected a variable being created during an eager forward pass. Variables should only be created during model initialization: {}r"   )
ValueErrorrD   name)next_creatorkwr'   r"   r"   r(   _disallow_var_creationu   s
   rb   c                    s    fdd}|S )z9Asserts that a given number of re-traces is not breached.c                    s>   | j dd ur| j| j d krtd | g|R i |S )Neager_max_retraceszToo many tf-eager re-traces detected! This could lead to significant slow-downs (even slower than running in tf-eager mode w/ `eager_tracing=False`). To switch off these re-trace counting checks, set `eager_max_retraces` in your config to None.)configget_re_trace_counterRuntimeError)self_rP   rQ   objr"   r(   rV      s   z'_check_too_many_retraces.<locals>._funcr"   )rj   rV   r"   ri   r(   _check_too_many_retraces~   s   rk   c                   @   s   e Zd ZdZdS )EagerTFPolicyzCDummy class to recognize any eagerized TFPolicy by its inheritance.N)__name__
__module____qualname____doc__r"   r"   r"   r(   rl      s    rl   c                    s2   G  fddd j d  _ jd  _ S )zWrapper class that enables tracing for all eager policy methods.

    This is enabled by the `--trace`/`eager_tracing=True` config when
    framework=tf2.
    c                       s   e Zd Z fddZeee			ddeee	f de
dee dee	ee	 eee	f f f fdd	Zee fd
dZeededef fddZeeededdf fddZedd Z  ZS )z/_traced_eager_policy.<locals>.TracedEagerPolicyc                    s2   d| _ d| _d| _d| _t | j|i | d S )NF)_traced_learn_on_batch_helper_traced_compute_actions_helper _traced_compute_gradients_helper_traced_apply_gradients_helpersuper__init__)selfrP   rQ   TracedEagerPolicy	__class__r"   r(   rv      s
   z8_traced_eager_policy.<locals>.TracedEagerPolicy.__init__N
input_dictexplorerL   returnc                    sT   | j du r| jsttjt | jddd| _d| _ t | jd||||d|S )z9Traced version of Policy.compute_actions_from_input_dict.FT	autographreduce_retracingr{   r|   rL   rJ   Nr"   )rr   _no_tracingr[   r.   functionru   _compute_actions_helpercompute_actions_from_input_dict)rw   r{   r|   rL   rJ   rQ   rx   r"   r(   r      s"   
zO_traced_eager_policy.<locals>.TracedEagerPolicy.compute_actions_from_input_dictc                    D   | j du r| jsttjt | jddd| _d| _ t | |S )z(Traced version of Policy.learn_on_batch.FTr~   )rq   r   r[   r.   r   ru   _learn_on_batch_helperlearn_on_batchrw   samplesrx   r"   r(   r         
z>_traced_eager_policy.<locals>.TracedEagerPolicy.learn_on_batchr   c                    r   )z+Traced version of Policy.compute_gradients.FTr~   )rs   r   r[   r.   r   ru   _compute_gradients_helpercompute_gradientsr   rx   r"   r(   r      r   zA_traced_eager_policy.<locals>.TracedEagerPolicy.compute_gradientsgradsc                    r   )z)Traced version of Policy.apply_gradients.FTr~   )rt   r   r[   r.   r   ru   _apply_gradients_helperapply_gradients)rw   r   rx   r"   r(   r      r   z?_traced_eager_policy.<locals>.TracedEagerPolicy.apply_gradientsc                 S   s   | S r+   r"   clsr"   r"   r(   with_tracing  s   z<_traced_eager_policy.<locals>.TracedEagerPolicy.with_tracingNNN)rm   rn   ro   rv   rk   r   r   r   strr    boolr   intr   r   r   r   r   r   r   r   classmethodr   __classcell__r"   ry   eager_policy_clsrz   r(   ry      s6    
ry   _traced)rm   ro   )r   r"   r   r(   _traced_eager_policy   s   jr   c                   @   s   e Zd Zdd Zdd ZdS )_OptimizerWrapperc                 C   s
   || _ d S r+   )tape)rw   r   r"   r"   r(   rv     s   
z_OptimizerWrapper.__init__c                 C   s   t t| j|||S r+   )listzipr   gradient)rw   lossvar_listr"   r"   r(   r     s   z#_OptimizerWrapper.compute_gradientsN)rm   rn   ro   rv   r   r"   r"   r"   r(   r     s    r   c                    s   t t|}|tkrtddd |durtdddd |dur%tdd	dd G  	
fd
dd|}| d |_| d |_|S )a  Build an eager TF policy.

    An eager policy runs all operations in eager mode, which makes debugging
    much simpler, but has lower performance.

    You shouldn't need to call this directly. Rather, prefer to build a TF
    graph policy and use set `.framework("tf2", eager_tracing=False) in your
    AlgorithmConfig to have it automatically be converted to an eager policy.

    This has the same signature as build_tf_policy().obs_include_prev_action_rewardT)olderrorNextra_action_fetches_fnextra_action_out_fn)r   newr   gradients_fncompute_gradients_fnc                       s  e Zd Z
fddZee			dHdeeef de	de
e deeee eeef f fdd	Zee							dId
eee ef de
ee  deee ef deee ef de
eeef  de
e de
e	 de
e deeee eeef f fddZeee				dJfdd	Zee	dKfdd	Zeeedd Zeededeeeeef f fddZeededdfddZeedLd d!Zeed"d# Zeed$d% Zeed&d' Zeed(d) Zeed*d+ Z eede!f fd,d-Z"eed.e!ddf fd/d0Z#eedMd1e
e ddfd2d3Z$d4d5 Z%d6d7 Z&efd8d9Z'dMd:d;Z(d<d= Z)efd>d?Z*fd@dAZ+	fdBdCZ,defdDdEZ-e.dFdG Z/  Z0S )Nz0_build_eager_tf_policy.<locals>.eager_policy_clsc                    sl  t  st   |dd| _t| ||| tjddtj	d| _
tj| jd dtjd| _|  }|dkrDt }tdt| d d| _d| _d| _d urU| _n| jjjd	krb| jj| _nd | _trm| nppd
| _|d d | _
r
| ||| r| ||| || _d | _s rstdnt || jd \| _}r| |||| _!ntj"||||d | jd| _!t#$ | _%| &  | j'(| j!j' | ) | _*| j!+ | _,t| j,dk| _-r| ||| r| |}n	tj.j/0|d }t1|}| j*r| j*2|}|| _3|r|d nd | _4| j5d	d d| _r.| ||| | j
6d d S )N	frameworktf2r   F)	trainabler<   r|   zFound z visible cuda devices.zPolicy.loss   modelmax_seq_lenzT`make_model` is required if `action_sampler_fn` OR `action_distribution_fn` is given)r   lrT)auto_remove_unneeded_view_reqsstats_fn)7tf1rO   enable_eager_executionre   r   rl   rv   r.   VariablerN   global_timesteprd   r   r|   _get_num_gpus_for_policyr   loggerinfolen_is_trainingrf   _loss_initialized_lossr   __func__ro   callablebatch_divisibility_req_max_seq_len
dist_classr^   r	   get_action_distr   get_model_v2	threadingRLock_lock/_update_model_view_requirements_from_init_stateview_requirementsupdate_create_explorationexplorationget_initial_state_state_inputs_is_recurrentkeras
optimizersAdamr   get_exploration_optimizer_optimizers
_optimizer!_initialize_loss_from_dummy_batchassign)rw   observation_spaceaction_spacerd   num_gpusgpu_ids	logit_dimr   )action_distribution_fnaction_sampler_fn
after_initbefore_initbefore_loss_initget_batch_divisibility_reqloss_fn
make_modeloptimizer_fnr   validate_spacesr"   r(   rv   K  s   





z9_build_eager_tf_policy.<locals>.eager_policy_cls.__init__Nr{   r|   rL   r}   c                    s   | j dst st  d| _|d ur|n| j}|d ur |n| j}t|t	j
r/t| }|    d  fdd  D }|| _|g k| _| jj|||  d |  || j d rbd n|||}| jt|d d j d  t|S )Neager_tracingFc                    s$   g | ]}d |dd v r | qS )state_inN   r"   )r%   r&   r{   r"   r(   rH     s    zd_build_eager_tf_policy.<locals>.eager_policy_cls.compute_actions_from_input_dict.<locals>.<listcomp>)rL   r|   tf_sessr   )rd   re   r   rO   r   r   r|   r   r,   r.   r>   r   r?   _lazy_tensor_dictset_trainingkeys	_state_inr   r   before_compute_actionsget_sessionr   
assign_addr6   flattenshapeas_listr   )rw   r{   r|   rL   rJ   rQ   state_batchesretr"   r   r(   r     s4   	




$	zP_build_eager_tf_policy.<locals>.eager_policy_cls.compute_actions_from_input_dict	obs_batchr   prev_action_batchprev_reward_batchrK   rJ   c	                 [   s   t t j|itdd}
|d ur t|D ]\}}||
d| < q|d ur)||
t j< |d ur2||
t j< |d ur;||
t j< | jd|
|||d|	S )NFr   	state_in_r   r"   )	r   CUR_OBSr.   constant	enumeratePREV_ACTIONSPREV_REWARDSr#   r   )rw   r   r   r   r   rK   rJ   r|   rL   rQ   r{   isr"   r"   r(   compute_actions  s,   


z@_build_eager_tf_policy.<locals>.eager_policy_cls.compute_actionsTc                    s   r
 d u r
t dtjt|tjd}ttjt|idd}	|d ur,t||	tj< |d ur8t||	tj	< | j
rB| j
jdd  rR | | j|	ddd\}
}}n| |	||\}
}| j}||
| j}|sq| jd rqt|| j}||}|S )NzfCannot compute log-prob/likelihood w/o an `action_distribution_fn` and a provided `action_sampler_fn`!rM   Fr   )r|   )r|   is_trainingnormalize_actions)r^   r.   onesr   int32r   r  r0   r  r  r   r   r   r   rd   r   action_space_structlogp)rw   actionsr   r   r   r   actions_normalizedrQ   seq_lensinput_batchdist_inputsr   _action_distlog_likelihoods)r   r   r"   r(   compute_log_likelihoods   s<   


zH_build_eager_tf_policy.<locals>.eager_policy_cls.compute_log_likelihoodsc                    s.   t  sJ t| |} r | |||S |S r+   )r.   rO   rl   postprocess_trajectory)rw   sample_batchother_agent_batchesepisode)postprocess_fnr"   r(   r  Y  s
   zG_build_eager_tf_policy.<locals>.eager_policy_cls.postprocess_trajectoryc                 S   s   i }| j j| ||d t|| jd| j| jd d| _| |}|d | 	|}|  j
d7  _
|d|t|jt| j
t| j
d |j
pCd i t|S )N)policytrain_batchresultF)r   shuffler   r   Tr   custom_metricsr   )	callbackson_learn_on_batchr   r   r   r   r   r   r   r   num_grad_updatesr   r   countr   r   r   )rw   postprocessed_batchlearn_statsstatsr"   r"   r(   r   d  s:   


z?_build_eager_tf_policy.<locals>.eager_policy_cls.learn_on_batchr&  c                 S   sN   t |d| j| j| jd d| _| | |d | |\}}}t||fS )NF)r   r   r   r   T)	r   r   r   r   r   r   r   r   r   )rw   r&  grads_and_varsr   r(  r"   r"   r(   r     s   


zB_build_eager_tf_policy.<locals>.eager_policy_cls.compute_gradients	gradientsc                 S   s(   |  ttdd |D | j  d S )Nc                 S   s"   g | ]}|d urt |nd qS r+   )r.   r0   )r%   gr"   r"   r(   rH         zT_build_eager_tf_policy.<locals>.eager_policy_cls.apply_gradients.<locals>.<listcomp>)r   r   r   r   trainable_variables)rw   r*  r"   r"   r(   r     s   z@_build_eager_tf_policy.<locals>.eager_policy_cls.apply_gradientsFc                 S   s(   |   }|rdd |D S dd |D S )Nc                 S   s   i | ]}|j | qS r"   )r_   r?   r%   r'   r"   r"   r(   r)     s    zP_build_eager_tf_policy.<locals>.eager_policy_cls.get_weights.<locals>.<dictcomp>c                 S   s   g | ]}|  qS r"   )r?   r.  r"   r"   r(   rH     rI   zP_build_eager_tf_policy.<locals>.eager_policy_cls.get_weights.<locals>.<listcomp>)	variables)rw   as_dictr/  r"   r"   r(   get_weights  s   z<_build_eager_tf_policy.<locals>.eager_policy_cls.get_weightsc                 S   sN   |   }t|t|ksJ t|t|ft||D ]	\}}|| qd S r+   )r/  r   r   r   )rw   weightsr/  r'   wr"   r"   r(   set_weights  s
   $z<_build_eager_tf_policy.<locals>.eager_policy_cls.set_weightsc                 S   s   t | j S r+   )r   r   	get_staterw   r"   r"   r(   get_exploration_state  s   zF_build_eager_tf_policy.<locals>.eager_policy_cls.get_exploration_statec                 S      | j S r+   )r   r6  r"   r"   r(   is_recurrent  s   z=_build_eager_tf_policy.<locals>.eager_policy_cls.is_recurrentc                 S   s
   t | jS r+   )r   r   r6  r"   r"   r(   num_state_tensors  s   
zB_build_eager_tf_policy.<locals>.eager_policy_cls.num_state_tensorsc                 S   s   t | dr
| j S g S )Nr   )hasattrr   r   r6  r"   r"   r(   r     s   

zB_build_eager_tf_policy.<locals>.eager_policy_cls.get_initial_statec                    sX   t   }|d  |d< | jr t| j dkr | j |d< | jr*| j |d< |S )Nr   r   _optimizer_variables_exploration_state)ru   r5  r?   r   r   r/  r   )rw   stater   r"   r(   r5    s   
z:_build_eager_tf_policy.<locals>.eager_policy_cls.get_stater>  c                    s   | dd }|r0| j r0t| jdstdrtd t	| j |D ]	\}}|
| q&t| drBd|v rB| jj|d d | j
|d  t | d S )	Nr<  r   +set_state_optimizer_vars_tf_eager_policy_v2zCannot restore an optimizer's state for tf eager! Keras is not able to save the v1.x optimizers (from tf.compat.v1.train) since they aren't compatible with checkpoints.r   r=  )r>  r   )re   r   r/  rE   rm   endswithr!   r   warningr   r   r;  r   	set_stater   ru   )rw   r>  optimizer_varsopt_varvaluer   r"   r(   rB    s   z:_build_eager_tf_policy.<locals>.eager_policy_cls.set_stateonnxc              
   S   s   t | dr]t | jdr]t| jjtjjr]|rAzddl}W n ty- } zt	d|d}~ww |j
j| jjtj|dd\}}dS z| jjj|dd	 W dS  ty\   tt Y dS w tt dS )
a  Exports the Policy's Model to local directory for serving.

            Note: Since the TfModelV2 class that EagerTfPolicy uses is-NOT-a
            tf.keras.Model, we need to assume that there is a `base_model` property
            within this TfModelV2 class that is-a tf.keras.Model. This base model
            will be used here for the export.
            TODO (kourosh): This restriction will be resolved once we move Policy and
            ModelV2 to the new Learner/RLModule APIs.

            Args:
                export_dir: Local writable directory.
                onnx: If given, will export model in ONNX format. The
                    value of this parameter set the ONNX OpSet version to use.
            r   
base_modelr   NzmConverting a TensorFlow model to ONNX requires `tf2onnx` to be installed. Install with `pip install tf2onnx`.z
model.onnx)output_pathr.   )save_format)r;  r   r,   rG  r.   r   Modeltf2onnxImportErrorrg   convert
from_kerasospathjoinsave	Exceptionr   rA  r   )rw   
export_dirrF  rK  emodel_protoexternal_tensor_storager"   r"   r(   export_model  s6   
z=_build_eager_tf_policy.<locals>.eager_policy_cls.export_modelc                 S   s"   t | jtjjr| jjS | j S )z9Return the list of all savable variables for this policy.)r,   r   r.   r   rJ  r/  r6  r"   r"   r(   r/  $  s   
z:_build_eager_tf_policy.<locals>.eager_policy_cls.variablesc                 S   r8  r+   )r   r6  r"   r"   r(   loss_initialized+  s   zA_build_eager_tf_policy.<locals>.eager_policy_cls.loss_initializedc                    s@  |  j d7  _ t|tj d jd }|rtj|tjdnd }i }t	t
 rN| | j|tj |||d}	t|	dkrE|	\}
}}}nd }g }|	\}
}n rz | | j|||||dd\}| _}W np ty } z)d|jd v syd	|jd v r | | j|tj ||dd
\}| _}n|W Y d }~n@d }~ww t| jtjjrt||d}|rd|vrt|D ]\}}||d| < q| | | |\}}}n	| |||\}}| || j}| jj|||d\}
}W d    n1 sw   Y  |d urt||tj< ||tj< |d ur||tj< r||  |
||fS )Nr   r   rM   )r|   rL   rJ      F)r{   r   r  r|   rL   r	  zpositional argumentzunexpected keyword argument)r|   rL   r	  )r  
state_in_0r   )action_distributionrL   r|   )rf   r6   r   r   OBSr   r.   r  r  variable_creator_scoperb   r   r  r   r   rC   rP   r,   r   rJ  r  r   r   get_exploration_actionexpACTION_PROBACTION_LOGPACTION_DIST_INPUTSr   )rw   r{   r   rJ   r|   rL   
batch_sizer  extra_fetchesaction_sampler_outputsr  r  r  	state_outrU  r  r  r  )r   r   r   r"   r(   r   .  s   




N



zH_build_eager_tf_policy.<locals>.eager_policy_cls._compute_actions_helperc                 S   sV   |  j d7  _ tt | |\}}}W d    n1 sw   Y  | | |S )Nr   )rf   r.   r^  rb   r   r   )rw   r   _ray_trace_ctxr)  r  r(  r"   r"   r(   r     s   
zG_build_eager_tf_policy.<locals>.eager_policy_cls._learn_on_batch_helperc                 S   s   t | jS r+   )r.   r0   r   r6  r"   r"   r(   _get_is_training_placeholder  s   zM_build_eager_tf_policy.<locals>.eager_policy_cls._get_is_training_placeholderc           
         sV  |  j d7  _ t| jtjjr| jjn| j tjdud | | | j| j	|}W d   n1 s6w   Y  t
|}r`t }| jd rV| |gt| |}n| ||d g}n
 fdd|D }tdr|D ]}|D ]\}}|durtd	|j  qtqp| jd rd
d |D }n|d }dd |D }| | ||}	|||	fS )z,Computes and returns grads as eager tensors.r   N)
persistent%_tf_policy_handles_more_than_one_lossr   c                    s"   g | ]}t t |qS r"   )r   r   r   )r%   r   r   r/  r"   r(   rH     r,  z^_build_eager_tf_policy.<locals>.eager_policy_cls._compute_gradients_helper.<locals>.<listcomp>	grad_varszOptimizing variable c                 S   s   g | ]	}d d |D qS )c                 S      g | ]\}}|qS r"   r"   r%   r+  r  r"   r"   r(   rH     rI   zi_build_eager_tf_policy.<locals>.eager_policy_cls._compute_gradients_helper.<locals>.<listcomp>.<listcomp>r"   )r%   g_and_vr"   r"   r(   rH     s    c                 S   rn  r"   r"   ro  r"   r"   r(   rH     rI   )rf   r,   r   r.   r   rJ  r-  GradientTaper   r   r   r   rd   r   r!   r   r   r_   _stats)
rw   r   losses	optimizerr)  rp  r+  r'   r   r(  )r   rl  r(   r     s>   




zJ_build_eager_tf_policy.<locals>.eager_policy_cls._compute_gradients_helperc                    s   |  j d7  _  r | jd r | | j| d S  | | j| d S | jd r=t| jD ]\}}|dd || D  q*d S | jdd |D  d S )Nr   rk  c                 S       g | ]\}}|d ur||fqS r+   r"   r%   r+  r'   r"   r"   r(   rH     r*   z\_build_eager_tf_policy.<locals>.eager_policy_cls._apply_gradients_helper.<locals>.<listcomp>c                 S   ru  r+   r"   rv  r"   r"   r(   rH     r*   )rf   rd   r   r   r  r   )rw   r)  r  o)apply_gradients_fnr"   r(   r     s   

zH_build_eager_tf_policy.<locals>.eager_policy_cls._apply_gradients_helperc                    sX   i }rt |||t< ni |t<  r|t  |  r*|t | || |S r+   )dictr   r   )rw   outputsr   r   fetches)extra_learn_fetches_fngrad_stats_fnr   r"   r(   rr    s   z7_build_eager_tf_policy.<locals>.eager_policy_cls._statsc                 S   s    t |ts	t|}|t |S r+   )r,   r   set_get_interceptorr-   )rw   r&  r"   r"   r(   r     s   

zB_build_eager_tf_policy.<locals>.eager_policy_cls._lazy_tensor_dictc                 S   s   t | S r+   )r   r   r"   r"   r(   r     s   z=_build_eager_tf_policy.<locals>.eager_policy_cls.with_tracingr   )NNNNNNN)NNNT)NN)Fr+   )1rm   rn   ro   rv   r   r   r   r   r    r   r   r   r   r   r   r   r   r   r  r   r  r  r   r   r   r   r   r1  r4  r7  r9  r:  r   r   r5  rB  rX  r/  rY  r   r   ri  r   r   rr  r   r   r   r   r"   r   r   r   rx  r   r   r   r   r|  r   r}  r   r   r   r  r   r   r   r(   r   J  s     
.
	&7
%




-
o<r   _eager)r   rl   r   r   rm   ro   )r_   r   get_default_configr  r   r   r   rx  r}  r|  r   r   r   r   r   r   r   r   mixinsr   r   r   r   baser   r"   r  r(   _build_eager_tf_policy  s$   
&4     
T
r  r+   )Irp   rX   loggingrO  r   typingr   r   r   r   r   r6   ray._common.deprecationr   r   ray.rllib.models.catalogr	    ray.rllib.models.repeated_valuesr
   ray.rllib.policy.policyr   r   ray.rllib.policy.rnn_sequencingr   ray.rllib.policy.sample_batchr   ray.rllib.utilsr   r   ray.rllib.utils.annotationsr   r   ray.rllib.utils.errorr   ray.rllib.utils.frameworkr   ray.rllib.utils.metricsr   r   r   $ray.rllib.utils.metrics.learner_infor   ray.rllib.utils.numpyr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.tf_utilsr   ray.rllib.utils.threadingr   ray.rllib.utils.typingr   r   r   r    ray.util.debugr!   r   r.   tfv	getLoggerrm   r   r-   rF   r[   r]   rb   rk   rl   r   r   r  r"   r"   r"   r(   <module>   sx    

	v