o
    $i>                     @   s   d Z ddlZddlZddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZmZ ddlmZmZ d	Zd
ZG dd dZede
fddZdS )z
[1] IMPACT: Importance Weighted Asynchronous Architectures with Clipped Target Networks.
Luo et al. 2020
https://arxiv.org/pdf/1912.00167
    N)deque)ModelCatalog)ModelV2)OldAPIStack))DEFAULT_HISTOGRAM_BOUNDARIES_SHORT_EVENTSTimerAndPrometheusLogger)Counter	Histogramfunctarget_funcc                   @   sL   e Zd ZdZdedefddZdd Zdd	 Zed
d Z	defddZ
dS )CircularBufferaJ  A circular batch-wise buffer as described in [1] for APPO.

    The buffer holds at most N batches, which are sampled at random (uniformly).
    If full and a new batch is added, the oldest batch is discarded. Also, each batch
    currently in the buffer can be sampled at most K times (after which it is also
    discarded).
    num_batchesiterations_per_batchc                 C   s   || _ || _| j | j | _d| _tdd t| jD | jd| _t | _| j| _	t
 | _tj | _tddtdd| _| jd	| jji td
ddd| _| jd	| jji tddtdd| _| jd	| jji d S )Nr   c                 S   s   g | ]}d qS N ).0_r   r   \/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/rllib/algorithms/appo/utils.py
<listcomp>+   s    z+CircularBuffer.__init__.<locals>.<listcomp>)maxlen$rllib_utils_circular_buffer_add_timez"Time spent in CircularBuffer.add())rllib)namedescription
boundariestag_keysr   2rllib_utils_circular_buffer_add_ts_dropped_counterz8Total number of env steps dropped by the CircularBuffer.)r   r   r   'rllib_utils_circular_buffer_sample_timez%Time spent in CircularBuffer.sample())r   r   _NxK
_num_addedr   range_bufferset_indices_offset	threadingLock_locknprandomdefault_rng_rngr	   r   !_metrics_circular_buffer_add_timeset_default_tags	__class____name__r   '_metrics_circular_buffer_add_ts_dropped$_metrics_circular_buffer_sample_time)selfr   r   r   r   r   __init__"   sF    



zCircularBuffer.__init__c              	   C   s  t | js | j: | jd }t| jD ] }| j| | j| j	 | j
| j	| j  |  j	d7  _	q|  jd7  _W d    n1 sFw   Y  d}|d urh| }|dkrp| jj|d W d    |S W d    |S W d    |S 1 s{w   Y  |S )Nr      )value)r   r,   r'   r!   r    r   appendr#   addr$   discardr   r   	env_stepsr0   inc)r2   batchdropped_entryr   
dropped_tsr   r   r   r7   P   s2   




zCircularBuffer.addc              	   C   s   t | jk t| dkrtd t| dks| j> | jt| j	}|| j
 | j }| j| }|d usFJ ||| j
| j	dd | jD fd | j|< | j	| W d    n1 s[w   Y  W d    |S W d    |S 1 ssw   Y  |S )Nr   g-C6?c                 S   s   g | ]}|d u qS r   r   )r   br   r   r   r   u   s    z)CircularBuffer.sample.<locals>.<listcomp>)r   r1   lentimesleepr'   r+   choicelistr#   r$   r   r!   r8   )r2   idxactual_buffer_idxr;   r   r   r   samplee   s2   





zCircularBuffer.samplec                 C   s6   | j  | j| jkW  d   S 1 sw   Y  dS )zIWhether the buffer has been filled once with at least `self.num_batches`.N)r'   r   r   r2   r   r   r   filled}   s   
$zCircularBuffer.filledreturnc                 C   s4   | j  t| jW  d   S 1 sw   Y  dS )zIReturns the number of actually valid (non-expired) batches in the buffer.N)r'   r?   r#   rG   r   r   r   __len__   s   $zCircularBuffer.__len__N)r/   
__module____qualname____doc__intr3   r7   rF   propertyrH   rJ   r   r   r   r   r      s    .
r   rI   c                 C   s~   t | j| jd \}}t j| j| j|| jd t| jd| _| j	 | _
t j| j| j|| jd t| jd| _| j	 | _| jS )zBuilds model and target model for APPO.

    Returns:
        ModelV2: The Model for the Policy to use.
            Note: The target model will not be returned, just assigned to
            `policy.target_model`.
    model)r   	framework)r   get_action_distaction_spaceconfigget_model_v2observation_spacePOLICY_SCOPErQ   rP   	variablesmodel_variablesTARGET_POLICY_SCOPEtarget_modeltarget_model_variables)policyr   	logit_dimr   r   r   make_appo_models   s,   
r_   )rM   r%   r@   collectionsr   numpyr(   ray.rllib.models.catalogr   ray.rllib.models.modelv2r   ray.rllib.utils.annotationsr   #ray.rllib.utils.metrics.ray_metricsr   r   ray.util.metricsr   r	   rW   rZ   r   r_   r   r   r   r   <module>   s    p