o
    `۷iY                     @   sv   d dl mZ d dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZ dZdZe
G d	d
 d
Ze
dd ZdS )    )defaultdict)DictN)DEFAULT_POLICY_ID)OldAPIStack)PolicyIDlearnerlearner_statsc                   @   sP   e Zd ZddefddZefdededdfd	d
ZdeddfddZ	dd Z
dS )LearnerInfoBuilder   num_devicesc                 C   s   || _ tt| _d| _d S )NF)r   r   listresults_all_towersis_finalized)selfr    r   Z/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/utils/metrics/learner_info.py__init__   s   

zLearnerInfoBuilder.__init__results	policy_idreturnNc                    s   | j rJ dd vr| j|   dS | j| tjdd g fddt| jD R     D ](\}}|tkrS |  D ]\}}|| j| d t |< qBq4|| j| d |< q4dS )	a=  Adds a policy.learn_on_(loaded)?_batch() result to this builder.

        Args:
            results: The results returned by Policy.learn_on_batch or
                Policy.learn_on_loaded_batch.
            policy_id: The policy's ID, whose learn_on_(loaded)_batch method
                returned `results`.
        z7LearnerInfo already finalized! Cannot add more results.tower_0c                 W   s   t | g|R  S )N)_all_tower_reduce)psr   r   r   <lambda>1   s    z?LearnerInfoBuilder.add_learn_on_batch_results.<locals>.<lambda>c                 3   s     | ]}  d |V  qdS )ztower_{}N)popformat).0	tower_numr   r   r   	<genexpr>2   s
    
z@LearnerInfoBuilder.add_learn_on_batch_results.<locals>.<genexpr>N)	r   r   appendtreemap_structure_with_pathranger   itemsLEARNER_STATS_KEY)r   r   r   kvk1v1r   r   r   add_learn_on_batch_results   s2   

	z-LearnerInfoBuilder.add_learn_on_batch_resultsall_policies_resultsc                 C   s,   |  D ]\}}|dkr| j||d qdS )a$  Adds multiple policy.learn_on_(loaded)?_batch() results to this builder.

        Args:
            all_policies_results: The results returned by all Policy.learn_on_batch or
                Policy.learn_on_loaded_batch wrapped as a dict mapping policy ID to
                results.
        batch_count)r   N)r&   r,   )r   r-   pidresultr   r   r   &add_learn_on_batch_results_multi_agentA   s
   z9LearnerInfoBuilder.add_learn_on_batch_results_multi_agentc                 C   s8   d| _ i }| j D ]\}}tjtg|R  ||< q
|S )NT)r   r   r&   r#   r$   r   )r   infor   r   r   r   r   finalizeP   s   zLearnerInfoBuilder.finalize)r
   )__name__
__module____qualname__intr   r   r   r   r,   r1   r3   r   r   r   r   r	      s     
(
r	   c                 G   s   t | dkr| d dkrtj|ddS |d du rdS t| d tr:| d dr.t|S | d dr:t|S t|	 rDtj
S t|S )	z<Reduces stats across towers based on their stats-dict paths.r
   r   td_error)axisNr!   min_max_)lennpconcatenate
isinstancestr
startswithnanminnanmaxisnanallnannanmean)path
tower_datar   r   r   r   ^   s   	


r   )collectionsr   typingr   numpyr=   r#   ray.rllib.policy.sample_batchr   ray.rllib.utils.annotationsr   ray.rllib.utils.typingr   LEARNER_INFOr'   r	   r   r   r   r   r   <module>   s    K