o
    `۷i                     @   s   d dl mZmZ d dlZd dlZd dlmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZ d d
lmZ d dlmZ d dlmZ e \Z Z!Z"e \Z#Z$eG dd deZ%dS )    )OptionalUnionN)BoxDiscreteMultiDiscreteSpace)ActionDistribution)ModelV2)force_tuple)OldAPIStackoverride)Exploration)
TensorTypetry_import_tftry_import_torch)Simplex)get_base_struct_from_space)zero_logps_from_actionsc                       s   e Zd ZdZdededee f fddZe	e
ddd	ed
eeef defddZdedeeeef  fddZdedefddZ  ZS )RandomzA random action selector (deterministic/greedy for explore=False).

    If explore=True, returns actions randomly from `self.action_space` (via
    Space.sample()).
    If explore=False, returns the greedy/max-likelihood action.
    action_spacemodel	frameworkc                   s*   t  jd|||d| t| j| _dS )zInitialize a Random Exploration object.

        Args:
            action_space: The gym action space used by the environment.
            framework: One of None, "tf", "torch".
        )r   r   r   N )super__init__r   r   action_space_struct)selfr   r   r   kwargs	__class__r   X/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/utils/exploration/random.pyr      s   	zRandom.__init__T)exploreaction_distributiontimestepr!   c                C   s"   | j dv r| ||S | ||S )N)tf2tf)r   get_tf_exploration_action_opget_torch_exploration_action)r   r"   r#   r!   r   r   r    get_exploration_action-   s   
	zRandom.get_exploration_actionaction_distc                    sT    fdd} fdd}t jt|trt j|t jdn|||d}t|}||fS )Nc                     sf   d t jtjdd } tjjt| d kr$tjd   fdd}t	
|j}|S )N   model_configr   c                    s(   j pd}t trtjjf j   j jdS t tr.tj	 fdd j
D ddS t trv j rj j rj jjdrZtjjf|  jjd  jjd  jd	S tjjf|  j j jd	S tjjf|  jd
S t tsJ d tjtjjf| dd jd	S )N)r*   shapemaxvaldtypec                    s$   g | ]}t jj d f|jdqS )r*   r,   )r%   randomuniformr/   ).0n)
batch_size	componentr   r    
<listcomp>Z   s    zbRandom.get_tf_exploration_action_op.<locals>.true_fn.<locals>.random_component.<locals>.<listcomp>r*   )axisintr   )r-   minvalr.   r/   )r-   r/   z<Unsupported distribution component '{}' for random sampling!g        g      ?)r-   
isinstancer   r%   r0   r1   r3   r/   r   concatnvecr   bounded_aboveallbounded_belowname
startswithlowflathighnormalr   formatnnsoftmax)r5   r-   r4   )r5   r    random_componentM   sX   




	

zNRandom.get_tf_exploration_action_op.<locals>.true_fn.<locals>.random_component)r
   required_model_output_shaper   getattrr   leninputsr-   r%   treemap_structurer   )reqrJ   actionsr)   r   rI   r    true_fn@   s   7z4Random.get_tf_exploration_action_op.<locals>.true_fnc                      s      S )N)deterministic_sampler   )r)   r   r    false_fn   s   z5Random.get_tf_exploration_action_op.<locals>.false_fn)r/   )predrT   rV   )r%   condr:   boolconstantr   )r   r)   r!   rT   rV   actionlogpr   rS   r    r&   ;   s   Gz#Random.get_tf_exploration_action_opc                    s   |r@t | jt jdd }t|jjt|d kr1|jjd }t	 fddt
|D }n j }t| j}n| }tj| d ftj jd}||fS )Nr+   r*   r   c                    s   g | ]} j  qS r   )r   sample)r2   _r   r   r    r6      s    z7Random.get_torch_exploration_action.<locals>.<listcomp>)r/   device)r
   rK   r   rL   r   rM   rN   r-   npstackranger]   torch
from_numpytor`   rU   zerossizefloat32)r   r)   r!   rQ   r4   ar[   r\   r   r_   r    r'      s   
z#Random.get_torch_exploration_action)__name__
__module____qualname____doc__r   r	   r   strr   r   r   r   r   r8   r   rY   r(   r&   r'   __classcell__r   r   r   r    r      s8    

Zr   )&typingr   r   numpyra   rO   gymnasium.spacesr   r   r   r   ray.rllib.models.action_distr   ray.rllib.models.modelv2r	   ray.rllib.utilsr
   ray.rllib.utils.annotationsr   r   'ray.rllib.utils.exploration.explorationr   ray.rllib.utils.frameworkr   r   r   ray.rllib.utils.spaces.simplexr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.tf_utilsr   tf1r%   tfvrd   r^   r   r   r   r   r    <module>   s"    
