o
    `۷i4                     @   s  d dl Z d dlmZmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$ erzd dl%m&Z& d dl'm(Z( e )e*Z+d+ddZ,d+ddZ-e$de dddefddZ.e$de dddefddZ/e$d,d"d#Z0e$d d!d$ee1ee f defd%d&Z2e$de fd'd(Z3e$d)d* Z4dS )-    N)TYPE_CHECKINGAnyTuple)ClipActionsConnector)ImmutableActionsConnector)ConvertToNumpyConnector)NormalizeActionsConnector)ActionConnectorPipeline)ClipRewardAgentConnector)0ConcurrentMeanStdObservationFilterAgentConnector&MeanStdObservationFilterAgentConnector)ObsPreprocessorConnector)AgentConnectorPipeline)StateBufferConnector)SyncedFilterAgentConnector)ViewRequirementAgentConnector)	ConnectorConnectorContext)get_connector)OldAPIStack)AlgorithmConfig)Policyconfigr   c                 C   s0   | j rdS | jr| jdkrdS | jd u rdS dS )NFdeepmindT)_disable_preprocessor_apiis_ataripreprocessor_prefr    r   O/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/connectors/util.py__preprocessing_enabled   s   
r    c                 C   s   | j p| jS )N)clip_rewardsr   r   r   r   r   __clip_rewards)   s   r"   ctxreturnc                 C   s   g }t |}|du r|t| dd nt|tu r%|t| t|d t|r0|t|  t| }|r;|| |	t
| t| g t| |S )NT)sign)limit)r"   appendr
   typefloatabsr    r   get_synced_filter_connectorextendr   r   r   )r#   r   
connectorsr!   filter_connectorr   r   r    get_agent_connectors_from_config/   s&   

r/   c                 C   sV   t | g}|ddr|t|  |ddr|t|  |t|  t| |S )zDefault list of action connectors to use for a new policy.

    Args:
        ctx: context used to create connectors.
        config: The AlgorithmConfig object.
    normalize_actionsFclip_actions)r   getr'   r   r   r   r	   )r#   r   r-   r   r   r   !get_action_connectors_from_configQ   s   

r3   policyr   c                 C   st   t | }| jdu r| jdu sJ dt||| _t||| _td t| jjdd t| jjdd dS )zUtil to create agent and action connectors for a Policy.

    Args:
        policy: Policy instance.
        config: Algorithm config dict.
    NzCCan not create connectors for a policy that already has connectors.zUsing connectors:   )indentation)	r   from_policyagent_connectorsaction_connectorsr/   r3   loggerinfo__str__)r4   r   r#   r   r   r   create_connectors_for_policye   s   

r=   connector_configc                 C   s   t | }|\}}t|||S )zUtil to create connector for a Policy based on serialized config.

    Args:
        policy: Policy instance.
        connector_config: Serialized connector config.
    )r   r7   r   )r4   r>   r#   nameparamsr   r   r   restore_connectors_for_policy{   s   

rA   c                 C   sP   | j d}|dkrt| d dS |dkrt| d dS |dkr d S tdt| )Nobservation_filterMeanStdFilter)clipConcurrentMeanStdFilterNoFilterzUnknown observation_filter: )r   r2   r   r   	Exceptionstr)r#   filter_specifierr   r   r   r+      s   r+   c                 C   sN   | j | }|js
d S |jt }|sd S t|dksJ d|d j| j|< d S )N   zcConnectorPipeline has multiple connectors of type SyncedFilterAgentConnector but can only have one.r   )
policy_mapr8   r   lenfilterfilters)rollout_worker	policy_idr4   filter_connectorsr   r   r   maybe_get_filters_for_syncing   s   

rR   )r   r   )r4   r   r   r   )5loggingtypingr   r   r    ray.rllib.connectors.action.clipr   %ray.rllib.connectors.action.immutabler   #ray.rllib.connectors.action.lambdasr   %ray.rllib.connectors.action.normalizer   $ray.rllib.connectors.action.pipeliner	   &ray.rllib.connectors.agent.clip_rewardr
   *ray.rllib.connectors.agent.mean_std_filterr   r   &ray.rllib.connectors.agent.obs_preprocr   #ray.rllib.connectors.agent.pipeliner   'ray.rllib.connectors.agent.state_bufferr   (ray.rllib.connectors.agent.synced_filterr   +ray.rllib.connectors.agent.view_requirementr   ray.rllib.connectors.connectorr   r   ray.rllib.connectors.registryr   ray.rllib.utils.annotationsr   %ray.rllib.algorithms.algorithm_configr   ray.rllib.policy.policyr   	getLogger__name__r:   r    r"   r/   r3   r=   rH   rA   r+   rR   r   r   r   r   <module>   sj    


!