o
    ci                     @   sx   d dl mZ d dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ eG dd	 d	eZeeje dS )
    )AnyN)AgentConnectorConnectorContext)register_connector)SampleBatch)AgentConnectorDataType)OldAPIStackc                       sT   e Zd Zddef fddZdedefdd	Zd
d Zedede	fddZ
  ZS )ClipRewardAgentConnectorFNctxc                    s,   t  | |r|rJ d|| _|| _d S )Nz6should not enable both sign and limit reward clipping.)super__init__signlimit)selfr
   r   r   	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/connectors/agent/clip_reward.pyr      s   
z!ClipRewardAgentConnector.__init__ac_datareturnc                 C   st   |j }t|tu sJ dtj|vr|S | jr$t|tj |tj< |S | jr8tj|tj | j | jd|tj< |S )Nz=Single agent data must be of type Dict[str, TensorStructType])a_mina_max)	datatypedictr   REWARDSr   npr   clip)r   r   dr   r   r   	transform   s    
z"ClipRewardAgentConnector.transformc                 C   s   t j| j| jdfS )N)r   r   )r	   __name__r   r   )r   r   r   r   to_state-   s   z!ClipRewardAgentConnector.to_stateparamsc                 C   s   t | fi |S )N)r	   )r
   r"   r   r   r   
from_state3   s   z#ClipRewardAgentConnector.from_state)FN)r    
__module____qualname__r   r   r   r   r!   staticmethodr   r#   __classcell__r   r   r   r   r	      s    r	   )typingr   numpyr   ray.rllib.connectors.connectorr   r   ray.rllib.connectors.registryr   ray.rllib.policy.sample_batchr   ray.rllib.utils.typingr   ray.rllib.utils.annotationsr   r	   r    r   r   r   r   <module>   s    (