o
    `Û·iH.  ã                	   @   sH  d dl mZmZmZmZmZmZmZmZm	Z	m
Z
mZmZ d dlZd dlmZ er~d dlmZ d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dl m!Z! d d	l"m#Z# d d
l$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z, d dl-m.Z. ed Z/	 ee/e0e1f Z2	 ee	e3df ee3 f Z4ed Z5	 ee6ddf Z7	 ed Z8	 ee6ef Z9	 e0Z:	 e0Z;	 e0Z<	 ee	e3ee3e	e3e3f f ee3e	e3e3f f f  Z=	 eee6eee>e6f f e>e6f Z?	 e0Z@	 ee3e6f ZA	 eeejBf ZC	 edgeeC f ZD	 eZE	 e6ZF	 e6ZG	 eeFdf ZH	 ed ZI	 eeFed geJf ZK	 eeEeIgeGf ZL	 eeeG eeGed geJf f ZM	 ee6e2f ZN	 e
ed  ZO	 ee3e6f ZP	 e3ZQ	 eeEef ZR	 eeAeRf ZS	 eZT	 eZU	 e0ZV	 eZW	 ee6df ZX	 eZY	 ed ZZ	 eZZ[	 ed Z\	 eZ]	 ee]e\f Z^	 ee\ Z_	 ee6e\f Z`	 eeaeeee3eaf   ee	e3ee3eaf f  f Zb	 e0Zc	 e0Zd	 eee	e/e/f  ee/ f Ze	 e0Zf	 ee6e/f Zg	 eddee6ef f Zh	 eejijjee6ejijjf e	ejijjdf f Zk	 eee  Zl	 e	e2elef Zm	 eG dd„ dƒƒZneG d d!„ d!ƒƒZoeG d"d#„ d#ƒƒZped$ƒZqdS )%é    )ÚTYPE_CHECKINGÚAnyÚCallableÚDictÚHashableÚListÚOptionalÚSequenceÚTupleÚTypeÚTypeVarÚUnionN)ÚOldAPIStack)ÚNDArray)ÚMultiRLModuleSpec)ÚRLModuleSpec)Ú
EnvContext)ÚMultiAgentEpisode)ÚSingleAgentEpisode)ÚDynamicTFPolicyV2)ÚEagerTFPolicyV2)Ú
PolicySpec)ÚMultiAgentBatchÚSampleBatch)ÚViewRequirement)zNDArray[Any]zjnp.ndarrayz	tf.Tensorútorch.Tensor.)ztorch.nn.Modulezkeras.Modelztorch.deviceÚint)r   r   r   r   )r   r   r   )r   r   r   )ztorch.optim.Optimizerzkeras.optimizers.Optimizer)r   ztf.Variabler   c                   @   s&   e Zd ZdZdededefdd„ZdS )ÚAgentConnectorDataTypeaƒ  Data type that is fed into and yielded from agent connectors.

    Args:
        env_id: ID of the environment.
        agent_id: ID to help identify the agent from which the data is received.
        data: A payload (``data``). With RLlib's default sampler, the payload
            is a dictionary of arbitrary data columns (obs, rewards, terminateds,
            truncateds, etc).
    Úenv_idÚagent_idÚdatac                 C   s   || _ || _|| _d S ©N)r   r   r    )Úselfr   r   r    © r#   úL/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/utils/typing.pyÚ__init__5  s   
zAgentConnectorDataType.__init__N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ústrr   r%   r#   r#   r#   r$   r   )  s    
r   c                   @   s*   e Zd ZdZdedededefdd„ZdS )	ÚActionConnectorDataTypea?  Data type that is fed into and yielded from agent connectors.

    Args:
        env_id: ID of the environment.
        agent_id: ID to help identify the agent from which the data is received.
        input_dict: Input data that was passed into the policy.
            Sometimes output must be adapted based on the input, for example
            action masking. So the entire input data structure is provided here.
        output: An object of PolicyOutputType. It is is composed of the
            action output, the internal state output, and additional data fetches.

    r   r   Ú
input_dictÚoutputc                 C   s   || _ || _|| _|| _d S r!   )r   r   r,   r-   )r"   r   r   r,   r-   r#   r#   r$   r%   N  s   
z ActionConnectorDataType.__init__N)r&   r'   r(   r)   r*   ÚTensorStructTypeÚPolicyOutputTyper%   r#   r#   r#   r$   r+   ?  s    þýüûr+   c                   @   s*   e Zd ZdZdeeef ddfdd„ZdS )ÚAgentConnectorsOutputa=  Final output data type of agent connectors.

    Args are populated depending on the AgentConnector settings.
    The branching happens in ViewRequirementAgentConnector.

    Args:
        raw_dict: The raw input dictionary that sampler can use to
            build episodes and training batches.
            This raw dict also gets passed into ActionConnectors in case
            it contains data useful for action adaptation (e.g. action masks).
        sample_batch: The SampleBatch that can be immediately used for
            querying the policy for next action.
    Úraw_dictÚsample_batchr   c                 C   s   || _ || _d S r!   )r1   r2   )r"   r1   r2   r#   r#   r$   r%   o  s   
zAgentConnectorsOutput.__init__N)r&   r'   r(   r)   r   r*   r.   r%   r#   r#   r#   r$   r0   _  s    
ÿÿr0   ÚT)rÚtypingr   r   r   r   r   r   r   r	   r
   r   r   r   Ú	gymnasiumÚgymÚray.rllib.utils.annotationsr   Ú	jax.numpyÚnumpyÚjnpÚkerasÚ
tensorflowÚtfÚtorchÚnumpy.typingr   Ú(ray.rllib.core.rl_module.multi_rl_moduler   Ú"ray.rllib.core.rl_module.rl_moduler   Úray.rllib.env.env_contextr   Ú!ray.rllib.env.multi_agent_episoder   Ú"ray.rllib.env.single_agent_episoder   Ú%ray.rllib.policy.dynamic_tf_policy_v2r   Ú#ray.rllib.policy.eager_tf_policy_v2r   Úray.rllib.policy.policyr   Úray.rllib.policy.sample_batchr   r   Ú!ray.rllib.policy.view_requirementr   Ú
TensorTypeÚdictÚtupler.   r   ÚTensorShapeÚNetworkTyper*   Ú
DeviceTypeÚRLModuleSpecTypeÚ	StateDictÚAlgorithmConfigDictÚPartialAlgorithmConfigDictÚModelConfigDictÚConvFilterSpecÚtypeÚFromConfigSpecÚEnvConfigDictÚEnvIDÚEnvÚEnvTypeÚ
EnvCreatorÚAgentIDÚPolicyIDÚModuleIDÚMultiAgentPolicyConfigDictÚEpisodeTypeÚboolÚIsPolicyToTrainÚAgentToModuleMappingFnÚShouldModuleBeUpdatedFnÚPolicyStateÚTFPolicyV2TypeÚ	EpisodeIDÚUnrollIDÚMultiAgentDictÚMultiEnvDictÚ
EnvObsTypeÚEnvActionTypeÚEnvInfoDictÚFileTypeÚViewRequirementsDictÚ
ResultDictÚLocalOptimizerÚ	OptimizerÚParamÚParamRefÚ	ParamDictÚ	ParamListÚNamedParamDictÚfloatÚLearningRateOrScheduleÚGradInfoDictÚLearnerStatsDictÚModelGradientsÚModelWeightsÚModelInputDictÚSampleBatchTypeÚspacesÚSpaceÚSpaceStructÚStateBatchesr/   r   r+   r0   r3   r#   r#   r#   r$   Ú<module>   s   8 	,ÿ ÿÿþÿ$ÿ