o
    $ii                     @   sd   d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
 G dd deZG dd	 d	eZd
S )    )AlgorithmConfig)MARWILMARWILConfig)RLModuleSpec)override)RLModuleSpecTypec                       sf   e Zd ZdZd fdd	ZeedefddZee	d fdd		Z	ee
d fd
dZ  ZS )BCConfiga]  Defines a configuration class from which a new BC Algorithm can be built

    .. testcode::
        :skipif: True

        from ray.rllib.algorithms.bc import BCConfig
        # Run this from the ray directory root.
        config = BCConfig().training(lr=0.00001, gamma=0.99)
        config = config.offline_data(
            input_="./rllib/offline/tests/data/cartpole/large.json")

        # Build an Algorithm object from the config and run 1 training iteration.
        algo = config.build()
        algo.train()

    .. testcode::
        :skipif: True

        from ray.rllib.algorithms.bc import BCConfig
        from ray import tune
        config = BCConfig()
        # Print out some default values.
        print(config.beta)
        # Update the config object.
        config.training(
            lr=tune.grid_search([0.001, 0.0001]), beta=0.75
        )
        # Set the config object's data path.
        # Run this from the ray directory root.
        config.offline_data(
            input_="./rllib/offline/tests/data/cartpole/large.json"
        )
        # Set the config object's env, used for evaluation.
        config.environment(env="CartPole-v1")
        # Use to_dict() to get the old-style python config dict
        # when running with tune.
        tune.Tuner(
            "BC",
            param_space=config.to_dict(),
        ).fit()
    Nc                    s.   t  j|ptd d| _d| _d| _d| _d S )N)
algo_class        FT)super__init__BCbetapostprocess_inputsmaterialize_datamaterialize_mapped_data)selfr	   	__class__ W/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/rllib/algorithms/bc/bc.pyr   3   s
   
zBCConfig.__init__returnc                 C   s2   | j dkrddlm} t|dS td| j  d)Ntorchr   )DefaultBCTorchRLModule)module_classzThe framework z' is not supported. Use `torch` instead.)framework_str8ray.rllib.algorithms.bc.torch.default_bc_torch_rl_moduler   r   
ValueError)r   r   r   r   r   get_default_rl_module_specE   s   

z#BCConfig.get_default_rl_module_specc                    s*   t  j|||d}|d |d |S )N)input_observation_spaceinput_action_spacedeviceAddOneTsToEpisodesAndTruncateGeneralAdvantageEstimation)r   build_learner_connectorremove)r   r   r    r!   pipeliner   r   r   r$   S   s   

z BCConfig.build_learner_connectorc                    s&   t    | jdkr| d d S d S )Nr
   z5For behavioral cloning, `beta` parameter must be 0.0!)r   validater   _value_error)r   r   r   r   r'   f   s   

zBCConfig.validateN)r   N)__name__
__module____qualname____doc__r   r   r   r   r   r$   r   r'   __classcell__r   r   r   r   r      s    *r   c                   @   s*   e Zd ZdZeeedefddZdS )r   z[Behavioral Cloning (derived from MARWIL).

    Uses MARWIL with beta force-set to 0.0.
    r   c                 C   s   t  S r)   )r   )clsr   r   r   get_default_configu   s   zBC.get_default_configN)	r*   r+   r,   r-   classmethodr   r   r   r0   r   r   r   r   r   o   s
    r   N)%ray.rllib.algorithms.algorithm_configr   "ray.rllib.algorithms.marwil.marwilr   r   "ray.rllib.core.rl_module.rl_moduler   ray.rllib.utils.annotationsr   ray.rllib.utils.typingr   r   r   r   r   r   r   <module>   s    g