o
    ci
                     @   s   d dl Zd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZ dZd	Zd
ZdZdZdZdZdZdZdZdZG dd deZdS )    N)Dict)
DQNLearner)Learner)override)LambdaDefaultDict)ModuleID
TensorTypelogpsqf_lossqf_meanqf_maxqf_minqf_predsqf_twin_lossqf_twin_predstd_error_meancritic_targetaction_dist_inputs_nextc                       sD   e Zd Zeed fddZeededdf fddZ  ZS )	
SACLearnerreturnNc                    s@   t fdd_t   fdd t  fdd_d S )Nc                    s(    j t j| jtjgddS )NT)	trainable)_get_tensor_variablenplogconfigget_config_for_moduleinitial_alphaastypefloat32	module_idself X/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/algorithms/sac/sac_learner.py<lambda>    s    z"SACLearner.build.<locals>.<lambda>c                    s:    j | j}|du s|dkrt jj|  jj }|S )zReturns the target entropy to use for the loss.

            Args:
                module_id: Module ID for which the target entropy should be
                    returned.

            Returns:
                Target entropy.
            Nauto)	r   r   target_entropyr   prod_module_specmodule_specsaction_spaceshape)r    r'   r!   r#   r$   get_target_entropy/   s   
z,SACLearner.build.<locals>.get_target_entropyc                    s     | S )N)r   r   r-   r"   r#   r$   r%   A   s    )r   curr_log_alphasuperbuildr'   r!   	__class__r.   r$   r1      s   


zSACLearner.buildr    c                    s,   t  | | j|d | j|d dS )zRemoves the temperature and target entropy.

        Note, this means that we also need to remove the corresponding
        temperature optimizer.
        N)r0   remove_moduler/   popr'   )r"   r    r2   r#   r$   r4   D   s   zSACLearner.remove_module)r   N)	__name__
__module____qualname__r   r   r1   r   r4   __classcell__r#   r#   r2   r$   r      s
    ( r   )numpyr   typingr   $ray.rllib.algorithms.dqn.dqn_learnerr   ray.rllib.core.learner.learnerr   ray.rllib.utils.annotationsr   "ray.rllib.utils.lambda_defaultdictr   ray.rllib.utils.typingr   r   	LOGPS_KEYQF_LOSS_KEYQF_MEAN_KEY
QF_MAX_KEY
QF_MIN_KEYQF_PREDSQF_TWIN_LOSS_KEYQF_TWIN_PREDSTD_ERROR_MEAN_KEYCRITIC_TARGETACTION_DIST_INPUTS_NEXTr   r#   r#   r#   r$   <module>   s&    