o
    ci9                     @   s   d Z ddlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZ ddlmZ e \ZZ		d"d
dZdddd	dddZddddedefddZddd	ddededdfddZdd Zdd Zdd Zd d! ZdS )#a  
[1] Mastering Diverse Domains through World Models - 2023
D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
https://arxiv.org/pdf/2301.04104v1.pdf

[2] Mastering Atari with Discrete World Models - 2021
D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
https://arxiv.org/pdf/2010.02193.pdf
    N)create_cartpole_dream_imagecreate_frozenlake_dream_image)DEFAULT_MODULE_ID)Columns)try_import_torch)LEARNER_RESULTSREPLAY_BUFFER_RESULTS)inverse_symlogtorchc                 C   s   | j }|d }|d }|dkrItt|jj j}|jjt| 	|| df
|t|	|| f|j dd  
|d   }	n|jj| 	|| df|	|| f|j dd  d}	t	|	||f| }
|
S )Returnsr      r
      N)hz)shapenextiterworld_modeldecoder
parametersdevicer
   
from_numpyreshapetodetachcpunumpynp)	h_t0_to_H	z_t0_to_Hdreamer_modelobs_dims_shape	frameworkr   TBr   !reconstructed_obs_distr_means_TxBreconstructed_obs_T_B r(   b/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/algorithms/dreamerv3/utils/summaries.pyreconstruct_obs_from_h_and_z   s.   r*   )r   T)batch_indicesdescinclude_imagesr#   c                 C   s(  |sd S | d }t |d |d |||d}	|drtnt}
|D ]q}g }tt|	d D ]L}||
|	| | |d | | |d | | |d	 |d  | d
| v rY| d
 | | nd |d |d  | | d | | |d | | dd	  q,| d|rd| nd d| t	j
|ddi q d S )N
dream_datah_states_t0_to_H_BxTz_states_prior_t0_to_H_BxTr   r    r!   r"   r#   CartPoler   values_dreamed_t0_to_H_BxT actions_ints_dreamed_t0_to_H_BxTrewards_dreamed_t0_to_H_BxT DISAGREE_intrinsic_rewards_H_BxTcontinues_dreamed_t0_to_H_BxTVALUE_TARGETS_H_BxTT)	dreamed_obs	dreamed_V	dreamed_adreamed_r_tp1dreamed_ri_tp1dreamed_c_tp1value_target	initial_h	as_tensordreamed_trajectories_ _Baxis)r*   
startswithr   r   rangelenappendr   updater   concatenate)resultsenvr!   r"   r+   r,   r-   r#   r.   dreamed_obs_H_Bfuncbimagestr(   r(   r)   report_dreamed_trajectoryH   sN   	
rU   )
symlog_obs	do_reportrV   rW   c           	   	   C   s   t tdf}| j|dgdd }| j|dd d| d}|s'| j|dd dS t| t|d	|f|tj j	d
d  |tj dd	 ||d dS )a  Summarizes sampled data (from the replay buffer) vs world-model predictions.

    World model predictions are based on the posterior states (z computed from actual
    observation encoder input + the current h-states).

    Observations: Computes MSE (sampled vs predicted/recreated) over all features.
    For image observations, also creates direct image comparisons (sampled images
    vs predicted (posterior) ones).
    Rewards: Compute MSE (sampled vs predicted).
    Continues: Compute MSE (sampled vs predicted).

    Args:
        metrics: The MetricsLogger object of the DreamerV3 algo.
        sample: The sampled data (dict) from the replay buffer. Already tf-tensor
            converted.
        batch_size_B: The batch size (B). This is the number of trajectories sampled
            from the buffer.
        batch_length_T: The batch length (T). This is the length of an individual
            trajectory sampled from the buffer.
        do_report: Whether to actually log the report (default). If this is set to
            False, this function serves as a clean-up on the given metrics, making sure
            they do NOT contain anymore any (spacious) data relevant for producing
            the report/videos.
    /WORLD_MODEL_fwd_out_obs_distribution_means_b0xTNdefaultr   F	key_error.WORLD_MODEL_sampled_vs_predicted_posterior_b0x_videosr   r   r   metricscomputed_float_obs_B_T_dimssampled_obs_B_T_dimsmetrics_keyrV   )
r   r   peekdelete_report_obsr   r   r   OBSr   )	r`   samplebatch_size_Bbatch_length_TrV   rW   fwd_output_key*predicted_observation_means_single_examplefinal_result_keyr(   r(   r)   report_predicted_vs_sampled_obs   s2   "

rn   )rV   rW   r#   returnc                 C   sV  | j ttdfi d}| jttddd d| d}	d| d}
d| d}|s>| j|	dd | j|
dd | j|dd d	S t|d
 d |d d ||tj jdd	 |d}|}|| }t| t	
|dddd |tj dd||f |	|d t| |d d |tj d	d	||f |
d t| |d d d|d  d	d	||f |d d	S )a  Logs dreamed observations, rewards, continues and compares them vs sampled data.

    For obs, we'll try to create videos (side-by-side comparison) of the dreamed,
    recreated-from-prior obs vs the sampled ones (over dreamed_T timesteps).

    Args:
        metrics: The MetricsLogger object of the DreamerV3 algo.
        sample: The sampled data (dict) from the replay buffer. Already tf-tensor
            converted.
        burn_in_T: The number of burn-in timesteps (these will be skipped over in the
            reported video comparisons and MSEs).
        dreamed_T: The number of timesteps to produce dreamed data for.
        dreamer_model: The DreamerModel to use to create observation vectors/images
            from dreamed h- and (prior) z-states.
        symlog_obs: Whether to inverse-symlog the computed observations or not. Set this
            to True for environments, in which we should symlog the observations.
        do_report: Whether to actually log the report (default). If this is set to
            False, this function serves as a clean-up on the given metrics, making sure
            they do NOT contain anymore any (spacious) data relevant for producing
            the report/videos.
    r.   rY   Fr[   %EVALUATION_sampled_vs_dreamed_prior_H_obs_rewards_MSE_continues_MSENh_states_t0_to_H_Bx1r   z_states_prior_t0_to_H_Bx1r   r1   r   r_   rewards_dreamed_t0_to_H_Bx1)r`   computed_rewardssampled_rewardsrc   continues_dreamed_t0_to_H_Bx1      ?is_terminated)r`   computed_continuessampled_continuesrc   )rd   r   r   re   r*   r   rg   r   rf   r   swapaxes_report_rewardsREWARDS_report_continues)r`   rh   	burn_in_T	dreamed_Tr!   rV   rW   r#   r.   final_result_key_obsfinal_result_key_rewfinal_result_key_contrP   t0tHr(   r(   r)   )report_dreamed_eval_trajectory_vs_samples   sZ    






r   c                 C   sB   |  }| }| }| }| j|j||||dtdd d S )N)capacitysize_num_episodessize_timestepsreplayed_stepsadded_stepsr   )keywindow)get_num_episodesget_num_timestepsget_sampled_timestepsget_added_timestepslog_dictr   r   )r`   replay_bufferepisodes_in_bufferts_in_bufferr   r   r(   r(   r)   !report_sampling_and_replay_buffer!  s   
r   c                 C   s   t |jdv rit |jdkrdnd}|rt|}|s7|d d }|d d }t|ddtj}t||}t|ddtj}t||}tj||gd	d
}t |jdkr^t	|d	}| j
||ddd dS dS )a  Summarizes computed- vs sampled observations: MSE and (if applicable) images.

    Args:
        metrics: The MetricsLogger object of the DreamerV3 algo.
        computed_float_obs_B_T_dims: Computed float observations
            (not clipped, not cast'd). Shape=(B, T, [dims ...]).
        sampled_obs_B_T_dims: Sampled observations (as-is from the environment, meaning
            this could be uint8, 0-255 clipped images). Shape=(B, T, [dims ...]).
        metrics_key: The metrics key (or key sequence) under which to log ths resulting
            video sequence.
        symlog_obs: Whether to inverse-symlog the computed observations or not. Set this
            to True for environments, in which we should symlog the observations.

    )      r   )r   r   r   r      )r   r   r   r   rz      g        g     o@r   rF   r   Nr   )reducer   )rJ   r   r	   r   clipastypeuint8	transposerM   expand_dims	log_value)r`   ra   rb   rc   rV   transpose_axescomputed_imagessampled_vs_computed_imagesr(   r(   r)   rf   5  s:   
rf   c                 C   s2   t t || }t |}| j||dd d S Nr   )r   )r   meansquarer   )r`   rw   rx   rc   mse_sampled_vs_computed_rewardsr(   r(   r)   r   u  s   

r   c                 C   s0   t t |||j }| j||dd d S r   )r   r   r   r   dtyper   )r`   r|   r}   rc   !mse_sampled_vs_computed_continuesr(   r(   r)   r     s   
r   )r
   )__doc__r   r   .ray.rllib.algorithms.dreamerv3.utils.debuggingr   r   ray.rllib.corer   ray.rllib.core.columnsr   ray.rllib.utils.frameworkr   ray.rllib.utils.metricsr   r   ray.rllib.utils.tf_utilsr	   r
   rC   r*   rU   boolrn   r   r   rf   r   r   r(   r(   r(   r)   <module>   sJ    	

2E
G

Z@