o
    $i1-                     @   s"  d dl Z d dlmZmZmZ d dlZd dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dl m!Z!m"Z" d dl#m$Z$m%Z% d dl&m'Z' e \Z(Z)Z*e +e,Z-eG dd deZ.eG dd de.Z/dS )    N)DictListTuple)DiscreteMultiDiscrete)deprecation_warning)ModelV2)	TFModelV2)add_time_dimension)SampleBatch)ViewRequirement)OldAPIStackoverride)try_import_tf)get_base_struct_from_space)flatten_inputs_to_1d_tensorone_hot)ModelConfigDict
TensorType)log_oncec                   @   s   e Zd ZdZeedeeef de	e dede
ee	e f fddZdede	e dede
ee	e f fd	d
Zde	e fddZdS )RecurrentNetworka  Helper class to simplify implementing RNN models with TFModelV2.

    Instead of implementing forward(), you can implement forward_rnn() which
    takes batches with the time dimension added already.

    Here is an example implementation for a subclass
    ``MyRNNClass(RecurrentNetwork)``::

        def __init__(self, *args, **kwargs):
            super(MyModelClass, self).__init__(*args, **kwargs)
            cell_size = 256

            # Define input layers
            input_layer = tf.keras.layers.Input(
                shape=(None, obs_space.shape[0]))
            state_in_h = tf.keras.layers.Input(shape=(256, ))
            state_in_c = tf.keras.layers.Input(shape=(256, ))
            seq_in = tf.keras.layers.Input(shape=(), dtype=tf.int32)

            # Send to LSTM cell
            lstm_out, state_h, state_c = tf.keras.layers.LSTM(
                cell_size, return_sequences=True, return_state=True,
                name="lstm")(
                    inputs=input_layer,
                    mask=tf.sequence_mask(seq_in),
                    initial_state=[state_in_h, state_in_c])
            output_layer = tf.keras.layers.Dense(...)(lstm_out)

            # Create the RNN model
            self.rnn_model = tf.keras.Model(
                inputs=[input_layer, seq_in, state_in_h, state_in_c],
                outputs=[output_layer, state_h, state_c])
            self.rnn_model.summary()
    
input_dictstateseq_lensreturnc                 C   s\   t dr	tdd |dusJ |d }t||dd}| |||\}}t|d| jg|fS )	zAdds time dimension to batch before sending inputs to forward_rnn().

        You should implement forward_rnn() in your subclass.recurrent_network_tfz2ray.rllib.models.tf.recurrent_net.RecurrentNetwork)oldNobs_flattf)padded_inputsr   	framework)r   r   r
   forward_rnnr   reshapenum_outputs)selfr   r   r   flat_inputsinputsoutput	new_state r*   ^/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/rllib/models/tf/recurrent_net.pyforward?   s   zRecurrentNetwork.forwardr'   c                 C      t d)a  Call the model with the given input tensors and state.

        Args:
            inputs: observation tensor with shape [B, T, obs_size].
            state: list of state tensors, each with shape [B, T, size].
            seq_lens: 1d tensor holding input sequence lengths.

        Returns:
            (outputs, new_state): The model output tensor of shape
                [B, T, num_outputs] and the list of new state tensors each with
                shape [B, size].

        Sample implementation for the ``MyRNNClass`` example::

            def forward_rnn(self, inputs, state, seq_lens):
                model_out, h, c = self.rnn_model([inputs, seq_lens] + state)
                return model_out, [h, c]
        'You must implement this for a RNN modelNotImplementedError)r%   r'   r   r   r*   r*   r+   r"   ]   s   zRecurrentNetwork.forward_rnnc                 C   r-   )a  Get the initial recurrent state values for the model.

        Returns:
            list of np.array objects, if any

        Sample implementation for the ``MyRNNClass`` example::

            def get_initial_state(self):
                return [
                    np.zeros(self.cell_size, np.float32),
                    np.zeros(self.cell_size, np.float32),
                ]
        r.   r/   r%   r*   r*   r+   get_initial_statet   s   z"RecurrentNetwork.get_initial_stateN)__name__
__module____qualname____doc__r   r   r   strr   r   r   r,   r"   r2   r*   r*   r*   r+   r      s,    #

r   c                       s   e Zd ZdZdejjdejjdedede	f
 fddZ
eed	ee	ef d
ee dedeeee f f fddZeeded
ee dedeeee f fddZeedeej fddZeedefddZ  ZS )LSTMWrapperzGAn LSTM wrapper serving as an interface for ModelV2s that set use_lstm.	obs_spaceaction_spacer$   model_confignamec                    sl  t t| ||d || | jd u rtt| jj| _|d | _	|d | _
|d | _t| j| _d| _t| jD ]?}t|trI|  j|j7  _q9t|trZ|  jt|j7  _q9|jd urm|  jtt|j7  _q9|  jtt|7  _q9| j
r|  j| j7  _| jr|  jd7  _tjjjd | jfdd}|| _tjjj| j	fdd}tjjj| j	fd	d}	tjjjd
dtjd}
tjjj| j	dddd|t|
||	gd\}}}tjjj | jtjj!j"dd|}tjjj dd dd|}tjj#||
||	g||||gd| _$t%&t'j(r| j$)  |d r"t*t+j,| jdd| j-t+j.< |d r4t*t+j/dd| j-t+j0< d S d S )Nlstm_cell_sizelstm_use_prev_actionlstm_use_prev_rewardr      r'   )shaper<   hcr*   seq_in)rA   r<   dtypeTlstm)return_sequencesreturn_stater<   )r'   maskinitial_statelogits)
activationr<   values)r'   outputsr!   )spaceshift)rP   )1superr8   __init__r$   intnpprodr9   rA   	cell_sizeuse_prev_actionuse_prev_rewardr   r:   action_space_struct
action_dimtreeflatten
isinstancer   nr   sumnveclenr   keraslayersInputint32LSTMsequence_maskDenseactivationslinearModel
_rnn_modelloggerisEnabledForloggingINFOsummaryr   r   ACTIONSview_requirementsPREV_ACTIONSREWARDSPREV_REWARDS)r%   r9   r:   r$   r;   r<   rO   input_layer
state_in_h
state_in_crD   lstm_outstate_hstate_crK   rM   	__class__r*   r+   rR      sr   










	





zLSTMWrapper.__init__r   r   r   r   c                    s   |d usJ |  |g d \}}g }| jd rK|tj }| jd r,|t|| jdd nt| jt	t
fr:t|| j}|tt|tjd| jg | jd rc|tt|tj tjddg |rotj|g| dd}||d	< t |||S )
Nr>   _disable_action_flatteningF)spaces_struct	time_axisr!   r?   r@   )axisr   )_wrapped_forwardr;   r   rt   appendr   rY   r]   r:   r   r   r   r   r#   castfloat32rZ   rv   concatrQ   r,   )r%   r   r   r   wrapped_out_prev_a_rprev_ar}   r*   r+   r,      s8   




zLSTMWrapper.forwardr'   c                 C   s(   |  ||g| \}| _}}|||gfS N)rl   
_value_out)r%   r'   r   r   	model_outrB   rC   r*   r*   r+   r"     s   zLSTMWrapper.forward_rnnc                 C   s    t | jt jt | jt jgS r   )rT   zerosrV   r   r1   r*   r*   r+   r2     s   zLSTMWrapper.get_initial_statec                 C   s   t | jdgS )Nr!   )r   r#   r   r1   r*   r*   r+   value_function#  s   zLSTMWrapper.value_function)r3   r4   r5   r6   gymspacesSpacerS   r   r7   rR   r   r   r   r   r   r   r,   r"   r   rT   ndarrayr2   r   __classcell__r*   r*   r}   r+   r8      sJ    X
3r8   )0ro   typingr   r   r   	gymnasiumr   numpyrT   r[   gymnasium.spacesr   r   ray._common.deprecationr   ray.rllib.models.modelv2r   ray.rllib.models.tf.tf_modelv2r	   ray.rllib.policy.rnn_sequencingr
   ray.rllib.policy.sample_batchr   !ray.rllib.policy.view_requirementr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.tf_utilsr   r   ray.rllib.utils.typingr   r   ray.util.debugr   tf1r   tfv	getLoggerr3   rm   r   r8   r*   r*   r*   r+   <module>   s0    
j