o
    ci/                     @   s"  d dl Zd dlZd dlmZmZ d dlZd dlm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dl m!Z!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z) e \Z*Z+eG dd deZ,eG dd de,e+j-Z.dS )    N)DiscreteMultiDiscrete)DictListUnionTuple)ModelV2)SlimFC)TorchModelV2)add_time_dimension)SampleBatch)ViewRequirement)OldAPIStackoverride)try_import_torch)get_base_struct_from_space)flatten_inputs_to_1d_tensorone_hot)ModelConfigDict
TensorType)deprecation_warning)log_oncec                   @   st   e Zd ZdZeedeeef de	e dede
ee	e f fddZdede	e dede
ee	e f fd	d
ZdS )RecurrentNetworka  Helper class to simplify implementing RNN models with TorchModelV2.

    Instead of implementing forward(), you can implement forward_rnn() which
    takes batches with the time dimension added already.

    Here is an example implementation for a subclass
    ``MyRNNClass(RecurrentNetwork, nn.Module)``::

        def __init__(self, obs_space, num_outputs):
            nn.Module.__init__(self)
            super().__init__(obs_space, action_space, num_outputs,
                             model_config, name)
            self.obs_size = _get_size(obs_space)
            self.rnn_hidden_dim = model_config["lstm_cell_size"]
            self.fc1 = nn.Linear(self.obs_size, self.rnn_hidden_dim)
            self.rnn = nn.GRUCell(self.rnn_hidden_dim, self.rnn_hidden_dim)
            self.fc2 = nn.Linear(self.rnn_hidden_dim, num_outputs)

            self.value_branch = nn.Linear(self.rnn_hidden_dim, 1)
            self._cur_value = None

        @override(ModelV2)
        def get_initial_state(self):
            # Place hidden states on same device as model.
            h = [self.fc1.weight.new(
                1, self.rnn_hidden_dim).zero_().squeeze(0)]
            return h

        @override(ModelV2)
        def value_function(self):
            assert self._cur_value is not None, "must call forward() first"
            return self._cur_value

        @override(RecurrentNetwork)
        def forward_rnn(self, input_dict, state, seq_lens):
            x = nn.functional.relu(self.fc1(input_dict["obs_flat"].float()))
            h_in = state[0].reshape(-1, self.rnn_hidden_dim)
            h = self.rnn(x, h_in)
            q = self.fc2(h)
            self._cur_value = self.value_branch(h).squeeze(1)
            return q, [h]
    
input_dictstateseq_lensreturnc                 C   sl   t dr	tdd |d  }| jdd| _t||d| jd}| |||\}}t	|d	| j
g}||fS )
zAdds time dimension to batch before sending inputs to forward_rnn().

        You should implement forward_rnn() in your subclass.recurrent_network_tfz5ray.rllib.models.torch.recurrent_net.RecurrentNetwork)oldobs_flat_time_majorFtorch)r   	framework
time_major)r   r   floatmodel_configgetr#   r   forward_rnnr!   reshapenum_outputs)selfr   r   r   flat_inputsinputsoutput	new_state r0   X/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/models/torch/recurrent_net.pyforwardE   s   zRecurrentNetwork.forwardr-   c                 C   s   t d)a  Call the model with the given input tensors and state.

        Args:
            inputs: Observation tensor with shape [B, T, obs_size].
            state: List of state tensors, each with shape [B, size].
            seq_lens: 1D tensor holding input sequence lengths.
                Note: len(seq_lens) == B.

        Returns:
            (outputs, new_state): The model output tensor of shape
                [B, T, num_outputs] and the list of new state tensors each with
                shape [B, size].

        Examples:
            def forward_rnn(self, inputs, state, seq_lens):
                model_out, h, c = self.rnn_model([inputs, seq_lens] + state)
                return model_out, [h, c]
        z(You must implement this for an RNN model)NotImplementedError)r+   r-   r   r   r0   r0   r1   r(   f   s   zRecurrentNetwork.forward_rnnN)__name__
__module____qualname____doc__r   r   r   strr   r   r   r2   r(   r0   r0   r0   r1   r      s*    +
 r   c                       s   e Zd ZdZdejjdejjdedede	f
 fddZ
eed	ee	ef d
ee dedeeee f f fddZeeded
ee dedeeee f fddZeedeeej ee f fddZeedefddZ  ZS )LSTMWrapperzGAn LSTM wrapper serving as an interface for ModelV2s that set use_lstm.	obs_spaceaction_spacer*   r&   namec                    s  t j|  tt| ||d || | jd u r!tt| j	j
| _|d | _|dd| _|d | _|d | _t| j| _d| _t| jD ]?}t|trV|  j|j7  _qFt|trg|  jt|j7  _qF|j
d urz|  jtt|j
7  _qF|  jtt|7  _qF| jr|  j| j7  _| jr|  jd7  _t j| j| j| j d| _|| _t| j| jd t j j!j"d	| _#t| jdd t j j!j"d	| _$|d rt%t&j'| jd
d| j(t&j)< |d rt%t&j*d
d| j(t&j+< d S d S )Nlstm_cell_sizer    Flstm_use_prev_actionlstm_use_prev_rewardr      )batch_first)in_sizeout_sizeactivation_fninitializerr$   )spaceshift)rG   ),nnModule__init__superr9   r*   intnpprodr:   shape	cell_sizer'   r#   use_prev_actionuse_prev_rewardr   r;   action_space_struct
action_dimtreeflatten
isinstancer   nr   sumnveclenLSTMlstmr	   r!   initxavier_uniform__logits_branch_value_branchr   r   ACTIONSview_requirementsPREV_ACTIONSREWARDSPREV_REWARDS)r+   r:   r;   r*   r&   r<   rF   	__class__r0   r1   rJ      sb   








	
zLSTMWrapper.__init__r   r   r   r   c                    s   |d usJ |  |g d \}}g }| jd rM|tj }| jd r,|t|| jdd n!t| jt	t
fr=t| | j}n| }|t|d| jg | jd rb|t|tj  ddg |rntj|g| dd}||d	< t |||S )
Nr>   _disable_action_flatteningF)spaces_struct	time_axisr$   r?   r@   )dimr   )_wrapped_forwardr&   r   rd   appendr   rS   rW   r;   r   r   r   r%   r!   r)   rT   rf   catrK   r2   )r+   r   r   r   wrapped_out_prev_a_rprev_arg   r0   r1   r2      s.   



zLSTMWrapper.forwardr-   c                 C   sZ   |  |t|d dt|d dg\| _\}}| | j}|t|dt|dgfS )Nr   r@   )r]   r!   	unsqueeze	_featuresr`   squeeze)r+   r-   r   r   hc	model_outr0   r0   r1   r(     s
    zLSTMWrapper.forward_rnnc                 C   sH   t | jj }|jd| j d|jd| j dg}|S )Nr@   r   )	nextr`   _modelchildrenweightnewrP   zero_rv   )r+   linearrw   r0   r0   r1   get_initial_state  s
   zLSTMWrapper.get_initial_statec                 C   s(   | j d us	J dt| | j dgS )Nzmust call forward() firstr$   )ru   r!   r)   ra   )r+   r0   r0   r1   value_function$  s   zLSTMWrapper.value_function)r4   r5   r6   r7   gymspacesSpacerL   r   r8   rJ   r   r   r   r   r   r   r2   r(   r   r   rM   ndarrayr   r   __classcell__r0   r0   rg   r1   r9   ~   sJ    P
/"	r9   )/numpyrM   	gymnasiumr   gymnasium.spacesr   r   rU   typingr   r   r   r   ray.rllib.models.modelv2r   ray.rllib.models.torch.miscr	   $ray.rllib.models.torch.torch_modelv2r
   ray.rllib.policy.rnn_sequencingr   ray.rllib.policy.sample_batchr   !ray.rllib.policy.view_requirementr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.torch_utilsr   r   ray.rllib.utils.typingr   r   ray.rllib.utils.deprecationr   ray.util.debugr   r!   rH   r   rI   r9   r0   r0   r0   r1   <module>   s.    
e