o
    $i2                     @   s  d dl Z d dlmZmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlZd dlZd dlZd dlmZmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& erd dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 e 1e2Z3e \Z4Z5Z6eddddde	e7 de8de	e7 fddZ9ede&de&de&fddZ:e		dode%de	e$ de;de&fd d!Z<edee8 fd"d#Z=eddddd$d%d&e	ej> d'e	e d(e	e8 de;d)e;dd*fd+d,Z?ed-e
d. d/ed0e#f de
ed1  fd2d3Z@edpd5e&d6e7de&fd7d8ZAed5e&de&fd9d:ZBe	dqd;e	d< d=e;defd>d?ZCe	@drdAe dBe&dCedD dEe7de!f
dFdGZDed5e&d&ej>de&fdHdIZEedsd5e&dJe	eF de&fdKdLZGe	dqdMee8dNf dOe;dedD fdPdQZHedtdSdTZIedudUdVZJe	W	X	Y	dvd'dRdZeFd[e7d\e7fd]d^ZKed_e"d`e"dae7ddfdbdcZLedde%de&fdedfZMedge
d. dhe&ddfdidjZNdkdl ZOeG dmdn dnZPdS )w    N)OrderedDictdeque)TYPE_CHECKINGAnyCallableListOptionalTypeUnion)DiscreteMultiDiscrete)
force_list)DeveloperAPI	PublicAPI)try_import_tf)SMALL_NUMBER)get_base_struct_from_space)LocalOptimizerModelGradientsNetworkTypePartialAlgorithmConfigDictSpaceStructTensorStructType
TensorType)AlgorithmConfig)	ParamDictEagerTFPolicyEagerTFPolicyV2TFPolicy)	grad_clipgradients_dictr   r"   grad_clip_byreturnc                C   s   |du rdS |dkr!|    D ]\}}t|| || |< qdS |dkr:|    D ]\}}t||| |< q+dS |dks@J tt|  |\}}t|   	 |D ]\}}|| |< qU|S )a?  Performs gradient clipping on a grad-dict based on a clip value and clip mode.

    Changes the provided gradient dict in place.

    Args:
        gradients_dict: The gradients dict, mapping str to gradient tensors.
        grad_clip: The value to clip with. The way gradients are clipped is defined
            by the `grad_clip_by` arg (see below).
        grad_clip_by: One of 'value', 'norm', or 'global_norm'.

    Returns:
        If `grad_clip_by`="global_norm" and `grad_clip` is not None, returns the global
        norm of all tensors, otherwise returns None.
    Nvaluenormglobal_norm)
copyitemstfclip_by_valueclip_by_normclip_by_global_normlistvalueszipkeys)r#   r"   r$   kvclipped_gradsr(    r6   U/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/rllib/utils/tf_utils.pyclip_gradients$   s"   
r8   ypredc                 C   sH   t jj| dgd\}}t jj| | dgd\}}t dd||t   S )a,  Computes the explained variance for a pair of labels and predictions.

    The formula used is:
    max(-1.0, 1.0 - (std(y - pred)^2 / std(y)^2))

    Args:
        y: The labels.
        pred: The predictions.

    Returns:
        The explained variance given a pair of labels and predictions.
    r   )axesg         )r+   nnmomentsmaximumr   )r9   r:   _y_vardiff_varr6   r6   r7   explained_varianceU   s   rC   Finputsspaces_struct	time_axisc                 C   sZ  t | }|durt |ndgt| }d}d}g }t||D ]x\}}	t|}t|}
|du r<|
d }|r<|
d }t|	trZ|rLt	||| g}|
tt||	tj q t|	try|rkt	||| dg}|
tt||	tj q |rt	||| dg}nt	||dg}|
t|tj q tj|dd}|rt	|||dg}|S )aK  Flattens arbitrary input structs according to the given spaces struct.

    Returns a single 1D tensor resulting from the different input
    components' values.

    Thereby:
    - Boxes (any shape) get flattened to (B, [T]?, -1). Note that image boxes
    are not treated differently from other types of Boxes and get
    flattened as well.
    - Discrete (int) values are one-hot'd, e.g. a batch of [1, 0, 3] (B=3 with
    Discrete(4) space) results in [[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1]].
    - MultiDiscrete values are multi-one-hot'd, e.g. a batch of
    [[0, 2], [1, 4]] (B=2 with MultiDiscrete([2, 5]) space) results in
    [[1, 0,  0, 0, 1, 0, 0], [0, 1,  0, 0, 0, 0, 1]].

    Args:
        inputs: The inputs to be flattened.
        spaces_struct: The structure of the spaces that behind the input
        time_axis: Whether all inputs have a time-axis (after the batch axis).
            If True, will keep not only the batch axis (0th), but the time axis
            (1st) as-is and flatten everything from the 2nd axis up.

    Returns:
        A single 1D tensor resulting from concatenating all
        flattened/one-hot'd input components. Depending on the time_axis flag,
        the shape is (B, n) or (B, T, n).

    .. testcode::
        :skipif: True

        # B=2
        from ray.rllib.utils.tf_utils import flatten_inputs_to_1d_tensor
        from gymnasium.spaces import Discrete, Box
        out = flatten_inputs_to_1d_tensor(
            {"a": [1, 0], "b": [[[0.0], [0.1]], [1.0], [1.1]]},
            spaces_struct=dict(a=Discrete(2), b=Box(shape=(2, 1)))
        )
        print(out)

        # B=2; T=2
        out = flatten_inputs_to_1d_tensor(
            ([[1, 0], [0, 1]],
             [[[0.0, 0.1], [1.0, 1.1]], [[2.0, 2.1], [3.0, 3.1]]]),
            spaces_struct=tuple([Discrete(2), Box(shape=(2, ))]),
            time_axis=True
        )
        print(out)

    .. testoutput::

        [[0.0, 1.0,  0.0, 0.1], [1.0, 0.0,  1.0, 1.1]]  # B=2 n=4
        [[[0.0, 1.0, 0.0, 0.1], [1.0, 0.0, 1.0, 1.1]],
        [[1.0, 0.0, 2.0, 2.1], [0.0, 1.0, 3.0, 3.1]]]  # B=2 T=2 n=4
    Nr   r<   axis)treeflattenlenr1   r+   convert_to_tensorshape
isinstancer   reshapeappendcastone_hotfloat32r   concat)rD   rE   rF   flat_inputsflat_spacesBToutinput_spacerN   mergedr6   r6   r7   flatten_inputs_to_1d_tensorh   s>   
=




r^   c                  C   sZ   t dkrddlm}  |  }nztj }W n ty%   tjj }Y nw dd |D S )zReturns a list of GPU device names, e.g. ["/gpu:0", "/gpu:1"].

    Supports both tf1.x and tf2.x.

    Returns:
        List of GPU device names (str).
    r<   r   )
device_libc                 S   s   g | ]
}d |j v r|jqS )GPU)device_typename).0dr6   r6   r7   
<listcomp>   s    z#get_gpu_devices.<locals>.<listcomp>)	tfvtensorflow.python.clientr_   list_local_devicesr+   configlist_physical_devices	Exceptionexperimental)r_   devicesr6   r6   r7   get_gpu_devices   s   	
rn   T)r\   r&   rb   rF   rK   r\   r&   rb   rK   ztf1.placeholderc                    s   ddl m} | durGt| tjjtjjfr)|r|| dS t	 fddt
| S tjd|r0dnd | j | jtjkrAtj dS | j dS |dusMJ |jd	d }tjd|r[dnd t|trd|nt|  |jtjkrvtj dS |j dS )
a  Returns a tf1.placeholder object given optional hints, such as a space.

    Note that the returned placeholder will always have a leading batch
    dimension (None).

    Args:
        space: An optional gym.Space to hint the shape and dtype of the
            placeholder.
        value: An optional value to hint the shape and dtype of the
            placeholder.
        name: An optional name for the placeholder.
        time_axis: Whether the placeholder should also receive a time
            dimension (None).
        flatten: Whether to flatten the given space into a plain Box space
            and then create the placeholder from the resulting space.

    Returns:
        The tf1 placeholder.
    r   )ModelCatalogNc                    s$   t | d ddd | D  dS )N.c                 S      g | ]}t |qS r6   strrc   pr6   r6   r7   re         z5get_placeholder.<locals>.<lambda>.<locals>.<listcomp>)r\   rb   )get_placeholderjoin)path	componentrb   r6   r7   <lambda>  s    z!get_placeholder.<locals>.<lambda>Nr6   )rN   dtyperb   r<   )ray.rllib.models.catalogro   rO   gymspacesDictTupleget_action_placeholderrJ   map_structure_with_pathr   tf1placeholderrN   r~   npfloat64r+   rT   tupleas_list)r\   r&   rb   rF   rK   ro   rN   r6   r{   r7   rw      s>   

rw   orig_clsr!   ri   r   )r!   r   r   c                 C   s   | }| dd}|dv rtstd|dkrbt st  t s$J ddlm} ddlm} dd	l	m
} t| d
rEt| |sE|  }nt| |sKntd| | drbt|||frb| }|S )a[  Returns the corresponding tf-eager class for a given TFPolicy class.

    Args:
        orig_cls: The original TFPolicy class to get the corresponding tf-eager
            class for.
        config: The Algorithm config dict or AlgorithmConfig object.

    Returns:
        The tf eager policy class corresponding to the given TFPolicy class.
    	frameworkr+   )tf2r+   zCould not import tensorflow!r   r   r   r   r    as_eagerz0This policy does not support eager execution: {}eager_tracing)getr   ImportErrorexecuting_eagerlyenable_eager_execution ray.rllib.policy.eager_tf_policyr   #ray.rllib.policy.eager_tf_policy_v2r   ray.rllib.policy.tf_policyr!   hasattr
issubclassr   
ValueErrorformatwith_tracing)r   ri   clsr   r   r   r!   r6   r6   r7   get_tf_eager_cls_if_necessary#  s.   

r         ?xdeltac                 C   s6   t t | |k t j| d |t | d|   S )a  Computes the huber loss for a given term and delta parameter.

    Reference: https://en.wikipedia.org/wiki/Huber_loss
    Note that the factor of 0.5 is implicitly included in the calculation.

    Formula:
        L = 0.5 * x^2  for small abs x (delta threshold)
        L = delta * (abs(x) - 0.5*delta)  for larger abs x (delta threshold)

    Args:
        x: The input term, e.g. a TD error.
        delta: The delta parmameter in the above formula.

    Returns:
        The Huber loss resulting from `x` and `delta`.
          ?)r+   whereabsmathsquare)r   r   r6   r6   r7   
huber_lossU  s
   r   c                 C   s   dt t | d S )zComputes half the L2 norm over a tensor's values without the sqrt.

    output = 0.5 * sum(x ** 2)

    Args:
        x: The input tensor.

    Returns:
        0.5 times the L2 norm over the given tensor's values (w/o sqrt).
    r          @)r+   
reduce_sumpowr   r6   r6   r7   l2_lossn  s   r   session_or_noneztf1.Sessiondynamic_shapec                    s4   t  rdu s
J ndusJ  fdd}|S )aL  Returns a function that can be executed in either graph or eager mode.

    The function must take only positional args.

    If eager is enabled, this will act as just a function. Otherwise, it
    will build a function that executes a session run with placeholders
    internally.

    Args:
        session_or_none: tf.Session if in graph mode, else None.
        dynamic_shape: True if the placeholders should have a dynamic
            batch dimension. Otherwise they will be fixed shape.

    Returns:
        A function that can be called in either eager or static-graph mode.
    Nc                    s4   d urg  i d g fdd}|S S )Nc            	         s  g }| D ]}t |tu r|| q|| q|} d d u rjj = fdd}t|| }t|D ]}| q7t||}|	 D ]\}}||< qIi d< W d    n1 sew   Y  t
tt|  t fdd| d  }|S )Nr   c                    sT    rt |jdkrd|jdd   }nd}n|j}tj|j|ddd | D dS )	Nr   r}   r<   r6   rp   c                 S   rq   r6   rr   rt   r6   r6   r7   re     rv   zfmake_tf_callable.<locals>.make_wrapper.<locals>.call.<locals>._create_placeholders.<locals>.<listcomp>)r~   rN   rb   )rL   rN   r   r   r~   rx   )ry   r&   rN   )r   r6   r7   _create_placeholders  s   zRmake_tf_callable.<locals>.make_wrapper.<locals>.call.<locals>._create_placeholdersc                    s     | |S r}   )__setitem__)phr4   	feed_dictr6   r7   r|     s    zFmake_tf_callable.<locals>.make_wrapper.<locals>.call.<locals>.<lambda>)typer/   extendrQ   graph
as_defaultrJ   r   rK   r*   dictr1   map_structurerun)	argskwargs	args_flatar   placeholdersr   r3   ret)args_placeholdersr   fnkwargs_placeholdersr   symbolic_outr   r7   call  s:   

z4make_tf_callable.<locals>.make_wrapper.<locals>.callr6   )r   r   r   r   )r   r   r   r   r7   make_wrapper  s   3z&make_tf_callable.<locals>.make_wrapper)r+   r   )r   r   r   r6   r   r7   make_tf_callable}  s
   Br         $@	optimizer	objectivevar_listztf.Variableclip_valc                    sb    du s dksJ  t  r!| j}ttt||||}n| j||d} fdd|D S )a  Computes, then clips gradients using objective, optimizer and var list.

    Ensures the norm of the gradients for each variable is clipped to
    `clip_val`.

    Args:
        optimizer: Either a shim optimizer (tf eager) containing a
            tf.GradientTape under `self.tape` or a tf1 local optimizer
            object.
        objective: The loss tensor to calculate gradients on.
        var_list: The list of tf.Variables to compute gradients over.
        clip_val: The global norm clip value. Will clip around -clip_val and
            +clip_val.

    Returns:
        The resulting model gradients (list or tuples of grads + vars)
        corresponding to the input `var_list`.
    N        )r   c                    s4   g | ]\}}|d ur d urt | n||fqS r}   )r+   r-   )rc   gr4   r   r6   r7   re      s
    z%minimize_and_clip.<locals>.<listcomp>)r+   r   taper/   r1   gradientcompute_gradients)r   r   r   r   r   grads_and_varsr6   r   r7   minimize_and_clip  s   
r   c                    s   t |trtj |jtjdS t |trBt |jd tj	r/t
|j}t  jd df n|j}tj fddt|D ddS td|)aj  Returns a one-hot tensor, given and int tensor and a space.

    Handles the MultiDiscrete case as well.

    Args:
        x: The input tensor.
        space: The space to use for generating the one-hot tensor.

    Returns:
        The resulting one-hot tensor.

    Raises:
        ValueError: If the given space is not a discrete one.

    .. testcode::
        :skipif: True

        import gymnasium as gym
        import tensorflow as tf
        from ray.rllib.utils.tf_utils import one_hot
        x = tf.Variable([0, 3], dtype=tf.int32)  # batch-dim=2
        # Discrete space with 4 (one-hot) slots per batch item.
        s = gym.spaces.Discrete(4)
        one_hot(x, s)

    .. testoutput::

        <tf.Tensor 'one_hot:0' shape=(2, 4) dtype=float32>

    .. testcode::
        :skipif: True

        x = tf.Variable([[0, 1, 2, 3]], dtype=tf.int32)  # batch-dim=1
        # MultiDiscrete space with 5 + 4 + 4 + 7 = 20 (one-hot) slots
        # per batch item.
        s = gym.spaces.MultiDiscrete([5, 4, 4, 7])
        one_hot(x, s)

    .. testoutput::

        <tf.Tensor 'concat:0' shape=(1, 20) dtype=float32>
    r~   r   rG   c                    s.   g | ]\}}t j d d |f |t jdqS )Nr   )r+   rS   rT   )rc   inr   r6   r7   re   <  s   . zone_hot.<locals>.<listcomp>rH   z#Unsupported space for `one_hot`: {})rO   r   r+   rS   r   rT   r   nvecr   ndarrayravelrP   rN   rU   	enumerater   r   )r   r\   r   r6   r   r7   rS     s   
,
rS   rI   c                 C   sJ   t | t jj}t || t | }t j||t jt |t j| S )zSame as tf.reduce_mean() but ignores -inf values.

    Args:
        x: The input tensor to reduce mean over.
        axis: The axis over which to reduce. None for all axes.

    Returns:
        The mean reduced inputs, ignoring inf values.
    )	r+   	not_equalrT   minr   
zeros_liker   r   rR   )r   rI   maskx_zeroedr6   r6   r7   reduce_mean_ignore_infC  s
   r   scopeztf1.VariableScopetrainable_onlyc                 C   s2   t j|rt jjnt jjt| tr| dS | jdS )a  Get variables inside a given scope.

    Args:
        scope: Scope in which the variables reside.
        trainable_only: Whether or not to return only the variables that were
            marked as trainable.

    Returns:
        The list of variables in the given `scope`.
    )r   )r   get_collection	GraphKeysTRAINABLE_VARIABLES	VARIABLESrO   rs   rb   )r   r   r6   r6   r7   
scope_varsU  s   
r   	tf.Tensorc                 C   s$   t j| t jt j| d  S )zThe symlog function as described in [1]:

    [1] Mastering Diverse Domains through World Models - 2023
    D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
    https://arxiv.org/pdf/2301.04104v1.pdf
    r<   )r+   r   signlogr   r   r6   r6   r7   symlogk  s   $r   c                 C   s$   t j| t jt j| d  S )zInverse of the `symlog` function as desribed in [1]:

    [1] Mastering Diverse Domains through World Models - 2023
    D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
    https://arxiv.org/pdf/2301.04104v1.pdf
    r<   )r+   r   r   expr   )r9   r6   r6   r7   inverse_symlogv  s   $r            4      4@num_bucketslower_boundupper_boundc                 C   s6  t | ||} t jt dt | d |pt jd}|| |d  }| |  | }t j|}t j|}	t 	t 
||	|	d |	}	t 	t 
|	||	d |	}	|||  }
||	|  }| | |
|  }d| }t ||gd}t ||	gd}t ||gd}t ||gd}t jt |t j|t | d |fdS )a+  Returns a two-hot vector of dim=num_buckets with two entries that are non-zero.

    See [1] for more details:
    [1] Mastering Diverse Domains through World Models - 2023
    D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
    https://arxiv.org/pdf/2301.04104v1.pdf

    Entries in the vector represent equally sized buckets within some fixed range
    (`lower_bound` to `upper_bound`).
    Those entries not 0.0 at positions k and k+1 encode the actual `value` and sum
    up to 1.0. They are the weights multiplied by the buckets values at k and k+1 for
    retrieving `value`.

    Example:
        num_buckets=11
        lower_bound=-5
        upper_bound=5
        value=2.5
        -> [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0]
        -> [-5   -4   -3   -2   -1   0    1    2    3    4    5] (0.5*2 + 0.5*3=2.5)

    Example:
        num_buckets=5
        lower_bound=-1
        upper_bound=1
        value=0.1
        -> [0.0, 0.0, 0.8, 0.2, 0.0]
        -> [-1  -0.5   0   0.5   1] (0.2*0.5 + 0.8*0=0.1)

    Args:
        value: The input tensor of shape (B,) to be two-hot encoded.
        num_buckets: The number of buckets to two-hot encode into.
        lower_bound: The lower bound value used for the encoding. If input values are
            lower than this boundary, they will be encoded as `lower_bound`.
        upper_bound: The upper bound value used for the encoding. If input values are
            higher than this boundary, they will be encoded as `upper_bound`.

    Returns:
        The two-hot encoded tensor of shape (B, num_buckets).
    r   r   r<   r   r   rG   )rN   )r+   r,   rR   rangerN   rT   r   floorceilr   equalstackrU   
scatter_ndint32)r&   r   r   r   r~   batch_indicesbucket_deltaidxr3   kp1values_k
values_kp1	weights_kweights_kp1	indices_kindices_kp1indicesupdatesr6   r6   r7   two_hot  s0   1r	  main_net
target_nettauc                 C   s:   t |j| jD ]\}}|| d| |  }|| qdS )a`  Updates a keras.Model target network using Polyak averaging.

    new_target_net_weight = (
        tau * main_net_weight + (1.0 - tau) * current_target_net_weight
    )

    Args:
        main_net: The keras.Model to update from.
        target_net: The target network to update.
        tau: The tau value to use in the Polyak averaging formula.
    r   N)r1   	variablesassign)r
  r  r  old_varcurrent_varupdated_varr6   r6   r7   update_target_network  s   r  actionsc                 C   sN   t | d }tj|tjd}t|jdkr%|dddf }t|jdks|S )a  Helper function useful for returning dummy logp's (0) for some actions.

    Args:
        actions: The input actions. This can be any struct
            of complex action components or a simple tensor of different
            dimensions, e.g. [B], [B, 2], or {"a": [B, 4, 5], "b": [B]}.

    Returns:
        A 1D tensor of 0.0 (dummy logp's) matching the batch
        dim of `actions` (shape=[B]).
    r   r   r<   N)rJ   rK   r+   r   rT   rL   rN   )r  action_componentlogp_r6   r6   r7   zero_logps_from_actions   s   r  policymean_klc                    s<   dd  |   rtjtj|dd  fddd d S d S )Nc                   S   s   t d tdS )Na}  KL divergence is non-finite, this will likely destabilize your model and the training process. Action(s) in a specific state have near-zero probability. This can happen naturally in deterministic environments where the optimal policy has zero mass for a specific action. To fix this issue, consider setting the coefficient for the KL loss term to zero or increasing policy entropy.r   )loggerwarningr+   constantr6   r6   r6   r7   print_warning  s   
z5warn_if_infinite_kl_divergence.<locals>.print_warningc                   S   s
   t dS )Nr   )r+   r  r6   r6   r6   r7   r|   ,     
 z0warn_if_infinite_kl_divergence.<locals>.<lambda>c                      s     S r}   r6   r6   r  r6   r7   r|   -  s    )false_fntrue_fn)loss_initializedr+   condr   is_inf)r  r  r6   r  r7   warn_if_infinite_kl_divergence  s   


r$  c                 C   sb   d}g }|D ]}t j|t jd}| |||  |}|| ||7 }qt| |ks/J d|S )Nr   r   z.Passed weight does not have the correct shape.)r   prodint_rP   rQ   rL   )vectorshapesr   arraysrN   sizearrayr6   r6   r7   
_unflatten1  s   

r,  c                   @   sP   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zde	fddZ
dd ZdS )TensorFlowVariablesa  A class used to set and get weights for Tensorflow networks.

    Attributes:
        sess (tf.Session): The tensorflow session used to run assignment.
        variables (Dict[str, tf.Variable]): Extracted variables from the loss
            or additional variables that are passed in.
        placeholders (Dict[str, tf.placeholders]): Placeholders for weights.
        assignment_nodes (Dict[str, tf.Tensor]): Nodes that assign weights.
    Nc                    s  || _ t|}t|}g  t|}t|dkri| }|du r qt|dr(|j}|jD ]}||vr;|	| |
| q+|jD ]}||vrO|	| |
| q?d|jjv s\d|jjv rc 	|jj t|dkst | _ fddt D }	|dur|	|7 }	t s|	D ]
}
|
| j|
jjj< qi | _i | _| j D ]#\}}tj| j|  d| d	| j|< || j| | j|< qdS |	D ]}
|
| j|
j< qdS )
aU  Creates TensorFlowVariables containing extracted variables.

        The variables are extracted by performing a BFS search on the
        dependency graph with loss as the root node. After the tree is
        traversed and those variables are collected, we append input_variables
        to the collected variables. For each variable in the list, the
        variable has a placeholder and assignment operation created for it.

        Args:
            output (tf.Operation, List[tf.Operation]): The tensorflow
                operation to extract all variables from.
            sess (Optional[tf.Session]): Optional tf.Session used for running
                the get and set methods in tf graph mode.
                Use None for tf eager.
            input_variables (List[tf.Variables]): Variables to include in the
                list.
        r   NopVariable	VarHandlec                    s   g | ]}|j jj v r|qS r6   )r.  node_defrb   rc   r4   variable_namesr6   r7   re   y  s    z0TensorFlowVariables.__init__.<locals>.<listcomp>Placeholder_r{   )sessr   r   setrL   popleftr   r.  rD   rQ   addcontrol_inputsr1  rb   r   r  r   global_variablesr   r   assignment_nodesr*   r   r&   r~   	get_shaper   r  )selfoutputr6  input_variablesqueueexplored_inputstf_objinput_opcontrolvariable_listr4   r3   varr6   r3  r7   __init__I  s\   








zTensorFlowVariables.__init__c                 C   s   t dd | j D S )zReturns the total length of all of the flattened variables.

        Returns:
            The length of all flattened variables concatenated.
        c                 s   s"    | ]}t |  V  qd S r}   )r   r%  r=  r   r2  r6   r6   r7   	<genexpr>  s     z4TensorFlowVariables.get_flat_size.<locals>.<genexpr>)sumr  r0   r>  r6   r6   r7   get_flat_size  s   z!TensorFlowVariables.get_flat_sizec                    s>    j stdd  j D S t fdd j D S )zGets the weights and returns them as a flat array.

        Returns:
            1D Array containing the flattened weights.
        c                 S      g | ]}|   qS r6   )numpyrK   r2  r6   r6   r7   re         z0TensorFlowVariables.get_flat.<locals>.<listcomp>c                    s   g | ]}|j  jd  qS ))session)evalr6  rK   r2  rK  r6   r7   re     s    )r6  r   concatenater  r0   rK  r6   rK  r7   get_flat  s   zTensorFlowVariables.get_flatc                    s   dd  j  D }t||} js&t j  |D ]	\}}|| qdS  fdd j  D } jjt j	 t
t||d dS )a;  Sets the weights to new_weights, converting from a flat array.

        Note:
            You can only set all weights in the network using this function,
            i.e., the length of the array must match get_flat_size.

        Args:
            new_weights (np.ndarray): Flat array containing weights.
        c                 S   rM  r6   )r=  r   r2  r6   r6   r7   re     rO  z0TensorFlowVariables.set_flat.<locals>.<listcomp>c                    s   g | ]	\}} j | qS r6   )r   )rc   r3   r4   rK  r6   r7   re     s    r   N)r  r0   r,  r6  r1   r  r*   r   r/   r<  r   )r>  new_weightsr(  r)  r4   r   r   r6   rK  r7   set_flat  s   


zTensorFlowVariables.set_flatc                 C   s   | j s| jS | j | jS )zReturns a dictionary containing the weights of the network.

        Returns:
            Dictionary mapping variable names to their weights.
        )r6  r  r   rK  r6   r6   r7   get_weights  s   zTensorFlowVariables.get_weightsrT  c                 C   sR   | j du r| j D ]\}}|||  q
dS | |\}}| j j||d dS )a  Sets the weights to new_weights.

        Note:
            Can set subsets of variables as well, by only passing in the
            variables you want to be set.

        Args:
            new_weights: Dictionary mapping variable names to their
                weights.
        Nr   )r6  r  r*   r  _assign_weightsr   )r>  rT  rb   rG  assign_listr   r6   r6   r7   set_weights  s   
zTensorFlowVariables.set_weightsc                    s   g i t j  dd  fdd}| D ]3\ v r*| qfdd D }fddt| d	d
 dD }|rO||d  qsVJ dtt|ksbJ dfddD fS )a'  Sets weigths using exact or closest assignable variable name

        Args:
            weights: Dictionary mapping variable names to their
                weights.
        Returns:
            Tuple[List, Dict]: assigned variables list, dict of
                placeholders and weights
        c                    s   t  fdd| D S )Nc                    s   g | ]}| v r|qS r6   r6   )rc   el2r6   r7   re     rO  zOTensorFlowVariables._assign_weights.<locals>.nb_common_elem.<locals>.<listcomp>)rL   )l1r\  r6   r[  r7   nb_common_elem  s   z;TensorFlowVariables._assign_weights.<locals>.nb_common_elemc                    s&   |j |  < |   |  d S r}   )r   rQ   remove)rb   r&   )
assignableassignedr   r>  r6   r7   r    s   
z3TensorFlowVariables._assign_weights.<locals>.assignc                    s$   i | ]}|  d | d qS )/)split)rc   rG  )rb   r^  r6   r7   
<dictcomp>  s    z7TensorFlowVariables._assign_weights.<locals>.<dictcomp>c                    s.   g | ]\}}|d krj  j| j kr|qS )r   )rN   r<  )rc   	close_varcn)r>  r&   r6   r7   re     s
    z7TensorFlowVariables._assign_weights.<locals>.<listcomp>c                 S   s
   | d  S )Nr<   r6   )r   r6   r6   r7   r|     r  z5TensorFlowVariables._assign_weights.<locals>.<lambda>)keyr   zNo variables in the input matched those in the network. Possible cause: Two networks were defined in the same TensorFlow graph. To fix this, place each network definition in its own tf.Graph.z^All weights couldn't be assigned because no variable had an exact/close name or had same shapec                    s   g | ]} j | qS r6   )r<  r2  rK  r6   r7   re     s    )r7  r<  r2   r*   sortedrL   )r>  weightsr  commonselectr6   )r`  ra  r   rb   r^  r>  r&   r7   rW    s0   z#TensorFlowVariables._assign_weights)NN)__name__
__module____qualname____doc__rH  rL  rS  rU  rV  r   rY  rW  r6   r6   r6   r7   r-  =  s    

Ir-  )NF)r   )F)r   r}   )r   r   r%   r   )r9   r   r%   r   )r   r   r   N)Qloggingcollectionsr   r   typingr   r   r   r   r   r	   r
   	gymnasiumr   rN  r   rJ   gymnasium.spacesr   r   ray.rllib.utilsr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr   ray.rllib.utils.numpyr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.typingr   r   r   r   r   r   r   %ray.rllib.algorithms.algorithm_configr   ray.rllib.core.learner.learnerr   r   r   r   r   r   r!   	getLoggerrl  r  r   r+   rf   floatrs   r8   rC   boolr^   rn   Spacerw   r   r   r   r   r   rS   intr   r   r   r   r	  r  r  r$  r,  r-  r6   r6   r6   r7   <module>   s6   $$

0h:

1`(;

\