o
    $i                     @   s   d Z ddlZddlZddlZddlmZmZ ddlm	Z	 ddl
mZ eeZe	dejfddZe	dd
ededefddZe	dd ZdS )z7Utils for minibatch SGD across multiple RLlib policies.    N)MultiAgentBatchSampleBatch)OldAPIStack)LearnerInfoBuilderarrayc                 C   s   | |    td|   S )zNormalize the values in an array.

    Args:
        array (np.ndarray): Array of values to normalize.

    Returns:
        array with zero mean and unit standard deviation.
    g-C6?)meanmaxstd)r    r
   P/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/rllib/utils/sgd.pystandardized   s   
r   Tsamplessgd_minibatch_sizeshufflec           
      c   s    |s| V  dS t | trtdd| vrd| vr|   | |}|\}}t|dkrC|r3t| |D ]\}}| || V  q5dS tt||}|rQt| |D ]\\}}\}}	| 	||||	V  qSdS )a  Return a generator yielding minibatches from a sample batch.

    Args:
        samples: SampleBatch to split up.
        sgd_minibatch_size: Size of minibatches to return.
        shuffle: Whether to shuffle the order of the generated minibatches.
            Note that in case of a non-recurrent policy, the incoming batch
            is globally shuffled first regardless of this setting, before
            the minibatches are generated from it!

    Yields:
        SampleBatch: Each of size `sgd_minibatch_size`.
    Nz;Minibatching not implemented for multi-agent in simple mode
state_in_0state_out_0r   )

isinstancer   NotImplementedErrorr   _get_slice_indiceslenrandomlistzipslice)
r   r   r   
all_slicesdata_slicesstate_slicesijsisjr
   r
   r   minibatches   s0   



r!   c                 C   s   |   } tdd}| D ]X\}}|| jvrq| j| }	|D ]
}
t|	|
 |	|
< q| rC|jd d |krCtd||jd d t	|D ]}t
|	|D ]}|t||i|j| }||| qNqGq| }|S )a	  Execute minibatch SGD.

    Args:
        samples: Batch of samples to optimize.
        policies: Dictionary of policies to optimize.
        local_worker: Master rollout worker instance.
        num_sgd_iter: Number of epochs of optimization to take.
        sgd_minibatch_size: Size of minibatches to use for optimization.
        standardize_fields: List of sample field names that should be
            normalized prior to optimization.

    Returns:
        averaged info fetches over the last SGD epoch taken.
       )num_devicesmodelmax_seq_lenzC`sgd_minibatch_size` ({}) cannot be smaller than`max_seq_len` ({}).)as_multi_agentr   itemspolicy_batchesr   is_recurrentconfig
ValueErrorformatranger!   learn_on_batchr   countadd_learn_on_batch_resultsfinalize)r   policieslocal_workernum_sgd_iterr   standardize_fieldslearner_info_builder	policy_idpolicybatchfieldr   	minibatchresultslearner_infor
   r
   r   do_minibatch_sgdH   s:   


	r>   )T)__doc__loggingr   numpynpray.rllib.policy.sample_batchr   r   ray.rllib.utils.annotationsr   $ray.rllib.utils.metrics.learner_infor   	getLogger__name__loggerndarrayr   intboolr!   r>   r
   r
   r
   r   <module>   s    
+