o
    `۷i8                     @   s   d dl Z d dlmZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ d dlmZ d dlmZ d dlmZ e eZeddG dd deZdS )    N)Path)List)AlgorithmConfig)Columns)	EnvRunner)SingleAgentEnvRunner)SingleAgentEpisode)OverrideToImplementCustomLogic5OverrideToImplementCustomLogic_CallToSuperRecommendedoverride)pack_if_needed)EpisodeType)	PublicAPI)log_oncealpha)	stabilityc                       s   e Zd ZdZeeedef fddZde	fddZ
eeedddd	d	d
dedededededee f fddZeeedddZedee ddfddZ  ZS )OfflineSingleAgentEnvRunnerz7The environment runner to record the single agent case.configc                   s  t  jdd|i| d| _tjj }|jjj	|j
d|j_| jj| _| jj| _| jj| _| jj| _d | _| jj| _| jrD|  | _n| jsY| jjrP| jdksU| jdkrYtdd| j d | _| jdkrwdd l}|jdi | j| _n8| jd	krdd
lm } |j!di | j| _n"| jdkrdd l"}|j#di | j| _n| jd urtd| j d| j$d| ji | jj%| _%| jj&| _&| jj'| _'| j'rd| _(nd| _(| jj)| _*d| _+g | _,d S )Nr   F)cpur   zrTo set up the output path, the environment `env` must be provided when creating the `OfflineSingleAgentEnvRunner`.zrun-   gcss3)fsabszUnknown filesystem: z9. Filesystems can be 'gcs' for GCS, 's3' for S3, or 'abs'
filesystemT )-super__init__episodes_to_numpyraydataDataContextget_currentexecution_optionsresource_limitscopynum_cpus_per_env_runnerr   output_write_methodoutput_write_method_kwargsoutput_filesystemr   output_filesystem_kwargsfilesystem_kwargsfilesystem_objectoutputoutput_pathenv_get_subdir_pathsubdir_pathcreate_env_on_local_workerworker_index
ValueErrorzfillworker_pathgcsfsGCSFileSystempyarrowr   S3FileSystemadlfsAzureBlobFileSystemupdateoutput_write_episodesoutput_compress_columnsoutput_max_rows_per_filewrite_data_this_iteroutput_write_remaining_datawrite_remaining_data_sample_counter_samples)selfr   kwargsdata_contextr7   r   r;   	__class__r   Z/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/offline/offline_env_runner.pyr      sd   















z$OfflineSingleAgentEnvRunner.__init__returnc                 C   s   t | jtr| j S | jjdkr| jjjd jjj	 S | jjdkr0| jj
dd jj	 S | jjdkr>| jjjj	 S td| jj )zmReturns the subdir path for storing data.

        Returns:
            The subdir path as a string.
        syncr   async	unwrappedvector_entry_pointz"Unknown `gym_env_vectorize_mode`: )
isinstancer/   strlowerr   gym_env_vectorize_moderO   envsrJ   __name__get_attrr4   )rF   r   r   rK   r0      s"   
z,OfflineSingleAgentEnvRunner._get_subdir_pathNFnum_timestepsnum_episodesexplorerandom_actionsforce_resetrY   rZ   r[   r\   r]   c             
      s  t  j|||||d}|  jd7  _| jr@ddlddl tdr&td t	dd |D s1J | j
 fd	d
|D  n| | | jrSt| j
| jkrSd| _| jr| jrd| _t| j
| jkr| j
d| j }| j
| jd | _
tj|}t| j
| jksdntj| j
}z0t| j| j| jd| j d }	t|| j|	 fi | j td|	 d W n ty }
 zt|
 W Y d}
~
nd}
~
ww | jj dt| j
d |S )z2Samples from environments and writes data to disk.rX      r   NmsgpackzPacking episodes with `msgpack` and encode array with `msgpack_numpy` for serialization. This is needed for recording episodes.c                 s   s    | ]}|j d u V  qdS )FN)is_numpy.0epsr   r   rK   	<genexpr>   s    z5OfflineSingleAgentEnvRunner.sample.<locals>.<genexpr>c                    s    g | ]}j |  jd qS ))default)packb	get_stateencodera   mnpr_   r   rK   
<listcomp>   s     z6OfflineSingleAgentEnvRunner.sample.<locals>.<listcomp>TF-r   zWrote samples to storage at .recording_buffer_size)keyvalue)!r   samplerD   r>   r_   msgpack_numpyr   loggerinfoallrE   extend_map_episodes_to_datar@   lenrA   r   r    
from_itemsr   r.   joinpathr1   r6   r5   getattrr'   as_posixr(   	Exceptionerrormetrics	log_value)rF   rY   rZ   r[   r\   r]   samplessamples_to_write
samples_dspatherI   ri   rK   rq      sj   

z"OfflineSingleAgentEnvRunner.samplec              
   C   s   | j r\| jr\tj| j }|  jd7  _z0t| j| j	| j
d| j d }t|| j| fi | j td| d W n ty[ } zt| W Y d}~nd}~ww tdt| j   dS )zWrites the reamining samples to disk

        Note, if the user defined `max_rows_per_file` the
        number of rows for the remaining samples could be
        less than the defined maximum row number by the user.
        r^   rl   r   z"Wrote final samples to storage at zY. Note Note, final samples could be smaller in size than `max_rows_per_file`, if defined.NzExperience buffer length: )rE   rC   r   r    ry   rD   r   r.   rz   r1   r6   r5   r{   r'   r|   r(   rs   rt   r}   r~   debugrx   )rF   r   r   r   r   r   rK   stop  s*   

z OfflineSingleAgentEnvRunner.stopr   c                    s"  |D ]t tD ] tjjtjjtjjtj	tj	j
v r't n tjtjj
v r:t n tj tjtj	j
v rUt d n d tj td k rgdnjtj td k rudnji	 fddj D }j| q
qdS )zConverts list of episodes to list of single dict experiences.

        Note, this method also appends all sampled experiences to the
        buffer.

        Args:
            samples: List of episodes to be converted.
        r^   Fc                    s4   i | ]}||j v rt| n| qS r   )r?   r   get_extra_model_outputs)rb   kirq   rF   r   rK   
<dictcomp>M  s    

zEOfflineSingleAgentEnvRunner._map_episodes_to_data.<locals>.<dictcomp>N)rangerx   r   EPS_IDid_AGENT_IDagent_id	MODULE_ID	module_idOBSr?   r   get_observationsACTIONSget_actionsREWARDSget_rewardsNEXT_OBSTERMINATEDSis_terminated
TRUNCATEDSis_truncatedextra_model_outputskeysrE   append)rF   r   sample_datar   r   rK   rw   (  sD    z1OfflineSingleAgentEnvRunner._map_episodes_to_data)rL   N)rV   
__module____qualname____doc__r   r   r
   r   r   rR   r0   r	   intboolr   r   rq   r   r   r   rw   __classcell__r   r   rI   rK   r      s>    e_% r   ) loggingpathlibr   typingr   r   %ray.rllib.algorithms.algorithm_configr   ray.rllib.core.columnsr   ray.rllib.env.env_runnerr   %ray.rllib.env.single_agent_env_runnerr   "ray.rllib.env.single_agent_episoder   ray.rllib.utils.annotationsr	   r
   r   ray.rllib.utils.compressionr   ray.rllib.utils.typingr   ray.util.annotationsr   ray.util.debugr   Logger__file__rs   r   r   r   r   rK   <module>   s"    
