o
    
۾iK#                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ  ee!Z"ej#Z$ej%&e$dZ'ej%&e$dZ(da)ej*Z+e,e-e-e.B e/B f  Z0g dZ1de-de-e.B e/B ddfddZ2dd Z3de.fddZ4de-fddZ5G dd de-eZ6G dd dZ7e7 Z8dS )    N)Enum)Path)Thread)Any)uuid4)global_http_connection)init_logger)cuda_get_device_properties)cuda_device_count_stateless)__version__zusage_stats.jsondo_not_track)VLLM_USE_MODELSCOPEVLLM_USE_FLASHINFER_SAMPLERVLLM_PP_LAYER_PARTITIONVLLM_USE_TRITON_AWQVLLM_ENABLE_V1_MULTIPROCESSINGkeyvaluereturnc                 C   s   |t | < dS )zCSet global usage data that will be sent with every usage heartbeat.N)_GLOBAL_RUNTIME_DATA)r   r    r   H/home/ubuntu/.local/lib/python3.10/site-packages/vllm/usage/usage_lib.pyset_runtime_usage_data/   s   r   c                  C   s2   t du rtj} tj}tjt}| p|p| a t S )a|  Determine whether or not we can send usage stats to the server.
    The logic is as follows:
    - By default, it should be enabled.
    - Three environment variables can disable it:
        - VLLM_DO_NOT_TRACK=1
        - DO_NOT_TRACK=1
        - VLLM_NO_USAGE_STATS=1
    - A file in the home directory can disable it if it exists:
        - $HOME/.config/vllm/do_not_track
    N)_USAGE_STATS_ENABLEDenvsVLLM_DO_NOT_TRACKVLLM_NO_USAGE_STATSospathexists_USAGE_STATS_DO_NOT_TRACK_PATH)r   no_usage_statsdo_not_track_filer   r   r   is_usage_stats_enabled4   s   r#   c                   C   s   t tjtjj d S )Ng    eA)intdatetimenowtimezoneutc	timestampr   r   r   r   _get_current_timestamp_nsI   s   r*   c            	      C   s   g d} ddddd}| D ]#}t |}| r0|  }| D ]\}}||v r/|    S q!qddi}| D ]\}}tj|rG|  S q9d	S )
N)z!/sys/class/dmi/id/product_versionz/sys/class/dmi/id/bios_vendorz/sys/class/dmi/id/product_namez#/sys/class/dmi/id/chassis_asset_tagz/sys/class/dmi/id/sys_vendorAWSAZUREGCPOCI)amazonzmicrosoft corporationgoogleoraclecloudRUNPOD_DC_IDRUNPODUNKNOWN)r   is_file	read_textloweritemsr   environget)	vendor_filescloud_identifiersvendor_filer   file_content
identifierproviderenv_to_cloud_providerenv_varr   r   r   _detect_cloud_providerM   s,   	rC   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )UsageContextUNKNOWN_CONTEXT	LLM_CLASS
API_SERVEROPENAI_API_SERVEROPENAI_BATCH_RUNNERENGINE_CONTEXTN)	__name__
__module____qualname__rE   rF   rG   rH   rI   rJ   r   r   r   r   rD   q   s    rD   c                	   @   s   e Zd ZdZdddZ	ddededeeef dB ddfd	d
Z	dededeeef ddfddZ
defddZdededeeef ddfddZdd Zdeeef ddfddZdeeef ddfddZdS )UsageMessagezCCollect platform information and send it to the usage stats server.r   Nc                 C   sv   t t | _d | _d | _d | _d | _d | _d | _d | _	d | _
d | _d | _d | _d | _d | _d | _d | _d | _d | _d S N)strr   uuidr@   num_cpucpu_typecpu_family_model_steppingtotal_memoryarchitectureplatformcuda_runtime	gpu_countgpu_typegpu_memory_per_deviceenv_var_jsonmodel_architecturevllm_versioncontextlog_timesource)selfr   r   r   __init__}   s$   
zUsageMessage.__init__r]   usage_context	extra_kvsc                 C   s&   t | j|||pi fdd}|  d S )NT)targetargsdaemon)r   _report_usage_workerstart)rb   r]   rd   re   tr   r   r   report_usage   s   zUsageMessage.report_usagec                 C   s   |  ||| |   d S rO   )_report_usage_once_report_continuous_usage)rb   r]   rd   re   r   r   r   ri      s   z!UsageMessage._report_usage_workerc                 C   sP   zddl m}m} | | _| | _| | _d| _	W dS  t
y'   Y dS w )Nr   )tpu_infoutilstpu_inferenceTF)rq   ro   rp   get_num_chipsrY   get_tpu_typerZ   get_device_hbm_limitr[   rX   	Exception)rb   ro   rp   r   r   r   _report_tpu_inference_usage   s   


z(UsageMessage._report_tpu_inference_usagec              	   C   sD  ddl m} | rt | _tdd\| _| _| r t	j
j| _| r-|  s-td t | _t | _t | _t j| _t }|dd | _|dd| _dt |d	dt |d
dt |ddg| _!|j"| _#t$| _%|| _&t'(dd t)D | _*t+ | _,t-j.| _/t0| }|r|1| | 2| | 3| d S )Nr   )current_platform)namerU   z!Failed to collect TPU informationcount	brand_raw ,familymodelsteppingc                 S   s   i | ]}|t t|qS r   )getattrr   ).0rB   r   r   r   
<dictcomp>   s    z3UsageMessage._report_usage_once.<locals>.<dictcomp>)4vllm.platformsrw   is_cuda_aliker
   rY   r	   rZ   r[   is_cudatorchversioncudarX   is_tpurv   logger	exceptionrC   r@   rW   machinerV   psutilvirtual_memorytotalrU   cpuinfoget_cpu_infor:   rR   rS   joinrP   rT   r   r_   VLLM_VERSIONr^   r]   jsondumps_USAGE_ENV_VARS_TO_COLLECTr\   r*   r`   r   VLLM_USAGE_SOURCEra   varsupdate_write_to_file_send_to_server)rb   r]   rd   re   rw   infodatar   r   r   rm      sJ   



	

zUsageMessage._report_usage_oncec                 C   s:   	 t d | jt d}|t | | | | q)zReport usage every 10 minutes.

        This helps us to collect more data points for uptime of vLLM usages.
        This function can also help send over performance metrics over time.
        TiX  )rQ   r`   )timesleeprQ   r*   r   r   r   r   )rb   r   r   r   r   rn      s   



z%UsageMessage._report_continuous_usager   c                 C   s@   zt  }|jt|d W d S  tjjy   td Y d S w )N)r   z#Failed to send usage data to server)	r   get_sync_clientpost_USAGE_STATS_SERVERrequests
exceptionsRequestExceptionloggingdebug)rb   r   global_http_clientr   r   r   r     s   zUsageMessage._send_to_serverc                 C   sl   t jt jtdd ttjdd ttd}t	|| |
d W d    d S 1 s/w   Y  d S )NT)exist_oka
)r   makedirsr   dirname_USAGE_STATS_JSON_PATHr   touchopenr   dumpwrite)rb   r   fr   r   r   r     s   "zUsageMessage._write_to_file)r   NrO   )rK   rL   rM   __doc__rc   rP   rD   dictr   rl   ri   boolrv   rm   rn   r   r   r   r   r   r   rN   z   sF    
!


	

8rN   )9r%   r   r   r   rW   r   enumr   pathlibr   	threadingr   typingr   rQ   r   r   r   r   r   	vllm.envsr   vllm.connectionsr   vllm.loggerr   vllm.utils.platform_utilsr	   vllm.utils.torch_utilsr
   vllm.versionr   r   rK   r   VLLM_CONFIG_ROOT_config_homer   r   r   r    r   VLLM_USAGE_STATS_SERVERr   r   rP   r$   r   r   r   r   r#   r*   rC   rD   rN   usage_messager   r   r   r   <module>   sJ   	$	 
