o
     i!                     @   s~   d dl mZmZmZmZmZ d dlZd dlZd dlmZ d dl	m
Z
mZ d dlmZmZmZ ddlmZmZ G dd	 d	ZdS )
    )ListOptionalSetTupleUnionN)DcgmFieldGetById)DCGM_GROUP_EMPTYDCGM_OPERATION_MODE_AUTO)DcgmFieldGroup	DcgmGroup
DcgmHandle   )	_Profilerloggerc                	   @   s   e Zd ZdZdejejejejej	ej
ejejfdfdddeeedf  ded	dfd
dZdeee eedf f d	ee fddZd	ee fddZdeeedf  d	ee fddZdddZdddZdddZdS )DCGMProfilerz.Profiler that triggers start of DCGM profiler.Ni  main_profilerr   gpus_to_profile.
updateFreqreturnc                 C   s`   || _ || _tdtd| _|du r"tjg ddjj}| 	|f| _
n| 	|| _
| || _dS )a6  
        Args:
            main_profiler: The main profiler object.
            gpus_to_profile: A tuple of integers representing the GPUs to profile. If `None`,
                then the "default" GPU is used.
            field_ids_to_profile:
                See https://github.com/NVIDIA/DCGM/blob/master/testing/python3/dcgm_fields.py#L436
                for a full list of available fields. Note that not all fields are profilable.
            updateFreq: The interval of two consecutive updates of each field. Defaults to 5000 microseconds.
                This is a good tradeoff between performance and accuracy.
                An even smaller updateFreq is not supported well by A100.
                If the step to profile takes more than 5000 microseconds, then a larger updateFreq could also be used.
        z	127.0.0.1)	ipAddressopModeNcuda)device)r   r   r   r	   
dcgmHandletorchemptyr   indexcreate_dcgm_group	dcgmGroupcreate_profiling_field_groupdcgmFieldGroup)selfr   r   field_ids_to_profiler   default_gpu r$   X/home/ubuntu/.local/lib/python3.10/site-packages/xformers/profiler/profiler_dcgm_impl.py__init__   s   zDCGMProfiler.__init__c                    s   | j d u rd S | j  }|j   fdd|D }t|dk r.td| d  d d S t| j dtd}|D ]}|	| q8|S )	Nc                       g | ]}| v r|qS r$   r$   ).0gpusupportedGPUsr$   r%   
<listcomp>J   s    z2DCGMProfiler.create_dcgm_group.<locals>.<listcomp>r   z=The provided GPUs are not supported on this system: provided z, supported z. No data will be captured.r   )	groupName	groupType)
r   	GetSystem	discoveryGetAllSupportedGpuIdslenr   warningr   r   AddGpu)r!   r   
dcgmSystemvalid_gpus_to_profiler   r)   r$   r*   r%   r   A   s0   



zDCGMProfiler.create_dcgm_groupc                 C   s^   | j d usJ | j j }t }t|jD ]}|j| }|jd |j D ]}|	| q$q|S N)
r   	profilingGetSupportedMetricGroupssetrangenumMetricGroupsmetricGroupsfieldIdsnumFieldIdsadd)r!   dcgmMetricGroupsprofilableFieldIds	group_idxmetric_groupfield_idr$   r$   r%   get_profilable_fields`   s   
z"DCGMProfiler.get_profilable_fieldsfieldIdsToProfilec                    s   | j d u rd S |   |d u rt }g }n fdd|D } fdd|D }|s8td| d  d d S |rFtd| d| d	 t| jd
|d}|S )Nc                    r'   r$   r$   r(   rE   rB   r$   r%   r,   z   
    z=DCGMProfiler.create_profiling_field_group.<locals>.<listcomp>c                    s   g | ]}| vr|qS r$   r$   rH   rI   r$   r%   r,      rJ   z>None of the provided field ids could be profiled.
  Provided: z
  Supported: z
No data will be captured.z,The following field ids cannot be profiled: z. Profiling z only.	Profiling)namer>   )r   rF   listr   r3   r
   r   )r!   rG   validFieldIdsinvalidFieldIdsr    r$   rI   r%   r   k   s>   


z)DCGMProfiler.create_profiling_field_groupc                 C   s   | j d ur@| jd urB| j j| j| jdd | j jd | j| _| j j| j| j | j  | j j| j| j | j  d S d S d S )Ni  r   )r   r    samplesWatchFieldsr   GetAllSinceLastCallprofiling_resultsEmptyValues)r!   r$   r$   r%   	__enter__   s    
zDCGMProfiler.__enter__c                 C   sN   | j d ur#| jd ur%| j j| j | j   | ` d | _ | `d | _d S d S d S r7   )r   r    rP   UnwatchFieldsDeleter   )r!   exc_typeexc_valexc_tbr$   r$   r%   __exit__   s   

zDCGMProfiler.__exit__c                 C   s   | j d urk| jd urm| j j| j| j | jj D ]I}| jj|  D ]>}t|j}d}d}| jj| | D ]}|j	d urM|| |j	 |d  }|d7 }q7| j
jd| d| d| d| f q$q| j  d S d S d S )Ng        r   r   zGPU z, ())r   r    rP   rR   rS   valueskeysr   tagvaluer   summaryappendrT   )r!   gpu_idrE   
field_namefield_avg_valnum_valsgpu_field_timer$   r$   r%   step   s0   


zDCGMProfiler.step)r   N)__name__
__module____qualname____doc__dcgm_fieldsDCGM_FI_PROF_SM_ACTIVEDCGM_FI_PROF_SM_OCCUPANCYDCGM_FI_PROF_PIPE_TENSOR_ACTIVEDCGM_FI_PROF_DRAM_ACTIVEDCGM_FI_PROF_PCIE_TX_BYTESDCGM_FI_PROF_PCIE_RX_BYTESDCGM_FI_PROF_NVLINK_TX_BYTESDCGM_FI_PROF_NVLINK_RX_BYTESr   r   intr&   r   r   r   r   rF   r
   r   rU   r[   ri   r$   r$   r$   r%   r      sH    

,


-
r   )typingr   r   r   r   r   rn   r   r   dcgm_structsr   r	   pydcgmr
   r   r   profilerr   r   r   r$   r$   r$   r%   <module>   s   