o
    iL5                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dl	Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ eeZe jG dd dZG dd dZdS )    N)Callable)PicklerAny)CachingAutotuner)trace_structured)VllmBackend)end_monitoring_torch_compile)
VllmConfig)Range)init_loggerc                   @   s6   e Zd ZU eed< dZeed< dZede	f ed< dS )
RangeEntrycompile_rangeFcompiledN.runnable)
__name__
__module____qualname__r   __annotations__r   boolr   r   r    r   r   X/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/compilation/piecewise_backend.pyr      s   
 r   c                   @   s  e Zd Z		d%dejdB dedededee ded	e	d
e
eedef f dB defddZdedef dedef fddZd&ddZde
eef fddZdeedf dee fddZdefddZdedeedf defddZd ededB fd!d"Zdedefd#d$ZdS )'PiecewiseBackendN graphvllm_configpiecewise_compile_indextotal_piecewise_compilessym_shape_indicesvllm_backendreturns_tuplecompiled_runnables.submod_namec
                 C   s  t |dut |duA sJ d|| _|| _|j| _|| _|| _|| _|| _|	| _|dk| _	||d k| _
|dk| _|j| _| j | _| jrad}
| jd }|j|jjksWJ t|j|
d| jd< d| j }t| | jj| _d	| j }t| || _|| _i | _t| j| _| jdur| jD ]1}t|tr|d
ksJ tdt|t sJ t||d}|| jvrt!|d| j|< | j"| q| jD ]
}t!|d| j|< qd| _#ddl$m%} |& | _'dS )ad  
        The backend for piecewise compilation.
        It mainly handles the compilation of static shapes and
        dispatching based on runtime shape.

        We will compile `self.graph` once for the general shape,
        and then compile for different shapes specified in
        `compilation_config.compile_sizes`.

        This class supports two mutually exclusive modes:
        1. Compilation (graph is set, compiled_runnables is None):
           Used during initial compilation when we have the FX graph
           and need to compile it for each shape range.
        2. Precompilation (graph is None, compiled_runnables is set):
           Used when loading from cache/AOT artifacts where we already
           have pre-compiled callables and don't need the original graph.

        Exactly one of graph or compiled_runnables must be provided.
        Nz:exactly one of graph and compiled_runnables should be set.r      istartendz"PiecewiseBackend: compile_ranges: z!PiecewiseBackend: compile_sizes: cudagraph_capture_sizeszmcudagraph_capture_sizes not supported in compile_sizes.This should be handled in `post_init_cudagraph_sizes`.)r   F)!_on_compilation_complete_callback)(r   r   r   compilation_configr   r   r   r!   r"   is_first_graphis_last_graphis_full_graph
is_encoderis_encoder_compilationget_compile_rangescompile_rangesr'   scheduler_configmax_num_batched_tokensr   r&   logger
debug_oncecompile_sizesr   r    range_entriessetto_be_compiled_ranges
isinstancestrNotImplementedErrorintr   add_graph_loggedvllm.compilation.backendsr)   geton_compilation_complete)selfr   r   r   r   r   r   r    r!   r"   	max_int32last_compile_range
log_stringsizeranger)   r   r   r   __init__"   sp   










zPiecewiseBackend.__init__compiled_graphreturnc                    s   dt dt f fdd}|S )NargsrK   c                     s(    |  }j st|ttfs|S |d S )Nr   )r    r:   tuplelist)rL   graph_outputrJ   rC   r   r   compiled_graph_wrapper   s   zKPiecewiseBackend.get_compiled_graph_wrapper.<locals>.compiled_graph_wrapperr   )rC   rJ   rQ   r   rP   r   get_compiled_graph_wrapper   s   
z+PiecewiseBackend.get_compiled_graph_wrapperc                 C   sD   | j r| js| jj  t| j | jd ur |   d S d S d S d S N)r,   r9   r   compiler_managersave_to_filer	   r   rB   rC   r   r   r   check_for_ending_compilation   s   

z-PiecewiseBackend.check_for_ending_compilationc                    s|   G dd dt  dtdtf dtf fdd}i }| j D ]\}}|js,td| qt	|j
d	r;||j
|t|< q|S )
Nc                   @   s   e Zd ZdedefddZdS )zEPiecewiseBackend.to_bytes.<locals>.StandaloneCompiledArtifactsPicklerobjrK   c                 S   s(   t |tr|  tjt|ffS tS rS   )r:   r   prepare_for_picklepickleloadsdumpsNotImplemented)rC   rX   r   r   r   reducer_override   s   
zVPiecewiseBackend.to_bytes.<locals>.StandaloneCompiledArtifactsPickler.reducer_overrideN)r   r   r   objectr   r^   r   r   r   r   "StandaloneCompiledArtifactsPickler   s    r`   fn.rK   c                    sn   t | ds	J dtjjdd |  }t } || |	 }W d    |S 1 s0w   Y  |S )N	serializezfn must have serialize methodbundled_autograd_cacheT)
hasattrtorch
_functorchconfigpatchrb   ioBytesIOdumpgetvalue)ra   entryfresultr`   r   r   rb      s   

z,PiecewiseBackend.to_bytes.<locals>.serializez9entry with range %s not compiled, so cannot get its bytesrb   )r   r   r   bytesr7   itemsr   r4   debugrd   r   r;   )rC   rb   out	range_keyrm   r   rp   r   to_bytes   s   
zPiecewiseBackend.to_bytesrL   c                 C   sV   g }| j d us	J | j j jD ]}|jdkr||jd  q t|t|ks)J |S )Nplaceholderexample_value)r   nodesopappendmetalen)rC   rL   fake_example_inputsnoder   r   r   _fakify_args   s   
zPiecewiseBackend._fakify_argsr   c                    s   j duo
 jj v jjtddd  fddd js@d_jdus/J tdfd	dfd
dd dS dS )z.Log compilation event for TORCH_TRACE/tlparse.Nartifactc                   S   s
   dddS )Nvllm_piecewise_compile_startjson)nameencodingr   r   r   r   r   <lambda>   s   z5PiecewiseBackend._log_compile_start.<locals>.<lambda>c                
      s$   t j j j  dS )N)piecewise_indexr"   r   compile_range_startcompile_range_endis_single_sizeis_cudagraph_capture_size)r   r\   r   r&   r'   r   r   r   is_cudagraph_sizerC   subgraph_indexr"   r   r   r      s    )metadata_fn
payload_fnT
graph_dumpc                      s   dd  iS )Nr   vllm_r   r   )r"   r   r   r     s   
c                      s    j jddS )NF)print_output)r   print_readabler   rV   r   r   r     s    )r6   r&   r   r"   r   r?   r   )rC   r   r   r   r   _log_compile_start   s$   


z#PiecewiseBackend._log_compile_startrange_entryc              
   C   s   |j sg| jd ur| | jt|j |_nB| |j |j s%| |nt	|}t
jjdd | jjj| j|| jj| j|j| j| jd|_W d    n1 sRw   Y  d|_ | j|j |   d S d S )Nrc   T)r   graph_index
num_graphs)r   r!   rR   r;   r   r   r   r   r   rN   re   rf   rg   rh   r   rT   compiler   inductor_configr*   r   r   r9   removerW   )rC   r   rL   	args_listr   r   r   _maybe_compile_for_range_entry  s4   

z/PiecewiseBackend._maybe_compile_for_range_entryruntime_shapec                 C   sP   | j d u rd S || j v r| jt||d S | jD ]}||v r%| j|   S qd S )Nr%   )r6   r7   r   r1   )rC   r   rH   r   r   r   _find_range_for_shape5  s   


z&PiecewiseBackend._find_range_for_shapec                 G   sL   || j d  }| |}|d usJ d| d| j | || |j| S )Nr   zShape: z out of considered ranges: )r   r   r1   r   r   )rC   rL   r   r   r   r   r   __call__D  s   


zPiecewiseBackend.__call__)Nr   )rK   N)r   r   r   fxGraphModuler
   r=   rN   r   r   dictr;   r   r   rI   rR   rW   rq   rv   rM   r   r   r   r   r   r   r   r   r   r   r   r   !   sP    
	

r




$'

'r   ) dataclassesri   r   rZ   collections.abcr   r   typingr   torch._functorch.configre   torch.fxr   )torch._inductor.runtime.triton_heuristicsr   torch._logging._internalr   r@   r   vllm.compilation.monitorr	   vllm.configr
   vllm.config.utilsr   vllm.loggerr   r   r4   	dataclassr   r   r   r   r   r   <module>   s(   