o
    -i-                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ eeZe jG dd dZG dd dZdS )    N)Callable)PicklerAny)CachingAutotuner)VllmBackend)end_monitoring_torch_compile)
VllmConfig)Range)init_loggerc                   @   s6   e Zd ZU eed< dZeed< dZede	f ed< dS )
RangeEntrycompile_rangeFcompiledN.runnable)
__name__
__module____qualname__r
   __annotations__r   boolr   r   r    r   r   _/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/compilation/piecewise_backend.pyr      s   
 r   c                   @   s   e Zd Z	d dejdB dedededee dede	d	e
eed
ef f dB fddZded
ef ded
ef fddZd!ddZde
eef fddZdeed
f dee fddZdedeed
f defddZdededB fddZdedefddZdS )"PiecewiseBackendNgraphvllm_configpiecewise_compile_indextotal_piecewise_compilessym_shape_indicesvllm_backendreturns_tuplecompiled_runnables.c	                 C   s  t |dut |duA sJ d|| _|| _|j| _|| _|| _|| _|| _|dk| _||d k| _	|dk| _
|j| _| j | _| jr^d}	| jd }
|
j|jjksTJ t|
j|	d| jd< d| j }t| | jj| _d	| j }t| || _|| _i | _t| j| _| jdur| jD ]1}t|tr|d
ksJ tdt|tsJ t||d}|| jvrt |d| j|< | j!| q| jD ]
}t |d| j|< qddl"m#} |$ | _%dS )ad  
        The backend for piecewise compilation.
        It mainly handles the compilation of static shapes and
        dispatching based on runtime shape.

        We will compile `self.graph` once for the general shape,
        and then compile for different shapes specified in
        `compilation_config.compile_sizes`.

        This class supports two mutually exclusive modes:
        1. Compilation (graph is set, compiled_runnables is None):
           Used during initial compilation when we have the FX graph
           and need to compile it for each shape range.
        2. Precompilation (graph is None, compiled_runnables is set):
           Used when loading from cache/AOT artifacts where we already
           have pre-compiled callables and don't need the original graph.

        Exactly one of graph or compiled_runnables must be provided.
        Nz:exactly one of graph and compiled_runnables should be set.r      istartendz"PiecewiseBackend: compile_ranges: z!PiecewiseBackend: compile_sizes: cudagraph_capture_sizeszmcudagraph_capture_sizes not supported in compile_sizes.This should be handled in `post_init_cudagraph_sizes`.)r   )!_on_compilation_complete_callback)&r   r   r   compilation_configr   r   r   r   is_first_graphis_last_graphis_full_graph
is_encoderis_encoder_compilationget_compile_rangescompile_rangesr$   scheduler_configmax_num_batched_tokensr
   r#   logger
debug_oncecompile_sizesr   r   range_entriessetto_be_compiled_ranges
isinstancestrNotImplementedErrorintr   addvllm.compilation.backendsr&   geton_compilation_complete)selfr   r   r   r   r   r   r   r   	max_int32last_compile_range
log_stringsizeranger&   r   r   r   __init__    sl   










zPiecewiseBackend.__init__compiled_graphreturnc                    s   dt dt f fdd}|S )NargsrG   c                     s(    |  }j st|ttfs|S |d S )Nr   )r   r7   tuplelist)rH   graph_outputrF   r?   r   r   compiled_graph_wrapper   s   zKPiecewiseBackend.get_compiled_graph_wrapper.<locals>.compiled_graph_wrapperr   )r?   rF   rM   r   rL   r   get_compiled_graph_wrapper   s   
z+PiecewiseBackend.get_compiled_graph_wrapperc                 C   sD   | j r| js| jj  t| j | jd ur |   d S d S d S d S N)r)   r6   r   compiler_managersave_to_filer   r   r>   )r?   r   r   r   check_for_ending_compilation   s   

z-PiecewiseBackend.check_for_ending_compilationc                    s|   G dd dt  dtdtf dtf fdd}i }| j D ]\}}|js,td| qt	|j
d	r;||j
|t|< q|S )
Nc                   @   s   e Zd ZdedefddZdS )zEPiecewiseBackend.to_bytes.<locals>.StandaloneCompiledArtifactsPicklerobjrG   c                 S   s(   t |tr|  tjt|ffS tS rO   )r7   r   prepare_for_picklepickleloadsdumpsNotImplemented)r?   rS   r   r   r   reducer_override   s   
zVPiecewiseBackend.to_bytes.<locals>.StandaloneCompiledArtifactsPickler.reducer_overrideN)r   r   r   objectr   rY   r   r   r   r   "StandaloneCompiledArtifactsPickler   s    r[   fn.rG   c                    sn   t | ds	J dtjjdd |  }t } || |	 }W d    |S 1 s0w   Y  |S )N	serializezfn must have serialize methodbundled_autograd_cacheT)
hasattrtorch
_functorchconfigpatchr]   ioBytesIOdumpgetvalue)r\   entryfresultr[   r   r   r]      s   

z,PiecewiseBackend.to_bytes.<locals>.serializez9entry with range %s not compiled, so cannot get its bytesr]   )r   r   r   bytesr4   itemsr   r1   debugr_   r   r8   )r?   r]   out	range_keyrh   r   rk   r   to_bytes   s   
zPiecewiseBackend.to_bytesrH   c                 C   sV   g }| j d us	J | j j jD ]}|jdkr||jd  q t|t|ks)J |S )Nplaceholderexample_value)r   nodesopappendmetalen)r?   rH   fake_example_inputsnoder   r   r   _fakify_args   s   
zPiecewiseBackend._fakify_argsrange_entryc              
   C   s   |j sa| jd ur| | jt|j |_n<|j s| |nt|}t	j
jdd | jjj| j|| jj| j|j| j| jd|_W d    n1 sLw   Y  d|_ | j|j |   d S d S )Nr^   T)r   graph_index
num_graphs)r   r   rN   r8   r   r   is_single_sizer{   rJ   r`   ra   rb   rc   r   rP   compiler   inductor_configr'   r   r   r6   removerR   )r?   r|   rH   	args_listr   r   r   _maybe_compile_for_range_entry   s2   


z/PiecewiseBackend._maybe_compile_for_range_entryruntime_shapec                 C   sP   | j d u rd S || j v r| jt||d S | jD ]}||v r%| j|   S qd S )Nr"   )r3   r4   r
   r.   )r?   r   rD   r   r   r   _find_range_for_shape  s   


z&PiecewiseBackend._find_range_for_shapec                 G   sL   || j d  }| |}|d usJ d| d| j | || |j| S )Nr   zShape: z out of considered ranges: )r   r   r.   r   r   )r?   rH   r   r|   r   r   r   __call__  s   


zPiecewiseBackend.__call__rO   )rG   N)r   r   r   fxGraphModuler	   r:   rJ   r   r   dictr8   r   r   rE   rN   rR   rl   rq   rI   r{   r   r   r   r   r   r   r   r   r      sH    
	
m




$

%r   )dataclassesrd   rU   collections.abcr   r   typingr   torch._functorch.configr`   torch.fxr   )torch._inductor.runtime.triton_heuristicsr   r<   r   vllm.compilation.monitorr   vllm.configr	   vllm.config.utilsr
   vllm.loggerr   r   r1   	dataclassr   r   r   r   r   r   <module>   s$   