o
    i :                     @   s   d dl Z d dlZd dlmZ d dlmZ eeZde jd< de jd< dej	j
_dd	 Zd
ee fddZddedefddZdddZdd Ze  edrvd dlmZ d dlmZ d dlmZmZ eeg dej	j
jd< ee_ee_dS dS )    N)init_loggeris_torch_equal1PYTORCH_NVML_BASED_CUDA_CHECKTORCHINDUCTOR_COMPILE_THREADS   c                    s  dd l m  m ddlm}m}m}m}m} ddl	m
  dtt f fdd}t jj|r@ jjjd ur@| jjjj}n j }| jrrt| jd |rr| jd jj|vrr| j  | jrrt| jd |rr| jd jj|vsY| g}g }	tt| jD ]/}
| j|
 }t||r||d | j|
< qt||r||  qt||r|	|  q|	|  t|dksJ d S )Nr   )EnterSubgraphLineExitSubgraphLineMemoryPlanningLineMemoryPlanningStateSubgraphPythonWrapperCodegenVreturnc                    s   dd l }g }|d}|d}| D ]3}t|jr)| jj dt|  qt|jr>| jj dt|  q||	  q|S )Nr   _none_shape)
	itertoolscount
isinstanceNoneAsConstantBufferappendgraphnamenextShapeAsConstantBufferget_name)graph_outputsr   namesshape_counternone_counternoder   ir G/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/env_override.pyget_output_names-   s   

z3memory_plan_reuse_patched.<locals>.get_output_names)torch._inductor.ir	_inductorr#   torch._inductor.codegen.wrapperr	   r
   r   r   r   torch._inductor.virtualizedr   liststrr   r   wrapper_codepartition_signaturesoutput_nodesr&   linesr!   r   poprangelenplanr   )selfr	   r
   r   r   r   r&   	out_namesplanning_statespast_planning_statesiliner$   r"   r%   memory_plan_reuse_patched"   sF   






r<   skip_cudagraphsc                    s  ddl m} ddlm}m m ddlm ddlm	} g }|j
 } dtdtf fdd	tt|t|D ]\}}	| }
|D ]
}|
|j  qI|
|}|jd
d |D }|fdd|j|jB D |
 }|fdd|D }| |D ]}|j qfdd|
 D }|| fdd|D }fdd|D }fdd|D }|| |fdd|D }fdd|D }fdd|D }||}||||||	|}|| ||| }q@|ddd S )z
    Gets signature for each graph partition, including input nodes, output nodes, and
    whether deallocating an input within graph partition.
    r   )dependencies)GraphPartitionSignatureMutationOutput
NoneLayoutr   )
OrderedSetbuf_namer   c                    sV   j | d}|du rdS t|jjr)t|j r'j| d }r'|S dS dS )z
        Checks if buf_name is NoneLayout. Buffers with NoneLayout is not allocated
        so graph partition should not take it as inputs or outputs.
        NFT)name_to_bufgetr   r!   layoutmutation_real_name)rC   buf	real_name)r@   rA   is_none_layoutr6   r$   r%   rJ   w   s   z=get_graph_partition_signature_patched.<locals>.is_none_layoutc                 S   s   g | ]}|j qS r$   )read_writes).0r!   r$   r$   r%   
<listcomp>   s    z9get_graph_partition_signature_patched.<locals>.<listcomp>c                    s   g | ]
} |j s|j qS r$   )r   )rL   x)rJ   r$   r%   rM      s    c                 3       | ]
} j ||V  qd S NrG   rE   rL   r   r6   r$   r%   	<genexpr>       
z8get_graph_partition_signature_patched.<locals>.<genexpr>c                    s   g | ]}| v r|qS r$   r$   rR   name_to_noder$   r%   rM      s
    c                    s   i | ]}| v r| | qS r$   r$   rR   rV   r$   r%   
<dictcomp>   
    z9get_graph_partition_signature_patched.<locals>.<dictcomp>c                    s   i | ]}|v r|| v qS r$   r$   rR   buffer_names_to_freerW   r$   r%   rX      rY   c                    s    g | ]}|v r| vr|qS r$   r$   rR   rZ   r$   r%   rM      s
    c                 3   rO   rP   rQ   rR   rS   r$   r%   rT      rU   c                    s   g | ]
} |s| qS r$   r$   rR   )rJ   rW   r$   r%   rM      s    c                    s   g | ]
}| j jv r|qS r$   )r   	constantsrR   r   r$   r%   rM      s    Nr'   )torch._inductorr>   r(   r?   r@   rA   r+   r   torch.utils._ordered_setrB   r   r&   get_name_to_nodesr-   boolzipreversedupdateoutputs_by_namekeysintersection
ReadWrites
merge_listreadswrites
last_usage!get_graph_partition_symbol_inputsr   union)r6   
partitionsr=   r>   r?   rB   
signaturesunmet_output_names	partitionskip_cudagraphoutput_namesr!   returned_output_namesrK   partition_input_namesextra_input_namesinput_nodesinput_deallocationextra_output_namesr0   constant_namessymbol_inputspartition_signaturer$   )r@   rA   r   r[   rJ   rW   r6   r%   %get_graph_partition_signature_patchedf   s   









	r}   F
should_logr   c                    s  ddl m  m} ddlm}m} ddlm}m}m	} |j
}	t|	tjjjrY|	j }
rY|
 }t|
tjjr>| d|
j n|}|tjjjv sN|tjjjv rYt|
tjjsWJ dS tjjjjsf|jdu rfdS dtd|dB d	dfd
d}|rw|n|}t||rt fdd|jD S |j
dusJ | s|d|d dS t|j
|jr|d|d dS t|j
|jr|d|d dS t|j
ddr|d|d dS ||j
r|d|d dS dS )zBReturn True if we should partition the inductor graph on this noder   N)BaseSchedulerNodeFusedSchedulerNode)&_unstable_customized_partition_wrapperis_cudagraph_unsafe_opmaybe_log_cudagraph_partition.Tmsgr!   r   c                 S   s   d S rP   r$   )r   r!   r$   r$   r%   noop_log9  s   z*should_partition_patched.<locals>.noop_logc                 3   s    | ]}  |V  qd S rP   )should_partition)rL   snoderS   r$   r%   rT   ?  s    z+should_partition_patched.<locals>.<genexpr>znon gpu ops)r!   zDeviceCopy opszConditional opsunbacked_bindingszunbacked binding opszCUDAGraph-unsafe custom opsF)r(   r)   r#   torch._inductor.schedulerr   r   torch._inductor.utilsr   r   r   r!   r   torchFallbackKernelop_overloadr   _ops
OpOverload_overloadnameconfigcustom_should_partition_opstriton
cudagraphswrapperr-   anysnodesis_gpu
DeviceCopyConditionalgetattr)r6   r!   r~   r#   r   r   r   r   r   ir_nodeopop_overload_packet_nameop_overload_namer   log_partition_reasonr$   rS   r%   should_partition_patched  sV   



r   c                 C   sh   ddl m  m} ddlm} t|_t|_|	dd || j
| _W d   dS 1 s-w   Y  dS )z
    (Re)initializes the scheduler member.  When initializing the scheduler, no CUBIN
    files should be generated (to avoid biasing any benchmarks and pessimizing
    fusion decisions).
    r   N)	Schedulerztriton.store_cubinF)torch._inductor.configr)   r   r   r   r   r   r}   get_graph_partition_signaturepatch
operations	scheduler)r6   r   r   r$   r$   r%   _update_scheduler_patched[  s   	"r   c                  C   sN   ddl m}  | ds| dr%ddl}ttjdr#ddlm} ||_dS dS dS )z;Workaround for TorchInductor autotune get_raw_stream() bug.r   r   2.9.0z2.9.1N_cuda_getCurrentRawStream)r   )	vllm.utils.torch_utilsr   builtinshasattrr   _Ctorch._Cr   get_raw_stream)r   r   _get_raw_streamr$   r$   r%   _patch_get_raw_stream_if_neededt  s   
r   r   )PythonWrapperCodegen)GraphLowering)_Config_ConfigEntry)defaultr   )F)r   N)osr   vllm.loggerr   r   r   __name__loggerenvironr)   r   compile_threadsr<   r,   r`   r}   r   r   r   r*   r   torch._inductor.graphr   torch.utils._config_moduler   r   _configmemory_plan_reuse_update_schedulerr$   r$   r$   r%   <module>   s6   
	


D
 #
S
