o
    .i0                 	   @   s@  U d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
Z	d dlmZ d dlmZmZmZ e jdddkZdZdee fd	d
Zdee fddZdee fddZdee fddZdee fddZdefddZdefddZe jdddkZdZdZdZ e rdndZ!dZ"e#e$d< eddddZ%ee$d< dZ&ee$d < e Z'ee e$d!< e Z(ee e$d"< ed#d$ddZ)ee$d%< dZ*ee$d&< e Z+ee e$d'< e Z,ee e$d(< ed)d*Z-ee$d+< dZ.ee$d,< i Z/e0e1e1f e$d-< dZ2ee# e$d.< d/Z3ed0 e$d1< e jd2d3dkZ4ee$d4< e jd5d3dkZ5ee$d6< e jd7d3dkZ6ee$d8< e  Z7ee$d9< e jd:ddkZ8dZ9dZ:e jd;ddkZ;e jd<dkZ<e jd=ddkZ=e jd>e rbd3nddkZ>dZ?dZ@dZAe jd?d3dkZBe jd@dkZCdZDe jdAdBZEedC e$dD< dZFdZGe ZHdZIdZJdZKdZLe	jMjNjOe$dE< dZPe	jMjNjOe$dF< dZQe	jMjNjRe$dG< dZSe	jMjNjOe$dH< dZTe	jMjNjOe$dI< dZUeee	jVjWjXgdf  e$dJ< dZYeeeZdK geZdK f  e$dL< dZ[eeeZdK geZdK f  e$dM< dZ\dZ]dZ^dZ_dZ`i Zae0e1e0e1ef f e$dN< i Zbe0e1e0e1ef f e$dO< dZce jdPddkZddZedZfddQdRddSZge0e1ef e$dT< dUZhedV e$dW< dZig dXZjeZee1eeZdK geZdK f f  e$dY< dZkee# e$dZ< dZldZmee$d[< e nd\ Zodu rzdne#eoZpee# e$d]< e nd^ Zodu rdne#eoZqee# e$d_< d`Zreda e$db< dZseee#ge#f  e$dc< d`Ztedd e$de< dZueee#ge#f  e$df< dgZvdZwee$dh< diZxdjZyeddkdldmZzee$dn< e jdodkZ{e jdpdkZ|e jdqdkZ}drZ~ee# e$ds< e jdtddkZe jduddkZe jdve sdnd3dkZee$dw< g ZeZe1 e$dx< edydzddZee$d{< g ZeZe# e$d|< e jd}d~ Ze jdd Ze jdd Zed e$d< e jdd Zed e$d< dZdZdZe jddkZe jddkZdZdZdZe jddkZe jddkZe jddkZe#e jddZe jddZe jddZe jdd3dkZde1defddZde1defddZde1defddZe jddZe	jjsdnd3Ze jdedkZe jdd3dkZe jdddkZe jddkZdZdZdZdZee# e$d< dZdZe jddkZdZe jddkZee$d< e jddkZee$d< e jddZe jdd3dkZee$d< drZe jdddkZdRZdZdZdZdZdZdZdZdZe#e jdd Ze jdd3dkZdZdZdZdZdZdZdRZdRZdZdZdZe jdd3dkZde	jv pde	jv Ze peZe jdddkZdZee1 e$d< dZee1 e$d< de1fddZeσ Ze1e$d< dZe#e$d< eddddZee$d< edddZee$d< dZdjZddgZeZeed e1f  e$d< dZee$d< G ddȄ dȃZdefddʄZde#fdd̄Ze r+dneڃ Zee# e$d< eddddZee$d< e Zee$d< eddddZee$d< e jdd3dkZee$d< ee1 e$d< e rz d dlmZ eree jede jdڡZnedڡZW n eefy   dZY nw dZdrZe jdddkZe jdddkZdZdZdZdZdZdZdZdZdZee$d< e jdd3dkZdZdZdZdZe jddZedkZedkrdneZe jddZee1 e$d< e jddkZ dZe jdd3dkZee$d< dZee$d< dZee$d< dZee$d< dZee$d< dZee$d< dZee$d< e jdd3dkZ	e jdd3dkZ
ee$d< dZee$d< i Ze0e1e0e1ef f e$d< dee1 fddZedddZe1e$d< G dd dZG dd dZG dd dZG dd dZG dd dZdZed  e$d< dZed e$d< G dd dZG dd dZg dZeZe1 e$d	< g d
ZeZe1 e$d< g ZeZee	je	je	jgdf  e$d< G dd dZerd dlT eeje  dS (      N)AnyCallableLiteralOptionalTYPE_CHECKINGUnion)	is_fbcode)Configget_tristate_envinstall_config_moduleTORCHINDUCTOR_INPLACE_PADDING1Freturnc                   C      t dS )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr
    r   r   O/home/ubuntu/LTX-2/.venv/lib/python3.10/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_default      r   c                   C   s,   t jddkr
dS t jddkrdS d S )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   r   vec_isa_ok_default   s
   r   c                   C   r   )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   autotune_remote_cache_default   r   r   c                   C   r   )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   %bundled_autotune_remote_cache_default   r   r   c                   C   s   t dt sdS d S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r
   r   r   r   r   r   )bundle_triton_into_fx_graph_cache_default#   s   r!   c                  C   s<   d} dt jv rt jddkS t rtjd}|| kS dS )N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r   r   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversionr   r   r   static_cuda_launcher_default*   s   
r*   c                  C   s@   d} dt jv rt jddkS t rd}tj|}|| kS dS )Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr$   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer)   r   r   r   prologue_fusion_enabled9   s   
r.   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2T-/logs/dedicated_log_torch_compile_worker_ranki  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cacheTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_forcedefaultfx_graph_cacheremote_gemm_autotune_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITEnon_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez*torch.compiler.config.force_disable_caches)aliasforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rD   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separateTORCHINDUCTOR_FX_WRAPPER
fx_wrappercpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAXTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)nonerV   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passcustom_partitioner_fnjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)r5   tritonatenrl   mixed_mm_choice)reorder_compute_for_overlap
sink_waitsraise_comms'reorder_for_compute_comm_overlap_passesreorder_prefetch_limit(reorder_iterative_debug_memory_recompute!PYTORCH_REORDER_COLLECTIVES_LIMIT(reorder_iterative_debug_limit_to_reorderPYTORCH_SINK_WAITS_LIMIT(sink_waits_iterative_debug_limit_to_sinkrW   )rW   all	only_fsdpbucket_all_gathers_fx.bucket_all_gathers_fx_bucket_size_determinator)rW   rz   bucket_reduce_scatters_fx2bucket_reduce_scatters_fx_bucket_size_determinatorr5   !runtime_estimations_mms_benchmarki,     *TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r5   r4   r3   use_experimental_benchmarkerTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM
   autotune_num_choices_displayed/TORCHINDUCTOR_MAX_AUTOTUNE_REPORT_CHOICES_STATS<TORCHINDUCTOR_MAX_AUTOTUNE_PRUNE_CHOICES_BASED_ON_SHARED_MEMTORCHINDUCTOR_GRAPH_PARTITIONgraph_partitioncustom_should_partition_opsz%pytorch/compiler:force_same_precision"TORCHINDUCTOR_FORCE_SAME_PRECISIONforce_same_precisionmulti_kernel_hints(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPP(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacei    TORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUS#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT TORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mm'TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOKnamec                 C   s   t | pt| S )N)collect_autoheuristicuse_autoheuristicr   r   r   r   run_autoheuristic?  s   r   c                 C      | t jjjdv S N,)r%   	_inductorconfigautoheuristic_collectsplitr   r   r   r   r   C     r   c                 C   r   r   )r%   r   r   autoheuristic_user   r   r   r   r   r   G  r   r   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT          realize_acc_reads_size_threshold.TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLES(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusion'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   TORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL%TORCHINDUCTOR_EMULATE_PRECISION_CASTSdevgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesc                  C   s4   dt jv rt jd } nd} | dv sJ d|  | S )NTORCHINDUCTOR_WORKER_START
subprocess)r   forkspawnzInvalid start method: )r   r   )start_methodr   r   r   decide_worker_start_method  s   

r   worker_start_methodi   small_memory_access_thresholdz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingLOG_TLPARSEr4   r5   log_tlparsefuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tpc                   @   s&   e Zd ZU dZeed< dZeed< dS )_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)__name__
__module____qualname__r   bool__annotations__r   intr   r   r   r   r   7  s   
 r   c                  C   s   d} d}t j|}| |kS )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    rf   z0pytorch/inductor:enable_parallel_compile_version)r%   r&   r'   )ENABLE_PARALLEL_COMPILE_VERSIONr-   r)   r   r   r   #parallel_compile_enabled_internally<  s   r   c                  C   s   ddl } | t}dtjv rttjd }|d| |S tjdkr+d}|d |S t	 r:t
 s:d}|d |S ttd	rFttdnt }|sNJ td
|}|d| |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32rf   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodesched_getaffinity    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr   r   hasattrlenr   	cpu_countmin)r   logcompile_threadsr   r   r   r   decide_compile_threadsJ  s,   





r   r   z+pytorch/inductor:quiesce_async_compile_pool(TORCHINDUCTOR_QUIESCE_ASYNC_COMPILE_POOLquiesce_async_compile_pooluse_static_cuda_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   i   force_shape_padTORCHINDUCTOR_PERMUTE_FUSIONTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmassume_aligned_inputs.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesautotune_lookup_tablec                  C   s<   d } t  rtjdd }tjdd}|d urd| } | S )NMAST_HPC_JOB_NAME	ROLE_RANKr   r0   )r   r   r   r   )log_locmast_job_nameglobal_rankr   r   r   get_worker_log_path)  s   
r  TORCHINDUCTOR_WORKER_LOGPATHtorchinductor_worker_logpathc                   @   s  e Zd ZU dZdZejdddkZejdddkZ	dZ
ee ed< eejd	d
Zdejdejdkr8dndfZeed ef ed< ejdddkZejdddkZdZee ed< dZee ed< edZee ed< dZed ed< eejddZejdddkZejdddkZejddZ ejdddkZ!d Z"eejd!dZ#ejd"dZ$ejd#dZ%d$Z&d Z'ejd%ddkZ(d Z)ejd&ddkZ*ejd'ddkZ+dS )(cppzu
    Settings for cpp backend.
    This class provides a centralized location for managing cpp backend settings.
    $TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE512CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr%   r0  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH%TORCHINDUCTOR_CPP_FORCE_INLINE_KERNEL-TORCHINDUCTOR_CPP_USE_CONSTEXPR_FOR_INT_ARRAY),r   r   r   __doc__threadsr   r   r   no_redundant_loopsdynamic_threadsr%  r   r   r   min_chunk_sizer   r   r*  tupler   strenable_kernel_profileweight_prepackr-  r.  r
   r/  r   r3  max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferforce_inline_kerneluse_constexpr_for_int_arrayr   r   r   r   r!  <  sX   
 r!  c                   @   s(  e Zd ZU dZejddkZdZdZ	dZ
eeeeeedf f   ed< dZdZe r.dndZd	Zd
Zee ed< dZdZedddZeed< dZdZdZdZdZejde s[dnddkZ eed< dZ!ee ed< dZ"eed< dZ#dZ$dZ%ee ed< dZ&eed< dZ'eed< dZ(dZ)ejdddkZ*ejdddkZ+dZ,e-d ed< ejdddkZ.ejdddkZ/dZ0eejddZ1e-d ed < ejd!ddkZ2d"Z3dZ4d#Z5eed$< dZ6dZ7dZ8ee9 ed%< dZ:ejd&ddkZ;ejd'ddkZ<dZ=eejd(d)Z>eejd*d+Z?dS ),rm   z.
    Config specific to codegen/triton.py
    TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr  2   "cudagraph_dynamic_shape_warn_limit TORCHINDUCTOR_CUDAGRAPH_OR_ERRORr   cudagraph_or_error&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductions!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr0  r1  r3  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   rf   r"      multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16      spill_thresholdr-  ENABLE_PERSISTENT_TMA_MATMULTORCHINDUCTOR_SKIP_L1$TORCHINDUCTOR_NUM_DECOMPOSE_K_SPLITS10#TORCHINDUCTOR_DECOMPOSE_K_THRESHOLD32)@r   r   r   rA  r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsrZ  r   rF  r   r   r   slow_path_cudagraph_asserts!cudagraph_trees_history_recordingr    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr\  force_cudagraph_syncforce_cudagraphs_warmupr	   r^  r   fast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingr`  ra  rb  autotune_pointwiseautotune_cublasLtrc  rd  re   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesr3  r   persistent_reductionscooperative_reductionsforce_cooperative_reductionsrl  divisible_by_16min_split_scan_rblockstore_cubinrp  use_block_ptruse_tensor_descriptorr-  rG  codegen_upcast_to_fp32enable_persistent_tma_matmulskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYnum_decompose_k_splitsdecompose_k_thresholdr   r   r   r   rm     s   
 $

	rm   c                   @   s  e Zd ZU dZdZejdddkZejddZ	ejddZ
ed	 ed
< ejddZdZdZdZeed< dZeed< dZdZeed< dZee ed< i Zeeef ed< ejdddkZeed< ejdddkZeed< eejddZeed< i Zeeef ed< dZ eed< dZ!eed< ejdddkZ"eed< dZ#eed < dZ$eed!< e%  Z&eed"< dZ'ee ed#< dZ(ee ed$< dZ)ee ed%< i Z*ee+j,j-e.e f ed&< dZ/ee.e  ed'< dZ0eed(< ejd)ddkZ1dS )*aot_inductorz9
    Settings for Ahead-Of-Time Inductor Compilation
    r   AOT_INDUCTOR_DEBUG_COMPILEr   r   &AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightsTpackagepackage_cpp_onlymetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr"   repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interface)AOT_INDUCTOR_WEIGHT_USE_CACHING_ALLOCATORweight_use_caching_allocatorpackage_constants_in_sopackage_constants_on_diskprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libscompile_standaloneAOT_INDUCTOR_ENABLE_LTO)2r   r   r   rA  output_pathr   r   r   debug_compilecompile_wrapper_opt_levelr  r   r   filtered_kernel_namesserialized_in_specserialized_out_specr  r   r  use_consts_asm_buildr  r  r   r  dictrG  r  r  r   r  r  r   r  r  r  r  r  r   r  r  r  r  r  r%   _ops
OpOverloadlistr  r  
enable_ltor   r   r   r   r    sP   
 
	

r  c                
   @   s  e Zd ZU dZdZee ed< dZee ed< dZ	e
d ed< dZdZdZdZejejd	ejejejd
ZdZee ed< g dZee ed< ejdddkZdZdZee ed< dZ eed< ejdddkZ!e"ed< ejdZ#ee ed< ejdZ$ee ed< ejddZ%eed< ejdZ&ee ed< ejdddkZ'e"ed< ejd ddkZ(e"ed!< ejd"d#Z)eed$< d%Z*e"ed&< dZ+e"ed'< dZ,e"ed(< d%Z-e"ed)< dS )*cudaz9Settings for cuda backend, today this consists of cutlassNarchr)   -O1)-O0r  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/cutlass_max_profiling_configs)rf   r"   r   r   %cutlass_max_profiling_swizzle_optionsCUTLASS_EPILOGUE_FUSIONr   r   cuda_cxxrf   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_levelTORCHINDUCTOR_CUTLASS_PRESETScutlass_presets+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSrz   cutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_writeenable_caching_codegen).r   r   r   rA  r  r   rG  r   r)   r  r   enable_cuda_ltoenable_ptxas_infoenable_debug_infouse_fast_mathr   pathrealpathr   r   joindirnamer%   __file__cutlass_dirr  r   r  r  cutlass_epilogue_fusion_enabledcutlass_tma_onlyr  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   r  4  sZ   
 




r  c                   @   s   e Zd ZU g Zee ed< g dZeed  ed< dZ	ed ed< dZ
dZdZdZdZd	Zee ed
< ejdZejdddkZeed< d	Zee ed< d	Zee ed< d	Zee ed< dZeed< d	Zeee  ed< dZeed< dZeed< d	S )rocmr  )gfx90agfx942gfx950ck_supported_archr  )	r  r  r  r  z-Osz-Ozz-Ominz-Ofastz-Omaxr  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweepro  split_k_thresholdcontiguous_threshold)r   r   r   r  r  rG  r   r  r   r  is_debug
save_tempsr  flush_denormalsprint_kernel_resource_usager  r   r   r   r   ck_dirr  r   r  r   r  r  r   r  r  r  r   r   r   r   r    s.   
 

r  )r!  rm   halidecpu_backend)rm   r	  cuda_backendc                   @   sB   e Zd ZU dZdZdZed ed< dZed ed< dZ	dZ
dZd	S )
r	  hostz	host-cudaAnderson2021)r  Li2018	Adams2019Mullapudi2016scheduler_cudar  scheduler_cpuFN)r   r   r   
cpu_target
gpu_targetr  r   r   r  assertsdebugscan_kernelsr   r   r   r   r	    s   
 r	  c                   @   s   e Zd ZU ejdddkZejdddkZdZe	e
 ed< dZdZdZdZdZdZdZejd	ddkZejd
ddkZejddZejddZdZdZe	ee
gdf  ed< ejdddkZeejdejddZeed< dS )traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTSINDUCTOR_PROVENANCEprovenance_tracking_level)r   r   r   r   r   r   enabledsave_real_tensorsr  r   rG  r   	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler   r   log_autotuning_resultsr   r#  r   r   r   r   r  '  s.   
 

r  )
ztrace.upload_tarr^   r_   r`   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierr[   r\   r   ra   _save_config_ignore)r  zcuda.cutlass_dirr   r   r\   r[   r^   r_   r   ra   r  r6   r8   r<   r=   _cache_config_ignore_prefixexternal_matmulc                   @   sn   e Zd ZU dZeed< dZee ed< dZ	dZ
dZee ed< dZee ed< dZdZeed  ed< dZdS )	test_configsF%force_extern_kernel_in_multi_templateNmax_mm_configsautotune_choice_name_regexautotune_choice_desc_regex)assertr   track_memory_lifecycle)r   r   r   r7  r   r   r8  r   r   runtime_triton_dtype_assertstatic_cpp_dtype_assertr9  rG  r:  *graphsafe_rng_func_ignores_fallback_randomr<  r   use_libtorchr   r   r   r   r6    s   
 r6  )*(   r   r   typingr   r   r   r   r   r   r%   !torch._inductor.custom_graph_passtorch._environmentr   torch.utils._config_moduler	   r
   r   r   r   inplace_paddingcan_inplace_pad_graph_inputr   r   r   r   r   r!   r*   r.   enable_auto_functionalized_v2r  disable_progressverbose_progressworker_log_pathr1   r   r   r6   r7   r8   r9   r;   r<   r=   r>   r@   rA   rB   r  rG  rC   rF   rH   rJ   rL   rM   online_softmaxdcestatic_weight_shapessize_assertsnan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr  bfloat16_atomic_adds_enabledrZ   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_firstpattern_matcherb2b_gemm_passr[   r   custom_graph_passCustomGraphPassTyper\   r]   CustomPartitionerFnTyper^   r_   r`   fxgraphGraphra   r  rb   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrc   rd   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_muluse_mixed_mmrk   ro    reorder_for_compute_comm_overlaprs   rt   reorder_for_peak_memoryru   getenvenv_strrw   ry   r|   r}   r~   r   estimate_op_runtimer   intra_node_bwinter_node_bwr   max_autotunemax_autotune_pointwisemax_autotune_gemmr   !max_autotune_report_choices_stats.max_autotune_prune_choices_based_on_shared_memr   r   r   r   uppermax_autotune_gemm_backendsmax_autotune_conv_backendsr   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   run_jit_post_compile_hookr   r   r   autoheuristic_log_pathr)   hiplayout_opt_defaultlayout_optimizationforce_layout_optimizationkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdr   fallback_randomimplicit_fallbacks assume_unaligned_fallback_outputaggressive_fusionr   r   enabled_metric_tablesr   score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_size)max_fusion_buffer_group_pairwise_attemptsmax_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingcompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapesjoint_graph_constant_foldingdebug_index_assertsemulate_precision_casts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr   r   r   r   r   r   r   _fuse_ddp_communication_fuse_ddp_bucket_sizer   r   r   r   r   r   r   r   r   r   libfb.pyr  __package__get_dir_pathr  r  replacesepr   
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastpad_dynamic_shapesdisable_padding_cpu$expand_dimension_for_pointwise_nodespadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler  permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testing_profile_varprofile_bandwidthprofile_bandwidth_regexr
  /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr  r  r  r  r  r  r  enable_linear_binary_foldingr  r  r  r  r   r!  rm   r  r  r  r
  r  r	  r  r3  r4  r5  Tensorr6  torch.utils._config_typingmodulesr   r   r   r   r   <module>   s  
  

 




	%

  l  JN.