o
    ߗi                  	   @   s	  U d dl Z d dlZd dlmZmZmZmZmZmZm	Z	 d dl
Z
d dlZ
d dlmZ d dlmZmZ dee fddZdee fdd	Zdee fd
dZdee fddZe jdddkZdZdZdZe jde rmdnddkZe Zee ed< e Zee ed< dZ eed< e Z!ee ed< e Z"ee ed< e jddkZ#dZ$ee% ed< dZ&dZ'e jdddkZ(e jddZ)dZ*dZ+e jdddkZ,e jd dkZ-dZ.dZ/dZ0e jd!ddkZ1e jd"d#Z2dZ3dZ4dZ5dZ6dZ7dZ8e
j9j:j;ed$< dZ<e
j9j:j;ed%< dZ=eee
j>j?gdf  ed&< dZ@eee
j>j?gdf  ed'< dZAeee
j>jBj?gdf  ed(< dZCeeed) ged) f  ed*< dZDdZEdZFdZGdZHi ZIeeJeeJef f ed+< i ZKeeJeeJef f ed,< dZLe jd-ddkZMdZNdZOdd.d/dd0ZPeeJef ed1< d2ZQdZRg d3ZSdZTd4ZUd5ZVd6ZWe jd7dkZXe jd8dkZYe jd9dkZZd:Z[e rdne jd;dkZ\e jd<d=] Z^e jd>d?] Z_e jd@dA] Z`e jdBddkZadCZbe jdDdkZce jdEdkZde jdFdkZedGZfdHZgdIZhe jdJdkZie jdKdkZje jdLdkZke%e jdMdZle jdNdOZme jdPdQZndReJdefdSdTZodReJdefdUdVZpdReJdefdWdXZqe jdYdAZre
jsjts^dndZue jdZeudkZve jd[ddkZwe jd\ddkZxe jd]dkZyd^Zzd_Z{d`Z|dZ}dZ~dZe jdadkZe jdbdkZe jdcdOZe jddddkZd:Ze jdeddkZd/ZdfZd`ZdZd`ZdZdZdZe jdgddkZdZdZdZdZdZdZd/Zd/ZdZdZdZdZdhe
jv pdie
jv Ze peZe jdjddkZdZeeJ edk< dZd6ZdldmgZee	edn eJf  edo< dZeedp< G dqdr drZdefdsdtZde%fdudvZe rBdne Zee% edw< e r~z d dxlmZ eriee jedye jdzZnedzZW n eefy}   dZY nw dZd:Ze jd{ddkZe jd|ddkZdZdZd}Zd~ZdZdZdZeed< e jdddkZdZdZdZdZe jddOZedOkZedkrdOneZe jddZe jddkZdZe jdddkZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< e jdddkZe jdddkZeed< G dd dZG dd dZG dd dZG dd dZG dd dZdZdZG dd dZG dd dZg dZg dZg Zeee
je
je
jgdf  ed< G dd dZerd dlT eeje  dS )    N)AnyCallableDictListOptionalTYPE_CHECKINGUnion)	is_fbcode)get_tristate_envinstall_config_modulereturnc                   C      t dS )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr
    r   r   T/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_default      r   c                   C   r   )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   autotune_remote_cache_default   r   r   c                   C   r   )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   %bundled_autotune_remote_cache_default   r   r   c                   C   s   t dt sdS d S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r
   r	   r   r   r   r   )bundle_triton_into_fx_graph_cache_default   s   r   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V21FTTORCHINDUCTOR_FX_GRAPH_CACHE0fx_graph_remote_cache!bundle_triton_into_fx_graph_cacheautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cache"TORCHINDUCTOR_FORCE_DISABLE_CACHESsleep_sec_TESTING_ONLYneeds_fixed_stride_orderTORCHINDUCTOR_CPP_WRAPPERTORCHINDUCTOR_C_SHIM_VERSION2TORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_MEMORY_POOLintermediatespost_grad_custom_pre_passpost_grad_custom_post_passjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)reorder_compute_for_overlap
sink_waitsraise_commsdefaulti,     TORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM
   "TORCHINDUCTOR_FORCE_SAME_PRECISION(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPP(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT'TORCHINDUCTOR_AUTOTUNE_FALLBACK_TO_ATENi    #TORCHINDUCTOR_SEARCH_AUTOTUNE_CACHETORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g      ?g       @#TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUS#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT TORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mmnamec                 C   s   t | pt| S )N)collect_autoheuristicuse_autoheuristicrX   r   r   r   run_autoheuristic  s   r\   c                 C      | t jjjdv S N,)torch	_inductorconfigautoheuristic_collectsplitr[   r   r   r   rY        rY   c                 C   r]   r^   )r`   ra   rb   autoheuristic_userd   r[   r   r   r   rZ     re   rZ   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT         TORCHINDUCTOR_DEBUG_FUSIONTORCHINDUCTOR_BENCHMARK_FUSION#TORCHINDUCTOR_ENABLED_METRIC_TABLES(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSION'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   TORCHINDUCTOR_BENCHMARK_KERNELdevgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORworker_start_methodfuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tpc                   @   s&   e Zd ZU dZeed< dZeed< dS )_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)__name__
__module____qualname__r   bool__annotations__r   intr   r   r   r   r~   B  s   
 r~   c                  C   s   d} d}t j|}| |kS )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    r7   z0pytorch/inductor:enable_parallel_compile_version)r`   _utils_internaljustknobs_getval_int)ENABLE_PARALLEL_COMPILE_VERSIONjk_nameversionr   r   r   #parallel_compile_enabled_internallyG  s   r   c                  C   s   ddl } | t}dtjv rttjd }|d| |S tjdkr+d}|d |S t	 r:t
 s:d}|d |S ttd	rFttdnt }|sNJ td
|}|d| |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32r7   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodesched_getaffinity    zcompile_threads set to %d)logging	getLoggerr   osenvironr   infosysplatformr	   r   hasattrlenr   	cpu_countmin)r   logcompile_threadsr   r   r   r   decide_compile_threadsU  s,   





r   r   )parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   i   force_shape_padTORCHINDUCTOR_PERMUTE_FUSIONTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUT3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmassume_aligned_inputs.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_trainingc                   @   s`  e Zd ZU dZejdddkZejdddkZdZ	e
e ed< eejdd	Zdejd
ejdkr6dndfZejdddkZejdddkZdZe
e ed< dZe
e ed< dZe
e ed< dZeejddZejdddkZejdddkZejdddkZejdddkZeejddZejddZejddZ dZ!dZ"dS )cpp$TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE4096CXXdarwinzclang++zg++'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLY
vec_isa_okoriginal_aten,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAG)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTIC#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORSTF)#r   r   r   threadsr   r   getno_redundant_loopsdynamic_threadsr   r   r   r   min_chunk_sizer   r   cxxenable_kernel_profileweight_prepackr   strr   r   r   descriptive_namesmax_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsgemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearr   r   r   r   r     sF   
 	r   c                   @   s.  e Zd ZU ejddkZdZdZdZ	dZ
e rdndZdZdZee ed< dZdZdZdZdZdZdZdZdZeed	< dZdZd
Zee ed< dZdZ ejdddkZ!dZ"ejdddkZ#ejdddkZ$dZ%eejddZ&dZ'dZ(dZ)dZ*eed< dZ+d
Z,ee- ed< dZ.ejdddkZ/d
S )tritonTORCHINDUCTOR_CUDAGRAPHSr   TFr   2   "cudagraph_dynamic_shape_warn_limit   prefer_nd_tilingNautotune_at_compile_time!TORCHINDUCTOR_UNIQUE_KERNEL_NAMESr   #TORCHINDUCTOR_PERSISTENT_REDUCTIONS$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSr   TORCHINDUCTOR_MULTI_KERNEL      spill_thresholdr   ENABLE_PERSISTENT_TMA_MATMUL)0r   r   r   r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsslow_path_cudagraph_asserts!cudagraph_trees_history_recordingr	    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr   r   r   r   force_cudagraph_syncforce_cudagraphs_warmupfast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexing	max_tilesr   r   autotune_pointwiseautotune_cublasLtr    tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesr   persistent_reductionscooperative_reductionsforce_cooperative_reductionsmulti_kerneldivisible_by_16min_split_scan_rblockstore_cubinr   use_block_ptrr   r   codegen_upcast_to_fp32enable_persistent_tma_matmulr   r   r   r   r   w  sR   
 	r   c                   @   s   e Zd ZU dZejdddkZejddZejddZ	dZ
dZdZeed	< dZeed
< dZeed< dZeed< i Zeeef ed< ejdddkZeed< ejdddkZeed< i Zeeef ed< dZeed< dZeed< dZeed< dS )aot_inductorrU   AOT_INDUCTOR_DEBUG_COMPILEr   r   -AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightspackagepackage_cpp_onlymetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationDUMP_AOTI_MINIFIERdump_aoti_minifierpresetsallow_stack_allocationuse_minimal_arrayref_interfaceTpackage_constants_in_so)r   r   r   output_pathr   r   r   debug_compile debug_intermediate_value_printerfiltered_kernel_namesserialized_in_specserialized_out_specr  r   r   r  r  r  r  r   r   r  r  r  r   r  r  r  r   r   r   r   r    s.   
 
r  c                
   @   s   e Zd ZU dZee ed< dZee ed< dZdZ	dZ
dZdZejdejejejejdZdZee ed< dZee ed	< d
Zeed< ejdddkZeed< dZee ed< dZee ed< dS )cudaNarchr   z-O1FTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/cutlass_max_profiling_configscuda_cxxr7   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   generate_test_runnercutlass_op_allowlist_regexpingpongcutlass_op_denylist_regex)r   r   r   r%  r   r   r   r   compile_opt_levelenable_cuda_ltoenable_ptxas_infoenable_debug_infouse_fast_mathr   r   r   pathabspathjoindirnamer`   __file__cutlass_dirr'  r   r(  r)  r+  r   r,  r.  r   r   r   r   r$  ^  s*   
 

r$  c                   @   s   e Zd ZU g Zee ed< g dZee ed< dZdZ	dZ
dZdZdZdZee ed< ejd	Zejd
ddkZeed< dZee ed< dZeed< dS )rocmr%  )gfx90agfx940gfx941gfx942ck_supported_archz-O2FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r+  n_max_profiling_configsuse_preselected_instances)r   r   r   r%  r   r   r   r?  r/  is_debug
save_tempsr3  flush_denormalsprint_kernel_resource_usager@  r   r   r   r   ck_dirr+  r   rC  r   rD  r   r   r   r   r:    s   
 
r:  c                   @   s(   e Zd ZdZdZdZdZdZdZdZ	dS )halidehostz	host-cudaAnderson2021	Adams2019FN)
r   r   r   
cpu_target
gpu_targetscheduler_cudascheduler_cpuassertsdebugscan_kernelsr   r   r   r   rJ    s    rJ  c                   @   s   e Zd ZU ejdddkZejdddkZdZe	e
 ed< dZdZdZdZdZdZdZejd	ddkZejd
ddkZejddZejddZdZdZe	ee
gdf  ed< dZeed< dS )traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarlog_autotuning_results)r   r   r   r   r   r   enabledsave_real_tensorsrX  r   r   r   	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler]  r   r^  r   r   r   r   r   rU    s$   
 
rU  )ztrace.upload_tarr0   r1   r2   )rU  zcuda.cutlass_dirry   r   r/   r.   r   external_matmulc                   @   s   e Zd ZdZdZdS )test_configsFN)r   r   r   %force_extern_kernel_in_multi_templateruntime_triton_dtype_assertr   r   r   r   rn  Q  s    rn  )*)r   r   typingr   r   r   r   r   r   r   r`   !torch._inductor.custom_graph_passtorch._environmentr	   torch.utils._config_moduler
   r   r   r   r   r   r   r   r   enable_auto_functionalized_v2rS  disable_progressverbose_progressfx_graph_cacher   r   r   r    r!   r"   force_disable_cachesr$   r   #custom_op_default_layout_constraint'triton_kernel_default_layout_constraintcpp_wrapperc_shim_versiondcestatic_weight_shapessize_assertsnan_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningmemory_poolbenchmark_harnessepilogue_fusionepilogue_fusion_firstpattern_matcherb2b_gemm_passr.   ra   custom_graph_passCustomGraphPassTyper/   r0   fxGraphr1   r2   graphr3   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionr4   r   r5   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_muluse_mixed_mmr<   mixed_mm_choice reorder_for_compute_comm_overlap'reorder_for_compute_comm_overlap_passesreorder_for_peak_memoryestimate_op_runtimeintra_node_bwinter_node_bwmax_autotunemax_autotune_pointwisemax_autotune_gemmautotune_num_choices_displayedforce_same_precisionuppermax_autotune_gemm_backendsmax_autotune_conv_backendsmax_autotune_gemm_search_spaceautotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusrc   rf   r\   rY   rZ   autoheuristic_log_pathr   hiplayout_opt_defaultlayout_optimizationforce_layout_optimizationkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdfallback_randomimplicit_fallbacksaggressive_fusiondebug_fusionbenchmark_fusionenabled_metric_tablesloop_ordering_after_fusionscore_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_sizemax_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingcompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapesjoint_graph_constant_foldingdebug_index_assertsemulate_precision_casts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorry   _fuse_ddp_communication_fuse_ddp_bucket_sizer|   r}   r~   r   r   r   libfb.pyr   __package__get_dir_pathr4  r6  replacesepglobal_cache_dir
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastdisable_padding_cpupadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler   permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testing_profile_varprofile_bandwidthprofile_bandwidth_regexprofile_bandwidth_output/profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr   r   r   r   r   r   r   enable_linear_binary_foldingr   r   r   r  r$  r:  cpu_backendcuda_backendrJ  rU  _save_config_ignore_cache_config_ignore_prefixrm  Tensorrn  torch.utils._config_typingmodulesr   r   r   r   r   <module>   s  
 $	 
	%
j MI1@$