o
    i                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZmZ ddlZddlmZ ddlmZmZ ddlmZ eeZeedd	d
ejdeej deeee
f  dede
f fddZejeddZejeddZdefddZ ej!defddZ"dS )a*  
This module provides TVM backend integration for TorchDynamo.

Apache TVM is a deep learning compiler framework that can optimize and execute
models on various hardware backends. This module enables:

- Compilation of PyTorch models to TVM's computation graphs
- Multiple scheduling options:
  - Default scheduler
  - Auto-scheduler for automatic optimization
  - Meta-schedule for evolutionary search-based tuning
- Hardware-specific optimizations:
  - CUDA GPU support
  - CPU support with LLVM targeting and architecture-specific tuning
  - Automatic detection of CPU capabilities (AVX2, AVX512)
- Tensor conversion utilities between PyTorch and TVM formats
- Configurable optimization levels and tuning trials

The backend can be used with torch.compile():
    model = torch.compile(model, backend="tvm")
    N)MappingProxyType)AnyCallableOptional)fx   )device_from_inputsfake_tensor_unsupported)register_backend)optionsgmexample_inputsr   return.c                   s  |d u rt d ddd}|d usJ dd lddlm} ddlm} tj| |}t|}dd t	|D }| | }t
|dkrHtd	 | jS |j||\}	}
|jd
krb|j}j }nd}jt }|dd }|d u rtjdd }|dd}|dd}|dkr$ddlm} t }tj|s||	d |
|\}}t
|dkr| ||}tj|s|dksJ |j!||"|gdd}z|#| W n t$y   tj|rt%|  w |&|+ j'j(|ddid |j)|	||
d}W d    n	1 sw   Y  W d    n	1 sw   Y  n|dkr|ddlm*} t+ ?}|jd
krKjt  d|j,j-dd }|dksRJ |j.j/|	|||d|
d|d}|j.j0||	||
|d }W d    n	1 svw   Y  n-|d!ks|sj'j(|d" |j)|	||
d}W d    n	1 sw   Y  nt1d#|2|d! | d$j3j4d%tj5fd&d'd(tj5d%j3j4ffd)d*d+tj5d%t6tj5 f fd,d-}|S ).Ni N     )	schedulertrials	opt_levelr   )relay)graph_executorc                 S   s    g | ]\}}d | |j fqS )inp_)shape).0idxi r   W/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/torch/_dynamo/backends/tvm.py
<listcomp>;   s     ztvm.<locals>.<listcomp>z0Explicitly fall back to eager due to zero outputcudar   TVM_SCHEDULERr   r   auto_scheduler)r   maini  )num_measure_trialsmeasure_callbacksearly_stoppingz relay.backend.use_auto_schedulerT)r   config)targetparamsmeta_schedule)r'   z --num-cores F)logical@   evolutionary)modr%   work_dirmax_trials_globalnum_trials_per_iterr&   strategyr   )databaser+   r%   r&   r   default)r   zThis tuning option is invalid/not implemented for torchdynamo's TVM-related backend. There are three available options: default, auto_scheduler and meta_schedule.	nd_tensorr   c                 S   s*   | j dkrt|  S tjj|  S )z8A helper function to transfer a NDArray to torch.tensor.bool)dtypetorch
from_numpynumpyutilsdlpackfrom_dlpack	to_dlpack)r2   r   r   r   to_torch_tensor   s   
ztvm.<locals>.to_torch_tensortorch_tensorc                    s,   | j tjkr j|   S  j| S )z8A helper function to transfer a torch.tensor to NDArray.)r4   r5   r3   ndarraycpur7   r:   )r=   )tvmr   r   to_tvm_tensor   s   ztvm.<locals>.to_tvm_tensori_argsc                     s   dd | D }   \}}dd | D }t|dD ])\}}| dkrD|jr,| }d| }||vr<td| q || q 	   fddt
  D S )	Nc                 S   s   g | ]}|  qS r   )
contiguous)r   ar   r   r   r          z)tvm.<locals>.exec_tvm.<locals>.<listcomp>c                 S   s   h | ]\}}|qS r   r   )r   name_r   r   r   	<setcomp>   rF   z(tvm.<locals>.exec_tvm.<locals>.<setcomp>r   r   z6input %s skipped as not found in tvm's runtime libraryc                    s   g | ]	}  |qS r   )
get_output)r   r   )mr<   r   r   r      s    )get_input_infoitems	enumeratedimrequires_graddetachlogwarning	set_inputrunrangeget_num_outputs)rC   args
shape_inforH   active_inputsr   arginp_name)rK   r<   rB   r   r   exec_tvm   s*   
ztvm.<locals>.exec_tvm)7r   rA   r   tvm.contribr   r5   jittracer   rN   lenrR   rS   forwardfrontendfrom_pytorchtyper   indexr%   r@   Targetllvm_targetgetosenvironr   tempfileNamedTemporaryFilepathexistsextract_tasksTaskSchedulerTuningOptionsRecordToFiletune	ExceptionunlinkApplyHistoryBest	transformPassContextbuildr'   TemporaryDirectoryr8   	cpu_countrelay_integration
tune_relaycompile_relayNotImplementedErrorGraphModuler>   r?   Tensorlist)r   r   r   r   r   jit_moddevice
shape_listexample_outputsr+   r&   devr%   r   r   r   r   log_filetaskstask_weightstunertune_optionlibmsr,   r0   r]   r   )rK   r<   rB   rA   r   rA   *   s   









	"rA   r'   )r   r   c                   C   s&   zt d W dS  ty   Y dS w )NrA   TF)	importlibimport_moduleImportErrorr   r   r   r   has_tvm   s   
r   c                  C   s2   t jdkrtd } d| v rdS d| v rdS dS )Nlinuxz/proc/cpuinfoavx512zllvm -mcpu=skylake-avx512avx2zllvm -mcpu=core-avx2llvm)sysplatformopenread)cpuinfor   r   r   rh      s   
rh   )#__doc__	functoolsr   loggingrj   r   rl   typesr   typingr   r   r   r5   r   commonr   r	   registryr
   	getLogger__name__rR   r   r   r   strrA   partialtvm_meta_scheduletvm_auto_schedulerr3   r   cacherh   r   r   r   r   <module>   s@    

 