o
    ڗiK?                     @   s   d dl mZmZmZmZ d dlmZmZmZm	Z	 d dl
mZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ defd	d
ZeddG dd dZeG dd deZG dd deZdS )    )BaseBackend	GPUTargetAttrsDescriptorregister_descriptor)irpassesllvmamd)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                 C   s.   | j }d|v rdd S d|v rdd S dd S )Ngfx94c                 S   s   |   s|  r
dS dS )N   r   r   r   r      )is_int8lhsTyperhsType r   Z/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/triton/backends/amd/compiler.py<lambda>       zmin_dot_size.<locals>.<lambda>gfx9c                 S      dS )Nr   r   r   r   r   r   r          c                 S   r   )Nr   r   r   r   r   r   r      r    )arch)r   arch_strr   r   r   min_dot_size   s   r#   T)frozenc                   @   sJ  e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	eed	< dZ
eed
< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZee ed< dZee ed< dZeed< dZee ed< dZeed< dZeed< dZeed < dZeed!< dZeed"< d#Z eed$< d%Z!eed&< d'd( Z"d)d* Z#dS )+
HIPOptions   	num_warps   waves_per_eu   
num_stagesnum_ctasr   num_buffers_warp_specnum_consumer_groupsreg_dec_producerreg_inc_consumerNextern_libs)r(   r(   r(   cluster_dimsFdebugTsanitize_overflowr!   )fp8e5supported_fp8_dtypesr   deprecated_fp8_dtypesieeedefault_dot_input_precision)r8   allowed_dot_input_precisionsenable_fp_fusionmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namedefaultinstruction_sched_variantc                 C   s   t tjd }| jd u ri nt| j}d| jv s"d| jv s"d| jv r$dnd}t| d| dd	g}|D ]}t|| d
 ||< q3t| dt	|
  | jdkr[| j| jd @ dks_J dd S )Nlibgfx10gfx11gfx12    @   	warp_sizeocmlocklz.bcr1   r   r(   znum_warps must be a power of 2)r   __file__parentr1   dictr!   object__setattr__strtupleitemsr'   )selfdefault_libdirr1   rJ   libsrD   r   r   r   __post_init__=   s   & zHIPOptions.__post_init__c                 C   s.   d dd | j D }t|d S )N_c                 S   s   g | ]\}}| d | qS )-r   ).0namevalr   r   r   
<listcomp>K       z#HIPOptions.hash.<locals>.<listcomp>utf-8)join__dict__rT   hashlibsha256encode	hexdigest)rU   keyr   r   r   hashJ   s   zHIPOptions.hash)$__name__
__module____qualname__r'   int__annotations__r)   r+   r,   r-   r.   r/   r0   r1   rO   r2   rS   r3   boolr4   r!   rR   r6   r   r7   r9   r:   r;   r<   r=   r>   r?   rA   rC   rX   rh   r   r   r   r   r%      s6   
 r%   c                   @   s2   e Zd ZdZd	ddZedd Zedd ZdS )
HIPAttrsDescriptorpointer_range_32Nc                 C   s<   d| j d< |d u s|d u rd S dd t||D | jd< d S )NrH   ztt.pointer_rangec                 S   s,   g | ]\}}t |r|js|js|jqS r   )ro   is_within2gbdo_not_specializedo_not_specialize_on_alignmentnum)r[   paramargr   r   r   r^   _   s    z>HIPAttrsDescriptor._add_backend_properties.<locals>.<listcomp>)property_valuesziparg_properties)rU   paramsvaluesr   r   r   _add_backend_propertiesZ   s   
z*HIPAttrsDescriptor._add_backend_propertiesc                 C   sD   t | dr|  dkS dtt| v r t | dr |   dkS dS )N	ptr_rangeiztorch.Tensoruntyped_storageF)hasattrr}   rR   typer~   size)rv   r   r   r   rq   d   s
   
zHIPAttrsDescriptor.is_within2gbc                 C   s:   t | |}t| rdnd}|| dd}|r|S dS )NSN )r   get_property_keyro   rq   replace)r]   aligngeneric_keyhip_keyrg   r   r   r   r   m   s   z#HIPAttrsDescriptor.get_property_key)NN)ri   rj   rk   	__slots__r|   staticmethodrq   r   r   r   r   r   ro   O   s    	


ro   c                       s   e Zd ZedefddZdeddf fddZdefdd	Zd
d Z	dd Z
deeef fddZdd Zdd Zedd Zedd Zedd Zedd Zedd Zedd Zed d! Zd"d# Ze d$d% Z  ZS )&
HIPBackendr   c                 C   s
   | j dkS )Nr@   )backend)r   r   r   r   supports_targetw   s   
zHIPBackend.supports_targetreturnNc                    s&   t  | t|jtsJ d| _d S )Nhsaco)super__init__
isinstancer!   rR   
binary_ext)rU   r   	__class__r   r   r   {   s   
zHIPBackend.__init__c                    s   d| j ji}d vr$ttj}| j jdv r|ddh tt||d< d vr2t	dddk|d< | fd	d
tj
 D  tdi |S )Nr!   r6   )gfx940gfx941gfx942fp8e4b8fp8e5b16r;   TRITON_DEFAULT_FP_FUSION1c                    s   i | ]}| v r| | qS r   r   )r[   koptsr   r   
<dictcomp>   r_   z,HIPBackend.parse_options.<locals>.<dictcomp>r   )r   r!   setr%   r6   updaterS   sortedosgetenv__dataclass_fields__keys)rU   r   argsr6   r   r   r   parse_options   s   
zHIPBackend.parse_optionsc                 C   s(   |j |j|j|jd |jd |jd fS )Nr   r(   r*   )r'   r,   sharedr2   )rU   metadatar   r   r   pack_metadata   s   zHIPBackend.pack_metadatac                 C   s   dt | ji}|S )Nr#   )r#   r   )rU   codegen_fnsr   r   r   get_codegen_implementation   s   z%HIPBackend.get_codegen_implementationc                 C   s   ddl m} d|iS )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )rU   r   r   r   r   get_module_map   s   zHIPBackend.get_module_mapc                 C   s   t | d S N)r	   load_dialects)rU   ctxr   r   r   r      s   zHIPBackend.load_dialectsc                 C   s
   t ||S r   )ro   )rU   rz   r   r   r   r   get_attrs_descriptor   s   
zHIPBackend.get_attrs_descriptorc                 C   s   t | |S r   )ro   r   )rv   r   r   r   r   compute_spec_key   s   zHIPBackend.compute_spec_keyc                  C   sp   t d} | d urt| }| r|S ttjd }| r |S td}| r*|S td}| r4|S td)NTRITON_HIP_LLD_PATHzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r   r   r   is_filerM   rN   	Exception)lld_env_pathlldr   r   r   path_to_rocm_lld   s   
zHIPBackend.path_to_rocm_lldc                 C   s   t | j}|  tj| tj| tj	| tj
| tj| tj| tj| tj| tj| ||  | S r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointeradd_combineadd_canonicalizeradd_reorder_broadcastadd_cseadd_licmadd_symbol_dceadd_loop_unrollrunmodr   optionspmr   r   r   	make_ttir   s   
zHIPBackend.make_ttirc                 C   s  t | j}|  tj|d|j |j|j	|j
 ||  t | j}|  tj| tj| tj| tjj||j|j|j tj| tjj| tj|d t|jr{|jdkslJ dtjj||j tj| tjj| tj|d tj| tj| t|jrtjj| tj dddkrtjj!| tj| tjj"| tj| tj#| tj$| ||  | S )Nzhip:Tr   zTriton AMD backend pipeliner has been updated. We used to trigger software pipelining with num_stages == 0. Now it will not happen anymore; please update to use num_stages == 2 for equivalent behavior in the past.AMDGCN_USE_BUFFER_OPS0r   )%r   r   r   r   r   r   add_convert_to_ttgpuirr!   r'   rJ   r,   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr	   add_accelerate_matmulr<   r=   add_optimize_epilogueadd_optimize_dot_operandshas_matrix_core_featurer+   add_stream_pipelinev2r   r   insert_instruction_sched_hintsadd_reduce_data_duplicationadd_reorder_instructionsr   environgetadd_canonicalize_pointersadd_convert_to_buffer_opsr   r   r   r   r   r   
make_ttgir   sD   

zHIPBackend.make_ttgirc                    s  | }t |j}|  tjj||j d}tjj	||j| tj
| tj
| tj| d}tjj||j| tj| tj| tj
| tj
| tj| tj| tj| tjj||j tjdddkrtj| tjj|| || t  t }t || t!  t" tj#|jd t$ |j t% d t& dd t& d	d t& d
d t& d|j'dk dd  ( D }|d )tj* |d +dd|j,|j'   |d +d|j-  |j.rdnd}	|d +d|	 t/|d  |j0r, fdd|j0D }
t1 |
 t2 tj3|jdg |j4 | 5d|d< t6  t7 S )Nr   TTRITON_DISABLE_LINE_INFOr   r   i  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rI   c                 S   s   g | ]}|  s|qS r   )is_declaration)r[   fnr   r   r   r^   0  r   z(HIPBackend.make_llir.<locals>.<listcomp>zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr8   zdenormal-fp-math-f32c                    s    g | ]\}}t  |r|qS r   )r	   need_extern_lib)r[   r\   pathllvm_modr   r   r^   >  s     ztriton_gpu.sharedr   )8r   r   r   r   r	   r   r   %add_decompose_unsupported_conversionsr!   add_optimize_lds_usageconvertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   lower_instruction_sched_hintsrC   r   r   r   llvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleattach_target_tripleattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrJ   get_functionsset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr'   r)   r>   set_all_fn_arg_inregr1   link_extern_libsoptimize_moduleOPTIMIZE_O3r;   get_int_attrcleanup_bitcode_metadatarR   )srcr   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   fnsdenormal_modepathsr   r   r   	make_llir   s`   


zHIPBackend.make_llirc              	   C   sj   t d| }t|dksJ |d |d< t| tj|jdg |jd}t	j
ddd	kr3td
 t| |S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r(   r   r\   r   FAMDGCN_ENABLE_DUMPr   r   z!// -----// AMDGCN Dump //----- //)refindalllenr   translate_to_asmr	   r  r!   r;   r   r   r   print)r  r   r   namesamdgcnr   r   r   make_amdgcnI  s   zHIPBackend.make_amdgcnc           
      C   s  t | |jd}t }t h}t 1}t|jd}|	| W d    n1 s,w   Y  t
|ddd|jd|jg W d    n1 sIw   Y  t|jd}| }	W d    n1 scw   Y  W d    |	S W d    |	S 1 s{w   Y  |	S )Nr   wbz-flavorgnuz-sharedz-orb)r	   assemble_amdgcnr!   r   r   tempfileNamedTemporaryFileopenr\   write
subprocess
check_callread)
r  r   r   r   	rocm_pathtmp_outtmp_infd_infd_outretr   r   r   
make_hsacoX  s&   




zHIPBackend.make_hsacoc                    s^    fdd|d<  fdd|d<  fdd|d<  fdd|d	<  fd
d|d< d S )Nc                        | | S r   )r   r  r   r   rU   r   r   r   g      z'HIPBackend.add_stages.<locals>.<lambda>r   c                    r2  r   )r   r3  r4  r   r   r   h  r5  ttgirc                    r2  r   )r  r3  r4  r   r   r   i  r5  llirc                    r2  r   )r  r3  r4  r   r   r   j  r5  r  c                    r2  r   )r1  r3  r4  r   r   r   k  r5  r   r   )rU   stagesr   r   r4  r   
add_stagesf  s
   zHIPBackend.add_stagesc                 C   s&   t jt dgdd}| d| j S )Nz	--versionr`   )encodingrZ   )r(  check_outputr   r   r   )rU   versionr   r   r   rh   m  s   zHIPBackend.hash)ri   rj   rk   r   r   r   r   r   r   r   r   r   rR   r   r   r   r   r   r   r   r   r  r  r1  r9  	functools	lru_cacherh   __classcell__r   r   r   r   r   u   s6    




'
R

r   )triton.backends.compilerr   r   r   r   triton._C.libtritonr   r   r   r	   dataclassesr
   typingr   r   r   typesr   rc   r$  r   r  r(  r=  pathlibr   r#   r%   ro   r   r   r   r   r   <module>   s$    2%