o
    –ƒ½iÅY  ã                   @   sâ   d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d	efd
d„Zdd„ Zdd„ ZeddG dd„ dƒƒZG dd„ deƒZdS )é    )ÚBaseBackendÚ	GPUTargetÚLanguage)ÚirÚpassesÚllvmÚamd)Úknobs)Ú	dataclass)ÚAnyÚDictÚTuple)Ú
ModuleTypeN)ÚPathÚtargetc                 C   s   dd„ S )Nc                 S   s   dS )N)é   r   r   © )Úlhs_typeÚrhs_typer   r   ú[/home/ubuntu/Irodori-TTS/.venv/lib/python3.10/site-packages/triton/backends/amd/compiler.pyÚ<lambda>   s    z"get_min_dot_size.<locals>.<lambda>r   ©r   r   r   r   Úget_min_dot_size   s   r   c                 C   s,   t jjd u r| dkp| dko|du S t jjS )NÚgfx942Úgfx950T)r	   r   Úuse_block_pingpong)ÚarchÚuse_async_copyr   r   r   Úis_pingpong_schedule_enabled   s   ÿÿr   c                 C   s   t jjd u r
| dkS t jjS )Nr   )r	   r   Úuse_in_thread_transpose©r   r   r   r   Úis_in_thread_transpose_enabled   s   r!   T)Úfrozenc                   @   s&  e Zd ZU dZeed< dZeed< dZeed< dZeed< d	Z	e
ed
< dZeed< dZeed< d	Zeed< dZee ed< dZee ed< dZeed< dZee ed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< d Zeed!< d"Zeed#< d$d%„ Zd&d'„ Zd	S )(Ú
HIPOptionsé   Ú	num_warpsr   Úwaves_per_eué   Ú
num_stagesr   Únum_ctasNÚextern_libsFÚdebugTÚsanitize_overflowr   )Úfp8e4nvÚfp8e5Úfp8e5b16Úfp8e4b8Úsupported_fp8_dtypesr   Ú!deprecated_fp8_dot_operand_dtypesÚieeeÚdefault_dot_input_precision)r3   Úbf16x3Úbf16x6Úallowed_dot_input_precisionsÚenable_fp_fusionÚlaunch_cooperative_gridÚmatrix_instr_nonkdimÚkpackÚallow_flush_denormÚmax_num_imprecise_acc_defaultÚhipÚbackend_nameÚ Úinstrumentation_modeÚnoneÚschedule_hintc                 C   sì   t | jdd… ƒ}|dkrdnd}t | d|¡ | jdkr'| j| jd @ dks+J d	ƒ‚| jd
krF| jdkrFt d| j› d¡ t | dd¡ tt	ƒj
d }| jd u rTi nt| jƒ}dD ]}t||› d ƒ||< q[t | dt| ¡ ƒ¡ d S )Né   éþÿÿÿé
   é    é@   Ú	warp_sizer   r   znum_warps must be a power of 2r   zckpack is deprecated starting from gfx950 and will be removed in later releases. So for now kpack = z7 will be overwritten to 1 to make transitioning easier.r;   Úlib)ÚocmlÚocklz.bcr*   )Úintr   ÚobjectÚ__setattr__r%   r;   ÚwarningsÚwarnr   Ú__file__Úparentr*   ÚdictÚstrÚtupleÚitems)ÚselfÚ	gfx_majorrI   Údefault_libdirr*   rJ   r   r   r   Ú__post_init__N   s     ÿÿzHIPOptions.__post_init__c                 C   s.   d  dd„ | j ¡ D ƒ¡}t | d¡¡ ¡ S )NÚ_c                 S   s   g | ]\}}|› d |› ‘qS )Ú-r   )Ú.0ÚnameÚvalr   r   r   Ú
<listcomp>b   s    z#HIPOptions.hash.<locals>.<listcomp>úutf-8)ÚjoinÚ__dict__rW   ÚhashlibÚsha256ÚencodeÚ	hexdigest)rX   Úkeyr   r   r   Úhasha   s   zHIPOptions.hash) Ú__name__Ú
__module__Ú__qualname__r%   rM   Ú__annotations__r&   r(   r)   r*   rT   r+   Úboolr,   r   rU   r1   r   r2   r4   r7   r8   r9   r:   r;   r<   r=   r?   rA   rC   r[   rj   r   r   r   r   r#      s0   
 r#   c                       s  e Zd ZdZdZedefdd„ƒZdeddf‡ fdd„Zde	fd	d
„Z
defdd„Zdd„ Zdd„ Zdee	ef fdd„Zdd„ Zedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd „ ƒZed!d"„ ƒZed#d$„ ƒZed%d&„ ƒZd'd(„ Ze ¡ d)d*„ ƒZ‡  Z S )+Ú
HIPBackendNFr   c                 C   s
   | j dkS )Nr>   )Úbackendr   r   r   r   Úsupports_targetj   s   
zHIPBackend.supports_targetÚreturnc                    s&   t ƒ  |¡ t|jtƒsJ ‚d| _d S )NÚhsaco)ÚsuperÚ__init__Ú
isinstancer   rU   Ú
binary_ext)rX   r   ©Ú	__class__r   r   rv   n   s   
zHIPBackend.__init__c                 C   s   d|j › S )Núhip:r    ©rX   Úoptionsr   r   r   Úget_target_names   s   zHIPBackend.get_target_namec                    s  dt jjp| jji}ˆ  dd¡dkr"t | jj¡s"td| jj› ƒ‚| jjdkr;t	t
jƒ}| dh¡ tt|ƒƒ|d< dˆ vrHttt
jƒƒ|d< | jjd	krbt	t
jƒ}| d
dh¡ tt|ƒƒ|d< dˆ vrlt jj|d< | ‡ fdd„t
j ¡ D ƒ¡ t
di |¤ŽS )Nr   r)   r   znum_ctas > 1 not supported on r   Útf32r7   r1   r   r/   r0   r2   r8   c                    s*   i | ]}|ˆ v rˆ | d ur|ˆ | “qS ©Nr   )r^   Úk©Úoptsr   r   Ú
<dictcomp>Œ   s   * z,HIPBackend.parse_options.<locals>.<dictcomp>r   )r	   ÚruntimeÚoverride_archr   r   Úgetr   Úsupports_multi_cta_launchÚ
ValueErrorÚsetr#   r7   ÚupdaterV   Úsortedr1   r2   ÚlanguageÚdefault_fp_fusionÚ__dataclass_fields__Úkeys)rX   rƒ   Úargsr7   r2   r   r‚   r   Úparse_optionsv   s"   

zHIPBackend.parse_optionsc                 C   s   |j |j|jfS r€   )r%   r)   Úshared)rX   Úmetadatar   r   r   Úpack_metadata   s   ýzHIPBackend.pack_metadatac                 C   s   dt | jƒiS )NÚmin_dot_size)r   r   r|   r   r   r   Úget_codegen_implementation–   s   z%HIPBackend.get_codegen_implementationc                 C   s   ddl m} d|iS )Nr   )Ú	libdeviceztriton.language.extra.libdevice)Útriton.language.extra.hipr˜   )rX   r˜   r   r   r   Úget_module_map™   s   zHIPBackend.get_module_mapc                 C   s$   t  |¡ tjrtj |¡ d S d S r€   )r   Úload_dialectsrp   Úinstrumentation)rX   Úctxr   r   r   r›   ž   s   
ÿzHIPBackend.load_dialectsc                 C   sL   dd l }d}t| dƒr|  ¡ |kS t| |jƒr$t| dƒr$|  ¡  ¡ |kS dS )Nr   iÿÿÿÚ	ptr_rangeÚuntyped_storageF)ÚtorchÚhasattrrž   rw   ÚTensorrŸ   Úsize)Úargr    Ú
MAX_INT_32r   r   r   Úis_within_2gb£   s   
zHIPBackend.is_within_2gbc                 C   s$   t  | ¡}d| v r|ddgg7 }|S )NÚSztt.pointer_rangerG   )r   Ú
parse_attr)ÚdescÚretr   r   r   r¨   ®   s   
zHIPBackend.parse_attrc                 K   s0   t j| fi |¤Ž}tjjrt | ¡r|d7 }|S )Nr§   )r   Úget_tensor_specializationr	   r   Úuse_buffer_opsrp   r¦   )r¤   Úkwargsrª   r   r   r   r«   µ   s   z$HIPBackend.get_tensor_specializationc                 C   sœ   t  | j¡}| ¡  tj |¡ tj |¡ tj 	|¡ tj 
|¡ tj |¡ tj |¡ tj |¡ tj |¡ tj |¡ tj |¡ | | d¡ | S )NÚ	make_ttir)r   Úpass_managerÚcontextÚenable_debugr   ÚcommonÚadd_inlinerÚttirÚadd_rewrite_tensor_pointerÚ(add_rewrite_tensor_descriptor_to_pointerÚadd_canonicalizerÚadd_combineÚadd_reorder_broadcastÚadd_cseÚadd_triton_licmÚadd_symbol_dceÚadd_loop_unrollÚrun)Úmodr”   r}   Úpmr   r   r   r®   ¼   s   zHIPBackend.make_ttirc                 C   s˜  t  | j¡}| ¡  tj |d|j› |j|j	|j
¡ | | d¡ t  | j¡}| ¡  d}tj |¡ tj ||¡ tj |¡ tj |¡ tjj ||j|j|j¡ tj |¡ tjj |¡ tjj ||j¡ tjj |¡ tj |¡ tj |¡ tj |¡ tj |¡ tjj}t|j|ƒ}tjj ||j ¡ tjj !|||¡ |r¯tjj "||j¡ tj |¡ |j# $¡ dkrÍ|j# %d¡D ]
}tjj &||¡ qÂtj |¡ tj '|¡ t(|jƒrëtjj )|¡ tj |¡ tjj *|¡ |r|j dkrtjj +||j ¡ tjj,r%tjj -|¡ tj |¡ tjj .||jtjj/tjj0¡ tjj 1|¡ tj |¡ tj 2|¡ tj 3|¡ | | d¡ |  4¡ |d< | S )	Nr{   Úmake_ttgir_earlyFrB   ú,r   Ú
make_ttgirÚtensordesc_meta)5r   r¯   r°   r±   r   r´   Úadd_convert_to_ttgpuirr   r%   rI   r)   r¾   ÚttgpuirÚadd_coalesceÚadd_f32_dot_tcÚadd_remove_layout_conversionsÚadd_optimize_thread_localityr   Úadd_accelerate_matmulr:   r;   Úadd_optimize_epilogueÚadd_optimize_dot_operandsÚadd_hoist_layout_conversionsÚadd_fuse_nested_loopsr²   r·   r»   r	   r   r   Úadd_schedule_loopsr(   Úadd_pipelineÚadd_coalesce_async_copyrC   ÚlowerÚsplitÚinsert_instruction_sched_hintsÚadd_reduce_data_duplicationr!   Úadd_in_thread_transposeÚadd_reorder_instructionsÚadd_block_pingpongr¬   Úadd_canonicalize_pointersÚadd_convert_to_buffer_opsÚuse_buffer_atomicsÚ%buffer_ops_analyze_small_tensor_rangeÚadd_fold_true_cmpirº   r¼   Úget_tensordesc_metadata)r¿   r”   r}   rÀ   ÚemuTF32r   r   Úhintr   r   r   rÃ   Í   sp   ÿ

üzHIPBackend.make_ttgirc                 C   s|   | }t  |j¡}| ¡  tj |¡ tj |¡ tj 	|¡ tj
 |¡ tj |¡ tj |¡ | |d¡ | ¡ |d< |S )NÚgluon_to_ttgirrÄ   )r   r¯   r°   r±   r   Úgluonr³   Úadd_resolve_auto_encodingsr²   Úadd_sccpr´   Úadd_loop_aware_cser·   rÆ   Ú add_combine_tensor_select_and_ifr¾   rß   )Úsrcr”   r}   r¿   rÀ   r   r   r   râ     s   zHIPBackend.gluon_to_ttgirc                    s”  | }t  |j¡}| ¡  tjj ||j¡ d}tjj 	||j|¡ tj
 |¡ tj |¡ tj
 |¡ tjj |¡ tjrFtj d||j¡ d}tjj ||j|¡ tj |¡ tj |¡ tj
 |¡ tj
 |¡ tj |¡ tj |¡ tj |¡ |j ¡ dkrŽtjj ||j|j¡ tjrštj d||j¡ tjj s¨tjj!s¨tj" #|¡ tjj $||¡ | %|d¡ tjj!rêtjj sÔt  |j¡}| ¡  tj" #|¡ | %|d¡ t  |j¡}| ¡  tj" &|¡ | %|d¡ t' (¡  t' ¡ }t' )||¡‰ t *ˆ ¡ d	}tjj+rd
}t' ,ˆ tj-|j|¡ t .ˆ |j¡ t /ˆ d¡ t 0ˆ dd¡ t 0ˆ dd¡ t 0ˆ dd¡ t 0ˆ d|j1dk¡ dd„ ˆ  2¡ D ƒ}	|	d  3tj4¡ |	d  5dd|j6|j1 › ¡ d|j 7d¡v rm|	d  5dd¡ |	d  5dd¡ |	d  5d|j8› d|j8› ¡ |j9rŠdnd}
|	d  5d |
¡ tjj+r¦|	d  :d
¡ |	d  ;¡  t <|	d ¡ tjj+rÑt=t>ƒj?d! }t@|d" ƒt@|d# ƒt@|d$ ƒg}t' Aˆ |¡ n|jBrì‡ fd%d„|jBD ƒ}tC|ƒdkrìt' Aˆ |¡ t' Dˆ t'jE|jd	g |jF¡ t G|j¡r|	d  Hd&¡ |	d  Hd'¡ |	d  Hd(¡ tjjIr!t J|	d ¡ |  Kd)¡|d*< |  Kd+¡p/d|d,< |  Kd-¡p9d.|d/< t Lˆ ¡ t Mˆ ¡ t@ˆ ƒS )0Nr   Úttgpuir_to_llvmirTrB   Úllvmir_to_llvmÚ	make_llirzmake_llir.disable_line_infoz,make_llir.dump_ir_extract_di_local_variablesr@   ú+xnackiô  Ú__oclc_finite_only_optFÚ__oclc_correctly_rounded_sqrt32Ú__oclc_unsafe_math_optÚ__oclc_wavefrontsize64rH   c                 S   s   g | ]}|  ¡ s|‘qS r   )Úis_declaration)r^   Úfnr   r   r   ra   x  s    z(HIPBackend.make_llir.<locals>.<listcomp>zamdgpu-flat-work-group-sizez1,zmemory-bound-attentionrÂ   zamdgpu-sched-strategyziterative-ilpzuniform-work-group-sizeÚtruezamdgpu-waves-per-euz, zpreserve-signr3   zdenormal-fp-math-f32rJ   z
asanrtl.bczocml.bczockl.bcc                    s    g | ]\}}t  ˆ |¡r|‘qS r   )r   Úneed_extern_lib)r^   r_   Úpath©Úllvm_modr   r   ra     s     zamdgpu-no-workgroup-id-xzamdgpu-no-workgroup-id-yzamdgpu-no-workgroup-id-zz
ttg.sharedr“   zttg.profile_scratch_memory_sizeÚprofile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   Úprofile_scratch_align)Nr   r¯   r°   r±   r   r   rÆ   Úadd_update_async_wait_countr   Úadd_optimize_lds_usageÚconvertÚadd_scf_to_cfrã   r³   Úadd_index_to_llvmirÚadd_allocate_shared_memoryrp   rœ   ÚpatchÚadd_to_llvmirr²   r·   rº   Úadd_cf_to_llvmirÚadd_arith_to_llvmirr¼   rC   rÓ   Úlower_instruction_sched_hintsr(   r	   ÚcompilationÚdisable_line_infoÚ"dump_ir_extract_di_local_variablesÚllvmirÚadd_di_scopeÚadd_builtin_func_to_llvmirr¾   Úadd_di_local_variabler   Úinit_targetsÚ	to_moduleÚattach_target_tripleÚenable_asanÚattach_datalayoutÚTARGET_TRIPLEÚset_isa_versionÚset_abi_versionÚset_bool_control_constantrI   Úget_functionsÚset_calling_convÚCALLING_CONV_AMDGPU_KERNELÚadd_fn_attrr%   rÔ   r&   r<   Úadd_fn_target_featureÚadd_fn_asan_attrÚset_all_fn_arg_inregr   rR   rS   rU   Úlink_extern_libsr*   ÚlenÚoptimize_moduleÚOPTIMIZE_O3r8   Úhas_architected_sgprsÚremove_fn_attrÚscalarize_packed_fopsÚ#add_scalarize_packed_fops_llvm_passÚget_int_attrÚcleanup_bitcode_metadataÚdisable_print_inline)rè   r”   r}   r¿   rÀ   Úcustom_lds_sizeÚ_HIPBackend__HIP_FTZr°   Útarget_featuresÚfnsÚdenormal_moderZ   Úpathsr   rö   r   rë     s´   







ý


zHIPBackend.make_llirc           
   	   C   sÒ   t  d| ¡}t|ƒdksJ ‚|d |d< g }d|jv rdnd}t |  d¡¡ ¡ }|d d	 | }t 	| t
j|j|||j|¡}t | t
j|j|||j|¡ t | t
j|j|||jd
¡}	tj
jrgtdƒ t|	ƒ |	S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   r_   Úgfx11z-real-true16r@   rb   r\   Fz!// -----// AMDGCN Dump //----- //)ÚreÚfindallr  r   re   rf   rg   rh   r   Útranslate_to_mirr   r  r8   Údump_sched_dagÚtranslate_to_asmr	   Údump_amdgcnÚprint)
rè   r”   r}   ÚnamesÚflagsÚfeaturesÚir_hashÚdump_file_idr\   Úamdgcnr   r   r   Úmake_amdgcnº  s(   ÿÿÿzHIPBackend.make_amdgcnc           
   
   C   s   d}t jjrd}t | |j|¡}t ¡ b}t ¡ +}t|j	dƒ}| 
|¡ W d   ƒ n1 s0w   Y  t |j	|j	¡ W d   ƒ n1 sGw   Y  t|j	dƒ}| ¡ }	W d   ƒ n1 saw   Y  W d   ƒ |	S W d   ƒ |	S 1 syw   Y  |	S )Nr@   rì   ÚwbÚrb)r	   r  r  r   Úassemble_amdgcnr   ÚtempfileÚNamedTemporaryFileÚopenr_   ÚwriteÚ
link_hsacoÚread)
rè   r”   r}   r)  rt   Útmp_outÚtmp_inÚfd_inÚfd_outrª   r   r   r   Ú
make_hsacoÒ  s*   

ÿý
ÿ
ûþ
ûùzHIPBackend.make_hsacoc                    sª   |t jkr‡ ‡fdd„|d< ‡ ‡fdd„|d< n|t jkr&‡ ‡fdd„|d< ‡ ‡fdd„|d< ‡ ‡fd	d„|d
< ‡ ‡fdd„|d< tjjd urStj ˆ|ˆ |d ¡ d S d S )Nc                    ó   ˆ  | |ˆ ¡S r€   )r®   ©rè   r”   ©r}   rX   r   r   r   ã  ó    z'HIPBackend.add_stages.<locals>.<lambda>r´   c                    rJ  r€   )rÃ   rK  rL  r   r   r   ä  rM  Úttgirc                    rJ  r€   )râ   rK  rL  r   r   r   æ  rM  c                    rJ  r€   )rë   rK  rL  r   r   r   ç  rM  Úllirc                    rJ  r€   )r;  rK  rL  r   r   r   è  rM  r:  c                    rJ  r€   )rI  rK  rL  r   r   r   é  rM  rt   )r   ÚTRITONÚGLUONr	   r…   Úadd_stages_inspection_hook)rX   Ústagesr}   r   r   rL  r   Ú
add_stagesá  s   

ÿzHIPBackend.add_stagesc                 C   s   | j › S r€   r   )rX   r   r   r   rj   í  s   zHIPBackend.hash)!rk   rl   rm   rœ   Ú%supports_native_tensor_specializationÚstaticmethodr   rr   rv   rU   r~   r   r’   r•   r—   r   r   rš   r›   r¦   r¨   r«   r®   rÃ   râ   rë   r;  rI  rT  Ú	functoolsÚ	lru_cacherj   Ú__classcell__r   r   ry   r   rp   f   sD    





>

 

rp   )Útriton.backends.compilerr   r   r   Útriton._C.libtritonr   r   r   r   Útritonr	   Údataclassesr
   Útypingr   r   r   Útypesr   re   r?  r.  rW  rP   Úpathlibr   r   r   r!   r#   rp   r   r   r   r   Ú<module>   s$    G