o
    پi)                     @   s   U d dl mZ d dlZd dlmZ d dlZddlmZmZ eG dd dZ	dd	 Z
d
d Ze aeed< dae	dB ed< deeef fddZdd Zde	fddZG dd deZdd ZdS )    )	dataclassN)get_cdna_version   )opt_flags_amdopt_flags_nvidiac                   @   s   e Zd ZU eed< eed< eed< eed< eed< eed< eed< eed< eed	< eed
< eed< eed< edB ed< eed< eed< dd ZdS )OptFlagsblock_mblock_nblock_k	num_warps
num_stagesgroup_mxcd_swizzlew_cache_modifiersplit_kis_persistentfused_scatteridle_smsNepilogue_subtilearchtarget_kernel_kwargsc                 C   s    | j r| jdkrtdd S d S )Nr   zNot supported)r   r   
ValueError)self r   _/home/ubuntu/.local/lib/python3.10/site-packages/triton_kernels/matmul_ogs_details/opt_flags.py__post_init__   s   zOptFlags.__post_init__)	__name__
__module____qualname__int__annotations__strbooldictr   r   r   r   r   r   
   s"   
 r   c                    s^  g d t  fdd|D rJ | |d u r|}n|jd u r*td||j }n|j}t dk}|dd r=|d }n,|
rF|rCdnd}n#|d	krU|d
krU|rRdnd}n|r^|d	kr^d}ntdtt	|d}|d urt|
||}nt||}d}d}|}t|||||||\}}|dd d ur|d }|dd d ur|d }|dd}|dd d ur|d }n|s|
rd}n||| d |  }tjdj}td|| }|dkrdnd }|d ur|dkrdnd}d}dddd}|dd }|d u rd}t|||||||||||ddd|d |dtfdd| D s-J  d| S )N)r   r	   r
   r   r   r   r   c                       g | ]}| vqS r   r   .0cconstraints_supportedr   r   
<listcomp>1       z.make_default_opt_flags_amd.<locals>.<listcomp>r      r         i   i       @      r
   r	   r   Fr   r   z.cg      )waves_per_eumatrix_instr_nonkdimkpackr   r   )r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   c                 3   *    | ]\}}|d urt  ||kV  qd S Ngetattrr&   ckcvretr   r   	<genexpr>      ( z-make_default_opt_flags_amd.<locals>.<genexpr> != )anykeysexpected_tokens_per_exptmaxn_expts_totr   getmintritonnext_power_of_2n_blockscdivr   compute_block_nktorchcudaget_device_propertiesmulti_processor_countr   allitems)	out_dtype	lhs_dtype	rhs_dtypeprecision_configmnkrouting_datacan_use_persistent_tmacan_use_fused_scatterenforce_bitwise_invarianceepilogue_effective_itemsizeconstraintstokens_per_exptis_cdna4r   grid_mr   num_xcdsr   r	   r
   r   r   	grid_sizen_cur   r   r   r   r   r   r)   r?   r   make_default_opt_flags_amd!   s   "




.ri   c           $         s  g d t  fdd|D rJ | |d u r|}n|jd u r*td||j }n|j}d}d}|dd r<|d }n|
rAd}ntdtt|d}d }t	
|||}t	|||||}tjd	j}|| }|ox|d u pxt|d
d dk}|dd d ur|d }n#|jd u }|o|o|dks|jdko| jdk }|jd us|jd urd}|dd d ur|d }n
t	||||||}|dd d ur|d }n!|s|
s|jd us|jd urd}nt	d ||||}t	|||}|dkrtj} |||||| ||f}|dd d ur|d g}ng d}d}|D ]}t	jg |||R  } | |kr.|| }!}q|dks7J |dd rB|d }|dd d urP|d }"n|	oV|dk}"t	|||}#t||||#||"||d |||!|t |dd	dtfdd| D sJ  d| S )N)r   r
   r   r   r   r   r   r   c                    r$   r   r   r%   r(   r   r   r*      r+   z1make_default_opt_flags_nvidia.<locals>.<listcomp>r   r1   r   r.   r2   r   r3   	   r   g       @r,   Fr
   r   r   )r   r3   r,   r   r   r   )r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   c                 3   r7   r8   r9   r;   r>   r   r   r@      rA   z0make_default_opt_flags_nvidia.<locals>.<genexpr>rB   )rC   rD   rE   rF   rG   rH   rI   rJ   rK   r   compute_block_ncompute_grid_sizerO   rP   rQ   rR   r   max_num_imprecise_accitemsize	act_scale	out_scalecompute_block_kcompute_split_kfloat32compute_num_stagescompute_num_warpsr   r#   rS   rT   )$rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   r   r   r   r   r	   rf   n_smstiles_per_smsupports_persistentr   has_simple_epiloguer
   r   estimated_actual_grid_sizecompute_num_stages_argssubtiles_to_checkr   epnsr   r   r   r   rh   r   make_default_opt_flags_nvidia   s   "

 

$





.r   _opt_flags_constraints
_opt_flagsra   c                 C   s   t |  d S r8   )r   update)ra   r   r   r   update_opt_flags_constraints   s   r   c                   C   s
   t  ad S r8   )r#   r   r   r   r   r   reset_opt_flags_constraints  s   
r   	opt_flagsc                 C   s    t rJ dtrJ d| ad S )Nz>setting constraints is incompatible with manual flags overridez1opt_flags already set; please reset to None first)r   r   )r   r   r   r   set_opt_flags  s   r   c                   @   s   e Zd ZdS )InapplicableConstraintN)r   r   r   r   r   r   r   r     s    r   c                 C   s   t ddr|stdt ddr|	std|j}td ur%t r#J tS | |||||||||	||
t g}tjjj	 j
}|dkrDt| S |dkrLt| S J )Nr   Fz.cannot enforce `is_persistent=True` constraintr   z.cannot enforce `fused_scatter=True` constrainthiprP   )r   rH   r   r_   r   rJ   runtimedriveractiveget_current_targetbackendri   r   )rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r`   r_   argsr   r   r   r   make_opt_flags  s&   r   )dataclassesr   rJ   triton_kernels.target_infor   rO   opt_flags_detailsr   r   r   ri   r   r#   r   r    r   r!   r   r   r   r   	Exceptionr   r   r   r   r   r   <module>   s    bz