o
    	iF                     @   s   d dl mZmZmZmZmZ d dlZd dlmZ ddl	m
Z ddlmZmZmZ d dlmZ d dlmZmZ ed	Zefd
edeg ef fddZG dd dZG dd dee ZdS )    )SequenceListTypeVarTupleCallableN)TritonSemantic   )_core)
AutoLayoutDistributedLayoutSliceLayout)GluonOpBuilder)flatten_values_to_irunflatten_ir_valuesTensorTycondmsg_fnc                 C   s   | s|| d S N )r   r   categoryr   r   b/home/ubuntu/vllm_env/lib/python3.10/site-packages/triton/experimental/gluon/language/_semantic.py_check   s   
r   c                   @   s*   e Zd ZdefddZdd Zdd ZdS )	GluonCallerContext	num_warpsc                 C   
   || _ d S r   r   )selfr   r   r   r   __init__      
zGluonCallerContext.__init__c                 C   s   d| j  S )N_NWr   )r   r   r   r   mangle   s   zGluonCallerContext.manglec                 C   s   | d|| j d S )Nzttg.num-warps)set_attrget_int32_attrr   )r   fnbuilderr   r   r   initialize_callee   s   z$GluonCallerContext.initialize_calleeN)__name__
__module____qualname__intr   r    r%   r   r   r   r   r      s    r   c                
       s   e Zd ZU ejZeZeed< defddZdd Z	dd Z
dee d	ee fd
dZdededefddZdededef fddZdedeeef f fddZdedee def fddZdedee defddZdededef fdd Z fd!d"Zded#ee d$ef fd%d&Zd'd( Zd)d* ZdUd,d-Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Z d8d9 Z!d:d; Z"d<d= Z#d>d? Z$d@dA Z%dBdC Z&e'dDdE Z(dFe)e dedGedeedHf fdIdJZ*dFe)e dedeedHf fdKdLZ+dedMedNedefdOdPZ,dQe)e dRe)e fdSdTZ-  Z.S )VGluonSemanticr$   c                 C   r   r   )r$   )r   r$   r   r   r   r   #   r   zGluonSemantic.__init__c                 C   s0   |g kr|}nt ||| j|}| ||S r   )ttgldistributed_typer$   get_gluon_layout_from_tensortensor)r   handle	scalar_tyshapetyr   r   r   _wrap_handle_infer_layout&   s   z'GluonSemantic._wrap_handle_infer_layoutc                 C   s   |  |j|jj|jS r   )r3   r/   typescalarr1   )r   r.   r   r   r   _wrap_tensor_infer_layout-   s   z'GluonSemantic._wrap_tensor_infer_layout	lhs_shape	rhs_shapec                 C   s   t |t |krtd| d| g }t|D ]3\}}|| }|dkr*|| q|dks2||kr8|| qtdt| d t| d t| |S )N!Cannot broadcast, rank mismatch: , r   z?Cannot make_shape_compatible: incompatible dimensions at index : z and )len
ValueError	enumerateappendstr)r   r7   r8   	ret_shapeileftrightr   r   r   _broadcast_shapes0   s*   zGluonSemantic._broadcast_shapesinputaxisreturnc                    s   dd j D }| d  dk r tj 7  ttjtjfdd jjttt	t
ffdd ttt
pCj k fdd | jj }| |jj|S )	Nc                 S   s   g | ]}t |qS r   )r+   _unwrap_if_constexpr.0xr   r   r   
<listcomp>A       z-GluonSemantic.expand_dims.<locals>.<listcomp>r   r   c                         d j S Nz=expected expand_dims input to be a distributed_type but got: r4   r   rF   r   r   <lambda>H       z+GluonSemantic.expand_dims.<locals>.<lambda>c                      
   d  S )Nz;expected expand_dims input to have a SliceLayout, but got: r   r   )layoutr   r   rS   K      
 c                      s   d  dj  S )Nz7expected expand_dims input layout to be sliced in axis z	 but got )dimr   )rG   rV   r   r   rS   N       )r1   insertr<   r   
isinstancer4   r+   r,   rV   r   r
   rX   r$   create_expand_dimsr/   r3   r5   )r   rF   rG   	dst_shaper/   r   )rG   rF   rV   r   expand_dims@   s"   

zGluonSemantic.expand_dimsabc                    s8   |  ||\}}t|jg kd t ||}| |S )NzCannot join scalars in gluon)broadcast_impl_valuer   r1   superjoinr6   )r   r_   r`   value	__class__r   r   rc   S   s   
zGluonSemantic.joinc                    s$   t  |\}}| || |fS r   )rb   splitr6   )r   r_   lhsrhsre   r   r   rg   Y   s   zGluonSemantic.splitdimsc                    s   t  ||}| |S r   )rb   permuter6   )r   rF   rj   rd   re   r   r   rk   ]   s   
zGluonSemantic.permuter1   c                    s   t t jtj fdd  j t ttkfdd kr) S tD ]#\}}| |krP|dkrPtd|  d| d| d d	 
q-t jj	 jj
}| j j|| j}| ||S )
Nc                      rO   rP   rQ   r   rR   r   r   rS   c   rT   z4GluonSemantic.broadcast_impl_shape.<locals>.<lambda>c                         d d  S )Nr9   r:   r   r   )r1   	src_shaper   r   rS   e       r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension r;   r:   )r   r[   r4   r+   r,   get_block_shapesr<   r>   r=   r5   rV   r$   create_broadcastr/   to_irr.   )r   rF   r1   rB   itemret_tyr/   r   )rF   r1   rm   r   broadcast_impl_shapea   s,   

 z"GluonSemantic.broadcast_impl_shaperh   ri   c                    s  |j  |j   r st ||S tt tj fdd tttjfdd   } }| 	||}t j
t}tj
t}|rU|sU| |j
}n|ra|sa| | j
}n j
j
krstd j
 dj
 | ||}| ||}||fS )Nc                      
   d S )Nz@expected broadcast left input to be a distributed_type but got: r   r   )lhs_tyr   r   rS   y   rW   z4GluonSemantic.broadcast_impl_value.<locals>.<lambda>c                      ru   )NzAexpected broadcast right input to be a distributed_type but got: r   r   )rhs_tyr   r   rS   {   rW   zLayout mismatch in broadcast: z vs )r4   is_blockrb   ra   r   r[   r+   r,   ro   rE   rV   r
   set_auto_layoutr=   rt   )r   rh   ri   r7   r8   rA   is_lhs_autois_rhs_autore   )rv   rw   r   ra   q   s0   

z"GluonSemantic.broadcast_impl_valuec                    s:   || g}|d u rt  }ttj||}t j|||dS )N)rs   )r
   r+   r,   int32rb   arange)r   startendrV   r1   rs   re   r   r   r}      s
   
zGluonSemantic.aranger]   can_reorderc                    s&   t | d t |||}| |S )Nz%can_reorder is not supported in gluon)r   rb   reshaper6   )r   rF   r]   r   rd   re   r   r   r      s   
zGluonSemantic.reshapec                 C   s4   t |j||}| j|| j|j}t ||S r   )r+   r,   dtyper$   create_splatrq   r/   r.   )r   rd   r1   rV   rs   r/   r   r   r   splat   s   zGluonSemantic.splatc                 C   s(   |  ||}|d u rt }| |||S r   )make_scalarr
   r   )r   r1   rd   r   rV   r5   r   r   r   full   s   zGluonSemantic.fullFc                    s   |j  tt tj fdd t j j|}|| j}|r5| j	||j
s5td j d| d| j||j
}t||S )Nc                      ru   )Nz@expected convert_layout input to be a distributed_type but got: r   r   r2   r   r   rS      rW   z.GluonSemantic.convert_layout.<locals>.<lambda>zlayout conversion from z to z is not trivial)r4   r   r[   r+   r,   
element_tyr1   rq   r$   is_convert_layout_trivialr/   	TypeErrorrV   create_convert_layoutr.   )r   rd   rV   assert_trivialrs   	ret_ty_irr/   r   r   r   convert_layout   s   
zGluonSemantic.convert_layoutc                 C   sX   t ||||}|d ur| j|| j|j}n
| j|| j}t |||||S r   )r+   shared_memory_descriptor_typer$   create_local_allocrq   r/   shared_memory_descriptor)r   r   r1   rV   rd   r2   r/   r   r   r   allocate_shared   s
   zGluonSemantic.allocate_sharedc                 C   s6   t |j|j|}| j|| j|j}t ||S r   )	r+   r,   r   r1   r$   create_local_loadrq   r/   r.   )r   mem_descrV   rs   r/   r   r   r   shared_load   s   zGluonSemantic.shared_loadc                 C   sb   |j |j ksJ d|j  d|j  d|j|jks&J d|j d|j d| j|j|j d S )Nzsource shape z and destination shape z must matchzsource dtype z and destination dtype )r1   r   r$   create_local_storer/   )r   r   rd   r   r   r   shared_store   s   &&zGluonSemantic.shared_storec                 C   s   | j |j d S r   )r$   create_local_deallocr/   )r   r   r   r   r   shared_dealloc   s   zGluonSemantic.shared_deallocc                 C   sr   |j }t|tsJ d| t|jtsJ d|j j | j|| j|j}t	
|j|j|}| ||S )Nz9set_auto_layout must set to a distributed layout but got z4set_auto_layout input must have auto layout but got )r4   r[   r   rV   r
   r$   create_set_auto_layout_to_irr/   r+   r,   r   r1   r.   )r   rd   rV   src_tyr/   res_tyr   r   r   ry      s   zGluonSemantic.set_auto_layoutc                 C   sr   dg|j  }|||< t|j}|||< |j}t|j|||jj}| j	}	|	
||	|j|}
tj|
fi |jS )Nr   )ranklistr1   rV   r+   r   r   r4   alloc_shaper$   create_memdesc_subslicerq   r/   r   __dict__)r   r   r~   lengthrX   offsetsr1   rV   r2   r$   r/   r   r   r   memdesc_slice   s   
zGluonSemantic.memdesc_slicec                 C   sf   |j dd  }| |j}|j}t|j|||jj}| j	}|
|||j|}tj|fi |jS )Nr   )r1   	to_tensorr/   rV   r+   r   r   r4   r   r$   create_memdesc_indexrq   r   r   )r   r   indexr1   rV   r2   r$   r/   r   r   r   memdesc_index   s   zGluonSemantic.memdesc_indexc                    s   t |t jksJ dj dt | dfdd|D }jj  d t  j  }| fdd|D 7 }| jj|}| j|}t	j
|j|||dS )Nzsource rank (z) and order length (z) must matchc                    s   g | ]} j | qS r   r1   rK   rB   )r   r   r   rM      rN   z/GluonSemantic.memdesc_trans.<locals>.<listcomp>c                    s&   g | ]} t  j d  | qS r   )r<   r   r   r   r   r   r   rM      s   & r   r1   r   rV   )r<   r1   r   r4   r   r$   create_memdesc_transr/   get_gluon_layout_from_memdescr+   r   r   )r   r   orderr1   new_alloc_shaper/   rV   r   r   r   memdesc_trans   s   zGluonSemantic.memdesc_transc                    s   t tt jk fdd | j j}| j|} jj	}t
| j }|d | t }tj| j||dS )Nc                      s   d j  d S )Nz)memdesc_reshape total elements mismatch: z -> r   r   r   r1   r   r   rS      s
    z/GluonSemantic.memdesc_reshape.<locals>.<lambda>r   )r   mathprodr1   r$   create_memdesc_reshaper/   r   r4   r   r<   r   r   r+   r   r   )r   r   r1   r/   rV   r   
prefix_lenr   r   r   r   memdesc_reshape   s    zGluonSemantic.memdesc_reshapec                 C   s<   t ||||}| j|| j|j}t j|fi |jS r   )r+   r   r$   create_memdesc_reinterpretrq   r/   r   r   )r   r   r   r1   rV   r2   r/   r   r   r   memdesc_reinterpret  s   z!GluonSemantic.memdesc_reinterpretc                 C   s$   |r
t |||}n|}| ||S r   )r+   r,   r.   )r   rL   r0   rA   rV   r   r   r   r   wrap_tensor  s   zGluonSemantic.wrap_tensorc                    sl   | D ]t tjtjfdd qdd | D d  t t fdddd  D fd	d d S )
Nc                      rO   )Nz#expected distributed_type but got: rQ   r   )rL   r   r   rS     rT   z2GluonSemantic._check_same_layout.<locals>.<lambda>c                 S   s   g | ]}|j jqS r   )r4   rV   rJ   r   r   r   rM         z4GluonSemantic._check_same_layout.<locals>.<listcomp>r   c                 3   s    | ]}| kV  qd S r   r   )rK   l)l0r   r   	<genexpr>  s    z3GluonSemantic._check_same_layout.<locals>.<genexpr>r   c                      rU   )Nz3Expected inputs to have matching layouts, but got: r   r   )layoutsr   r   rS     rW   )r   r[   r4   r+   r,   all)xsr   )r   r   rL   r   _check_same_layout  s   
z GluonSemantic._check_same_layoutinputsreverse.c                    s    d j jt}| |  kr|k s!n J d| d| d|dk r)||7 } D ]}|j jks7J dq+jdd  D |||  sOJ t fdd	tt D S )
Nr   z
scan axis z must be < inputs rank ()z(all scan inputs must have the same shapec                 S      g | ]}|j qS r   r/   rK   tr   r   r   rM   (  rY   z2GluonSemantic.associative_scan.<locals>.<listcomp>c                 3   s,    | ]} | | jjV  qd S r   r3   
get_resultr4   r5   r   r   scan_opr   r1   r   r   r   ,  
    
z1GluonSemantic.associative_scan.<locals>.<genexpr>)r4   r1   r<   r$   create_scanverifytuplerange)r   r   rG   region_builder_fnr   r   r   r   r   r   associative_scan  s   .
zGluonSemantic.associative_scanc                    s   t  d udd  d jjtt d   kok n   fdd   fddtD tfddD sFJ d	jd
d D  | 	 s\J t
fddttD S )Nc                   S      dS )Nz*All-reduce is not yet implemented in gluonr   r   r   r   r   rS   1      z)GluonSemantic.reduction.<locals>.<lambda>r   c                      rl   )Nz/expected reduction axis to be in the range [0, z
) but got r   r   )rG   r   r   r   rS   5  rn   c                    s   g | ]
\}}| kr|qS r   r   )rK   rB   s)rG   r   r   rM   7  s    z+GluonSemantic.reduction.<locals>.<listcomp>c                 3   s    | ]	}|j j kV  qd S r   )r4   r1   r   r   r   r   r   8  s    z*GluonSemantic.reduction.<locals>.<genexpr>z-all reduction inputs must have the same shapec                 S   r   r   r   r   r   r   r   rM   :  rY   c                 3   s,    | ]} | | jjV  qd S r   r   r   )r   	reduce_oprA   r   r   r   r   >  r   )r   r4   r1   r<   r   r>   r   r$   create_reducer   r   r   )r   r   rG   r   r   )rG   r   r   r   rA   r   r1   r   	reduction0  s   (

zGluonSemantic.reductionnum_binsmaskc                 C   s   t t|jdkdd  t |j dd  t |d udd  |d ur9| ||\}}t |jj dd  |j	}|
| j}| j|j	|||}| |tj|g|S )Nr   c                   S   r   )Nz histogram only supports 1D inputr   r   r   r   r   rS   C  r   z)GluonSemantic.histogram.<locals>.<lambda>c                   S   r   )Nz%histogram only supports integer inputr   r   r   r   r   rS   D  r   c                   S   r   )Nz'histogram requires a destination layoutr   r   r   r   r   rS   E  r   c                   S   r   )Nz"Mask must have boolean scalar typer   r   r   r   r   rS   H  r   )r   r<   r1   r   is_intra   r4   r5   is_boolr/   r   r$   create_histogramr   r+   r|   )r   rF   r   r   rV   layout_attrr/   r   r   r   	histogramB  s   zGluonSemantic.histogramworker_num_warpsworker_num_regsc                    s  t |}|t |ksJ d| dt | d|t |ks*J d| dt | d| j}	|	 }
|	 }|	| |j||i d}g }|d urLt|}|	| dd |D }|	|
 t|}|		|||
 | | |	 g  |	|}dd |D }t|D ]8}t|| d	}|	|||  fd
dtt |D }t|dd |D }|j|| |i |d |	  q|	  fddtt |D }|d u rd S tt|dd |D S )Nzwarp specialize got z partitions but z warp countsz register counts)kwargsc                 S      g | ]}|  qS r   get_typerK   rr   r   r   rM   c  r   z1GluonSemantic.warp_specialize.<locals>.<listcomp>c                 S   r   r   r   rK   argr   r   r   rM   o  r   r   c                       g | ]}  |qS r   )get_argument)rK   j)blockr   r   rM   s  rN   c                 S   r   r   rQ   r   r   r   r   rM   t  rY   )r   caller_contextc                    r   r   )r   r   )ws_opr   r   rM   y  rN   c                 S   r   r   rQ   r   r   r   r   rM   |  rY   )r<   r$   get_insertion_point	new_blockset_insertion_point_to_startcall_JitFunctionr   create_warp_yieldrestore_insertion_pointcreate_warp_specializeget_default_region	push_backset_requested_registerscreate_block_with_parentget_partition_op_holder!create_warp_specialize_partitionsr   r   
get_regionr   create_warp_returnset_insertion_point_afterget_operationr   )r   default_argsdefault_partitionworker_argsworker_partitionsr   r   	generatornum_partitionsr$   	insert_ptdefault_blockdefault_resultsmlir_resultsresult_types	mlir_argspartitions_op	arg_typesrB   r   
block_argsr   )r   r   r   warp_specializeN  sR   





zGluonSemantic.warp_specialize)F)/r&   r'   r(   r+   r.   langr   __annotations__r   r3   r6   r   r)   rE   r   r^   rc   r   rg   rk   rt   ra   r}   boolr   r   r   r   r   r   r   r   ry   r   r   r   r   r   r   staticmethodr   r   r   r   r   r  __classcell__r   r   re   r   r*      sX   
 

	


"r*   )typingr   r   r   r   r   r   triton.language.semanticr    r	   r+   _layoutsr
   r   r   triton._C.libtriton.gluon_irr   triton.compiler.code_generatorr   r   r   r=   r  r@   r   r   r*   r   r   r   r   <module>   s    