o
    i"                     @   s   d dl mZ d dlZd dlmZ d dlmZmZ d dlmZm	Z	 d dl
mZ ejdedefdd	Zdedefd
dZe	ddddededefddZG dd dZdS )    )TupleN)Int32Uint32)Tdsl_user_op)llvmxreturnc                 C   s@   t d}d}tdD ]}dd| > | @ r|st |}d}q|S )N    F      T)r   cutlassrange)r   resdonei r   O/home/ubuntu/vllm_env/lib/python3.10/site-packages/flash_attn/cute/fast_math.pyclz   s   r   c                 C   s$   t dt|  }|| | d @ dk S )Nr   r   r   )r   r   )r   ar   r   r   	find_log2   s   r   locipr   bc             
   C   sD   t tjt t| j||dt|j||dgddddtjjdS )Nr   zmul.hi.u32 $0, $1, $2;z=r,r,rF)has_side_effectsis_align_stackasm_dialect)	r   r   
inline_asmr   i32r   ir_value
AsmDialectAD_ATT)r   r   r   r   r   r   r   umulhi!   s   "r#   c                   @   s   e Zd ZddddededefddZeddddedd fd	d
Zej	dedefddZ
dedeeef fddZdd Zdd ZdS )
FastDivmodNr   divisor	multiplershift_rightc                C   s   || _ || _|| _|| _d S N)r%   
multiplierr'   _loc)selfr%   r&   r'   r   r   r   r   r   __init__1   s   
zFastDivmod.__init__r	   c                C   sT   t dt|  }t | }t td|> | d | }t |d }t| ||||dS )zConstruct the FastDivmod object, in host code.
        This precomputes some values based on the divisor and is computationally expensive.
        r   r   r
   r   )r   r   r   Uint64r$   )r%   r   r   pdivisor_u32r)   r'   r   r   r   create:   s
   zFastDivmod.createdividendc                 C   s$   | j dkrtt|| j| j? S |S )Nr   )r%   r   r#   r)   r'   )r+   r1   r   r   r   divE   s
   
zFastDivmod.divc                 C   s    |  |}||| j  }||fS r(   )r2   r%   )r+   r1   quotient	remainderr   r   r   divmodM   s   
zFastDivmod.divmodc                 C   sH   g g }| _ | j| j| jfD ]}t|}||7 }| j t| q|S r(   )_values_posr%   r)   r'   r   extract_mlir_valuesappendlen)r+   valuesobj
obj_valuesr   r   r   __extract_mlir_values__R   s   
z"FastDivmod.__extract_mlir_values__c              	   C   s`   g }t | j| j| jg| jD ]\}}|t||d |  ||d  }qtt	|d| j
iS )Nr   )zipr%   r)   r'   r6   r8   r   new_from_mlir_valuesr$   tupler*   )r+   r:   obj_listr;   n_itemsr   r   r   __new_from_mlir_values__Z   s   z#FastDivmod.__new_from_mlir_values__)__name__
__module____qualname__r   r   r,   staticmethodr0   cutejitr2   r   r5   r=   rC   r   r   r   r   r$   0   s     
	
r$   )typingr   r   cutlass.cuterH   r   r   cutlass.cutlass_dslr   r   cutlass._mlir.dialectsr   rI   r   r   r#   r$   r   r   r   r   <module>   s    