o
    c۷i                  
   @   s  d dl mZmZ d dlmZ d dlmZmZ d dlZzd dl	m
Z
 W n ey-   dZ
Y nw d dlZd dlmZ d dlmZmZmZmZmZ d dlmZ d dlmZ d d	lmZ ejeeeeeedfZ ej!j"j#j$Z%ej&Z'd dl(m  m)Z* e*j+Z,d
d Z-e-e*_+ej.eej/eej0eej1eej2eiZ3edd Z4eddej5deeef fddZ6dd Z7dd Z8dd Z9dd Z:eG dd dZ;eG dd deZ<dS )    )Tuple
get_origin)	lru_cache)	dataclassfieldsN)extract)Int32Int64Float16BFloat16Float32)JitArgument)spec)NumericMetac                 C   sj   |d urt |tju rt|S t| tr.tt| dr.|d u s%t|ds.t	| |t| |S t	| |||S )N_fields)
r   cutlass	Constexprr   	ConstNone
isinstancetuplehasattrtype_original_convert_single_arg)argarg_namearg_typectx r   J/home/ubuntu/vllm_env/lib/python3.10/site-packages/quack/cute_dsl_utils.py_patched_convert_single_arg%   s   
r   c                 C   s   t j j| dS )Ncluster_size)r   utilsHardwareInfoget_max_active_clustersr    r   r   r   r$   ?   s   r$   devicereturnc                 C   s   t j| S N)torchcudaget_device_capability)r%   r   r   r   get_device_capacityD   s   r+   c                    sB    fddt  D }dd | D }dd | D }||fS )zISplit dataclass fields into (constexpr_dict, non_constexpr_dict) by type.c                    s   i | ]
}|j t |j qS r   )namegetattr).0fieldobjr   r   
<dictcomp>K   s    z%_partition_fields.<locals>.<dictcomp>c                 S   s    i | ]\}}t |tr||qS r   r   StaticTypesr.   nfr   r   r   r2   L        c                 S   s    i | ]\}}t |ts||qS r   r3   r5   r   r   r   r2   M   r8   )r   items)r1   
all_fields	constexprnon_constexprr   r0   r   _partition_fieldsI   s   r=   c                 C   sb   t | \}}t| | jD ]\\}}}t||d | ||< ||d  }q| jdi ||S )Nr   )r=   zipr9   _values_posr   new_from_mlir_values	__class__)selfvaluesconstexpr_fieldsnon_constexpr_fieldsr,   r/   n_itemsr   r   r   _new_from_mlir_valuesQ   s
   rG   c              	   C   s|   ddl m} t|}g }| D ]*}|du st|tr|| qt||}|t||d|  ||d }q| j	| S )aC  Generic __new_from_mlir_values__ for NamedTuples.

    Applied to NamedTuple classes via the ``@mlir_namedtuple`` decorator.

    Fields that are None or Constexpr (StaticTypes) are preserved from ``self`` (the compile-time
    template). Only non-static fields consume MLIR values. Multi-value fields (e.g. cute.Tensor)
    consume the correct number of values via ``cutlass.new_from_mlir_values``.

    Constexpr fields (annotated ``cutlass.Constexpr[T]``) are baked into the compiled kernel via
    a converter patch (see above). At call time, pass None for these fields.
    r   )get_mlir_typesN)
cutlass.base_dsl.typingrH   listr   r4   appendlenr   r@   rA   )rB   rC   rH   
new_fields	field_valrF   r   r   r    _namedtuple_new_from_mlir_valuesY   s   
rO   c                 C   s
   t | _| S )zDecorator that adds MLIR value reconstruction to a NamedTuple class.

    Usage::

        @mlir_namedtuple
        class MyArgs(NamedTuple):
            tensor_arg: cute.Tensor
            const_arg: cutlass.Constexpr[int] = 0
    )rO   __new_from_mlir_values__)clsr   r   r   mlir_namedtuples   s   
rR   c                   @   s   e Zd Zdd ZeZdS )
ParamsBasec                 C   sL   t | \}}g g }| _| D ]}t|}||7 }| jt| q|S r'   )r=   r?   rC   r   extract_mlir_valuesrK   rL   )rB   _rE   rC   r1   
obj_valuesr   r   r   __extract_mlir_values__   s   
z"ParamsBase.__extract_mlir_values__N)__name__
__module____qualname__rW   rG   rP   r   r   r   r   rS      s    	rS   c                   @   s    e Zd Zdd Zdd ZeZdS )ArgumentsBasec                 C   s:   t | \}}g }| D ]}t|dr||  q|S )N__c_pointers__)r=   rC   r   extendr\   )rB   rU   rE   c_ptrsr1   r   r   r   r\      s   
zArgumentsBase.__c_pointers__c                 C   sd   t | \}}g g }| _| D ]}t|dr)| }|| | jt| q| jd q|S )N__get_mlir_types__r   )r=   r?   rC   r   r_   r]   rK   rL   )rB   rU   rE   typesr1   	obj_typesr   r   r   r_      s   

z ArgumentsBase.__get_mlir_types__N)rX   rY   rZ   r\   r_   rG   rP   r   r   r   r   r[      s    r[   r'   )=typingr   r   	functoolsr   dataclassesr   r   r(   triton.tools.disasmr   ImportErrorr   cutlass.cutecuter   r	   r
   r   r   rI   r    cutlass.base_dsl.tvm_ffi_builderr   cutlass.cutlass_dslr   r   intboolstrfloatr   r4   base_dslruntimer)   load_cubin_module_dataload_cubin_module_data_ogcompilecute_compile_og)cutlass.cute._tvm_ffi_args_spec_converter_tvm_ffi_args_spec_converter_converter_module_convert_single_argr   r   float16bfloat16float32int32int64torch2cute_dtype_mapr$   r%   r+   r=   rG   rO   rR   rS   r[   r   r   r   r   <module>   sP   	
 