o
    i/                     @   s@   d dl Z d dlmZ dededefddZG dd	 d	e jZdS )
    N)tree_mapsizealignment_valuereturnc                 C   s   d| d |  | S N    )r   r   r   r   R/home/ubuntu/.local/lib/python3.10/site-packages/torchao/swizzle/swizzle_tensor.py_get_min_alignment   s   r
   c                   @   s|   e Zd ZdZ	ddejdefddZdddZd	d
 Z	dd Z
dd Zdd Zedd Zdd ZedddZejjZdS )SwizzleTensorz
    A Python-only swizzled tensor subclass.

    Intended usage of this abstraction:
    Swizzle weight Tensor to avoid LDS use during GEMMs on ROCm hardware.
    Foriginalshallowc                 C   s   t j|dd}t j| |S )Nmeta)device)torch
empty_likeTensor_make_subclass)clsr   r   wrapperr   r   r	   __new__   s   zSwizzleTensor.__new__c                 C   sz  |rd S |j dksJ d|jdks|jdksJ |jdkr dnd}|jdkr)dnd}|j dkr7|j\}}d}|j d	krB|j\}}}t|d}t||}	|| }
|	| }tjj|d|d|
fd
d}|j dkr}||d d|	| d|}|	ddd	dd}|j d	kr|||d d|	| d|}|	ddd	ddd}|
 | _|| _|| _|| _|| _|	| _|
| _|| _|j | _d| _d S )N   z"SwizzleTensor only supports ndim 2r       @         r      constant      F)ndimitemsizeshaper
   r   nn
functionalpadviewpermute
contiguousxBMKalignedMalignedKpaddedMpaddedKoriginal_ndimis_transposed)selfr   r   kdivlastdimr+   r,   r*   r-   r.   r/   r0   r)   r   r   r	   __init__    s@   








zSwizzleTensor.__init__c                 C   s   | j j d|   dS )Nz
(original=))	__class____name__	unswizzler3   r   r   r	   __repr__C      zSwizzleTensor.__repr__c                 C   s   d }| j dkr5| jddddd }|| j| j}|d| jd| jf }|| j| j}| j	r5|j
}| j dkrk| jdddddd }|| j| j| j}|d| jd| jd| jf }|| j| j| j}|S )Nr   r   r   r   r   r   )r1   r)   r'   r(   reshaper-   r.   r+   r,   r2   Tr*   )r3   undoner   r   r	   r:   F   s   

 zSwizzleTensor.unswizzlec                 C   s`   | j dkr| j| j| j}| jr|j}|S | j dkr.| j| j| j| j}| jr,|j}|S d S )Nr   r   )r1   r)   r>   r-   r.   r2   r?   r*   )r3   tmpr   r   r	   	as_tensorV   s   

zSwizzleTensor.as_tensorc                 C   s   | j dkr| j| jfn| j| j| jff}ttj|| j| jddd}| j	|_	| j|_| j|_| j|_| j
|_
| j|_| j|_| j|_| j |_ | j |_|S )Nr   r   )dtypelayoutr   T)r1   r+   r,   r*   r   r   emptyrC   rD   r)   r-   r.   r/   r0   r2   )r3   r"   new_objr   r   r	   shallow_transposec   s"   $
zSwizzleTensor.shallow_transposec                 C   s&   t | jr| j| jfS | j| jfS N)r   Sizer2   r,   r+   r;   r   r   r	   r"   w   s   &zSwizzleTensor.shapec                 C   s   | j rd| jfS | jdfS r   )r2   r,   r;   r   r   r	   stride{   s   zSwizzleTensor.strideNc                 C   sT   ddl m} ||v r|| |||S dd }dd }t||t||i t||S )Nr   )SWIZZLE_OPS_TABLEc                 S   s   t | tr	|  S | S rH   )
isinstancer   r:   er   r   r	   unwrap   s   z0SwizzleTensor.__torch_dispatch__.<locals>.unwrapc                 S   s   t | tjr
t| S | S rH   )rL   r   r   r   rM   r   r   r	   wrap   r=   z.SwizzleTensor.__torch_dispatch__.<locals>.wrap)torchao.swizzle.swizzle_opsrK   r   )r   functypesargskwargsrK   rO   rP   r   r   r	   __torch_dispatch__~   s    z SwizzleTensor.__torch_dispatch__)FrH   )r9   
__module____qualname____doc__r   r   boolr   r6   r<   r:   rB   rG   propertyr"   rJ   classmethodrV   _C_disabled_torch_function_impl__torch_function__r   r   r   r	   r      s&    


#
r   )r   torch.utils._pytreer   intr
   r   r   r   r   r   r	   <module>   s   