o
    i                     @   s|   d dl Z d dlZd dlmZ d dlm  mZ dZdZdd Z	G dd dej
jZG dd dej
jZG d	d
 d
ejZdS )    Ng   Ј Bc                 C   s2   dt dt| |  d   }| d|jS )z,Return the value that is larger than q% of t   g{Gz?)roundfloatnumelviewkthvaluevalues)tqk r   N/home/ubuntu/.local/lib/python3.10/site-packages/torchao/sparsity/supermask.py
percentile   s    r   c                   @   (   e Zd ZdZedd Zedd ZdS )	GetSubnetSupermask STE functionc                 C   s>   |j ttd}t||d }t||k ||j||jS )N)minmaxd   )clamp
SCORES_MIN
SCORES_MAXr   torchwheretodevice)ctxscoreszerosonessparsityclamped_scoresk_valr   r   r   forward   s
   zGetSubnet.forwardc                 C   s   |d d d fS Nr   )r   gr   r   r   backward   s   zGetSubnet.backwardN__name__
__module____qualname____doc__staticmethodr$   r'   r   r   r   r   r      s    
r   c                   @   r   )	ApplyMaskr   c                 C   s   || S r%   r   )r   weightr   r   r   r   r$   &   s   zApplyMask.forwardc                 C   s,   d  }}| j d r|}| j d r|}||fS )Nr   r   )needs_input_grad)r   grad_outputgrad_weightgrad_scoresr   r   r   r'   *   s   

zApplyMask.backwardNr(   r   r   r   r   r.   #   s    
r.   c                       sN   e Zd ZdZ fddZdd Zdd Ze			dd
dZedd Z	  Z
S )SupermaskLinearz Supermask class for Linear layerc                    s   t t| j|i | ddt fdd| j D   }|| _| j|kr?td| j	 d| d| j  d  d || _ | _
d	| _tjt fd
d| j D | d| _tjj| jtdd | | j_d S )Nr   c                    s   g | ]	}t |  qS r   )mathceil).0r   	blocksizer   r   
<listcomp>=   s    z,SupermaskLinear.__init__.<locals>.<listcomp>zreducing sparsity from z to z'(maximum sparsity for layer with shape z and tile size )Fc              	      s$   g | ]}t d tt|  qS )r   )r   intr5   r6   )r7   wnr8   r   r   r:   J   s   $ )requires_grad   )a)superr4   __init__r5   prodr/   sizesparsity_levelprintr!   r9   sparsify_weightsnn	Parameterr   emptyr   initkaiming_uniform_sqrtr>   )selfrE   r9   
fixed_maskfixed_weightargskwargsmax_sparsity_level	__class__r8   r   rB   7   s*    
zSupermaskLinear.__init__c                 C   sh   t | jt| jt| j| j}| jdkr2t| j	j
D ]\}}|j| j|d}t||d|}q|S )Nr   )dimr   )r   applyr   r   
zeros_like	ones_likerE   r9   	enumerater/   shaperepeat_interleavenarrow)rN   subnetir   r   r   r   get_maskT   s   


zSupermaskLinear.get_maskc                 C   s&   |   }t| j|}t||| jS r%   )r`   r.   rW   r/   Flinearbias)rN   xr^   wr   r   r   r$   c   s   zSupermaskLinear.forward        r   c              	   C   sv   t |tjjs	J t||dd|j|j|jdudj|j	j
|j	jd}|j	j|j	j |jdur9|jj|jj |S )zU
        Main entrypoint for creating a SupermaskLinear from a Linear layer.
        FNrc   r   dtype)
isinstancer   rH   Linearr4   in_featuresout_featuresrc   r   r/   r   ri   datacopy_)clsrb   rE   r9   supermask_linearr   r   r   from_linearh   s    
	
zSupermaskLinear.from_linearc                 C   sn   |}t jj|j|j|jdudj|jj|jj	d}|
 }|jj|j|  |jdur5|jj|jj |S )zt
        Convert a SupermaskLinear to a Linear layer.
        Replaces the old sparsify_offline() function.
        Nrg   rh   )r   rH   rk   rl   rm   rc   r   r/   r   ri   r`   rn   ro   )rp   rq   rN   rb   maskr   r   r   	to_linear   s   
zSupermaskLinear.to_linear)rf   r   )r)   r*   r+   r,   rB   r`   r$   classmethodrr   rt   __classcell__r   r   rT   r   r4   4   s    r4   )r5   r   torch.nnrH   torch.nn.functional
functionalra   r   r   r   autogradFunctionr   r.   rk   r4   r   r   r   r   <module>   s   