o
    ॵi                     @   sx   d dl Z d dlmZ d dlmZ G dd de jjZG dd dejZ	dd Z
	dd	d
Zdd Zdd ZdddZdS )    N)	is_masterc                   @   s$   e Zd Zedd Zedd ZdS )SparseBinarizerc                 C   sH   t | | }t|dd | }| d}d|d|< |S )N           )intnumeltorchargsortreshapeclonefill_)ctxmask_scoressparsity	num_pruneprune_indicesmask r   _/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/hooks/compression/utils.pyforward   s
   zSparseBinarizer.forwardc                 C   s   |d fS Nr   )r   
gradOutputr   r   r   backward   s   zSparseBinarizer.backwardN)__name__
__module____qualname__staticmethodr   r   r   r   r   r   r   	   s
    
r   c                       s>   e Zd ZdZ						d fdd	Zdd Zd	d
 Z  ZS )SparseLinearz>
    Fully Connected layer with on the fly adaptive mask.
    pst         ?c           
         s  t t|   || _| jjjd }| jjjd }	| jj| _d | j_| jjd || _d| _	| jdkr|| _
|| _|| _|| _|| _tt|| j
j| jj| jjd| _tt| j
|	j| jj| jjd| _tt|| jj| jj| jjd| _tt| j|	j| jj| jjd| _tt|j| jj| jjd| _tt|	j| jj| jjd| _d| j_| jjd urd| jj_d S d S d S )Nr   r   weightr   r   )devicedtypeF)superr   __init__moduler"   shape_parameterspoppruning_methodcur_sparsityweight_rankweight_beta	mask_rankmask_alpha1mask_alpha2nn	Parameterr	   randntor#   r$   weight_Uzerosweight_Vmask_scores_Amask_scores_Bmask_scores_Rmask_scores_Crequires_gradbias)
selfr'   r+   r-   r.   r/   r0   r1   out_featuresin_features	__class__r   r   r&      sd   




zSparseLinear.__init__c                 G   s   | j dkrA| j| j| j | j  }| | j| j | j  | j	| j
d| jd   }t|| j}|| }|| j_| j| S | j| S Nr   r   r   )r+   r"   r.   r6   r8   absr0   r9   r:   r1   r;   	unsqueezer<   r   applyr,   r'   )r?   inputsr"   r   r   masked_weightr   r   r   r   T   s   



zSparseLinear.forwardc                 C   s   | j dkrB| j| j| j | j  }| | j| j | j  | j	| j
d| jd   }t|| j}|| }t|j| j_d S d S rD   )r+   r"   r.   r6   r8   rE   r0   r9   r:   r1   r;   rF   r<   r   rG   r,   r2   r3   datar'   )r?   r"   r   r   rI   r   r   r   converte   s   

zSparseLinear.convert)r   r    r!   r    r!   r!   )r   r   r   __doc__r&   r   rK   __classcell__r   r   rB   r   r      s    7r   c                 C   s:   | d}|d d D ]}t| |} qt| |d | d S )N.r   )splitgetattrsetattr)modelnamer'   	name_listr   r   r   _setattrt   s   
rU   c              	   C   s   t jg}zddlm}	 ||	j|	jg W n	 ty   Y nw |  D ]1\}
}t	||v rTt
|||||||}t| |
| t rT|rL|d|
 d q#td|
 d q#d S )Nr   )mpuconvert z to sparse module.)r2   Linearmegatron_utilrV   extendRowParallelLinearColumnParallelLinearImportErrornamed_modulestyper   rU   r   infoprint)rR   r+   r-   r.   r/   r0   r1   loggercompress_modulerV   rS   r'   
new_moduler   r   r   convert_sparse_network{   s6   

re   c                 C   s&   |   D ]\}}t|tr||_qd S r   )r^   
isinstancer   r,   )rR   r   rS   r'   r   r   r   update_network_sparsity   s
   
rg   c                 C   sx   | || kr
|}|S | |||  kr|}|S || }|| | }	| | | | } d| ||	   }
||| |
d   }|S )Nr      r   )step
total_step	frequencyinitial_warmupfinal_warmupinitial_sparsityfinal_sparsityr   spars_warmup_stepsspars_schedu_steps	mul_coeffr   r   r   schedule_sparsity_ratio   s   	
rs   c                 C   s   |   D ]G\}}t|trK|  t| ||j t rK|r5|d| dt	d|jj
dk   d qtd| dt	d|jj
dk   d qd S )NrW   zE weight to sparse weight,                             sparsity ratio=r!   r   rN   z> weight to sparse,                             sparsity ratio=)r^   rf   r   rK   rU   r'   r   r`   r	   meanr"   itemra   )rR   rb   rS   r'   r   r   r   generate_sparse_model   s   


rv   r   )r	   torch.nnr2   modelscope.utils.torch_utilsr   autogradFunctionr   Moduler   rU   re   rg   rs   rv   r   r   r   r   <module>   s   \
(