o
    :iK                     @   s0  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ eZ								 d?ded	eejef d
e
ej de
ej de
eeef  de
e de
e de
e de
e deddfddZ				d@ded	e
ej d
e
ej de
ej dedeeef fddZ 	dAdeded	ejdedeeef f
ddZ!dedededed	ejdeeef fd d!Z"dBd"ed#ede	e fd$d%Z#e$d&fd'ed(ed)ej$deeej%f fd*d+Z&	 dBd"ed,edefd-d.Z'	dCded	ejd
ejde
ej de
eeef  f
d/d0Z(	1dDd2eeej%f d3eeej%f d4e)d5e)d6e)deeej%f fd7d8Z*deeej%f d9eejef d
ejddfd:d;Z+							 dEd"ed<ed	eejef d
e
ej de
eeef  de
e de
e de
e de
e defd=d>Z,dS )F    N)Path)AnyDictListOptionalUnion)
CutSampler)DistributedDataParallel)	Optimizer)AttributeDict
GradScalerfilenamemodel	model_avg	model_emaparams	optimizer	schedulerscalersamplerrankreturnc
                 C   s   |	dkrdS t d|   t|tr|j}| |dur!| nd|dur*| nd|dur3| nd|dur<| ndd}
|durN|tj |
d< |dur\|tj |
d< |rq|	 D ]\}}||
vslJ ||
|< qbt
|
|  dS )ah  Save training information to a file.

    Args:
      filename:
        The checkpoint filename.
      model:
        The model to be saved. We only save its `state_dict()`.
      model_avg:
        The stored model averaged from the start of training.
      model_ema:
        The EMA version of model.
      params:
        User defined parameters, e.g., epoch, loss.
      optimizer:
        The optimizer to be saved. We only save its `state_dict()`.
      scheduler:
        The scheduler to be saved. We only save its `state_dict()`.
      scalar:
        The GradScaler to be saved. We only save its `state_dict()`.
      sampler:
        The sampler used in the labeled training dataset. We only
          save its `state_dict()`.
      rank:
        Used in DDP. We save checkpoint only for the node whose
          rank is 0.
    Returns:
      Return None.
    r   NzSaving checkpoint to )r   r   r   grad_scalerr   r   r   )logginginfo
isinstanceDDPmodule
state_dicttotorchfloat32itemssave)r   r   r   r   r   r   r   r   r   r   
checkpointkv r'   0/home/ubuntu/LuxTTS/zipvoice/utils/checkpoint.pysave_checkpoint&   s(   (

r)   Fstrictc           
      C   s6  t d|   tj| ddd}|d urctt|d drPt d | }|d }|	 D ]}d
d	|}	||	||< q0t|d
ksHJ |j||d nt d |j|d |d |d |d ur~d|v r~t d |j|d |d |d |d urd|v rt d |j|d |d |d |S )NLoading checkpoint from cpuFmap_locationweights_onlyr   module.Loading checkpoint saved by DDP{}.{}r   r   r*   Loading checkpointr   zLoading averaged modelr   zLoading ema model)r   r   r    loadnextiter
startswithdebugr   keysformatpoplenload_state_dict)
r   r   r   r   r*   r$   dst_state_dictsrc_state_dictkeysrc_keyr'   r'   r(   load_checkpointk   s0   






rC   Textend_sizec           
      C   s   t d|   tj| ddd}|d urztt|d drIt d | }|d }| D ]}d	d	|}|
|||< q0t|d
ksHJ n	t d |d }| d |d< | d }	|d |	d | d d f< |	|d< |j||d d S d S )Nr+   r,   Fr-   r   r0   r1   r2   r   r   r4   zspk_embed.weightzembed.weightr3   )r   r   r    r5   r6   r7   r8   r   r:   r;   r<   r=   r>   )
r   rD   r   r*   r$   r?   r@   rA   rB   embed_weightr'   r'   r(   !load_checkpoint_extend_vocab_size   s&   

rF   in_proj_keyout_proj_keydimc              
   C   sD  t d|   tj| ddd}|d ur tt|d drFt d t }|d }| D ]}|	|||
d< q0t|dksEJ n	t d	 |d }t| }	|	D ]}||v rd
|v r|	|}
tj|
d d d |f d |
d d d |f d |
d d ||d f |
d d |d d f d |
d d |d d f d gdd||d
d< |
||d
d< d|v r|	|}|||dd< |||dd< ||v rd
|v r|	|}
tj|
|
gdd||d
d< |
||d
d< qWd|v r|	|}tj||gdd||dd< |||dd< qW|j|dd d S d S )Nr+   r,   Fr-   r   r0   r1   r   r4   weight   )rI   z0.weightz1.weightbiasz0.biasz1.biasTr3   )r   r   r    r5   r6   r7   r8   dictr:   r<   lstripr=   listcatreplacer>   )r   rG   rH   rI   r   r$   r?   r@   rA   r:   rJ   rM   r'   r'   r(   -load_checkpoint_copy_proj_three_channel_alter   s^   









rS   out_dir	iterationc                    s   t t|  d}td}g }|D ]}||}|s%td|  q|t|	d|f qt
|ddd d} d	krJ fd
d|D }|S  fdd|D }|S )a  Find all available checkpoints in a directory.

    The checkpoint filenames have the form: `checkpoint-xxx.pt`
    where xxx is a numerical value.

    Assume you have the following checkpoints in the folder `foo`:

        - checkpoint-1.pt
        - checkpoint-20.pt
        - checkpoint-300.pt
        - checkpoint-4000.pt

    Case 1 (Return all checkpoints)::

      find_checkpoints(out_dir='foo')

    Case 2 (Return checkpoints newer than checkpoint-20.pt, i.e.,
    checkpoint-4000.pt, checkpoint-300.pt, and checkpoint-20.pt)

        find_checkpoints(out_dir='foo', iteration=20)

    Case 3 (Return checkpoints older than checkpoint-20.pt, i.e.,
    checkpoint-20.pt, checkpoint-1.pt)::

        find_checkpoints(out_dir='foo', iteration=-20)

    Args:
      out_dir:
        The directory where to search for checkpoints.
      iteration:
        If it is 0, return all available checkpoints.
        If it is positive, return the checkpoints whose iteration number is
        greater than or equal to `iteration`.
        If it is negative, return the checkpoints whose iteration number is
        less than or equal to `-iteration`.
    Returns:
      Return a list of checkpoint filenames, sorted in descending
      order by the numerical value in the filename.
    z/checkpoint-[0-9]*.ptzcheckpoint-([0-9]+).ptzInvalid checkpoint filename    Tc                 S   s   | d S )Nr   r'   )xr'   r'   r(   <lambda>  s    z"find_checkpoints.<locals>.<lambda>)reverserA   r   c                    s    g | ]}|d   kr|d qS r   rV   r'   .0icrU   r'   r(   
<listcomp>   s     z$find_checkpoints.<locals>.<listcomp>c                    s"   g | ]}|d    kr|d qS rZ   r'   r[   r^   r'   r(   r_   "  s   " )rP   globrecompilesearchr   warnappendintgroupsorted)rT   rU   checkpointspatterniter_checkpointscresultansr'   r^   r(   find_checkpoints   s   (

ro   r,   filename_startfilename_enddevicec                 C   s   t j| |dd}t j||dd}|d }|d }|| | }|d }|| | }|| }|dks4J ||| }	d|	 }
|d }|d }|}t||d|
|	 |	d	 |S )
a  Average model parameters over the range with given
    start model (excluded) and end model.

    Let start = batch_idx_train of model-start;
        end = batch_idx_train of model-end;
        interval = end - start.
    Then the average model over range from start (excluded) to end is
    (1) avg = (model_end * end - model_start * start) / interval.
    It can be written as
    (2) avg = model_end * weight_end + model_start * weight_start,
        where weight_end = end / interval,
              weight_start = -start / interval = 1 - weight_end.
    Since the terms `weight_end` and `weight_start` would be large
    if the model has been trained for lots of batches, which would cause
    overflow when multiplying the model parameters.
    To avoid this, we rewrite (2) as:
    (3) avg = (model_end + model_start * (weight_start / weight_end))
              * weight_end

    The model index could be epoch number or iteration number.

    Args:
      filename_start:
        Checkpoint filename of the start model. We assume it
        is saved by :func:`save_checkpoint`.
      filename_end:
        Checkpoint filename of the end model. We assume it
        is saved by :func:`save_checkpoint`.
      device:
        Move checkpoints to this device before averaging.
    Fr-   average_periodbatch_idx_trainr   rV   r         ?)state_dict_1state_dict_2weight_1weight_2scaling_factor)r    r5   average_state_dict)rp   rq   rr   state_dict_startstate_dict_endrs   batch_idx_train_startbatch_idx_train_endinterval
weight_endweight_start	model_endmodel_startavgr'   r'   r(   'average_checkpoints_with_averaged_model'  s0   $r   topkc                 C   sx   |dksJ ||dkrdS t | }t|dkr"td|   dS t||kr*dS ||d }|D ]}t| q2dS )aN  Remove checkpoints from the given directory.

    We assume that checkpoint filename has the form `checkpoint-xxx.pt`
    where xxx is a number, representing the number of processed batches
    when saving that checkpoint. We sort checkpoints by filename and keep
    only the `topk` checkpoints with the highest `xxx`.

    Args:
      out_dir:
        The directory containing checkpoints to be removed.
      topk:
        Number of checkpoints to keep.
      rank:
        If using DDP for training, it is the rank of the current node.
        Use 0 if no DDP is used for training.
    rV   r   NzNo checkpoints found in )ro   r=   r   rd   osremove)rT   r   r   ri   	to_removerl   r'   r'   r(   remove_checkpointsk  s   r   c                 C   sl   | j d| jd  d }| sJ | dt||||dd}| jdkr4g d}|D ]}|| | |< q+|S )a  Load checkpoint from file.

    If params.start_epoch is larger than 1, it will load the checkpoint from
    `params.start_epoch - 1`.

    Apart from loading state dict for `model` and `optimizer` it also updates
    `best_train_epoch`, `best_train_loss`, `best_valid_epoch`,
    and `best_valid_loss` in `params`.

    Args:
      params:
        The return value of :func:`get_params`.
      model:
        The training model.
    Returns:
      Return a dict containing previously saved training info.
    zepoch-rV   .ptz does not exist!T)r   r   r   r*   )best_train_epochbest_valid_epochrt   best_train_lossbest_valid_loss)exp_dirstart_epochis_filerC   )r   r   r   r   r   saved_paramsr:   r%   r'   r'   r(   resume_checkpoint  s   
r   ru   rv   rw   rx   ry   rz   c           
      C   s   t  }|  D ]\}}| }||v rq|||< qt| }	|	D ]"}| | }t|rC||9 }||| j| | jd| 7 }||9 }q!dS )zAverage two state_dict with given weights:
    state_dict_1 = (state_dict_1 * weight_1 + state_dict_2 * weight_2)
      * scaling_factor
    It is an in-place operation on state_dict_1 itself.
    )rr   N)	rN   r"   data_ptrrP   valuesr    is_floating_pointr   rr   )
rv   rw   rx   ry   rz   uniquedr%   r&   
v_data_ptruniqued_namesr'   r'   r(   r{     s   

r{   	model_curc                 C   sH   | j | j }d| }t|tr|j}| }| }t||||d dS )a]  Update the averaged model:
    model_avg = model_cur * (average_period / batch_idx_train)
      + model_avg * ((batch_idx_train - average_period) / batch_idx_train)

    Args:
      params:
        User defined parameters, e.g., epoch, loss.
      model_cur:
        The current model.
      model_avg:
        The averaged model to be updated.
    rV   )rv   rw   rx   ry   N)rs   rt   r   r   r   r   r{   )r   r   r   
weight_cur
weight_avgcurr   r'   r'   r(   update_averaged_model  s   

r   global_batch_idxc
                 C   sD   t | } | jddd | d| d }
t|
||||||||	d	 dS )aZ  Save training info after processing given number of batches.

    Args:
      out_dir:
        The directory to save the checkpoint.
      global_batch_idx:
        The number of batches processed so far from the very start of the
        training. The saved checkpoint will have the following filename:

            f'out_dir / checkpoint-{global_batch_idx}.pt'
      model:
        The neural network model whose `state_dict` will be saved in the
        checkpoint.
      model_avg:
        The stored model averaged from the start of training.
      params:
        A dict of training configurations to be saved.
      optimizer:
        The optimizer used in the training. Its `state_dict` will be saved.
      scheduler:
        The learning rate scheduler used in the training. Its `state_dict` will
        be saved.
      scaler:
        The scaler used for mix precision training. Its `state_dict` will
        be saved.
      sampler:
        The sampler used in the training dataset.
      rank:
        The rank ID used in DDP training of the current node. Set it to 0
        if DDP is not used.
    T)parentsexist_okzcheckpoint-r   )	r   r   r   r   r   r   r   r   r   N)r   mkdirr)   )rT   r   r   r   r   r   r   r   r   r   r   r'   r'   r(   %save_checkpoint_with_global_batch_idx  s   +
r   )NNNNNNNr   )NNNF)T)r   )N)ru   )NNNNNNr   )-r`   r   r   ra   pathlibr   typingr   r   r   r   r   r    torch.nnnnlhotse.dataset.sampling.baser   torch.nn.parallelr	   r   torch.optimr
   zipvoice.utils.commonr   r   objectLRSchedulerTypeModulestrrf   r)   boolrC   rF   rS   ro   rr   Tensorr   r   r   floatr{   r   r   r'   r'   r'   r(   <module>   sP  	

G

)



;B
G
*
6

&	
