o
    :iT                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZ d dlZd dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ e ej!dryd dl"m#Z# nd dl$m#Z# ee%ef Z&G dd de'Z(G dd dej	Z)edYddZ*dYddZ+					dZddZ,dd Z-			d[de(de'dej.d e/d!e/d"e/fd#d$Z0d%d& Z1d'eee2  d(e2dej.fd)d*Z3d+eee2  d,e2d-ej4fd.d/Z5d0ee%e2f fd1d2Z6de(d-e7fd3d4Z8d5eej9ef d6e7d-dfd7d8Z:	 d\d9ej4d:ee7e7f d;e2d-ej4fd<d=Z;	 d\d9ej4d:ee7e7f d;e2d-ej4fd>d?Z<d\d@ej4d;e2d-ej4fdAdBZ=dCdD Z>	E	d]dFe&dGe%dHe/d-dfdIdJZ?dKdL Z@dMdN ZAdOdP ZBd-ee%ef fdQdRZCdg g fd5ej9dSe7dTe/dUee% dVee% d-ee' fdWdXZDdS )^    N)defaultdict)contextmanager)datetime)Path)AnyDictListTupleUnion)version)distributed)nn)DistributedDataParallel)SummaryWriter
GradScalerr   c                   @   s4   e Zd Zdd Zdd Zdd Zddefd	d
ZdS )AttributeDictc                 C   s    || v r| | S t d| dNzNo such attribute ''AttributeErrorselfkey r   ,/home/ubuntu/LuxTTS/zipvoice/utils/common.py__getattr__!   s   zAttributeDict.__getattr__c                 C   s   || |< d S Nr   )r   r   valuer   r   r   __setattr__&      zAttributeDict.__setattr__c                 C   s"   || v r	| |= d S t d| dr   r   r   r   r   r   __delattr__)   s   zAttributeDict.__delattr__   indentc                 C   sJ   i }|   D ]\}}t|ttjtjfrt|}|||< qtj||ddS )NT)r#   	sort_keys)	items
isinstancer   torchdevicedtypestrjsondumps)r   r#   tmpkvr   r   r   __str__/   s   
zAttributeDict.__str__N)r"   )__name__
__module____qualname__r   r   r!   intr0   r   r   r   r   r       s
    r   c                       s   e Zd Z fddZdddZdedd fdd	Zdefd
dZde	e
eef  fddZdd ZdedededdfddZ  ZS )MetricsTrackerc                    s   t t| t d S r   )superr5   __init__r4   r   	__class__r   r   r7   :   s   zMetricsTracker.__init__otherreturnc                 C   sR   t  }|  D ]\}}|||< q| D ]\}}|| dkr&|| | ||< q|S )Nr   r5   r%   )r   r;   ansr.   r/   r   r   r   __add__A   s   
zMetricsTracker.__add__alphac                 C   s(   t  }|  D ]
\}}|| ||< q|S r   r=   )r   r@   r>   r.   r/   r   r   r   __mul__J   s   zMetricsTracker.__mul__c                 C   s   d}d}|   D ]?\}}d| }d|vr#|t|d t| d 7 }q|t|d t| 7 }|dkr8|d7 }q|dkrA|d7 }qtd	| d
| d  }|dt| d 7 }|dkrld
| d  }|dt| d 7 }|| S )N z%.4gutt_=z, utt_durationz	 frames, utt_pad_proportionzUnexpected key: z%.2fframeszover z	 frames. 
utterancesz utterances.)
norm_itemsr*   
ValueError)r   
ans_framesans_utterancesr.   r/   
norm_valuerG   rH   r   r   r   r0   P   s$   

zMetricsTracker.__str__c                 C   s   d| v r| d nd}d| v r| d nd}g }|   D ]$\}}|dks&|dkr'qd|vr1t|| nt|| }|||f q|S )zb
        Returns a list of pairs, like:
          [('ctc_loss', 0.1), ('att_loss', 0.07)]
        rG      rH   rC   )r%   floatappend)r   
num_framesnum_utterancesr>   r.   r/   rM   r   r   r   rI   g   s   zMetricsTracker.norm_itemsc                    sb   t   }tj fdd|D |d}tj|tjjd t||	 
 D ]\}}| |< q&dS )zs
        Reduce using torch.distributed, which I believe ensures that
        all processes get the total.
        c                    s   g | ]}t  | qS r   )rO   ).0r.   r8   r   r   
<listcomp>~   s    z)MetricsTracker.reduce.<locals>.<listcomp>r(   )opN)sortedkeysr'   tensordist
all_reduceReduceOpSUMzipcputolist)r   r(   rX   sr.   r/   r   r8   r   reducex   s   
zMetricsTracker.reduce	tb_writerprefix	batch_idxNc                 C   s(   |   D ]\}}||| || qdS )a4  Add logging information to a TensorBoard writer.

        Args:
            tb_writer: a TensorBoard writer
            prefix: a prefix for the name of the loss, e.g. "train/valid_",
                or "train/current_"
            batch_idx: The current batch index, used as the x-axis of the plot.
        N)rI   
add_scalar)r   rc   rd   re   r.   r/   r   r   r   write_summary   s   zMetricsTracker.write_summary)r;   r5   r<   r5   )r1   r2   r3   r7   r?   rO   rA   r*   r0   r   r	   rI   rb   r   r4   rg   __classcell__r   r   r9   r   r5   9   s     
	r5   cudac              	   k   s    t tjt dkr-tjjdd| i| dV  W d   dS 1 s&w   Y  dS t 5 tjdt	d tj
jjdi | dV  W d   n1 sQw   Y  W d   dS W d   dS 1 siw   Y  dS )z
    To fix the following warnings:
    FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated.
    Please use `torch.amp.autocast('cuda', args...)` instead.
      with torch.cuda.amp.autocast(enabled=False):
    2.3.0device_typeNignorecategoryr   )r   parser'   __version__ampautocastwarningscatch_warningssimplefilterFutureWarningri   )rk   kwargsr   r   r   torch_autocast   s   "
"rx   c                 K   s   t tjt dkrddlm} |dd| i|S t  tjdt	d tj
jjdi |W  d   S 1 s:w   Y  dS )	a  
    Creates a GradScaler compatible with both torch < 2.3.0 and >= 2.3.0.
    Accepts all kwargs like: enabled, init_scale, growth_factor, etc.

    FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated.
    Please use `torch.amp.GradScaler('cuda', args...)` instead.
    rj   r   r   r(   rl   rm   Nr   )r   ro   r'   rp   	torch.ampr   rs   rt   ru   rv   ri   rq   )r(   rw   r   r   r   r   create_grad_scaler   s   
$rz   Fc                 C   s~   dt jvr|du rdnt|t jd< dt jvr$|du rdnt|t jd< |du r8tjd| |d tj|  dS td dS )	zG
    rank and world_size are used only if use_ddp_launch is False.
    MASTER_ADDRN	localhostMASTER_PORT12354Fnccl)rank
world_size)osenvironr*   rZ   init_process_groupr'   ri   
set_device)r   r   master_portuse_ddp_launchmaster_addrr   r   r   
setup_dist   s   


r   c                   C   s   t   d S r   )rZ   destroy_process_groupr   r   r   r   cleanup_dist   r    r   Tparamsbatchr(   return_tokensreturn_featurereturn_audioc           	      C   sd   g }|r||d g7 }|r$|d  |}|d  |}||| j |g7 }|r0||d |d g7 }|S )a]  
    Parse the features and targets of the current batch.
    Args:
      params:
        It is returned by :func:`get_params`.
      batch:
        It is the return value from iterating
        `lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation
        for the format of the `batch`.
      device:
        The device of Tensor.
    tokensfeaturesfeatures_lensaudio
audio_lens)to
feat_scale)	r   r   r(   r   r   r   return_listr   r   r   r   r   prepare_input   s   r   c                 C   sB   g }t t| D ]}| | }|||  }||g||   q|S r   )rangelenrP   )r   tokens_lenstokens_durationsirE   avg_token_durationr   r   r   prepare_avg_tokens_durations   s   r   ypad_idc                    sJ   fdd| D } t dd | D   fdd| D } tj| tj|dS )z
    Pad the transcripts to the same length with zeros.

    Args:
      y: the transcripts, which is a list of a list

    Returns:
      Return a Tensor of padded transcripts.
    c                    s   g | ]}| g qS r   r   rS   	token_ids)r   r   r   rT     s    zpad_labels.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   r   r   r   r   r   rT     s    c                    s"   g | ]}|g t |   qS r   r   r   lengthr   r   r   rT     s   " )r)   r(   )maxr'   rY   int64)r   r   r(   r   r   r   
pad_labels  s   
r   	durationsrQ   r<   c           	         s    fdd| D } t | }tj| tjd}t|D ])}| | }d}t|D ]\}}|||||| f< ||7 }q&| ksCJ | fq|S )aL  
    Gets position in the transcript for each frame, i.e. the position
    in the symbol-sequence to look up.

    Args:
      durations:
        Duration of each token in transcripts.
      num_frames:
        The maximum frame length of the current batch.

    Returns:
      Return a Tensor of shape (batch_size, num_frames)
    c                    s   g | ]}| t | g qS r   )sum)rS   xrQ   r   r   rT   #  s    z$get_tokens_index.<locals>.<listcomp>r)   r   )r   r'   zerosr   r   	enumerate)	r   rQ   
batch_sizer>   bthis_dur	cur_framer   dr   r   r   get_tokens_index  s   
r   ra   c                 C   s$   t | tr| fS ttt| dS )N,)r&   r4   tuplemapsplit)ra   r   r   r   to_int_tuple0  s   
r   c                 C   s   | j | j| j  | j S r   )batch_idx_trainmax_durationr   ref_duration)r   r   r   r   get_adjusted_batch_count6  s   
r   modelbatch_countc                 C   sF   t | tr| j} |  D ]\}}t|dr||_t|dr ||_qd S )Nr   name)r&   DDPmodulenamed_moduleshasattrr   r   )r   r   r   r   r   r   r   set_batch_count@  s   


r   r   mask_percentmax_lenc                 C      t j| t jdj| |  t j}t j|t jd| |  t j}|| }t||  }t jd|| j	d}|dddf |dddf k|dddf |dddf k @ }|S )a  
    Apply Time masking.
    Args:
        features_lens:
            input tensor of shape ``(B)``
        mask_size:
            the width size for masking.
        max_len:
            the maximum length of the mask.
    Returns:
        Return a 2-D bool tensor (B, T), where masked positions
        are filled with `True` and non-masked positions are
        filled with `False`.
    r   r   rU   N)
r'   
zeros_likefloat32uniform_r   r   	rand_liker   aranger(   r   r   r   	mask_sizemask_starts	mask_ends	seq_rangemaskr   r   r   condition_time_maskK     r   c                 C   r   )a  
    Apply Time masking, mask from the end time index.
    Args:
        features_lens:
            input tensor of shape ``(B)``
        mask_size:
            the width size for masking.
        max_len:
            the maximum length of the mask.
    Returns:
        Return a 2-D bool tensor (B, T), where masked positions
        are filled with `True` and non-masked positions are
        filled with `False`.
    r   r   rU   N)
r'   r   r   r   r   r   	ones_liker   r   r(   r   r   r   r   condition_time_mask_suffixn  r   r   lengthsc                 C   s^   | j dks
J | j t||  }| d}tjd|| jd}|d||}|| dkS )a'  
    Args:
      lengths:
        A 1-D tensor containing sentence lengths.
      max_len:
        The length of masks.
    Returns:
      Return a 2-D bool tensor, where masked positions
      are filled with `True` and non-masked positions are
      filled with `False`.

    >>> lengths = torch.tensor([1, 3, 2, 5])
    >>> make_pad_mask(lengths)
    tensor([[False,  True,  True,  True,  True],
            [False, False, False,  True,  True],
            [False, False,  True,  True,  True],
            [False, False, False, False, False]])
    rN   r   rU   )ndimr   sizer'   r   r(   	unsqueezeexpand)r   r   nr   expaned_lengthsr   r   r   make_pad_mask  s   
r   c                 C   s8   t | tr| S |  dv rdS |  dv rdS td)a4  Used in argparse.ArgumentParser.add_argument to indicate
    that a type is a bool type and user can enter

        - yes, true, t, y, 1, to represent True
        - no, false, f, n, 0, to represent False

    See https://stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse  # noqa
    )yestruetr   1T)nofalsefr   0FzBoolean value expected.)r&   boollowerargparseArgumentTypeError)r/   r   r   r   str2bool  s   
	
r   infolog_filename	log_leveluse_consolec           
      C   s  t  }|d}t r-t r-t }t }d| d| d}|  d| d| } n	d}|  d| } tj	tj
| dd tj}|d	krLtj}n|d
krTtj}n|dkr\tj}n|dkrctj}tj| ||ddd |rt }	|	| |	t| td|	 dS dS )a  Setup log level.

    Args:
      log_filename:
        The filename to save the log.
      log_level:
        The log level to use, e.g., "debug", "info", "warning", "error",
        "critical"
      use_console:
        True to also print logs to console.
    z%Y-%m-%d-%H-%M-%Sz5%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (/z) %(message)s-z?%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)sT)exist_okdebugr   warningcriticalw)filenameformatlevelfilemodeforcerB   N)r   nowstrftimerZ   is_availableis_initializedget_world_sizeget_rankr   makedirspathdirnameloggingERRORDEBUGINFOWARNINGCRITICALbasicConfigStreamHandlersetLevelsetFormatter	Formatter	getLogger
addHandler)
r   r   r   r  	date_timer   r   	formatterr   consoler   r   r   setup_logger  s@   

r  c                  C   s   z:t jg ddt jdj d } tt jg ddt jdj d dk}|r4| d } W | S | d } W | S    Y d S )	N)git	rev-parsez--shortHEADTcheckstdout
)r  diffz--shortstatr   z-dirtyz-clean)
subprocessrunPIPEr   decoderstripstripr   )
git_commitdirty_commitr   r   r   get_git_sha1  s:   

r+  c                  C   :   zt jg ddt jdj d } W | S    Y d S )N)r  logz-1z--format=%adz--date=localTr  r!  r#  r$  r%  r   r&  r'  r(  git_dater   r   r   get_git_date     r1  c                  C   r,  )N)r  r  z--abbrev-refr  Tr  r!  r.  r/  r   r   r   get_git_branch_name'  r2  r3  c                   C   sX   t tjtj tjjtjdd t t t	 t t
t jjt tt d
S )z Get the environment information.N   )
ztorch-versionztorch-cuda-availableztorch-cuda-versionzpython-versionzzipvoice-git-branchzzipvoice-git-sha1zzipvoice-git-datezzipvoice-pathhostnamez
IP address)r*   r'   rp   ri   r  r   sysr3  r+  r1  r   __file__resolveparentsocketgethostnamegethostbynamer   r   r   r   get_env_info9  s   r=  lrinclude_namesfreeze_modulesunfreeze_modulesc                 C   s  t |r
t |r
J tdd }g }|  D ]\}}|| t|dr)|j||< qtt}	|  D ]\}}
|
jsCt	
d| d q2|d}|d }t |dkrx|dkri|d	 }||v rht	
d| d
 q2n:||v rwt	
d| d
 q2n+t |dkr|dkr|d	 }||vrt	
d| d
 q2n||vrt	
d| d
 q2|||  }|dkr||d 9 }|d	d D ]}d||g}||| 9 }q|	| |r||
fn|
 q2|rdd |	 D S dd |	 D S )a  
    This is for use with the ScaledAdam optimizers (more recent versions that accept
    lists of named-parameters; we can, if needed, create a version without the names).

    It provides a way to specify learning-rate scales inside the module, so that if
    any nn.Module in the hierarchy has a floating-point parameter 'lr_scale', it will
    scale the LR of any parameters inside that module or its submodules.  Note: you
    can set module parameters outside the __init__ function, e.g.:
      >>> a = nn.Linear(10, 10)
      >>> a.lr_scale = 0.5

    Returns: a list of dicts, of the following form:
      if include_names == False:
        [  { 'params': [ tensor1, tensor2, ... ], 'lr': 0.01 },
           { 'params': [ tensor3, tensor4, ... ], 'lr': 0.005 },
         ...   ]
      if include_names == true:
        [  { 'named_params': [ (name1, tensor1, (name2, tensor2), ... ], 'lr': 0.01 },
           { 'named_params': [ (name3, tensor3), (name4, tensor4), ... ], 'lr': 0.005 },
         ...   ]

    c                   S   s   dS )Ng      ?r   r   r   r   r   <lambda>l  s    z/get_parameter_groups_with_lrs.<locals>.<lambda>lr_scalezRemove z from parameter.r   r   rN   z from parametersrB   Nc                 S      g | ]	\}}||d qS ))named_paramsr>  r   )rS   r>  pairsr   r   r   rT         z1get_parameter_groups_with_lrs.<locals>.<listcomp>c                 S   rE  ))r   r>  r   )rS   r>  r   r   r   r   rT     rH  )r   r   r   rP   r   rC  listnamed_parametersrequires_gradr
  r   r   joinr%   )r   r>  r?  r@  rA  flat_lr_scalenamesr   mlr_to_params	parameter
split_namerd   module_namecur_lrpartr   r   r   get_parameter_groups_with_lrsI  s\   



rV  )ri   )NNNFN)TTF)r   )r   T)Er   collectionsr+   r
  r   r:  r#  r6  rs   r   
contextlibr   r   pathlibr   typingr   r   r   r	   r
   r'   	packagingr   r   rZ   r   torch.nn.parallelr   r   torch.utils.tensorboardr   r   rq   ry   r   torch.cuda.ampr*   Pathlikedictr   r5   rx   rz   r   r   r(   r   r   r   r4   r   Tensorr   r   rO   r   Moduler   r   r   r   r   r  r+  r1  r3  r=  rV  r   r   r   r   <module>   s    \


$ 	  


&

#
5 