o
    }oi                     @   s   d dl Z d dlmZ d dlZdejdejfddZddee d	ed
edee fddZ	dededefddZ
	ddejdededededejfddZdejdededefddZdS )     N)List	token_idsreturnc                 C   s6   |  d}tj|tj| jd}|d|  }|S )zCreate position ids   dtypedevicer   )sizetorcharangelongr   	unsqueeze	expand_asclone)r   
seq_lengthposition_ids r   e/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/speechlm/data/dataset/data_utils.pybuild_position_ids   s   
r   T	input_idsanswer_start_idxanswer_only_lossc                    s4   |r fddt t| D }|S dgt|  }|S )zCPad input_ids in batch to max batch length while building loss maskc                    s   g | ]}t | kqS r   )float).0idxr   r   r   
<listcomp>"   s    z#build_loss_mask.<locals>.<listcomp>g      ?)rangelen)r   r   r   	loss_maskr   r   r   build_loss_mask   s
   r    nmc                 C   s   | | d | | S )z#Ceil n to the nearest multiple of mr   r   )r!   r"   r   r   r   ceil_to_nearest)   s   r#   r   inputs
max_length	pad_valueceil_toseq_dimc                 C   s  |dkr
|  |d} |dkrt||}| d|k rg|| d }|  dkr<tj| d|| df|| j| jd}n!|  dkrStj| d|f|| j| jd}n
td|   dtj	| |gdd} n| d|krx| d	d	d	|f } |dkr|  |d} | S )
a  
    Pad or trim a tensor to max_length
    Args:
        inputs: tensor to pad or trim, shape=[batch, seq, hid_dim] or [batch, seq]
        max_length: length to pad or trim to
        pad_value: value to pad with
        ceil_to: pad to the nearest multiple of this number
    r      r      r   zUnsupported input dim: z, must be [B,T,D] or [B,T])dimN)
	transposer#   r	   r+   r
   fullr   r   
ValueErrorcat)r$   r%   r&   r'   r(   pad_sizepadr   r   r   pad_or_trim_to_max_length.   s&   
"r2   audio_signalsample_rateframe_lengthc                 C   s   t t| d| | S )a
  
    Estimate the length of the audio signal after encoding
    Args:
        audio_signal: audio signal tensor, shape=[batch, time]
        sample_rate: sample rate of the audio signal, e.g. 16000
        frame_length: frame length in seconds, e.g. 0.08 for FC
    r   )intmathceilr	   )r3   r4   r5   r   r   r   estimate_encoded_max_lengthV   s   r9   )T)r   r   )r7   typingr   r
   Tensorr   r6   boolr   r    r#   r2   r9   r   r   r   r   <module>   s*   $	
 (