o
    
۾i                     @   s  d dl mZ d dlmZ d dlZd dlmZ dZdZddgZ	dej
d	ed
ededej
f
ddZdej
ee B eej
 B dee fddZdej
ee B eej
 B dB dedee fddZdej
dedeeeeef  dej
fddZdej
dedeeeeef  dej
fddZdejdej
dedeeeeef  dej
f
ddZd ej
d!ej
dej
fd"d#Zd$eej
 dee deej
d%f fd&d'Zdej
eej
 B dej
fd(d)Zd*ej
eej
 B dej
ee B dB d+edej
fd,d-ZdS ).    )Sequence)castNi     )      r   )r   r      input_lengthpaddingkernel_sizestridereturnc                 C   s   | d|  | | d S )z6Calculate Conv1d output length using standard formula.r   r    )r   r	   r
   r   r   r   [/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/glmasr_utils.py_calculate_conv_output_length   s   r   chunk_countsc                 C   sT   t | tjr
|  S | r#t | d tjr#tttj | }dd |D S dd | D S )Nr   c                 S   s   g | ]}t | qS r   )intitem.0cr   r   r   
<listcomp>    s    z)_as_list_chunk_counts.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )r   r   r   r   r   r   !   s    )
isinstancetorchTensortolistr   list)r   tensor_countsr   r   r   _as_list_chunk_counts   s   r   
num_chunksc                 C   s   | d u r	dg| S t | S Nr   )r   )r   r   r   r   r   _normalize_chunk_counts$   s   
r    audio_lengthsmerge_factorconv_paramsc                 C   s.   |D ]\}}}t | |||} q| | | d S r   )r   )r!   r"   r#   r	   r
   r   r   r   r   &_get_audio_output_lengths_from_lengths-   s
   r$   maskc                 C   s   |  d}t|||S )N)sumr$   )r%   r"   r#   r!   r   r   r   #_get_audio_output_lengths_from_mask9   s   
r(   audio_towerc           	      C   sL   t | dr| |\}}n|}|D ]\}}}t||||}q|| | d S )a  
    Calculate the output lengths after audio processing.

    The output length accounts for:
    1. Convolution layers (downsampling)
    2. Merge factor (further downsampling during projection)

    Args:
        audio_tower: The audio encoder module
        audio_lengths: Input feature lengths [batch_size]
        merge_factor: Factor for merging adjacent features
        conv_params: List of (padding, kernel_size, stride) for each conv layer

    Returns:
        Output lengths after all processing [batch_size]
     _get_feat_extract_output_lengthsr   )hasattrr*   r   )	r)   r!   r"   r#   _conv_output_lengthsr	   r
   r   r   r   r   #_get_audio_output_lengths_for_towerD   s   

r.   audio_featuresaudio_output_lengthsc                 C   sD   | j \}}}|d}t||||j|k }| | d|S )Nr   r&   )shape	unsqueezer   arangeexpandtodeviceview)r/   r0   r   max_audio_tokens	embed_dimaudio_features_maskr   r   r   !_flatten_audio_features_by_lengthk   s   
r;   chunk_embeddings.c                 C   sF   g }d}|D ]}| |||  }| tj|dd ||7 }qt|S )Nr   )dim)appendr   cattuple)r<   r   grouped_embeddingscurrent_idxcountaudio_chunksr   r   r   _group_audio_embeddingsz   s   
rE   c                 C   s6   t | tr| rt | d tjrt| S t| S | S )z>Convert mask to tensor, handling both list and tensor formats.r   )r   r   r   r   stacktensor)r%   r   r   r   _normalize_to_tensor   s   

rH   feature_attention_maskitem_idxc                 C   s~   |du r| | }t | tjr|dS t|S t|}t|d| }|||  }t | tjr5| || S | || }t|S )z1Extract attention mask for a specific audio item.Nr   )r   r   r   r2   rH   r   r'   )rI   r   rJ   r%   counts	start_idxend_idx
mask_slicer   r   r   _extract_mask_for_item   s   
rO   )collections.abcr   typingr   r   torch.nnnnDEFAULT_MAX_AUDIO_LEN_SDEFAULT_MERGE_FACTORDEFAULT_CONV_PARAMSr   r   r   r   r   r    r@   r$   r(   Moduler.   r;   rE   rH   rO   r   r   r   r   <module>   s   


	


'

 