o
    iD                     @   s   d dl Z d dlZd dlmZ d dlmZmZ eddG dd de jj	j
ZeddG dd de jj	j
Zedd	G d
d	 d	e jj	j
ZdS )    N)tables)extract_fbankload_audio_text_image_videodataset_classesAudioLLMNARDatasetc                       j   e Zd ZdZ					ddededef fdd	Zd
d Zdd Z	dd Z
dd ZddefddZ  ZS )r   
    AudioLLMDataset
    N        index_dsint_pad_valuefloat_pad_valuec                       t    tj|}||fi || _|dd }	|	r-tj|	}
|
di |di }	|	| _|dd }|rItj|}|di |di }|| _|| _	|d u rUdn|j
| _
d| _|| _|| _|dd| _d	| j| _d
| _|dd| _| j| _d S )Npreprocessor_speechpreprocessor_speech_confpreprocessor_textpreprocessor_text_conf>  soundpromptzPlease copy the following text.USER: 
INSTRUCTION: {}
INPUT:  IGNORE_INDEX super__init__r   index_ds_classesgetr   preprocessor_classesr   r   frontendfs	data_type	tokenizerr   r   format
prompt_pre	prompt_afr   r   selfpathr   r!   r$   r   r   kwargsindex_ds_classr   preprocessor_speech_classr   preprocessor_text_class	__class__r   Y/home/ubuntu/.local/lib/python3.10/site-packages/funasr/datasets/llm_datasets/datasets.pyr      6   


zAudioLLMNARDataset.__init__c                 C      | j | }| j |S Nr   get_source_lenr)   indexitemr   r   r1   r6   6      
z!AudioLLMNARDataset.get_source_lenc                 C   r3   r4   r   get_target_lenr7   r   r   r1   r<   :   r:   z!AudioLLMNARDataset.get_target_lenc                 C   
   t | jS r4   lenr   r)   r   r   r1   __len__>      
zAudioLLMNARDataset.__len__c                 C   s  | j | }|d }t|| jd}| jr| j|| jd}t|| j| jdd\}}|d}|d }| jr8| |}| j	
| j}t|}	| j	
|}
|
d | j	jkrW|
dd  }
t|
}|}||
 | j	jg |
 }tjt|tjd}d	||	|	| < |d	}| j	
|}
|
d | j	jkr|
dd  }
||
 |
 | j	jg }tjt|tjd}d	|d |	< |d}| j|| < dg|	 dg|  dg|  dg }tj|tjd}|
}tj|tjd}tjt|gtjd}tjt|gtjd}||||||||||d

S )Nsourcer"   Tr#   r!   is_finalr   target   dtyper	   )
speechspeech_lengthstexttext_lengths	input_idsattention_mask
labels_ids
label_mask
audio_maskprompt_bos_length)r   r   r"   r   r   r#   r!   squeezer   r$   encoder&   r?   bos_token_idpad_token_idtorchtensorcopydeepcopyint64geeos_token_idr   float32int32)r)   r8   r9   rC   data_srcrK   rL   rG   prompt_ids_preprompt_ids_length
target_idstarget_ids_lengthaudio_lengthrO   rP   rQ   rR   rS   idsrM   rN   rT   r   r   r1   __getitem__A   sp   





"zAudioLLMNARDataset.__getitem__samplesc                 C      i }|D ]}|  D ]}||vrg ||< || ||  q
q| D ]0\}}t|d tjrS|d jtjks?|d jtjkrC| j	}n| j
}tjjjj|d|d||< q#|S Nr   T)batch_firstpadding_valuekeysappenditems
isinstancerY   TensorrJ   r]   ra   r   r   nnutilsrnnpad_sequencer)   rj   outputssamplekey	data_list	pad_valuer   r   r1   collator   "    

zAudioLLMNARDataset.collatorNNNr	   r
   r4   __name__
__module____qualname____doc__strintfloatr   r6   r<   rA   ri   listr   __classcell__r   r   r/   r1   r      s&    (PAudioLLMDatasetc                       r   )r   r   Nr	   r
   r   r   r   c                    r   Nr   r   r   r   r   r   r   zTranscribe speech to text.r   r   r   r   r   r   r(   r/   r   r1   r      r2   zAudioLLMDataset.__init__c                 C   r3   r4   r5   r7   r   r   r1   r6      r:   zAudioLLMDataset.get_source_lenc                 C   r3   r4   r;   r7   r   r   r1   r<      r:   zAudioLLMDataset.get_target_lenc                 C   r=   r4   r>   r@   r   r   r1   rA      rB   zAudioLLMDataset.__len__c              
   C     | j | }|d }t|| jd}| jr| j|| jd}t|| j| jdd\}}|d}|d }| jr8| |}| j	
| j}t|}	d| j|}
| j	
|
}t||	 }|| j	jg }tj|tjd}d	||	d < |d	}d| j|}| j	
|}t||	 }t|| j	jg }tj|tjd}d	|d |	< |d}| j|| < dg|	 d
g|  dg }tj|tjd}| j	
|}tj|tjd}tjt|gtjd}|||||||||d	S NrC   rD   TrE   r   rG   z{}{}rI   r	   rH   )	rK   rL   rM   rN   rO   rP   rQ   rR   rS   r   r   r"   r   r   r#   r!   rU   r   r$   rV   r&   r?   r%   rX   rY   rZ   r]   r^   r[   r\   r_   r   r`   ra   r)   r8   r9   rC   rb   rK   rL   rG   rc   rd   prompt_inputprompt_input_idsrg   rO   rP   prompt_answerprompt_answer_idsanswer_lengthrQ   rR   rS   rh   rM   rN   r   r   r1   ri      X   





zAudioLLMDataset.__getitem__rj   c                 C   rk   rl   ro   ry   r   r   r1   r     r   zAudioLLMDataset.collatorr   r4   r   r   r   r/   r1   r      &    (8AudioLLMARDatasetc                       r   )r   r   Nr	   r
   r   r   r   c                    r   r   r   r(   r/   r   r1   r   4  r2   zAudioLLMARDataset.__init__c                 C   r3   r4   r5   r7   r   r   r1   r6   \  r:   z AudioLLMARDataset.get_source_lenc                 C   r3   r4   r;   r7   r   r   r1   r<   `  r:   z AudioLLMARDataset.get_target_lenc                 C   r=   r4   r>   r@   r   r   r1   rA   d  rB   zAudioLLMARDataset.__len__c              
   C   r   r   r   r   r   r   r1   ri   g  r   zAudioLLMARDataset.__getitem__rj   c                 C   rk   rl   ro   ry   r   r   r1   r     r   zAudioLLMARDataset.collatorr   r4   r   r   r   r/   r1   r   .  r   )rY   r[   funasr.registerr   funasr.utils.load_utilsr   r   registerrv   dataDatasetr   r   r   r   r   r   r1   <module>   s    
 
 
