o
    9wi"                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZmZ d dlmZmZmZ ded	e jfd
dZdS )    N)alignload_align_model)
load_model)
load_audio)DiarizationPipelineassign_word_speakers)AlignedTranscriptionResultTranscriptionResult)	LANGUAGESTO_LANGUAGE_CODE
get_writerargsparserc           3         sJ    d}  d}  d}  d}  d}  d}  d}  d}	  d	}
  d
}tj|dd   d}  d}  d}  d}|dkrSd}  d}  d}  d}  d}  d}  d}  d}  d}  d}  d}  d}  d}|r|std  d d urĈ d   d<  d tvrĈ d tv rt d   d< n	td! d  |	d"r d d#kr d d urt| d$ d  d% d# d<  d d ur d nd#}  d&}  d' }d urt
t|d(|}n|g}d)}  d* } d+kr!t|  | }  d,  d-  d.|  d/  d0  d1d2  d3d4d5   d6d7D   d8d9}!t||}"g d:}#|rr|#D ]}$ |$ rp|d;|$ d< q` d= r d> std?  fd@dA|#D }%g }&g }'t|||	||
 d |!||||dB|||dC}(  dDD ]})t|)}*tdE |(j|*||||dF}+|&|+|)f q~(t  tj  |sH|&}'g }&t|||dG\}},|'D ][\}+})t|'dHkr|)}-n|*}-|d ur5t|+dI d+kr5|+dd#|,d kr#tdJ|+d  dK|,d  dL t|+d |\}},tdM t|+dI ||,|-||||dN}+|&|+|)f q~t  tj  |r|d u rTtdO |&}'tdP tdQ| g }&t|||dR}.|'D ]&\}+}/|.|/|||dS}0|r~|0\}1}2n|0}1d }2t |1|+|2}+|&|+|/f qj|&D ]\}+})||+d< |"|+|)|% qd S )TzTranscription task to be called from CLI.

    Args:
        args: Dictionary of command-line arguments.
        parser: argparse.ArgumentParser object.
    model
batch_size	model_dirmodel_cache_only
output_diroutput_formatdevicedevice_indexcompute_typeverboseT)exist_okalign_modelinterpolate_methodno_aligntask	translatereturn_char_alignmentshf_token
vad_method	vad_onset
vad_offset
chunk_sizediarizemin_speakersmax_speakersdiarize_modelprint_progressspeaker_embeddingsz4--speaker_embeddings has no effect without --diarizelanguageNzUnsupported language: z.enenz( is an English-only model but received 'z'; using English instead.temperature!temperature_increment_on_fallbackgzo ?   threadsr   	beam_sizepatiencelength_penaltycompression_ratio_thresholdlogprob_thresholdno_speech_thresholdFinitial_promptc                 S   s   g | ]}t |qS  )int).0xr8   r8   P/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/whisperx/transcribe.py
<listcomp>j   s    z#transcribe_task.<locals>.<listcomp>suppress_tokens,suppress_numerals)r1   r2   r3   temperaturesr4   log_prob_thresholdr6   condition_on_previous_textr7   r>   r@   )highlight_wordsmax_line_countmax_line_widthz--z not possible with --no_alignrE   rF   z7--max_line_count has no effect without --max_line_widthc                    s   i | ]}|  |qS r8   )pop)r:   argr   r8   r<   
<dictcomp>v   s    z#transcribe_task.<locals>.<dictcomp>)r$   r"   r#   )r   r   download_rootr   r+   asr_optionsr!   vad_optionsr   local_files_onlyr0   audioz>>Performing transcription...)r   r$   r)   r   )
model_name   segmentszNew language found (z)! Previous was (z2), loading new alignment model for new language...z>>Performing alignment...)r   r   r)   zWarning, no --hf_token used, needs to be saved in environment variable, otherwise will throw error loading diarization model...z>>Performing diarization...z>>Using model:)rP   use_auth_tokenr   )r&   r'   return_embeddings)!rG   osmakedirswarningswarnlowerr
   r   
ValueErrorendswithtuplenparangetorchset_num_threadssplitr   errorr   r   print
transcribeappendgccollectcudaempty_cacher   lengetr   r   r   )3r   r   rP   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   diarize_model_namer)   return_speaker_embeddingsalign_languager-   	incrementfaster_whisper_threadsr0   rL   writerword_optionsoptionwriter_argsresultstmp_resultsr   
audio_pathrO   resultalign_metadatainput_audior(   input_audio_pathdiarize_resultdiarize_segmentsr*   r8   rI   r<   transcribe_task   sP  
	




































r~   )argparserf   rU   rW   numpyr]   r_   whisperx.alignmentr   r   whisperx.asrr   whisperx.audior   whisperx.diarizer   r   whisperx.typesr   r	   whisperx.utilsr
   r   r   dictArgumentParserr~   r8   r8   r8   r<   <module>   s    