o
    9wi)!                     @   sZ   d dl Z d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
 dd Zedkr+e  dS dS )    N)	LANGUAGESTO_LANGUAGE_CODEoptional_floatoptional_intstr2boolc               	   C   s  t jt jd} | jddtdd | jdddd	 | jd
tddd | jdtd dd | jdtj r4dnddd	 | jddt	dd | jddt	dd | jddtg ddd | jd d!td"d#d | jd$d%td&g d'd(d) | jd*td+d,d | jd-td.d.d/gd0d) | jd1td t
t t
d2d3 t D  d4d) | jd5d d6d	 | jd7d8g d9d:d; | jd<d=d>d? | jd@d=dAd? | jdBtdCdCdDgdEd) | jdFtdGdHd | jdItdJdKd | jdLt	dMdNd | jdOd=dPd? | jdQd t	dRd | jdSd t	dTd | jdUdVtdWd | jdXd=dYd? | jdZtdd[d | jd\td]d^d | jd_td]d`d | jdatdbdcd | jddtdbded | jdftdgdhd | jdid=djd? | jdktd dld | jdmtddnd | jdotd+dpd | jdqtdrdsd | jdttdudvd | jdwtdxdyd | jdztd{d|d | jd}td d~d | jdtd dd | jdtddd | jdtdddgd~d) | jdtddd | jdtd dd | jdtddd | jddddtjd dd | jddddt  dt  ddd |  j}ddlm} |||  d S )N)formatter_classaudio+zaudio file(s) to transcribe)nargstypehelpz--modelsmallz name of the Whisper model to use)defaultr   z--model_cache_onlyFzZIf True, will not attempt to download models, instead using cached models from --model_dir)r   r   r   z--model_dirz>the path to save model files; uses ~/.cache/whisper by defaultz--devicecudacpuz#device to use for PyTorch inferencez--device_indexr   z/device index to use for FasterWhisper inference)r   r   r   z--batch_size   z&the preferred batch size for inferencez--compute_typefloat16)r   float32int8zcompute type for computation)r   r   choicesr   z--output_dirz-o.zdirectory to save the outputsz--output_formatz-fall)r   srtvtttxttsvjsonaudzSformat of the output file; if not specified, all available formats will be produced)r   r   r   r   z	--verboseTz4whether to print out the progress and debug messagesz--task
transcribe	translatezawhether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')z
--languagec                 S   s   g | ]}|  qS  )title).0kr    r    N/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/whisperx/__main__.py
<listcomp>   s    zcli.<locals>.<listcomp>zHlanguage spoken in the audio, specify None to perform language detectionz--align_modelz/Name of phoneme-level ASR model to do alignmentz--interpolate_methodnearest)r&   linearignorezaFor word .srt, method to assign timestamps to non-aligned words, or merge them into neighbouring.)r   r   r   z
--no_align
store_truez Do not perform phoneme alignment)actionr   z--return_char_alignmentsz9Return character-level alignments in the output json filez--vad_methodpyannotesilerozVAD method to be usedz--vad_onsetg      ?zYOnset threshold for VAD (see pyannote.audio), reduce this if speech is not being detectedz--vad_offsetgZd;?z[Offset threshold for VAD (see pyannote.audio), reduce this if speech is not being detected.z--chunk_size   zYChunk size for merging VAD segments. Default is 30, reduce this if the chunk is too long.z	--diarizez?Apply diarization to assign speaker labels to each segment/wordz--min_speakersz+Minimum number of speakers to in audio filez--max_speakersz+Maximum number of speakers to in audio filez--diarize_modelz pyannote/speaker-diarization-3.1z,Name of the speaker diarization model to usez--speaker_embeddingszEInclude speaker embeddings in JSON output (only works with --diarize)z--temperatureztemperature to use for samplingz	--best_of   z<number of candidates when sampling with non-zero temperaturez--beam_sizezHnumber of beams in beam search, only applicable when temperature is zeroz
--patienceg      ?zoptional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam searchz--length_penaltyzoptional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by defaultz--suppress_tokensz-1zcomma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuationsz--suppress_numeralsztwhether to suppress numeric symbols and currency symbols during sampling, since wav2vec2 cannot align them correctlyz--initial_promptz:optional text to provide as a prompt for the first window.z--condition_on_previous_textzif True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loopz--fp16z5whether to perform inference in fp16; True by defaultz#--temperature_increment_on_fallbackg?zhtemperature to increase when falling back when the decoding fails to meet either of the thresholds belowz--compression_ratio_thresholdg333333@zUif the gzip compression ratio is higher than this value, treat the decoding as failedz--logprob_thresholdg      zUif the average log probability is lower than this value, treat the decoding as failedz--no_speech_thresholdg333333?zif the probability of the <|nospeech|> token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silencez--max_line_widthzb(not possible with --no_align) the maximum number of characters in a line before breaking the linez--max_line_countzG(not possible with --no_align) the maximum number of lines in a segmentz--highlight_wordszQ(not possible with --no_align) underline each word as it is spoken in srt and vttz--segment_resolutionsentencechunkz	--threadsz]number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADSz
--hf_tokenz9Hugging Face Access Token to access PyAnnote gated modelsz--print_progresszFif True, progress will be printed in transcribe() and align() methods.z	--versionz-Vversionz	%(prog)s whisperxz*Show whisperx version information and exit)r*   r1   r   z--python-versionz-PzPython z ()z(Show python version information and exit)transcribe_task)argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentstrr   torchr   is_availableintsortedr   keysr   floatr   r   	importlibmetadatar1   platformpython_versionpython_implementation
parse_args__dict__whisperx.transcriber4   )parserargsr4   r    r    r$   cli   sj   2"*
rJ   __main__)r5   importlib.metadatar@   rB   r:   whisperx.utilsr   r   r   r   r   rJ   __name__r    r    r    r$   <module>   s    M
