o
    i&                     @   sv   d dl mZmZ d dlZd dlZd dlZd dlZ	ddl
mZ dd Zdd Zdd
dZG dd dZG dd dZdS )    )enespeakN   )TextPreprocessorc                 C   s   ddl }|d| }|S )zBBasic English tokenizer that splits on whitespace and punctuation.r   Nz\w+|[^\w\s])refindall)textr   tokens r
   H/home/ubuntu/.local/lib/python3.10/site-packages/kittentts/onnx_model.pybasic_english_tokenize   s   r   c                 C   s(   |   } | s| S | d dvr| d } | S )z7Ensure text ends with punctuation. If not, add a comma.z.!?,;:,)strip)r   r
   r
   r   ensure_punctuation   s   r     c           	      C   s   ddl }|d| }g }|D ]Q}| }|sqt||kr%|t| q| }d}|D ]&}t|t| d |krF||rBd| n|7 }q-|rQ|t|  |}q-|r_|t|  q|S )z1Split text into chunks for processing long texts.r   Nz[.!?]+ r    )r   splitr   lenappendr   )	r   max_lenr   	sentenceschunkssentencewords
temp_chunkwordr
   r
   r   
chunk_text   s*   r   c                   @   s   e Zd ZdddZdd ZdS )TextCleanerNc           	      C   sZ   d}d}d}d}|gt | t | t | }i }tt|D ]}|||| < q|| _d S )N$u   ;:,.!?¡¿—…"«»"" 4ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzu   ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ)listranger   word_index_dictionary)	selfdummy_pad_punctuation_letters_letters_ipasymbolsdictsir
   r
   r   __init__8   s   
zTextCleaner.__init__c              	   C   s8   g }|D ]}z
| | j|  W q ty   Y qw |S N)r   r$   KeyError)r%   r   indexescharr
   r
   r   __call__F   s   zTextCleaner.__call__r/   )__name__
__module____qualname__r.   r3   r
   r
   r
   r   r   7   s    
r   c                   @   s   e Zd Zddi i fddZddededed	efd
dZddedededed	e	j
f
ddZddededed	e	j
fddZ		ddedededededed	dfddZdS )KittenTTS_1_Onnxzkitten_tts_nano_preview.onnxz
voices.npzc                 C   sj   || _ t|| _t|| _tjj	dddd| _t
 | _|| _g d| _g d| _|| _tdd| _dS )	zInitialize KittenTTS with model and voice data.
        
        Args:
            model_path: Path to the ONNX model file
            voices_path: Path to the voices NPZ file
        zen-usT)languagepreserve_punctuationwith_stress)zexpr-voice-2-mzexpr-voice-2-fzexpr-voice-3-mzexpr-voice-3-fzexpr-voice-4-mzexpr-voice-4-fexpr-voice-5-mzexpr-voice-5-f)BellaJasperLunaBrunoRosieHugoKikiLeoF)remove_punctuationN)
model_pathnploadvoicesortInferenceSessionsession
phonemizerbackendEspeakBackendr   text_cleanerspeed_priorsavailable_voicesall_voice_namesvoice_aliasesr   preprocessor)r%   rE   voices_pathrP   rS   r
   r
   r   r.   Q   s   

zKittenTTS_1_Onnx.__init__      ?r   voicespeedreturnc           
      C   s   || j v r
| j | }|| jvrtd| d| j || jv r&|| j|  }| j|g}t|d }d|}| |}|	dd |
d |
d tj|gtjd}tt|| j| jd d }| j| ||d  }	||	tj|gtjddS )	z9Prepare ONNX model inputs from text and voice parameters.zVoice 'z' not available. Choose from: r   r   
   )dtyper   )	input_idsstylerX   )rS   rQ   
ValueErrorrP   rL   	phonemizer   joinrO   insertr   rF   arrayint64minr   rH   shapefloat32)
r%   r   rW   rX   phonemes_listphonemesr	   r\   ref_idref_sr
   r
   r   _prepare_inputsl   s(   







z KittenTTS_1_Onnx._prepare_inputsr;   T
clean_textc                 C   sB   g }|r	|  |}t|D ]}|| ||| qtj|ddS )Nr   )axis)rT   r   r   generate_single_chunkrF   concatenate)r%   r   rW   rX   rl   
out_chunks
text_chunkr
   r
   r   generate   s   
zKittenTTS_1_Onnx.generatec                 C   s4   |  |||}| jd|}|d dddf }|S )a  Synthesize speech from text.
        
        Args:
            text: Input text to synthesize
            voice: Voice to use for synthesis
            speed: Speech speed (1.0 = normal)
            
        Returns:
            Audio data as numpy array
        Nr   .ix)rk   rK   run)r%   r   rW   rX   onnx_inputsoutputsaudior
   r
   r   rn      s   z&KittenTTS_1_Onnx.generate_single_chunk]  output_pathsample_rateNc                 C   s2   | j ||||d}t||| td|  dS )a  Synthesize speech and save to file.
        
        Args:
            text: Input text to synthesize
            output_path: Path to save the audio file
            voice: Voice to use for synthesis
            speed: Speech speed (1.0 = normal)
            sample_rate: Audio sample rate
            clean_text: If true, it will cleanup the text. Eg. replace numbers with words.
        )rl   zAudio saved to N)rr   sfwriteprint)r%   r   rx   rW   rX   ry   rl   rv   r
   r
   r   generate_to_file   s   z!KittenTTS_1_Onnx.generate_to_file)rV   )r;   rV   T)r;   rV   )r;   rV   rw   T)r4   r5   r6   r.   strfloatdictrk   boolrF   ndarrayrr   rn   intr}   r
   r
   r
   r   r7   P   s     ""r7   )r   )misakir   r   numpyrF   rL   	soundfilerz   onnxruntimerI   
preprocessr   r   r   r   r   r7   r
   r
   r
   r   <module>   s    

