o
    i                     @   s   d dl Z d dlZd dlZd dlmZ d dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ edd	Zed
dZdedefddZeddefddZdS )    N)	Generator)FastAPIHTTPException)Response)write)Tensor)
SopranoTTSd   )cache_size_mbzSoprano TTS API)titletensorreturnc                 C   sD   t |  ddd t j}t }t|d| |d |	 S )z>
    Convert a 1D fp32 torch tensor to a WAV byte stream.
    g      g      ?i  i }  r   )
npclipnumpyastypeint16ioBytesIOr   seekread)r   audio_int16wav_io r   B/home/ubuntu/.local/lib/python3.10/site-packages/soprano/server.py_tensor_to_wav_bytes   s
   
r   z/v1/audio/speechpayloadc                    sN   |  d}t|tr| stdddt|}t|}t|dddidS )	z
    Minimal implementation of OpenAI's Speech endpoint.
    Fields:
      - input: string - text to synthesize
      - model, voice, etc. are accepted but ignored.
      - response_format: str - ignored, only support wav.
    inputi  z)`input` field must be a non-empty string.)status_codedetailz	audio/wavzContent-Dispositionz!attachment; filename="speech.wav")content
media_typeheaders)	get
isinstancestrstripr   ttsinferr   r   )r   textaudio_tensor	wav_bytesr   r   r   create_speech    s   
	
r,   )base64r   jsontypingr   r   r   fastapir   r   fastapi.responsesr   scipy.io.wavfiler   torchr   soprano.ttsr   r'   appbytesr   postdictr,   r   r   r   r   <module>   s    

