o
    is                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZ d dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ eeZG dd deZdS )    N)AsyncGenerator)cached_property)Literalcast)EngineClient)RequestLogger)OpenAIServing)OpenAIServingModels)
PromptTypeStreamingInput)init_logger)SupportsRealtimec                	       s   e Zd ZdZdddedededB def fd	d
Ze	de
e fddZdeejdf dejee  deedf fddZ  ZS )OpenAIServingRealtimezRealtime audio transcription service via WebSocket streaming.

    Provides streaming audio-to-text transcription by transforming audio chunks
    into StreamingInput objects that can be consumed by the engine.
    F)log_error_stackengine_clientmodelsrequest_loggerNr   c                   s,   t  j||||d d| _td| j d S )N)r   r   r   r   realtimez.OpenAIServingRealtime initialized for task: %s)super__init__	task_typeloggerinfo)selfr   r   r   r   	__class__ ^/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/entrypoints/openai/realtime/serving.pyr      s   zOpenAIServingRealtime.__init__returnc                 C   s$   ddl m} || j}ttt |S )z0Get the model class that supports transcription.r   )get_model_cls) vllm.model_executor.model_loaderr   model_configr   typer   )r   r   	model_clsr   r   r   r#   0   s   
zOpenAIServingRealtime.model_clsaudio_streaminput_streamc                 C  sF   t ttdf | j||| j}|2 z3 dH W }t|dV  q6 dS )a  Transform audio stream into StreamingInput for engine.generate().

        Args:
            audio_stream: Async generator yielding float32 numpy audio arrays
            input_stream: Queue containing context token IDs from previous
                generation outputs. Used for autoregressive multi-turn
                processing where each generation's output becomes the context
                for the next iteration.

        Yields:
            StreamingInput objects containing audio prompts for the engine
        N)prompt)r   r   r
   r#   buffer_realtime_audior!   r   )r   r$   r%   stream_input_iterr&   r   r   r   transcribe_realtime8   s   
z)OpenAIServingRealtime.transcribe_realtime)__name__
__module____qualname____doc__r   r	   r   boolr   r   r"   r   r#   r   npndarrayasyncioQueuelistintr   r)   __classcell__r   r   r   r   r      s*    
r   )r1   collections.abcr   	functoolsr   typingr   r   numpyr/   vllm.engine.protocolr   vllm.entrypoints.loggerr   &vllm.entrypoints.openai.engine.servingr   &vllm.entrypoints.openai.models.servingr	   vllm.inputs.datar
   r   vllm.loggerr   %vllm.model_executor.models.interfacesr   r*   r   r   r   r   r   r   <module>   s   