o
    i%h                  
   @   s  d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	m
Z
mZmZmZmZ ddlZddlmZ ddlmZmZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, dd
l-m.Z. ddl/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6 ddl7m8Z8 ddl9m:Z:m;Z; ddl<m=Z=m>Z>m?Z? ddl@mAZA zddlBmCZD W n  eEy ZF zeGdeF  eGd eHdeF dZF[Fww eG dd de>ZIG dd deZJG dd deZKG dd deZLG dd de;ZMdS ) zUltravox Realtime API service implementation.

This module provides real-time conversational AI capabilities using Ultravox's
Realtime API, supporting both text and audio modalities with
voice transcription, streaming responses, and tool usage.
    N)	dataclassfield)AnyDictListLiteralOptionalUnion)logger)	BaseModelField)ToolsSchema)create_stream_resampler)AggregationTypeCancelFrameEndFrameFrameInputAudioRawFrameInputTextRawFrameInterruptionFrameLLMContextFrameLLMFullResponseEndFrameLLMFullResponseStartFrameLLMTextFrame
StartFrameTranscriptionFrameTTSAudioRawFrameTTSStartedFrameTTSStoppedFrameTTSTextFrameUserAudioRawFrameVADUserStoppedSpeakingFrame)
LLMContext)LLMAssistantAggregatorParamsLLMUserAggregatorParams)LLMContextAggregatorPair)OpenAILLMContextOpenAILLMContextFrame)FrameDirection)FunctionCallFromLLM
LLMService)	NOT_GIVENLLMSettings	_NotGiven)time_now_iso8601)clientzException: zIIn order to use Ultravox, you need to `pip install pipecat-ai[ultravox]`.zMissing module: c                   @   s(   e Zd ZU dZeedZeeB e	d< dS )UltravoxRealtimeLLMSettingszSettings for UltravoxRealtimeLLMService.

    Parameters:
        output_medium: The output medium for the model ("voice" or "text").
    )defaultoutput_mediumN)
__name__
__module____qualname____doc__r   r+   r2   strr-   __annotations__ r9   r9   Q/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/ultravox/llm.pyr0   G   s   
 r0   c                   @   s   e Zd ZU dZeed< ejed< ee	dZ
eeef ed< ee	dZeeef ed< dZeed  ed	< edejd
dejdddZeej ed< ee	dZeeef ed< dS )AgentInputParamsa  Input parameters for Ultravox Realtime generation using a pre-defined Agent.

    Parameters:
        api_key: Ultravox API key for authentication.
        agent_id: The ID of the Ultravox Realtime agent you'd like to use. Agents
            are pre-configured to handle calls consistently. You can create and edit
            agents in the Ultravox console (https://app.ultravox.ai/agents) or using
            the Ultravox API (https://docs.ultravox.ai/api-reference/agents/agents-post).
        template_context: Context variables to use when instantiating a call with the
            agent. Defaults to an empty dict.
        metadata: Metadata to attach to the call. Default to an empty dict.
        output_medium: The initial output medium for the agent. Use "text" for text
            responses or "voice" for audio responses. Defaults to None, which uses the
            agent's default.
        max_duration: The maximum duration of the call. Defaults to None, which will
            use the agent's default maximum duration.
        extra: Extra parameters to include in the agent call creation request. Defaults
            to an empty dict. See the Ultravox API documentation for valid arguments:
            https://docs.ultravox.ai/api-reference/agents/agents-calls-post
    api_keyagent_iddefault_factorytemplate_contextmetadataNtextvoicer2   
   seconds   hoursr1   gelemax_durationextra)r3   r4   r5   r6   r7   r8   uuidUUIDr   dictr@   r   r   rA   r2   r   r   datetime	timedeltarN   rO   r9   r9   r9   r:   r;   R   s   
 
r;   c                   @   s   e Zd ZU dZeed< dZee ed< eddddZ	e
ed< dZee ed	< dZeej ed
< eedZeeef ed< dZeed  ed< eejddejddejdddZejed< eedZeeef ed< dS )OneShotInputParamsa  Input parameters for Ultravox Realtime generation using a one-off call.

    Parameters:
        api_key: Ultravox API key for authentication.
        system_prompt: System prompt to guide the model's behavior. Defaults to None.
        temperature: Sampling temperature for response generation. Defaults to 0.
        model: Model identifier to use. Defaults to "fixie-ai/ultravox".
        voice: Voice identifier for speech generation. Defaults to None.
        metadata: Metadata to attach to the call. Default to an empty dict.
        output_medium: The initial output medium for the agent. Use "text" for text
            responses or "voice" for audio responses. Defaults to None (voice).
        max_duration: The maximum duration of the call. Defaults to one hour.
        extra: Extra parameters to include in the call creation request. Defaults
            to an empty dict. See the Ultravox API documentation for valid arguments:
            https://docs.ultravox.ai/api-reference/calls/calls-post
    r<   Nsystem_prompt              ?rK   temperaturemodelrD   r>   rA   rB   r2   rH   rI   rE   rF   rN   rO   )r3   r4   r5   r6   r7   r8   rV   r   r   rY   floatrZ   rD   rP   rQ   rR   rA   r   r2   r   rS   rT   rN   rO   r   r9   r9   r9   r:   rU   s   s   
 


rU   c                   @   s   e Zd ZU dZeed< dS )JoinUrlInputParamszInput parameters for joining an existing Ultravox Realtime call via join URL.

    Parameters:
        join_url: The join URL for the existing Ultravox Realtime call.
    join_urlN)r3   r4   r5   r6   r7   r8   r9   r9   r9   r:   r\      s   
 r\   c                	       s$  e Zd ZU dZeZeed< ddddeee	e
f dee dee f fdd	Zd
efddZdef fddZedeed  d
ee fddZded
efddZde	d
efddZded
eeeef  fddZdef fddZdef fddZdd  Z d!ef fd"d#Z!de"d$e#f fd%d&Z$d'e%fd(d)Z&de'fd*d+Z(de)fd,d-Z*d.efd/d0Z+d1efd2d3Z,d4ee-eeef f fd5d6Z.d7d8 Z/d9e-fd:d;Z0d<d= Z1d>ed?ed@eeef fdAdBZ2d.efdCdDZ3ded.ee d!ee dEefdFdGZ4e5 e6 dHd'e7dIe5dJe6d
e8fdKdLZ9  Z:S )MUltravoxRealtimeLLMServicea  Provides access to the Ultravox Realtime API.

    This service enables real-time conversations with Ultravox, supporting both
    text and audio output. It handles voice transcription, streaming audio
    responses, and tool usage.

    Note: Ultravox is an audio-native model, so voice transcriptions are not used
    by the model and may not always align with its understanding of user input.
    	_settingsN)settingsone_shot_selected_toolsparamsr`   ra   c                   s   | j ddddddddddddd}|dur|| t jdd|i| || _|r9t| jts6td n|| _	d| _
d| _d| _d| _d| _d| _t | _dS )a  Initialize the Ultravox Realtime LLM service.

        Args:
            params: Configuration parameters for the model.
            settings: Ultravox Realtime LLM settings. If provided, the ``settings``
                values take precedence over default values.
            one_shot_selected_tools: ToolsSchema for tools to use with this call.
                May only be set with OneShotInputParams.
            **kwargs: Additional arguments passed to parent LLMService.
        NF)rZ   system_instructionrY   
max_tokenstop_ptop_kfrequency_penaltypresence_penaltyseedfilter_incomplete_user_turnsuser_turn_completion_configr2   r`   zPone_shot_selected_tools may only be set when using OneShotInputParams; ignoring.i  r9   )Settingsapply_updatesuper__init___params
isinstancerU   r
   warning_selected_tools_socket_receive_task_disconnecting_bot_responding_last_user_id_sample_rater   
_resampler)selfrb   r`   ra   kwargsdefault_settings	__class__r9   r:   ro      sF   

z#UltravoxRealtimeLLMService.__init__returnc                 C   s   dS )zCheck if the service can generate usage metrics.

        Returns:
            True if metrics generation is supported.
        Tr9   r{   r9   r9   r:   can_generate_metrics   s   z/UltravoxRealtimeLLMService.can_generate_metricsframec              
      s   t  |I dH  zM| j td r  | jj}n#  td r+  | | jI dH }n td r; | | jI dH }n t	
d|  t|I dH | _| |  | _W dS  tyu } z| jd|ddI dH  W Y d}~dS d}~ww )zgStart the service and establish connection.

        Args:
            frame: The start frame.
        Nr9   z(Joining Ultravox Realtime call via URL: zFailed to connect to UltravoxTfatal)rn   startrp   r\   r]   r;   _start_agent_callrU   _start_one_shot_callr
   infowebsocket_clientconnectrt   create_task_receive_messagesru   	Exception
push_error)r{   r   r]   er~   r9   r:   r      s(   

$z UltravoxRealtimeLLMService.startmediumrB   c                 C   s   | dkrdS | dkrdS d S )NrC   MESSAGE_MEDIUM_TEXTrD   MESSAGE_MEDIUM_VOICEr9   )r   r9   r9   r:   _output_medium_to_api  s
   z0UltravoxRealtimeLLMService._output_medium_to_apic              
      sD  |j |jdd| jiid}| |j}|r||d< |jr(|j dd|d< ||jB }t	 4 I d H `}|j
d|j d	d
|ji|d4 I d H 5}|jdkrb| I d H }td|j d| | I d H d W  d   I d H  W  d   I d H  S 1 I d H sw   Y  W d   I d H  d S 1 I d H sw   Y  d S )NserverWebSocketinputSampleRate)templateContextrA   r   initialOutputMedium3fsmaxDurationz#https://api.ultravox.ai/api/agents/z/calls	X-Api-Keyheadersjson   Ultravox API error : joinUrl)r@   rA   ry   r   r2   rN   total_secondsrO   aiohttpClientSessionpostr=   r<   statusrC   r   r   r{   rb   request_bodyinitial_output_mediumsessionresponse
error_textr9   r9   r:   r     s:   	

.z,UltravoxRealtimeLLMService._start_agent_callc              
      s\  |j |j|j|jrt|jnd |j|j dd| jr#| 	| jng dd| j
iid}| |j}|r9||d< ||jB }t 4 I d H [}|jdd|ji|d	4 I d H 5}|jd
krn| I d H }td|j d| | I d H d W  d   I d H  W  d   I d H  S 1 I d H sw   Y  W d   I d H  d S 1 I d H sw   Y  d S )Nr   r   r   r   )systemPromptrY   rZ   rD   rA   r   selectedToolsr   r   z!https://api.ultravox.ai/api/callsr   r   r   r   r   r   )rV   rY   rZ   rD   r7   rA   rN   r   rs   _to_selected_toolsry   r   r2   rO   r   r   r   r<   r   rC   r   r   r   r9   r9   r:   r   0  sD   

.z/UltravoxRealtimeLLMService._start_one_shot_calltoolc              	      sD   g }|j D ] |d j j fdd j D i di q|S )NtemporaryToolc                    s$   g | ]\}}|d || j v dqS )PARAMETER_LOCATION_BODY)namelocationschemarequired)r   ).0kvstandard_toolr9   r:   
<listcomp>X  s    zAUltravoxRealtimeLLMService._to_selected_tools.<locals>.<listcomp>)modelToolNamedescriptiondynamicParametersr/   )standard_toolsappendr   r   
propertiesitems)r{   r   resultr9   r   r:   r   P  s   

	z-UltravoxRealtimeLLMService._to_selected_toolsc                    &   t  |I dH  |  I dH  dS )zaStop the service and close connections.

        Args:
            frame: The end frame.
        N)rn   stop_disconnectr{   r   r~   r9   r:   r   g     zUltravoxRealtimeLLMService.stopc                    r   )zfCancel the service and close connections.

        Args:
            frame: The cancel frame.
        N)rn   cancelr   r   r~   r9   r:   r   p  r   z!UltravoxRealtimeLLMService.cancelc                    sN   d| _ | jr| j I d H  d | _| jr%| j| jddI d H  d | _d S d S )NTrX   )timeout)rv   rt   closeru   cancel_taskr   r9   r9   r:   r   y  s   
z&UltravoxRealtimeLLMService._disconnectdeltac                    sH   t  |I d H }d|v r| | jjI d H  | | dh  |S )Nr2   )rn   _update_settings_update_output_mediumr_   r2    _warn_unhandled_updated_settingskeys)r{   r   changedr~   r9   r:   r     s   z+UltravoxRealtimeLLMService._update_settings	directionc                    s*  t  ||I dH  t|ttfr*t|tr|jnt|j}| |I dH  dS t|t	rA| 
 I dH  | ||I dH  dS t|trZ| |jI dH  | ||I dH  dS t|trr| |I dH  | ||I dH  dS t|tr| |I dH  | ||I dH  dS | ||I dH  dS )zProcess incoming frames for the Ultravox Realtime service.

        Args:
            frame: The frame to process.
            direction: The frame processing direction.
        N)rn   process_framerq   r   r'   contextr"   from_openai_context_handle_contextr   stop_all_metrics
push_framer   _send_user_textrC   r   _send_user_audior!   !_handle_vad_user_stopped_speaking)r{   r   r   r   r~   r9   r:   r     s*   




z(UltravoxRealtimeLLMService.process_framer   c                    st   t |jD ]1}|ddkr d S |d}d|dt|tr#|n	ddd |D d	}| |I d H  qd S )
Nroler   contentclient_tool_resulttool_call_id c                 s   s    | ]}| d V  qdS )rC   N)get)r   tr9   r9   r:   	<genexpr>  s    z=UltravoxRealtimeLLMService._handle_context.<locals>.<genexpr>)typeinvocationIdr   )reversedmessagesr   rq   r7   join_send)r{   r   messager   socket_messager9   r9   r:   r     s   
z*UltravoxRealtimeLLMService._handle_contextc                    s2   |j dkrdS |j|j  }| j|dI dH  dS )a  Handle VAD user stopped speaking frame.

        Calculates the actual speech end time and starts a timeout task to wait
        for the final transcription before reporting TTFB.

        Args:
            frame: The VAD user stopped speaking frame.
        rW   N)
start_time)	stop_secs	timestampstart_ttfb_metrics)r{   r   speech_end_timer9   r9   r:   r     s
   

z<UltravoxRealtimeLLMService._handle_vad_user_stopped_speakingc                    sb   | j sdS t|tr|jnd| _|j}|j| jkr'| j	||j| jI dH }| 
|I dH  dS )z+Send user audio frame to Ultravox Realtime.N)rt   rq   r    user_idrx   audiosample_ratery   rz   resampler   )r{   r   r   r9   r9   r:   r     s   z+UltravoxRealtimeLLMService._send_user_audiorC   c                    s&   | j sdS | d|dI dH  dS )zoSend user text via Ultravox Realtime.

        Args:
            text: The text to send as user input.
        Nuser_text_message)r   rC   )rt   r   r{   rC   r9   r9   r:   r     s   z*UltravoxRealtimeLLMService._send_user_textr2   c                    sP   |  }|dkrd}|  dvrtd|  d S | d|dI d H  d S )Nr   rD   >   rC   rD   z$Unsupported Ultravox output medium: set_output_medium)r   r   )lowerr
   rr   r   )r{   r2   r9   r9   r:   r     s   z0UltravoxRealtimeLLMService._update_output_mediumr   c              
      s   | j s| js	dS z t|tr| j|I dH  W dS | jt|I dH  W dS  tyT } z| j s7| js>W Y d}~dS | jd|ddI dH  W Y d}~dS d}~ww )zSend content via the WebSocket connection.

        Args:
            content: The content to send, either as bytes or a JSON-serializable dict.
        NzUltravox websocket send errorTr   )	rv   rt   rq   bytessendr   dumpsr   r   )r{   r   r   r9   r9   r:   r     s   
$z UltravoxRealtimeLLMService._sendc                    s  | j sdS | j 2 z3 dH W }zt|tr| |I dH  W q	t|}|d dkr? | jr>|ddkr>|  I dH  nk dkrX | 	|d|d|dI dH  nRd	kr|d
 dkr{ |dsot
d n;| |dI dH  n/dkr| |d|d|d|ddI dH  n	 t
d|  n		 t
d|  W q	 ty } z| js| j sW Y d}~ dS | jd|ddI dH  W Y d}~q	d}~ww 6 dS )z6Receive messages from the Ultravox Realtime WebSocket.Nr   statespeakingclient_tool_invocationtoolNamer   
parameters
transcriptr   userfinalzFUnexpected non-final user transcript from Ultravox Realtime; ignoring.rC   agentr   r   Fz>Received transcript with unknown role from Ultravox Realtime: z%Received unhandled Ultravox message: z Ultravox websocket receive errorTr   )rt   rq   r   _handle_audior   loadsr   rw   _handle_response_end_handle_tool_invocationr
   rr   _handle_user_transcript_handle_agent_transcriptdebugr   rv   r   )r{   r   datar   r9   r9   r:   r     s^   






"z,UltravoxRealtimeLLMService._receive_messagesr   c                    st   |sdS | j s+|  I dH  |  I dH  | t I dH  | t I dH  d| _ | t|| jdI dH  dS )z3Handle incoming audio bytes from Ultravox Realtime.NrD   rH   )rw   start_processing_metricsstop_ttfb_metricsr   r   r   r   ry   )r{   r   r9   r9   r:   r	  2  s   z(UltravoxRealtimeLLMService._handle_audioc                    sH   |   I d H  | jdkr| t I d H  | t I d H  d | _d S )NrD   )stop_processing_metricsrw   r   r   r   r   r9   r9   r:   r  >  s   

z/UltravoxRealtimeLLMService._handle_response_end	tool_nameinvocation_idr  c                    s$   |  t|||d dgI d H  d S )N)function_namer   	argumentsr   )run_function_callsr)   )r{   r  r  r  r9   r9   r:   r  E  s   z2UltravoxRealtimeLLMService._handle_tool_invocationc                    s.   |  t| jpdt ||dtjI d H  d S )Nr   )r   r   r   rC   )r   r   rx   r.   r(   UPSTREAMr   r9   r9   r:   r  S  s   z2UltravoxRealtimeLLMService._handle_user_transcriptr  c                    s  |dkr5|s	|r|st |p|d}d|_| |I d H  |r3t|tjd}d|_| |I d H  d S d S |dkr|rP|  I d H  | t I d H  d | _	d S |sT|r| j	sq| 
 I d H  |  I d H  | t I d H  d| _	| t |pw|dI d H  d S d S d S )NrD   )rC   F)rC   aggregated_byTrC   )r   append_to_contextr   r   r   WORDincludes_inter_frame_spacesr  r   rw   r  r  r   )r{   r   rC   r   r  r   	tts_framer9   r9   r:   r  ^  s2   
z3UltravoxRealtimeLLMService._handle_agent_transcriptuser_paramsassistant_paramsr   r!  c                C   s   t |}d|_t|||dS )a  Create an instance of LLMContextAggregatorPair from an OpenAILLMContext.

        Constructor keyword arguments for both the user and assistant aggregators can be provided.

        NOTE: this method exists only for backward compatibility. New code
        should instead do::

            context = LLMContext(...)
            context_aggregator = LLMContextAggregatorPair(context)

        Args:
            context: The LLM context to use.
            user_params: User aggregator parameters. Defaults to LLMUserAggregatorParams().
            assistant_params: Assistant aggregator parameters. Defaults to LLMAssistantAggregatorParams().

        Returns:
            A pair of user and assistant context aggregators.

        .. deprecated:: 0.0.99
            `create_context_aggregator()` is deprecated and will be removed in a future version.
            Use the universal `LLMContext` and `LLMContextAggregatorPair` instead.
            See `OpenAILLMContext` docstring for migration guide.
        Fr  )r"   r   expect_stripped_wordsr%   )r{   r   r   r!  r9   r9   r:   create_context_aggregatory  s
   
z4UltravoxRealtimeLLMService.create_context_aggregator);r3   r4   r5   r6   r0   rl   r8   r	   r;   rU   r\   r   r   ro   boolr   r   r   staticmethodr   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   r   r"   r   r!   r   r   r   r   r   r   r   r   r	  r  r  r  r  r$   r#   r&   r%   r#  __classcell__r9   r9   r~   r:   r^      s|   
 
>  			

	.


r^   )Nr6   asynciorS   r   rP   dataclassesr   r   typingr   r   r   r   r   r	   r   logurur
   pydanticr   r   %pipecat.adapters.schemas.tools_schemar   pipecat.audio.utilsr   pipecat.frames.framesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   *pipecat.processors.aggregators.llm_contextr"   +pipecat.processors.aggregators.llm_responser#   r$   5pipecat.processors.aggregators.llm_response_universalr%   1pipecat.processors.aggregators.openai_llm_contextr&   r'   "pipecat.processors.frame_processorr(   pipecat.services.llm_servicer)   r*   pipecat.services.settingsr+   r,   r-   pipecat.utils.timer.   websockets.asyncior/   r   ModuleNotFoundErrorr   errorr   r0   r;   rU   r\   r^   r9   r9   r9   r:   <module>   sF    T

!!
