o
    i^                     @   sZ  d Z ddlZddlZddlmZmZmZmZmZm	Z	 ddl
mZmZmZ ddlmZ ed ZG dd deZG d	d
 d
eZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZe	eeeeeeef f Zed ZG d d! d!eZ G d"d# d#eZ!G d$d% d%eZ"G d&d' d'eZ#G d(d) d)eZ$G d*d+ d+eZ%G d,d- d-eZ&G d.d/ d/e&Z'G d0d1 d1e&Z(G d2d3 d3e&Z)G d4d5 d5e&Z*G d6d7 d7e&Z+G d8d9 d9e&Z,G d:d; d;e&Z-G d<d= d=eZ.G d>d? d?e.Z/G d@dA dAe.Z0G dBdC dCe.Z1G dDdE dEe.Z2G dFdG dGe.Z3G dHdI dIe.Z4G dJdK dKe.Z5G dLdM dMe.Z6G dNdO dOe.Z7G dPdQ dQe.Z8G dRdS dSe.Z9G dTdU dUe.Z:G dVdW dWe.Z;G dXdY dYe.Z<G dZd[ d[e.Z=G d\d] d]e.Z>G d^d_ d_eZ?G d`da daeZ@G dbdO dOe.Z7G dcdd dde.ZAG dedf dfe.ZBG dgdh dheZCG didj dje.ZDG dkdl dle.ZEG dmdn dne.ZFG dodp dpe.ZGi dqeGdreFdse/dte0due1dve2dwe3dxe4dye5dze6d{e7d|e8d}eBd~eDdeEde9de:e;e<e=e>eAdZHdefddZIdS )zEvent models and data structures for Grok Voice Agent API communication.

Based on xAI's Grok Voice Agent API documentation:
https://docs.x.ai/docs/guides/voice/agent
    N)AnyDictListLiteralOptionalUnion)	BaseModel
ConfigDictField)ToolsSchema)i@  i>  i:R  ]  i }  iD  i  c                   @   s   e Zd ZU dZeed< dS )AudioFormatz*Base class for audio format configuration.typeN)__name__
__module____qualname____doc__str__annotations__ r   r   Y/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/grok/realtime/events.pyr      s   
 r   c                   @   s.   e Zd ZU dZdZed ed< dZeed< dS )PCMAudioFormata  PCM audio format configuration with configurable sample rate.

    Grok supports: 8000, 16000, 21050, 24000, 32000, 44100, 48000 Hz

    Parameters:
        type: Audio format type, always "audio/pcm".
        rate: Sample rate in Hz. Defaults to 24000.
    z	audio/pcmr   r   rateN)	r   r   r   r   r   r   r   r   SUPPORTED_SAMPLE_RATESr   r   r   r   r   #   s   
 	r   c                   @   "   e Zd ZU dZdZed ed< dS )PCMUAudioFormatu   PCMU (G.711 μ-law) audio format configuration.

    Fixed at 8000 Hz sample rate.

    Parameters:
        type: Audio format type, always "audio/pcmu".
    z
audio/pcmur   Nr   r   r   r   r   r   r   r   r   r   r   r   1      
 r   c                   @   r   )PCMAAudioFormatzPCMA (G.711 A-law) audio format configuration.

    Fixed at 8000 Hz sample rate.

    Parameters:
        type: Audio format type, always "audio/pcma".
    z
audio/pcmar   Nr   r   r   r   r   r   =   r   r   c                   @   s&   e Zd ZU dZdZeed  ed< dS )TurnDetectionzServer-side voice activity detection configuration.

    Parameters:
        type: Detection type, must be "server_vad" or None for manual.
    
server_vadr   N)r   r   r   r   r   r   r   r   r   r   r   r   r   N   s   
 r   c                   @   ,   e Zd ZU dZdZeeeee	f  e
d< dS )
AudioInputzjAudio input configuration.

    Parameters:
        format: The format configuration for input audio.
    Nformatr   r   r   r   r#   r   r   r   r   r   r   r   r   r   r   r"   ]      
 r"   c                   @   r!   )AudioOutputzlAudio output configuration.

    Parameters:
        format: The format configuration for output audio.
    Nr#   r$   r   r   r   r   r&   g   r%   r&   c                   @   s2   e Zd ZU dZdZee ed< dZee	 ed< dS )AudioConfigurationzAudio configuration for input and output.

    Parameters:
        input: Configuration for input audio.
        output: Configuration for output audio.
    Ninputoutput)
r   r   r   r   r(   r   r"   r   r)   r&   r   r   r   r   r'   q      
 r'   c                   @   r   )WebSearchToolzkWeb search tool configuration.

    Enables the voice agent to search the web for current information.
    
web_searchr   Nr   r   r   r   r   r+      s   
 r+   c                   @   s6   e Zd ZU dZdZed ed< dZee	e
  ed< dS )XSearchToolzX (Twitter) search tool configuration.

    Enables the voice agent to search X for posts and information.

    Parameters:
        type: Tool type, always "x_search".
        allowed_x_handles: Optional list of X handles to filter search results.
    x_searchr   Nallowed_x_handles)r   r   r   r   r   r   r   r/   r   r   r   r   r   r   r   r-      s   
 	r-   c                   @   s>   e Zd ZU dZdZed ed< ee ed< dZ	e
e ed< dS )FileSearchToola7  File/Collection search tool configuration.

    Enables the voice agent to search through uploaded document collections.

    Parameters:
        type: Tool type, always "file_search".
        vector_store_ids: List of collection IDs to search.
        max_num_results: Maximum number of results to return.
    file_searchr   vector_store_ids
   max_num_resultsN)r   r   r   r   r   r   r   r   r   r4   r   intr   r   r   r   r0      s
   
 
r0   c                   @   sB   e Zd ZU dZdZed ed< eed< eed< eee	f ed< dS )FunctionToolzCustom function tool configuration.

    Parameters:
        type: Tool type, always "function".
        name: Name of the function.
        description: Description of what the function does.
        parameters: JSON schema for function parameters.
    functionr   namedescription
parametersN)
r   r   r   r   r   r   r   r   r   r   r   r   r   r   r6      s   
 	r6   )AraRexSalEveLeoc                   @   s   e Zd ZU dZeddZdZee e	d< dZ
eeeB  e	d< edd	 d
Zee e	d< dZee e	d< dZeeee B  e	d< dS )SessionPropertiesa#  Configuration properties for a Grok Voice Agent session.

    Parameters:
        instructions: System instructions for the assistant.
        voice: The voice the model uses to respond. Options: Ara, Rex, Sal, Eve, Leo.
            Defaults to "Ara".
        turn_detection: Configuration for turn detection. Defaults to server-side VAD.
            Set to None for manual turn detection.
        audio: Configuration for input and output audio.
        tools: Available tools for the assistant (web_search, x_search, file_search, function).
    Tarbitrary_types_allowedNinstructionsr;   voicec                   C   s
   t ddS )Nr    )r   )r   r   r   r   r   <lambda>   s   
 zSessionProperties.<lambda>default_factoryturn_detectionaudiotools)r   r   r   r   r	   model_configrC   r   r   r   rD   	GrokVoicer
   rH   r   rI   r'   rJ   r   r   GrokToolr   r   r   r   r@      s   
 
r@   c                   @   sN   e Zd ZU dZed ed< dZee ed< dZ	ee ed< dZ
ee ed< dS )ItemContenta  Content within a conversation item.

    Parameters:
        type: Content type (input_text, input_audio, text, audio).
        text: Text content for text-based items.
        audio: Base64-encoded audio data for audio items.
        transcript: Transcribed text for audio items.
    )textrI   
input_textinput_audiooutput_textoutput_audior   NrO   rI   
transcript)r   r   r   r   r   r   rO   r   r   rI   rT   r   r   r   r   rN      s   
 	rN   c                   @   s   e Zd ZU dZedd dZeed< dZe	e
d  ed< e
d	 ed
< dZe	e
d  ed< dZe	e
d  ed< dZe	ee  ed< dZe	e ed< dZe	e ed< dZe	e ed< dZe	e ed< dS )ConversationItema  A conversation item in the realtime session.

    Parameters:
        id: Unique identifier for the item, auto-generated if not provided.
        object: Object type identifier for the realtime API.
        type: Item type (message, function_call, or function_call_output).
        status: Current status of the item.
        role: Speaker role for message items (user, assistant, or system).
        content: Content list for message items.
        call_id: Function call identifier for function_call items.
        name: Function name for function_call items.
        arguments: Function arguments as JSON string for function_call items.
        output: Function output as JSON string for function_call_output items.
    c                   C   s   t t jS N)r   uuiduuid4hexr   r   r   r   rE   	  s    zConversationItem.<lambda>rF   idNzrealtime.itemobject)messagefunction_callfunction_call_outputr   )	completedin_progress
incompletestatus)user	assistantsystemtoolrolecontentcall_idr8   	argumentsr)   )r   r   r   r   r
   rZ   r   r   r[   r   r   rb   rg   rh   r   rN   ri   r8   rj   r)   r   r   r   r   rU      s   
 rU   c                   @   s&   e Zd ZU dZeed< ed ed< dS )RealtimeConversationzA realtime conversation session.

    Parameters:
        id: Unique identifier for the conversation.
        object: Object type identifier, always "realtime.conversation".
    rZ   zrealtime.conversationr[   N)r   r   r   r   r   r   r   r   r   r   r   rk     s   
 rk   c                   @   s.   e Zd ZU dZddgZeeed   ed< dS )ResponsePropertieszProperties for configuring assistant responses.

    Parameters:
        modalities: Output modalities for the response (text, audio, or both).
    rO   rI   )rO   rI   
modalitiesN)	r   r   r   r   rm   r   r   r   r   r   r   r   r   rl   !  s   
  rl   c                   @   sZ   e Zd ZU dZdZee ed< dZee ed< eed< dZ	ee ed< dZ
ee ed< dS )	RealtimeErrora>  Error information from the realtime API.

    Parameters:
        type: Error type identifier.
        code: Specific error code.
        message: Human-readable error message.
        param: Parameter name that caused the error, if applicable.
        event_id: Event ID associated with the error, if applicable.
    Nr    coder\   paramevent_id)r   r   r   r   r   r   r   r   rp   rq   rr   r   r   r   r   rn   0  s   
 
rn   c                   @   s(   e Zd ZU dZedd dZeed< dS )ClientEventzBase class for client events sent to the realtime API.

    Parameters:
        event_id: Unique identifier for the event, auto-generated if not provided.
    c                   C   s   t t S rV   )r   rW   rX   r   r   r   r   rE   N  s    zClientEvent.<lambda>rF   rr   N)r   r   r   r   r
   rr   r   r   r   r   r   r   rs   G  s   
 rs   c                   @   *   e Zd ZU dZdZed ed< eed< dS )SessionUpdateEventzEvent to update session properties.

    Parameters:
        type: Event type, always "session.update".
        session: Updated session properties.
    zsession.updater   sessionN)r   r   r   r   r   r   r   r@   r   r   r   r   ru   Q     
 ru   c                   @   rt   )InputAudioBufferAppendEventzEvent to append audio data to the input buffer.

    Parameters:
        type: Event type, always "input_audio_buffer.append".
        audio: Base64-encoded audio data to append.
    zinput_audio_buffer.appendr   rI   N)r   r   r   r   r   r   r   r   r   r   r   r   rx   ]  rw   rx   c                   @   r   )InputAudioBufferCommitEventzEvent to commit the current input audio buffer.

    Used when turn_detection is null (manual mode).

    Parameters:
        type: Event type, always "input_audio_buffer.commit".
    zinput_audio_buffer.commitr   Nr   r   r   r   r   ry   i  r   ry   c                   @   r   )InputAudioBufferClearEventzyEvent to clear the input audio buffer.

    Parameters:
        type: Event type, always "input_audio_buffer.clear".
    zinput_audio_buffer.clearr   Nr   r   r   r   r   rz   u     
 rz   c                   @   s:   e Zd ZU dZdZed ed< dZee	 ed< e
ed< dS )ConversationItemCreateEventzEvent to create a new conversation item.

    Parameters:
        type: Event type, always "conversation.item.create".
        previous_item_id: ID of the item to insert after, if any.
        item: The conversation item to create.
    zconversation.item.creater   Nprevious_item_iditem)r   r   r   r   r   r   r   r}   r   r   rU   r   r   r   r   r|     s
   
 r|   c                   @   s2   e Zd ZU dZdZed ed< dZee	 ed< dS )ResponseCreateEventzEvent to create a new assistant response.

    Parameters:
        type: Event type, always "response.create".
        response: Optional response configuration properties.
    zresponse.creater   Nresponse)
r   r   r   r   r   r   r   r   r   rl   r   r   r   r   r     r*   r   c                   @   r   )ResponseCancelEventzyEvent to cancel the current assistant response.

    Parameters:
        type: Event type, always "response.cancel".
    zresponse.cancelr   Nr   r   r   r   r   r     r{   r   c                   @   s,   e Zd ZU dZeddZeed< eed< dS )ServerEventzBase class for server events received from the realtime API.

    Parameters:
        event_id: Unique identifier for the event.
        type: Type of the server event.
    TrA   rr   r   N)r   r   r   r   r	   rK   r   r   r   r   r   r   r     s
   
 
r   c                   @   &   e Zd ZU dZed ed< eed< dS )SessionUpdatedEventzEvent indicating a session has been updated.

    Parameters:
        type: Event type, always "session.updated".
        session: The updated session properties.
    session.updatedr   rv   N)r   r   r   r   r   r   r@   r   r   r   r   r        
 r   c                   @   r   )ConversationCreatedzEvent indicating a conversation has been created.

    This is the first message received after connecting.

    Parameters:
        type: Event type, always "conversation.created".
        conversation: The created conversation.
    conversation.createdr   conversationN)r   r   r   r   r   r   rk   r   r   r   r   r        
 	r   c                   @   s6   e Zd ZU dZed ed< dZee ed< e	ed< dS )ConversationItemAddedzEvent indicating a conversation item has been added.

    Parameters:
        type: Event type, always "conversation.item.added".
        previous_item_id: ID of the previous item, if any.
        item: The added conversation item.
    conversation.item.addedr   Nr}   r~   )
r   r   r   r   r   r   r}   r   r   rU   r   r   r   r   r     
   
 r   c                   @   .   e Zd ZU dZed ed< eed< eed< dS )0ConversationItemInputAudioTranscriptionCompleteda  Event indicating input audio transcription is complete.

    Parameters:
        type: Event type, always "conversation.item.input_audio_transcription.completed".
        item_id: ID of the conversation item that was transcribed.
        transcript: Complete transcription text.
    5conversation.item.input_audio_transcription.completedr   item_idrT   Nr   r   r   r   r   r   r   r   r   r   r   r     
   
 r   c                   @   r   )InputAudioBufferSpeechStarteda  Event indicating speech has started in the input audio buffer.

    Only sent when turn_detection is "server_vad".

    Parameters:
        type: Event type, always "input_audio_buffer.speech_started".
        item_id: ID of the associated conversation item.
    !input_audio_buffer.speech_startedr   r   Nr   r   r   r   r   r     r   r   c                   @   r   )InputAudioBufferSpeechStoppeda  Event indicating speech has stopped in the input audio buffer.

    Only sent when turn_detection is "server_vad".

    Parameters:
        type: Event type, always "input_audio_buffer.speech_stopped".
        item_id: ID of the associated conversation item.
    !input_audio_buffer.speech_stoppedr   r   Nr   r   r   r   r   r     r   r   c                   @   s6   e Zd ZU dZed ed< dZee ed< eed< dS )InputAudioBufferCommitteda  Event indicating the input audio buffer has been committed.

    Parameters:
        type: Event type, always "input_audio_buffer.committed".
        previous_item_id: ID of the previous item, if any.
        item_id: ID of the committed conversation item.
    input_audio_buffer.committedr   Nr}   r   )	r   r   r   r   r   r   r}   r   r   r   r   r   r   r     r   r   c                   @   s   e Zd ZU dZed ed< dS )InputAudioBufferClearedzEvent indicating the input audio buffer has been cleared.

    Parameters:
        type: Event type, always "input_audio_buffer.cleared".
    input_audio_buffer.clearedr   Nr   r   r   r   r   r   r   r   r   r   r     s   
 r   c                   @   s&   e Zd ZU dZed ed< ded< dS )ResponseCreatedEvent indicating an assistant response has been created.

    Parameters:
        type: Event type, always "response.created".
        response: The created response object.
    response.createdr   Responser   Nr   r   r   r   r   r      r   r   c                   @   6   e Zd ZU dZed ed< eed< eed< eed< dS )ResponseOutputItemAddeda  Event indicating an output item has been added to a response.

    Parameters:
        type: Event type, always "response.output_item.added".
        response_id: ID of the response.
        output_index: Index of the output item.
        item: The added conversation item.
    response.output_item.addedr   response_idoutput_indexr~   N	r   r   r   r   r   r   r   r5   rU   r   r   r   r   r   ,     
 	r   c                   @   6   e Zd ZU dZed ed< eed< eed< eed< dS )ResponseAudioTranscriptDeltaa"  Event containing incremental audio transcript from a response.

    Parameters:
        type: Event type, always "response.output_audio_transcript.delta".
        response_id: ID of the response.
        item_id: ID of the conversation item.
        delta: Incremental transcript text.
    &response.output_audio_transcript.deltar   r   r   deltaNr   r   r   r   r   r   <  r   r   c                   @   r   )ResponseAudioTranscriptDonezEvent indicating audio transcript is complete.

    Parameters:
        type: Event type, always "response.output_audio_transcript.done".
        response_id: ID of the response.
        item_id: ID of the conversation item.
    %response.output_audio_transcript.doner   r   r   Nr   r   r   r   r   r   L  r   r   c                   @   sF   e Zd ZU dZed ed< eed< eed< eed< eed< eed< d	S )
ResponseAudioDeltaa}  Event containing incremental audio data from a response.

    Parameters:
        type: Event type, always "response.output_audio.delta".
        response_id: ID of the response.
        item_id: ID of the conversation item.
        output_index: Index of the output item.
        content_index: Index of the content part.
        delta: Base64-encoded incremental audio data.
    response.output_audio.deltar   r   r   r   content_indexr   Nr   r   r   r   r   r   r   r5   r   r   r   r   r   Z     
 r   c                   @   r   )ResponseAudioDonezEvent indicating audio content is complete.

    Parameters:
        type: Event type, always "response.output_audio.done".
        response_id: ID of the response.
        item_id: ID of the conversation item.
    response.output_audio.doner   r   r   Nr   r   r   r   r   r   n  r   r   c                   @   s^   e Zd ZU dZed ed< dZee ed< dZ	ee ed< eed< eed< dZ
ee ed	< dS )
"ResponseFunctionCallArgumentsDeltaa  Event containing incremental function call arguments.

    Parameters:
        type: Event type, always "response.function_call_arguments.delta".
        response_id: ID of the response.
        item_id: ID of the conversation item.
        call_id: ID of the function call.
        delta: Incremental function arguments as JSON.
        previous_item_id: ID of the previous item, if any.
    &response.function_call_arguments.deltar   Nr   r   ri   r   r}   )r   r   r   r   r   r   r   r   r   r   r}   r   r   r   r   r   |  s   
 r   c                   @   r   )!ResponseFunctionCallArgumentsDonea0  Event indicating function call arguments are complete.

    Parameters:
        type: Event type, always "response.function_call_arguments.done".
        call_id: ID of the function call.
        name: Name of the function being called.
        arguments: Complete function arguments as JSON string.
    %response.function_call_arguments.doner   ri   r8   rj   Nr   r   r   r   r   r     r   r   c                   @   sB   e Zd ZU dZdZee ed< dZee ed< dZ	ee ed< dS )Usagea#  Token usage statistics for a response.

    All fields are optional because Grok sends empty usage in some events.

    Parameters:
        total_tokens: Total number of tokens used.
        input_tokens: Number of input tokens used.
        output_tokens: Number of output tokens used.
    Ntotal_tokensinput_tokensoutput_tokens)
r   r   r   r   r   r   r5   r   r   r   r   r   r   r   r     s
   
 
r   c                   @   s^   e Zd ZU dZeed< ed ed< ed ed< dZee	 ed< e
e ed	< dZee ed
< dS )r   aA  A complete assistant response.

    Parameters:
        id: Unique identifier for the response.
        object: Object type, always "realtime.response".
        status: Current status of the response.
        output: List of conversation items in the response.
        usage: Token usage statistics for the response.
    rZ   zrealtime.responser[   )r_   r`   ra   	cancelledfailedrb   Nstatus_detailsr)   usage)r   r   r   r   r   r   r   r   r   r   r   rU   r   r   r   r   r   r   r     s   
 
r   c                   @   r   )r   r   r   r   r   N)r   r   r   r   r   r   r   r   r   r   r   r     r   c                   @   s6   e Zd ZU dZed ed< eed< dZee	 ed< dS )ResponseDonezEvent indicating an assistant response is complete.

    Parameters:
        type: Event type, always "response.done".
        response: The completed response object.
        usage: Token usage (also available at top level in Grok).
    response.doner   r   Nr   )
r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s
   
 r   c                   @   r   )ResponseOutputItemDonea  Event indicating an output item is complete.

    Parameters:
        type: Event type, always "response.output_item.done".
        response_id: ID of the response.
        output_index: Index of the output item.
        item: The completed conversation item.
    response.output_item.doner   r   r   r~   Nr   r   r   r   r   r     r   r   c                   @   s*   e Zd ZU dZeed< dZee ed< dS )ContentPartzA content part within a response.

    Parameters:
        type: Type of the content part (audio, text).
        transcript: Transcript text if applicable.
    r   NrT   )r   r   r   r   r   r   rT   r   r   r   r   r   r     s   
 r   c                   @   sF   e Zd ZU dZed ed< eed< eed< eed< eed< eed< d	S )
ResponseContentPartAddedar  Event indicating a content part has been added to a response.

    Parameters:
        type: Event type, always "response.content_part.added".
        response_id: ID of the response.
        item_id: ID of the conversation item.
        content_index: Index of the content part.
        output_index: Index of the output item.
        part: The added content part.
    response.content_part.addedr   r   r   r   r   partN)	r   r   r   r   r   r   r   r5   r   r   r   r   r   r     r   r   c                   @   s>   e Zd ZU dZed ed< eed< eed< eed< eed< dS )	ResponseContentPartDonea:  Event indicating a content part is complete.

    Parameters:
        type: Event type, always "response.content_part.done".
        response_id: ID of the response.
        item_id: ID of the conversation item.
        content_index: Index of the content part.
        output_index: Index of the output item.
    response.content_part.doner   r   r   r   r   Nr   r   r   r   r   r     s   
 
r   c                   @   r   )	PingEventzKeep-alive ping event from the server.

    Parameters:
        type: Event type, always "ping".
        timestamp: Server timestamp in milliseconds.
    pingr   	timestampN)r   r   r   r   r   r   r5   r   r   r   r   r     r   r   c                   @   s&   e Zd ZU dZed ed< eed< dS )
ErrorEventzEvent indicating an error occurred.

    Parameters:
        type: Event type, always "error".
        error: Error details.
    errorr   N)r   r   r   r   r   r   rn   r   r   r   r   r   +  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   datac              
   C   sd   zt | }|d }|tvrtd| t| |W S  ty1 } z	t| d|  d}~ww )a  Parse a server event from JSON string.

    Args:
        data: JSON string containing the server event.

    Returns:
        Parsed server event object of the appropriate type.

    Raises:
        Exception: If the event type is unimplemented or parsing fails.
    r   z!Unimplemented server event type: z 

N)jsonloads_server_event_types	Exceptionmodel_validate)r   event
event_typeer   r   r   parse_server_eventU  s   
r   )Jr   r   rW   typingr   r   r   r   r   r   pydanticr   r	   r
   %pipecat.adapters.schemas.tools_schemar   r   r   r   r   r   r   r"   r&   r'   r+   r-   r0   r6   r   rM   rL   r@   rN   rU   rk   rl   rn   rs   ru   rx   ry   rz   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s    

	


	
