o
    i                     @   sr   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dee	 dee	 d	e	fd
dZ
G dd deZdS )z*Semantic similarity evaluators for LLMObs.    )Callable)Optional)BaseEvaluator)EvaluatorContext)EvaluatorResultvec1vec2returnc           
      C   s  t | t |krtdt |  dt | z(ddl}|j| }|j|}|dks/|dkr2W dS t|| |||  W S  tyG   Y nw d\}}}t| |D ]\}}	|||	 7 }||| 7 }||	|	 7 }qR|d }|d }|dksy|dkr{dS |||  S )a'  Calculate cosine similarity between two vectors.

    Uses numpy if available for optimized performance, otherwise falls back
    to a single-pass pure Python implementation.

    :param vec1: First vector
    :param vec2: Second vector
    :return: Cosine similarity score between -1 and 1
    zVectors must have same length: z != r   N        )r
   r
   r
   g      ?)	len
ValueErrornumpylinalgnormfloatdotImportErrorzip)
r   r   np
magnitude1
magnitude2r   mag1mag2ab r   W/home/ubuntu/.local/lib/python3.10/site-packages/ddtrace/llmobs/_evaluators/semantic.py_cosine_similarity   s,   

r   c                       sX   e Zd ZdZ		ddeegee f dedee f fddZ	d	e
d
efddZ  ZS )SemanticSimilarityEvaluatora	  Evaluator that measures semantic similarity using embeddings.

    Compares the semantic meaning of output_data and expected_output using
    embedding vectors. Useful for evaluating open-ended responses where exact
    matches are too strict but semantic equivalence matters.

    The evaluator requires an embedding function that converts text to vectors.
    This can be OpenAI embeddings, sentence transformers, or any custom model.

    Example with OpenAI::

        from openai import OpenAI
        client = OpenAI()

        def get_embedding(text):
            response = client.embeddings.create(
                input=text,
                model="text-embedding-3-small"
            )
            return response.data[0].embedding

        evaluator = SemanticSimilarityEvaluator(
            embedding_fn=get_embedding,
            threshold=0.8
        )
        result = evaluator.evaluate(context)
        # Returns: 0.92 (similarity score between 0.0 and 1.0)

    Example with sentence-transformers::

        from sentence_transformers import SentenceTransformer
        model = SentenceTransformer('all-MiniLM-L6-v2')

        def get_embedding(text):
            return model.encode(text).tolist()

        evaluator = SemanticSimilarityEvaluator(embedding_fn=get_embedding)

    :param embedding_fn: Function that takes text string and returns embedding vector (list of floats)
    :param threshold: Minimum similarity score (0-1) required to pass (default: 0.7)
    :param name: Optional custom name for the evaluator
    ffffff?Nembedding_fn	thresholdnamec                    sR   t  j|d t|stdd|  krdks!n td| || _|| _dS )aI  Initialize the SemanticSimilarityEvaluator evaluator.

        :param embedding_fn: Function that converts text to embedding vector
        :param threshold: Minimum similarity score (0-1) to pass
        :param name: Optional custom name for the evaluator
        :raises ValueError: If threshold is not between 0 and 1
        )r"   z(embedding_fn must be a callable functionr      z(threshold must be between 0 and 1, got: N)super__init__callable	TypeErrorr   r    r!   )selfr    r!   r"   	__class__r   r   r%   ^   s   
z$SemanticSimilarityEvaluator.__init__contextr	   c           	      C   s   |j }|j}|du r|du rtdddS |du s|du r"tdddS | t|}| t|}t||}|d d }|| jkrBdnd}t||dS )	zPerform semantic similarity evaluation.

        :param context: The evaluation context
        :return: EvaluatorResult with similarity score and pass/fail assessment based on threshold
        Ng      ?pass)value
assessmentr
   failr#      )output_dataexpected_outputr   r    strr   r!   )	r(   r+   outputexpectedoutput_embeddingexpected_embedding
similaritynormalized_similarityr.   r   r   r   evaluatev   s   
z$SemanticSimilarityEvaluator.evaluate)r   N)__name__
__module____qualname____doc__r   r3   listr   r   r%   r   r   r:   __classcell__r   r   r)   r   r   2   s    .r   N)r>   typingr   r   ddtrace.llmobs._experimentr   r   r   r?   r   r   r   r   r   r   r   <module>   s    '