o
    xi<                     @  s   d Z ddlmZ ddlZddlZddlmZmZ ddlm	Z	m
Z
 ddlZddlZddlmZ ddlmZ ejjdd	d
dZedurMejdksMJ dej eeZdddZG dd dejjZdS )u&   DSPy ↔ Weights & Biases integration.    )annotationsN)MappingSequence)AnyLiteral)	telemetry)Rundspyz}To use the W&B DSPy integration you need to have the `dspy` python package installed.  Install it with `uv pip install dspy`.F)namerequiredlazyz3.0.0z+DSPy 3.0.0 or higher is required. You have rowslist[dict[str, Any]]returnc                   s$   	dd fd	d
  fdd| D S )zFlatten a list of nested row dicts into flat key/value dicts.

    Args:
        rows (list[dict[str, Any]]): List of nested dictionaries to flatten.

    Returns:
        list[dict[str, Any]]: List of flattened dictionaries.

     .ddict[str, Any]
parent_keystrsepr   c                   sh   g }|   D ])\}}|r| | | n|}t|tr(| |||d   q|||f qt|S )N)r   )items
isinstancedictextendappend)r   r   r   r   kvnew_key_flatten O/home/ubuntu/.local/lib/python3.10/site-packages/wandb/integration/dspy/dspy.pyr    +   s   
z_flatten_rows.<locals>._flattenc                   s   g | ]} |qS r!   r!   .0rowr   r!   r"   
<listcomp>7   s    z!_flatten_rows.<locals>.<listcomp>Nr   r   )r   r   r   r   r   r   r   r   r!   )r   r!   r   r"   _flatten_rows    s   r(   c                   @  s   e Zd ZdZd@dAd
dZ	dBdCddZdDddZdEddZdFd!d"Z	dGdHd'd(Z	dId+d,Z
dJd.d/Zddd0d1d2d3dKd>d?ZdS )LWandbDSPyCallbacka  W&B callback for tracking DSPy evaluation and optimization.

    This callback logs evaluation scores, per-step predictions (optional), and
    a table capturing the DSPy program signature over time. It can also save
    the best program as a W&B Artifact for reproducibility.

    Examples:
        Basic usage within DSPy settings:

        ```python
        import dspy
        import wandb
        from wandb.integration.dspy import WandbDSPyCallback

        with wandb.init(project="dspy-optimization") as run:
            dspy.settings.callbacks.append(WandbDSPyCallback(run=run))
            # Run your DSPy optimization/evaluation
        ```
    TNlog_resultsboolrun
Run | Noner   Nonec                 C  s~   |du rt jdu rt dt j}|| _tj|d}d|j_W d   n1 s)w   Y  || _d| _	i | _
d| _d| _dS )aJ  Initialize the callback.

        Args:
            log_results (bool): Whether to log per-evaluation prediction tables.
            run (Run | None): Optional W&B run to use. Defaults to the
                current global run if available.

        Raises:
            wandb.Error: If no active run is provided or found.
        NzFYou must call `wandb.init()` before instantiating WandbDSPyCallback().)r,   TFr   )wandbr,   Errorr*   r   contextfeaturedspy_callback_run_did_log_config_program_info_program_table_row_idx)selfr*   r,   telr!   r!   r"   __init__O   s   


zWandbDSPyCallback.__init__r   r   nestedr   r   r   r   r   c                   s$   i d
 fdd  || S )a{  Recursively flatten arbitrarily nested mappings and sequences.

        Args:
            nested (Any): Nested structure of mappings/lists to flatten.
            parent_key (str): Prefix to prepend to keys in the flattened output.
            sep (str): Key separator for nested fields.

        Returns:
            dict[str, Any]: Flattened dictionary representation.
        objr   baser   r   r.   c                   s   t | tr#|  D ]\}}|r|  | nt|} || q	d S t | trNt | tttfsNt| D ]\}}|rB|  | nt|} || q4d S |rR|nd}|r\| |< d S d S )Nr   )r   r   r   r   r   bytes	bytearray	enumerate)r=   r>   r   r   r   idxkey_walkflatr   r!   r"   rE   |   s    

z.WandbDSPyCallback._flatten_dict.<locals>._walkN)r=   r   r>   r   r   r.   r!   )r9   r<   r   r   r!   rD   r"   _flatten_dictm   s   
zWandbDSPyCallback._flatten_dictfieldsr   dict[str, str]c                 C  s   dd |  D S )a  Convert signature fields to a flat mapping of strings.

        Note:
            The input is expected to be a dict-like mapping from field names to
            field metadata. Values are stringified for logging.

        Args:
            fields (list[dict[str, Any]]): Mapping of field name to metadata.

        Returns:
            dict[str, str]: Mapping of field name to string value.
        c                 S  s   i | ]	\}}|t |qS r!   )r   r$   r   r   r!   r!   r"   
<dictcomp>   s    z5WandbDSPyCallback._extract_fields.<locals>.<dictcomp>)r   )r9   rH   r!   r!   r"   _extract_fields   s   z!WandbDSPyCallback._extract_fieldsprogram_objc              
   C  s   i }|du r|S zGt dd | D }t|ddr|j|d< t|ddr*|j|d< t|ddr:|j}| ||d< t|ddrJ|j}| ||d< | |W S  t	yh } zt
d| W Y d}~|S d}~ww )	ao  Extract signature-related info from a DSPy program.

        Attempts to read the program signature, instructions, input and output
        fields from a DSPy `Predict` parameter if available.

        Args:
            program_obj (Any): DSPy program/module instance.

        Returns:
            dict[str, Any]: Flattened dictionary of signature metadata.
        Nc                 s  s&    | ]\}}t |tjr|jV  qd S N)r   r	   Predict	signature)r$   _paramr!   r!   r"   	<genexpr>   s    

z:WandbDSPyCallback._extract_program_info.<locals>.<genexpr>rP   instructionsinput_fieldsoutput_fieldsz9Failed to extract program info from Evaluate instance: %s)nextnamed_parametersgetattrrP   rT   rU   rL   rV   rG   	Exceptionloggerwarning)r9   rM   	info_dictsigrU   rV   er!   r!   r"   _extract_program_info   s2   

z'WandbDSPyCallback._extract_program_infocall_idinstanceinputsc                 C  sr   | j s(t|drt|ni }dd | D }d|v r|d= | jj| d| _ |d }r7| || _	dS dS )a  Handle start of a DSPy evaluation call.

        Logs non-private fields from the evaluator instance to W&B config and
        captures program signature info for later logging.

        Args:
            call_id (str): Unique identifier for the evaluation call.
            instance (Any): The evaluation instance (e.g., `dspy.Evaluate`).
            inputs (dict[str, Any]): Inputs passed to the evaluation (may
                include a `program` key with the DSPy program).
        __dict__c                 S  s    i | ]\}}| d s||qS )rQ   )
startswithrJ   r!   r!   r"   rK      s
    z7WandbDSPyCallback.on_evaluate_start.<locals>.<dictcomp>devsetTprogramN)
r5   hasattrvarsr   r4   configupdategetr`   r6   )r9   ra   rb   rc   instance_varsserializablerM   r!   r!   r"   on_evaluate_start   s   z#WandbDSPyCallback.on_evaluate_startoutputs
Any | None	exceptionException | Nonec           	      C  s4  d}|du r<t |tjjjr0|j}tjdt|i| jd |j	}| j
r/| |}|r/| | ntdt| d n	td| d | jdu rddg| j }t |tr\|d tj|d	d
| _| jdurt| j }t |trz|| | jj| jg|R   | jjd| ji| jd |  jd7  _dS )a  Handle end of a DSPy evaluation call.

        If available, logs a numeric `score` metric and (optionally) per-step
        prediction tables. Always appends a row to the program-signature table.

        Args:
            call_id (str): Unique identifier for the evaluation call.
            outputs (Any | None): Evaluation outputs; supports
                `dspy.evaluate.evaluate.EvaluationResult`.
            exception (Exception | None): Exception raised during evaluation, if any.
        Nscorestepz2on_evaluate_end received unexpected outputs type: z]. Expected dspy.evaluate.evaluate.EvaluationResult; skipping logging score and `log_results`.z$on_evaluate_end received exception: z+. Skipping logging score and `log_results`.rv   INCREMENTAL)columnslog_modeprogram_signature   )r   r	   evaluateEvaluationResultrt   r/   logfloatr8   resultsr*   _parse_results_log_predictions_tabletermwarntyper7   r6   keysr   Tablelistvaluesadd_datar4   )	r9   ra   rp   rr   rt   r   r   rx   r   r!   r!   r"   on_evaluate_end   sF   








z!WandbDSPyCallback.on_evaluate_endr   Clist[tuple[dspy.Example, dspy.Prediction | dspy.Completions, bool]]c                 C  sZ   g }|D ]&\}}}t |tjr| }t |tjr| }| ||d}|| q|S )a6  Normalize evaluation results into serializable row dicts.

        Args:
            results (list[tuple]): Sequence of `(example, prediction, is_correct)`
                tuples from DSPy evaluation.

        Returns:
            list[dict[str, Any]]: Rows with `example`, `prediction`, `is_correct`.
        )example
prediction
is_correct)r   r	   
PredictiontoDictCompletionsr   r   )r9   r   _rowsr   r   r   prediction_dictr%   r!   r!   r"   r   -  s   z WandbDSPyCallback._parse_resultsr   c                 C  sX   t |}t|d  }dd |D }tj||dd}| jjd| j |i| jd dS )	zLog a W&B Table of predictions for the current evaluation step.

        Args:
            rows (list[dict[str, Any]]): Prediction rows to log.
        r   c                 S  s   g | ]}t | qS r!   )r   r   r#   r!   r!   r"   r&   S  s    z<WandbDSPyCallback._log_predictions_table.<locals>.<listcomp>	IMMUTABLE)rx   datary   predictions_ru   N)r(   r   r   r/   r   r4   r~   r8   )r9   r   rx   r   preds_tabler!   r!   r"   r   J  s
   "z(WandbDSPyCallback._log_predictions_tablejson)bestlatestzdspy-program)save_programsave_dirfiletypealiasesartifact_namemodeldspy.Moduler   r   
str | Noner   Literal['json', 'pkl']r   Sequence[str]r   c             
   C  s   |  |}ttdd|jjd|}tj| d| jj d|d}	|du r.t	j
| jjd}t	j
|}z	t	j|d	d
 W n ty\ }
 ztd| d|
 d W Y d}
~
dS d}
~
ww |rl|j|d	d |	| nd| }t	j
||}|j|dd |	| | jj|	t|d dS )a~  Save and log the best DSPy program as a W&B Artifact.

        You can choose to save the full program (architecture + state) or only
        the state to a single file (JSON or pickle).

        Args:
            model (dspy.Module): DSPy module to save.
            save_program (bool): Save full program directory if True; otherwise
                save only the state file. Defaults to `True`.
            save_dir (str): Directory to store program files before logging. Defaults to a
                subdirectory `dspy_program` within the active run's files directory
                (i.e., `wandb.run.dir`).
            filetype (Literal["json", "pkl"]): State file format when
                `save_program` is False. Defaults to `json`.
            aliases (Sequence[str]): Aliases for the logged Artifact version. Defaults to `("best", "latest")`.
            artifact_name (str): Base name for the Artifact. Defaults to `dspy-program`.

        Examples:
            Save the complete program and add aliases:

            ```python
            callback.log_best_model(
                optimized_program, save_program=True, aliases=("best", "production")
            )
            ```

            Save only the state as JSON:

            ```python
            callback.log_best_model(
                optimized_program, save_program=False, filetype="json"
            )
            ```
        __version__unknown)dspy_versionmodule_class-r   )r
   r   metadataNdspy_programT)exist_okz&Could not create or access directory 'z': z. Skipping artifact logging.)r   zprogram.F)r   )r`   rY   r	   	__class____name__r/   Artifactr4   idospathjoindirnormpathmakedirsrZ   r   saveadd_diradd_filelog_artifactr   )r9   r   r   r   r   r   r   r]   r   artifactexcfilename	file_pathr!   r!   r"   log_best_modelX  s>   
-


z WandbDSPyCallback.log_best_model)TN)r*   r+   r,   r-   r   r.   r'   )r<   r   r   r   r   r   r   r   )rH   r   r   rI   )rM   r   r   r   )ra   r   rb   r   rc   r   r   r.   rN   )ra   r   rp   rq   rr   rs   r   r.   )r   r   r   r   )r   r   r   r.   )r   r   r   r+   r   r   r   r   r   r   r   r   r   r.   )r   
__module____qualname____doc__r;   rG   rL   r`   ro   r   r   r   r   r!   r!   r!   r"   r)   :   s$    
#

*%
C
r)   )r   r   r   r   )r   
__future__r   loggingr   collections.abcr   r   typingr   r   r/   
wandb.utilwandb.sdk.libr   wandb.sdk.wandb_runr   util
get_moduler	   r   	getLoggerr   r[   r(   utilsBaseCallbackr)   r!   r!   r!   r"   <module>   s,    

