o
    -wi                     @  s   d dl mZ d dlZd dlmZmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZ er4d dl	mZ edZ							
	ddddZdS )     )annotationsN)TYPE_CHECKINGIterableTypeVar)util)
plot_table)test_missing
test_types)CustomChartT   Precision-Recall CurveFy_trueIterable[T] | Noney_probasIterable[numbers.Number] | Nonelabelslist[str] | Noneclasses_to_plotlist[T] | Noneinterp_sizeinttitlestrsplit_tableboolreturnr
   c              	     s$  t jddd t jddd}t dd}t dd}	 fd	d
}
 | }  |}t| |ds2dS t| |ds:dS  | }|du rE|}i } dd|ddd }  ||d }|D ]S}|dur|t	|| t
sut	|d  jr||||  }n|| }|j| |dd|f || d\}}}|
|}|ddd }|ddd } j||dd}|| ||< q`| dd | D  t|  |t|dd}t|tjjkrtdtjj d |	j|dtjjd|d dg d}ttj|ddd d!dd"|d#d$d%|d&S )'a"  Constructs a Precision-Recall (PR) curve.

    The Precision-Recall curve is particularly useful for evaluating classifiers
    on imbalanced datasets. A high area under the PR curve signifies both high
    precision (a low false positive rate) and high recall (a low false negative
    rate). The curve provides insights into the balance between false positives
    and false negatives at various threshold levels, aiding in the assessment of
    a model's performance.

    Args:
        y_true: True binary labels. The shape should be (`num_samples`,).
        y_probas: Predicted scores or probabilities for each class.
            These can be probability estimates, confidence scores, or non-thresholded
            decision values. The shape should be (`num_samples`, `num_classes`).
        labels: Optional list of class names to replace
            numeric values in `y_true` for easier plot interpretation.
            For example, `labels = ['dog', 'cat', 'owl']` will replace 0 with
            'dog', 1 with 'cat', and 2 with 'owl' in the plot. If not provided,
            numeric values from `y_true` will be used.
        classes_to_plot: Optional list of unique class values from
            y_true to be included in the plot. If not specified, all unique
            classes in y_true will be plotted.
        interp_size: Number of points to interpolate recall values. The
            recall values will be fixed to `interp_size` uniformly distributed
            points in the range [0, 1], and the precision will be interpolated
            accordingly.
        title: Title of the plot. Defaults to "Precision-Recall Curve".
        split_table: Whether the table should be split into a separate section
            in the W&B UI. If `True`, the table will be displayed in a section named
            "Custom Chart Tables". Default is `False`.

    Returns:
        CustomChart: A custom chart object that can be logged to W&B. To log the
            chart, pass it to `wandb.log()`.

    Raises:
        wandb.Error: If NumPy, pandas, or scikit-learn is not installed.


    Example:

    ```python
    import wandb

    # Example for spam detection (binary classification)
    y_true = [0, 1, 1, 0, 1]  # 0 = not spam, 1 = spam
    y_probas = [
        [0.9, 0.1],  # Predicted probabilities for the first sample (not spam)
        [0.2, 0.8],  # Second sample (spam), and so on
        [0.1, 0.9],
        [0.8, 0.2],
        [0.3, 0.7],
    ]

    labels = ["not spam", "spam"]  # Optional class names for readability

    with wandb.init(project="spam-detection") as run:
        pr_curve = wandb.plot.pr_curve(
            y_true=y_true,
            y_probas=y_probas,
            labels=labels,
            title="Precision-Recall Curve for Spam Detection",
        )
        run.log({"pr-curve": pr_curve})
    ```
    numpyz@roc requires the numpy library, install with `pip install numpy`)requiredpandaszBroc requires the pandas library, install with `pip install pandas`zsklearn.metricszHroc requires the scikit library, install with `pip install scikit-learn`zsklearn.utilsc                   s<     | }tdt|D ]}t|| ||d  ||< q|S )N   )arrayrangelenmax)xyinp P/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/wandb/plot/pr_curve.py_stepn   s   
zpr_curve.<locals>._step)r   r   Nr   r    )	pos_labelleft)sidec                 S  s   g | ]\}}|gt | qS r*   )r#   ).0kvr*   r*   r+   
<listcomp>   s    zpr_curve.<locals>.<listcomp>)class	precisionrecall   zTable has a limit of z rows. Resampling to fit.F*   r5   )replace	n_samplesrandom_statestratify)r6   r7   r5   )	dataframezwandb/area-under-curve/v0r7   r6   )r%   r&   r5   Recall	Precision)r   zx-axis-titlezy-axis-title)
data_tablevega_spec_namefieldsstring_fieldsr   )r   
get_moduler!   r   r	   uniquelinspacewhereisin
isinstancer   integerprecision_recall_curvesearchsorted	DataFramehstackitemslistvaluestiler#   roundwandbTableMAX_ROWStermwarnresamplesort_valuesr   )r   r   r   r   r   r   r   pdsklearn_metricssklearn_utilsr,   classesr6   interp_recallindices_to_plotr'   class_labelcur_precision
cur_recall_indicesdfr*   r(   r+   pr_curve   s   K




rg   )NNNNr   r   F)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   )
__future__r   numberstypingr   r   r   rU   r   wandb.plot.custom_chartr   wandb.plot.utilsr   r	   r
   r   rg   r*   r*   r*   r+   <module>   s$    