Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions orca_python/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
"""Metrics module."""

from .metrics import (
accuracy_off1,
amae,
ccr,
gm,
gmsec,
greater_is_better,
mae,
mmae,
ms,
mze,
rps,
spearman,
tkendall,
wkappa,
Expand All @@ -19,11 +22,14 @@
"ccr",
"amae",
"gm",
"gmsec",
"mae",
"mmae",
"ms",
"mze",
"tkendall",
"wkappa",
"spearman",
"rps",
"accuracy_off1",
]
151 changes: 150 additions & 1 deletion orca_python/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,16 @@ def greater_is_better(metric_name):
False

"""
greater_is_better_metrics = ["ccr", "ms", "gm", "tkendall", "wkappa", "spearman"]
greater_is_better_metrics = [
"ccr",
"ms",
"gm",
"gmsec",
"tkendall",
"wkappa",
"spearman",
"accuracy_off1",
]
if metric_name in greater_is_better_metrics:
return True
else:
Expand Down Expand Up @@ -173,6 +182,47 @@ def gm(y_true, y_pred):
return gm


def gmsec(y_true, y_pred):
"""Compute the Geometric Mean of the Sensitivity of the Extreme Classes (GMSEC).

Proposed in (:footcite:t:`vargas2024improving`) to assess the classification
performance for the first and the last classes.

Parameters
----------
y_true : np.ndarray, shape (n_samples,)
Ground truth labels.

y_pred : np.ndarray, shape (n_samples,)
Predicted labels.

Returns
-------
gmsec : float
Geometric mean of the sensitivities of the extreme classes.

Examples
--------
>>> import numpy as np
>>> from orca_python.metrics import gmsec
>>> y_true = np.array([0, 0, 1, 2, 3, 0, 0])
>>> y_pred = np.array([0, 1, 1, 2, 3, 0, 1])
>>> gmsec(y_true, y_pred)
np.float64(0.7071067811865476)

"""
y_true = np.array(y_true)
y_pred = np.array(y_pred)

if len(y_true.shape) > 1:
y_true = np.argmax(y_true, axis=1)
if len(y_pred.shape) > 1:
y_pred = np.argmax(y_pred, axis=1)

sensitivities = recall_score(y_true, y_pred, average=None)
return np.sqrt(sensitivities[0] * sensitivities[-1])


def mae(y_true, y_pred):
"""Calculate the Mean Absolute Error.

Expand Down Expand Up @@ -490,3 +540,102 @@ def spearman(y_true, y_pred):
return 0
else:
return num / div


def rps(y_true, y_proba):
"""Compute the ranked probability score.

As presented in :footcite:t:`janitza2016random`.

Parameters
----------
y_true : np.ndarray, shape (n_samples,)
Ground truth labels.

y_proba : np.ndarray, shape (n_samples, n_classes)
Predicted probability distribution across different classes.

Returns
-------
rps : float
The ranked probability score.

Examples
--------
>>> import numpy as np
>>> from orca_python.metrics import rps
>>> y_true = np.array([0, 0, 3, 2])
>>> y_pred = np.array(
... [[0.2, 0.4, 0.2, 0.2],
... [0.7, 0.1, 0.1, 0.1],
... [0.5, 0.05, 0.1, 0.35],
... [0.1, 0.05, 0.65, 0.2]])
>>> rps(y_true, y_pred)
np.float64(0.5068750000000001)

"""
y_true = np.array(y_true)
y_proba = np.array(y_proba)

y_oh = np.zeros(y_proba.shape)
y_oh[np.arange(len(y_true)), y_true] = 1

y_oh = y_oh.cumsum(axis=1)
y_proba = y_proba.cumsum(axis=1)

rps = 0
for i in range(len(y_true)):
if y_true[i] in np.arange(y_proba.shape[1]):
rps += np.power(y_proba[i] - y_oh[i], 2).sum()
else:
rps += 1
return rps / len(y_true)


def accuracy_off1(y_true, y_pred, labels=None):
"""Computes the accuracy of the predictions.

Allows errors if they occur in an adjacent class.

Parameters
----------
y_true : np.ndarray, shape (n_samples,)
Ground truth labels.

y_pred : np.ndarray, shape (n_samples,)
Predicted labels.

labels : np.ndarray, shape (n_classes,) or None, default=None
Labels of the classes. If None, the labels are inferred from the data.

Returns
-------
acc : float
1-off accuracy.

Examples
--------
>>> import numpy as np
>>> from orca_python.metrics import accuracy_off1
>>> y_true = np.array([0, 0, 1, 2, 3, 0, 0])
>>> y_pred = np.array([0, 1, 1, 2, 0, 0, 1])
>>> accuracy_off1(y_true, y_pred)
np.float64(0.8571428571428571)

"""
y_true = np.array(y_true)
y_pred = np.array(y_pred)

if len(y_true.shape) > 1:
y_true = np.argmax(y_true, axis=1)
if len(y_pred.shape) > 1:
y_pred = np.argmax(y_pred, axis=1)
if labels is None:
labels = np.unique(y_true)

conf_mat = confusion_matrix(y_true, y_pred, labels=labels)
n = conf_mat.shape[0]
mask = np.eye(n, n) + np.eye(n, n, k=1), +np.eye(n, n, k=-1)
correct = mask * conf_mat

return 1.0 * np.sum(correct) / np.sum(conf_mat)
55 changes: 55 additions & 0 deletions orca_python/metrics/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,20 @@

import numpy as np
import numpy.testing as npt
from sklearn.metrics import recall_score

from orca_python.metrics import (
accuracy_off1,
amae,
ccr,
gm,
gmsec,
greater_is_better,
mae,
mmae,
ms,
mze,
rps,
spearman,
tkendall,
wkappa,
Expand All @@ -20,18 +24,36 @@

def test_greater_is_better():
"""Test the greater_is_better function."""
assert greater_is_better("accuracy_off1")
assert greater_is_better("ccr")
assert greater_is_better("gm")
assert greater_is_better("gmsec")
assert not greater_is_better("mae")
assert not greater_is_better("mmae")
assert not greater_is_better("amae")
assert greater_is_better("ms")
assert not greater_is_better("mze")
assert not greater_is_better("rps")
assert greater_is_better("tkendall")
assert greater_is_better("wkappa")
assert greater_is_better("spearman")


def test_accuracy_off1():
"""Test the Accuracy that allows errors in adjacent classes."""
y_true = np.array([0, 1, 2, 3, 4, 5])
y_pred = np.array([1, 2, 3, 4, 5, 0])
expected = 0.8333333333333334
actual = accuracy_off1(y_true, y_pred)
npt.assert_almost_equal(expected, actual, decimal=6)

y_true = np.array([0, 1, 2, 3, 4])
y_pred = np.array([0, 2, 1, 4, 3])
expected = 1.0
actual = accuracy_off1(y_true, y_pred)
npt.assert_almost_equal(expected, actual, decimal=6)


def test_ccr():
"""Test the Correctly Classified Ratio (CCR) metric."""
y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
Expand Down Expand Up @@ -104,6 +126,23 @@ def test_gm():
npt.assert_almost_equal(expected, actual, decimal=6)


def test_gmsec():
"""Test the Geometric Mean of Sensitivity and Specificity (GMSEC) metric."""
y_true = np.array([0, 0, 1, 1])
y_pred = np.array([0, 1, 0, 1])
sensitivities = recall_score(y_true, y_pred, average=None)
expected = np.sqrt(sensitivities[0] * sensitivities[-1])
actual = gmsec(y_true, y_pred)
npt.assert_almost_equal(expected, actual, decimal=6)

y_true = np.array([0, 0, 1, 1, 2, 2])
y_pred = np.array([0, 0, 1, 1, 2, 2])
sensitivities = recall_score(y_true, y_pred, average=None)
expected = np.sqrt(sensitivities[0] * sensitivities[-1])
actual = gmsec(y_true, y_pred)
npt.assert_almost_equal(expected, actual, decimal=6)


def test_mae():
"""Test the Mean Absolute Error (MAE) metric."""
y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
Expand Down Expand Up @@ -198,6 +237,22 @@ def test_mze():
npt.assert_almost_equal(expected, actual, decimal=6)


def test_rps():
"""Test the ranked probability score (RPS) metric."""
y_true = np.array([0, 0, 3, 2])
y_pred = np.array(
[
[0.2, 0.4, 0.2, 0.2],
[0.7, 0.1, 0.1, 0.1],
[0.5, 0.05, 0.1, 0.35],
[0.1, 0.05, 0.65, 0.2],
]
)
expected = 0.506875
actual = rps(y_true, y_pred)
npt.assert_almost_equal(expected, actual, decimal=6)


def test_tkendall():
"""Test the Kendall's Tau metric."""
y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
Expand Down