diff --git a/orca_python/classifiers/tests/test_nnop.py b/orca_python/classifiers/tests/test_nnop.py index 6dad78f..02b5240 100644 --- a/orca_python/classifiers/tests/test_nnop.py +++ b/orca_python/classifiers/tests/test_nnop.py @@ -1,129 +1,65 @@ """Tests for the NNOP classifier.""" -from pathlib import Path - import numpy as np import pytest from orca_python.classifiers.NNOP import NNOP -from orca_python.testing import TEST_DATASETS_DIR - - -@pytest.fixture -def dataset_path(): - return Path(TEST_DATASETS_DIR) / "balance-scale" @pytest.fixture -def train_file(dataset_path): - return np.loadtxt(dataset_path / "train_balance-scale.csv", delimiter=",") +def X(): + """Create sample feature patterns for testing.""" + return np.array([[0, 1], [1, 0], [1, 1], [0, 0], [1, 2]]) @pytest.fixture -def test_file(dataset_path): - return np.loadtxt(dataset_path / "test_balance-scale.csv", delimiter=",") - - -# ----- NOT APPLIED ----- -# It doesn't apply to the because can't set seed to randomize model weights. -# def test_nnop_fit_correct(self): -# #Check if this algorithm can correctly classify a toy problem. - -# #Test preparation -# X_train = self.train_file[:,0:(-1)] -# y_train = self.train_file[:,(-1)] - -# X_test = self.test_file[:,0:(-1)] - -# expected_predictions = [self.dataset_path / "expectedPredictions.0", -# self.dataset_path / "expectedPredictions.1", -# self.dataset_path / "expectedPredictions.2", -# self.dataset_path / "expectedPredictions.3"] - -# classifiers = [NNOP(epsilon_init = 0.5, hidden_n = 10, iterations = 500, lambda_value = 0.01)] -# NNOP(epsilon_init = 0.5, hidden_n = 20, iterations = 500, lambda_value = 0.01), -# NNOP(epsilon_init = 0.5, hidden_n = 10, iterations = 250, lambda_value = 0.01), -# NNOP(epsilon_init = 0.5, hidden_n = 20, iterations = 500, lambda_value = 0.01)] - +def y(): + """Create sample target variables for testing.""" + return np.array([0, 1, 1, 0, 1]) -# #Test execution and verification -# for expected_prediction, classifier in zip(expected_predictions, classifiers): -# classifier.fit(X_train, y_train) -# predictions = classifier.predict(X_test) -# expected_prediction = np.loadtxt(expected_prediction) -# npt.assert_equal(predictions, expected_prediction, "The prediction doesnt match with the desired values") +@pytest.mark.parametrize( + "param_name, invalid_value", + [ + ("n_hidden", -1), + ("max_iter", -1), + ], +) +def test_nnop_fit_hyperparameters_validation(X, y, param_name, invalid_value): + """Test that hyperparameters are validated.""" + classifier = NNOP(**{param_name: invalid_value}) + model = classifier.fit(X, y) -def test_nnop_fit_not_valid_parameter(train_file): + assert model is None, "The NNOP fit method doesnt return Null on error" - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - classifiers = [ - NNOP(epsilon_init=0.5, n_hidden=-1, max_iter=1000, lambda_value=0.01), - NNOP(epsilon_init=0.5, n_hidden=10, max_iter=-1, lambda_value=0.01), - ] +def test_nnop_fit_input_validation(X, y): + """Test that input data is validated.""" + X_invalid = X[:-1, :-1] + y_invalid = y[:-1] - # Test execution and verification - for classifier in classifiers: - model = classifier.fit(X_train, y_train) - assert model is None, "The NNOP fit method doesnt return Null on error" - - -def test_nnop_fit_not_valid_data(train_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - X_train_broken = train_file[0:(-1), 0:(-2)] - y_train_broken = train_file[0:(-1), (-1)] - - # Test execution and verification - classifier = NNOP(epsilon_init=0.5, n_hidden=10, max_iter=1000, lambda_value=0.01) + classifier = NNOP() with pytest.raises(ValueError): - model = classifier.fit(X_train, y_train_broken) + model = classifier.fit(X, y_invalid) assert model is None, "The NNOP fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit([], y_train) + model = classifier.fit([], y) assert model is None, "The NNOP fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit(X_train, []) + model = classifier.fit(X, []) assert model is None, "The NNOP fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit(X_train_broken, y_train) + model = classifier.fit(X_invalid, y) assert model is None, "The NNOP fit method doesnt return Null on error" -# ----- NOT APPLIED ----- -# It doesn't apply to the because it has no internal model -# like in other classifiers like REDSVM or SVOREX. -# def test_nnop_model_is_not_a_dict(self): -# #Test preparation -# X_train = self.train_file[:,0:(-1)] -# y_train = self.train_file[:,(-1)] - -# X_test = self.test_file[:,0:(-1)] - -# classifier = NNOP(epsilon_init = 0.5, hidden_n = 10, iterations = 500, lambda_value = 0.01) -# classifier.fit(X_train, y_train) - -# #Test execution and verification -# with self.assertRaisesRegex(TypeError, "Model should be a dictionary!"): -# classifier.classifier_ = 1 -# classifier.predict(X_test) - - -def test_nnop_predict_not_valid_data(train_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - - classifier = NNOP(epsilon_init=0.5, n_hidden=10, max_iter=500, lambda_value=0.01) - classifier.fit(X_train, y_train) +def test_nnop_predict_invalid_input_raises_error(X, y): + """Test that invalid input raises an error.""" + classifier = NNOP() + classifier.fit(X, y) - # Test execution and verification with pytest.raises(ValueError): classifier.predict([]) diff --git a/orca_python/classifiers/tests/test_nnpom.py b/orca_python/classifiers/tests/test_nnpom.py index 86b065d..f0a4e38 100644 --- a/orca_python/classifiers/tests/test_nnpom.py +++ b/orca_python/classifiers/tests/test_nnpom.py @@ -1,129 +1,65 @@ """Tests for the NNPOM classifier.""" -from pathlib import Path - import numpy as np import pytest from orca_python.classifiers.NNPOM import NNPOM -from orca_python.testing import TEST_DATASETS_DIR - - -@pytest.fixture -def dataset_path(): - return Path(TEST_DATASETS_DIR) / "balance-scale" @pytest.fixture -def train_file(dataset_path): - return np.loadtxt(dataset_path / "train_balance-scale.csv", delimiter=",") +def X(): + """Create sample feature patterns for testing.""" + return np.array([[0, 1], [1, 0], [1, 1], [0, 0], [1, 2]]) @pytest.fixture -def test_file(dataset_path): - return np.loadtxt(dataset_path / "test_balance-scale.csv", delimiter=",") - - -# ----- NOT APPLIED ----- -# It doesn't apply to the because can't set seed to randomize model weights. -# def test_nnpom_fit_correct(self): -# #Check if this algorithm can correctly classify a toy problem. - -# #Test preparation -# X_train = self.train_file[:,0:(-1)] -# y_train = self.train_file[:,(-1)] - -# X_test = self.test_file[:,0:(-1)] - -# expected_predictions = [self.dataset_path / "expectedPredictions.0"] -# # self.dataset_path / "expectedPredictions.1", -# # self.dataset_path / "expectedPredictions.2", -# # self.dataset_path / "expectedPredictions.3")] - -# classifiers = [NNPOM(epsilon_init = 0.5, n_hidden = 10, max_iter = 500, lambda_value = 0.01)] - -# # NNPOM(epsilon_init = 0.5, n_hidden = 20, max_iter = 500, lambda_value = 0.01), -# # NNPOM(epsilon_init = 0.5, n_hidden = 10, max_iter = 250, lambda_value = 0.01), -# # NNPOM(epsilon_init = 0.5, n_hidden = 20, max_iter = 500, lambda_value = 0.01)] +def y(): + """Create sample target variables for testing.""" + return np.array([0, 1, 1, 0, 1]) -# #Test execution and verification -# for expected_prediction, classifier in zip(expected_predictions, classifiers): -# classifier.fit(X_train, y_train) -# predictions = classifier.predict(X_test) -# expected_prediction = np.loadtxt(expected_prediction) -# npt.assert_equal(predictions, expected_prediction, "The prediction doesnt match with the desired values") +@pytest.mark.parametrize( + "param_name, invalid_value", + [ + ("n_hidden", -1), + ("max_iter", -1), + ], +) +def test_nnpom_fit_hyperparameters_validation(X, y, param_name, invalid_value): + """Test that hyperparameters are validated.""" + classifier = NNPOM(**{param_name: invalid_value}) + model = classifier.fit(X, y) + assert model is None, "The NNPOM fit method doesnt return Null on error" -def test_nnpom_fit_not_valid_parameter(train_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - classifiers = [ - NNPOM(epsilon_init=0.5, n_hidden=-1, max_iter=1000, lambda_value=0.01), - NNPOM(epsilon_init=0.5, n_hidden=10, max_iter=-1, lambda_value=0.01), - ] +def test_nnpom_fit_input_validation(X, y): + """Test that input data is validated.""" + X_invalid = X[:-1, :-1] + y_invalid = y[:-1] - # Test execution and verification - for classifier in classifiers: - model = classifier.fit(X_train, y_train) - assert model is None, "The NNPOM fit method doesnt return Null on error" - - -def test_nnpom_fit_not_valid_data(train_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - X_train_broken = train_file[0:(-1), 0:(-2)] - y_train_broken = train_file[0:(-1), (-1)] - - # Test execution and verification - classifier = NNPOM(epsilon_init=0.5, n_hidden=10, max_iter=1000, lambda_value=0.01) + classifier = NNPOM() with pytest.raises(ValueError): - model = classifier.fit(X_train, y_train_broken) + model = classifier.fit(X, y_invalid) assert model is None, "The NNPOM fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit([], y_train) + model = classifier.fit([], y) assert model is None, "The NNPOM fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit(X_train, []) + model = classifier.fit(X, []) assert model is None, "The NNPOM fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit(X_train_broken, y_train) + model = classifier.fit(X_invalid, y) assert model is None, "The NNPOM fit method doesnt return Null on error" -# ----- NOT APPLIED ----- -# It doesn't apply to the because it has no internal model -# like in other classifiers like REDSVM or SVOREX. -# def test_nnpom_model_is_not_a_dict(self): -# #Test preparation -# X_train = self.train_file[:,0:(-1)] -# y_train = self.train_file[:,(-1)] - -# X_test = self.test_file[:,0:(-1)] - -# classifier = NNPOM(epsilon_init = 0.5, n_hidden = 10, max_iter = 500, lambda_value = 0.01) -# classifier.fit(X_train, y_train) - -# #Test execution and verification -# with self.assertRaisesRegex(TypeError, "Model should be a dictionary!"): -# classifier.classifier_ = 1 -# classifier.predict(X_test) - - -def test_nnpom_predict_not_valid_data(train_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - - classifier = NNPOM(epsilon_init=0.5, n_hidden=10, max_iter=500, lambda_value=0.01) - classifier.fit(X_train, y_train) +def test_nnpom_predict_invalid_input_raises_error(X, y): + """Test that invalid input raises an error.""" + classifier = NNPOM() + classifier.fit(X, y) - # Test execution and verification with pytest.raises(ValueError): classifier.predict([]) diff --git a/orca_python/classifiers/tests/test_ordinal_decomposition.py b/orca_python/classifiers/tests/test_ordinal_decomposition.py index 83c0129..8a335e8 100644 --- a/orca_python/classifiers/tests/test_ordinal_decomposition.py +++ b/orca_python/classifiers/tests/test_ordinal_decomposition.py @@ -1,103 +1,136 @@ """Tests for the OrdinalDecomposition ensemble.""" +import numpy as np import numpy.testing as npt import pytest -from numpy import array -# from OrdinalDecomposition import OrdinalDecomposition from orca_python.classifiers.OrdinalDecomposition import OrdinalDecomposition -# syspath.append(ospath.join('..', 'classifiers')) - -# Data is just 6 separable points in the plane @pytest.fixture def X(): - return array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]) + """Create sample feature patterns for testing.""" + return np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]) @pytest.fixture def y(): - return array([1, 1, 1, 2, 2, 2]) + """Create sample target variables for testing.""" + return np.array([1, 1, 1, 2, 2, 2]) def test_ordinal_decomposition(X, y): """Check if this algorithm can correctly classify a toy problem.""" - od = OrdinalDecomposition( + classifier = OrdinalDecomposition( dtype="ordered_partitions", decision_method="frank_hall", base_classifier="sklearn.svm.SVC", parameters={"C": 1.0, "gamma": "scale", "probability": True}, ) - y_pred = od.fit(X, y).predict(X) + y_pred = classifier.fit(X, y).predict(X) npt.assert_array_equal(y_pred, y) -def test_coding_matrix(): - """Checking if the coding matrix is built properly for each type of ordinal - decomposition. - - """ - od = OrdinalDecomposition() - - # Checking ordered_partitions (with a 5 class, 4 classifiers example) - od.dtype = "ordered_partitions" - expected_cm = array( - [[-1, -1, -1, -1], [1, -1, -1, -1], [1, 1, -1, -1], [1, 1, 1, -1], [1, 1, 1, 1]] - ) - - actual_cm = od._coding_matrix(od.dtype, 5) - - npt.assert_array_equal(actual_cm, expected_cm) - - # Checking one_vs_next - od.dtype = "one_vs_next" - expected_cm = array( - [[-1, 0, 0, 0], [1, -1, 0, 0], [0, 1, -1, 0], [0, 0, 1, -1], [0, 0, 0, 1]] - ) - - actual_cm = od._coding_matrix(od.dtype, 5) - - npt.assert_array_equal(actual_cm, expected_cm) - - # Checking one_vs_followers - od.dtype = "one_vs_followers" - expected_cm = array( - [[-1, 0, 0, 0], [1, -1, 0, 0], [1, 1, -1, 0], [1, 1, 1, -1], [1, 1, 1, 1]] - ) - - actual_cm = od._coding_matrix(od.dtype, 5) - - npt.assert_array_equal(actual_cm, expected_cm) - - # Checking one_vs_previous - od.dtype = "one_vs_previous" - expected_cm = array( - [[1, 1, 1, 1], [1, 1, 1, -1], [1, 1, -1, 0], [1, -1, 0, 0], [-1, 0, 0, 0]] - ) - - actual_cm = od._coding_matrix(od.dtype, 5) - - npt.assert_array_equal(actual_cm, expected_cm) +def test_ordinal_decomposition_fit_input_validation(X, y): + """Test that input data is validated.""" + X_invalid = X[:-1, :-1] + y_invalid = y[:-1] + + classifier = OrdinalDecomposition() + with pytest.raises(ValueError): + model = classifier.fit(X, y_invalid) + assert model is None, "The fit method doesnt return Null on error" + + with pytest.raises(ValueError): + model = classifier.fit([], y) + assert model is None, "The fit method doesnt return Null on error" + + with pytest.raises(ValueError): + model = classifier.fit(X, []) + assert model is None, "The fit method doesnt return Null on error" + + with pytest.raises(ValueError): + model = classifier.fit(X_invalid, y) + assert model is None, "The fit method doesnt return Null on error" + + +@pytest.mark.parametrize( + "dtype, expected_cm", + [ + ( + "ordered_partitions", + np.array( + [ + [-1, -1, -1, -1], + [1, -1, -1, -1], + [1, 1, -1, -1], + [1, 1, 1, -1], + [1, 1, 1, 1], + ] + ), + ), + ( + "one_vs_next", + np.array( + [ + [-1, 0, 0, 0], + [1, -1, 0, 0], + [0, 1, -1, 0], + [0, 0, 1, -1], + [0, 0, 0, 1], + ] + ), + ), + ( + "one_vs_followers", + np.array( + [ + [-1, 0, 0, 0], + [1, -1, 0, 0], + [1, 1, -1, 0], + [1, 1, 1, -1], + [1, 1, 1, 1], + ] + ), + ), + ( + "one_vs_previous", + np.array( + [ + [1, 1, 1, 1], + [1, 1, 1, -1], + [1, 1, -1, 0], + [1, -1, 0, 0], + [-1, 0, 0, 0], + ] + ), + ), + ], +) +def test_coding_matrix(dtype, expected_cm): + """Test that the coding matrix is built properly for each type of ordinal + decomposition.""" + classifier = OrdinalDecomposition() + classifier.dtype = dtype + cm = classifier._coding_matrix(classifier.dtype, 5) + + npt.assert_array_equal(cm, expected_cm) def test_frank_hall_method(X): - """Check that frank and hall method returns expected values for one toy - problem (starting off predicted probabilities given by each binary - classifier). - - """ + """Test that frank and hall method returns expected values for one toy problem + (starting off predicted probabilities given by each binary classifier).""" # Checking frank_hall cannot be used whitout ordered_partitions - od = OrdinalDecomposition(dtype="one_vs_next", decision_method="frank_hall") + classifier = OrdinalDecomposition(dtype="one_vs_next", decision_method="frank_hall") with pytest.raises(AttributeError): - od._frank_hall_method(X) + classifier._frank_hall_method(X) - od = OrdinalDecomposition(dtype="ordered_partitions") - od.coding_matrix_ = od._coding_matrix(od.dtype, 5) + classifier = OrdinalDecomposition(dtype="ordered_partitions") + classifier.coding_matrix_ = classifier._coding_matrix(classifier.dtype, 5) # Predicted probabilities from a 5 class ordinal dataset (positive class) - predictions = array( + predictions = np.array( [ [0.07495, 0.00003, 0.06861, 0.00005], [0.00017, 0.0, 0.03174, 0.00011], @@ -112,8 +145,8 @@ def test_frank_hall_method(X): ] ) - actual_predicted_probabilities = od._frank_hall_method(predictions) - expected_predicted_probabilities = array( + y_proba = classifier._frank_hall_method(predictions) + expected_y_proba = np.array( [ [0.92505, 0.07492, -0.06858, 0.06856, 0.00005], [0.99983, 0.00017, -0.03174, 0.03163, 0.00011], @@ -130,24 +163,21 @@ def test_frank_hall_method(X): # Asserting similarity npt.assert_allclose( - actual_predicted_probabilities, - expected_predicted_probabilities, + y_proba, + expected_y_proba, rtol=1e-04, atol=0, ) def test_exponential_loss_method(): - """Check that exponential loss method returns expected values for one toy - problem (starting off predicted probabilities given by each binary - classifier). - - """ - od = OrdinalDecomposition(dtype="ordered_partitions") - od.coding_matrix_ = od._coding_matrix(od.dtype, 5) + """Test that exponential loss method returns expected values for one toy problem + (starting off predicted probabilities given by each binary classifier).""" + classifier = OrdinalDecomposition(dtype="ordered_partitions") + classifier.coding_matrix_ = classifier._coding_matrix(classifier.dtype, 5) # Predicted probabilities from a 5 class ordinal dataset (positive class) - predictions = array( + predictions = np.array( [ [0.07495, 0.00003, 0.06861, 0.00005], [0.00017, 0.0, 0.03174, 0.00011], @@ -165,8 +195,8 @@ def test_exponential_loss_method(): # Interpoling values from [0, 1] range to [-1, 1] predictions = (2 * predictions) - 1 - actual_elosses = od._exponential_loss(predictions) - expected_elosses = array( + e_losses = classifier._exponential_loss(predictions) + expected_e_losses = np.array( [ [1.5852, 3.49769, 5.8479, 7.79566, 10.14575], [1.49583, 3.84519, 6.19559, 8.35469, 10.70441], @@ -182,20 +212,17 @@ def test_exponential_loss_method(): ) # Asserting similarity - npt.assert_allclose(actual_elosses, expected_elosses, rtol=1e-04, atol=0) + npt.assert_allclose(e_losses, expected_e_losses, rtol=1e-04, atol=0) def test_logarithmic_loss_method(): - """Check that exponential loss method returns expected values for one toy - problem (starting off predicted probabilities given by each binary - classifier). - - """ - od = OrdinalDecomposition(dtype="ordered_partitions") - od.coding_matrix_ = od._coding_matrix(od.dtype, 5) + """Test that logarithmic loss method returns expected values for one toy problem + (starting off predicted probabilities given by each binary classifier).""" + classifier = OrdinalDecomposition(dtype="ordered_partitions") + classifier.coding_matrix_ = classifier._coding_matrix(classifier.dtype, 5) # Predicted probabilities from a 5 class ordinal dataset (positive class) - predictions = array( + predictions = np.array( [ [0.07495, 0.00003, 0.06861, 0.00005], [0.00017, 0.0, 0.03174, 0.00011], @@ -213,8 +240,8 @@ def test_logarithmic_loss_method(): # Interpoling values from [0, 1] range to [-1, 1] predictions = (2 * predictions) - 1 - actual_llosses = od._logarithmic_loss(predictions) - expected_llosses = array( + l_losses = classifier._logarithmic_loss(predictions) + expected_l_losses = np.array( [ [0.58553, 2.28573, 4.28561, 6.01117, 8.01097], [0.52385, 2.52317, 4.52317, 6.39621, 8.39577], @@ -230,20 +257,17 @@ def test_logarithmic_loss_method(): ) # Asserting similarity - npt.assert_allclose(actual_llosses, expected_llosses, rtol=1e-04, atol=0) + npt.assert_allclose(l_losses, expected_l_losses, rtol=1e-04, atol=0) def test_hinge_loss_method(): - """Check that exponential loss method returns expected values for one toy - problem (starting off predicted probabilities given by each binary - classifier). - - """ - od = OrdinalDecomposition(dtype="ordered_partitions") - od.coding_matrix_ = od._coding_matrix(od.dtype, 5) + """Test that hinge loss method returns expected values for one toy problem + (starting off predicted probabilities given by each binary classifier).""" + classifier = OrdinalDecomposition(dtype="ordered_partitions") + classifier.coding_matrix_ = classifier._coding_matrix(classifier.dtype, 5) # Predicted probabilities from a 5 class ordinal dataset (positive class) - predictions = array( + predictions = np.array( [ [0.07495, 0.00003, 0.06861, 0.00005], [0.00017, 0.0, 0.03174, 0.00011], @@ -261,8 +285,8 @@ def test_hinge_loss_method(): # Interpoling values from [0, 1] range to [-1, 1] predictions = (2 * predictions) - 1 - actual_hlosses = od._hinge_loss(predictions) - expected_hlosses = array( + h_losses = classifier._hinge_loss(predictions) + expected_h_losses = np.array( [ [0.28728, 1.98748, 3.98736, 5.71292, 7.71272], [0.06404, 2.06336, 4.06336, 5.9364, 7.93596], @@ -278,4 +302,13 @@ def test_hinge_loss_method(): ) # Asserting similarity - npt.assert_allclose(actual_hlosses, expected_hlosses, rtol=1e-04, atol=0) + npt.assert_allclose(h_losses, expected_h_losses, rtol=1e-04, atol=0) + + +def test_ordinal_decomposition_predict_invalid_input_raises_error(X, y): + """Test that invalid input raises an error.""" + classifier = OrdinalDecomposition() + classifier.fit(X, y) + + with pytest.raises(ValueError): + classifier.predict([]) diff --git a/orca_python/classifiers/tests/test_redsvm.py b/orca_python/classifiers/tests/test_redsvm.py index b83366d..9de9034 100644 --- a/orca_python/classifiers/tests/test_redsvm.py +++ b/orca_python/classifiers/tests/test_redsvm.py @@ -1,234 +1,125 @@ """Tests for the REDSVM classifier.""" -from pathlib import Path - import numpy as np import numpy.testing as npt import pytest from orca_python.classifiers.REDSVM import REDSVM +from orca_python.datasets import load_dataset from orca_python.testing import TEST_DATASETS_DIR, TEST_PREDICTIONS_DIR @pytest.fixture -def dataset_path(): - return Path(TEST_DATASETS_DIR) / "balance-scale" +def X(): + """Create sample feature patterns for testing.""" + return np.array([[0, 1], [1, 0], [1, 1], [0, 0], [1, 2]]) @pytest.fixture -def predictions_path(): - return Path(TEST_PREDICTIONS_DIR) / "REDSVM" - +def y(): + """Create sample target variables for testing.""" + return np.array([0, 1, 1, 0, 1]) + + +@pytest.mark.parametrize( + "kernel, degree, gamma, coef0, C, cache_size, tol, shrinking, expected_file", + [ + (0, 2, 0.1, 0.5, 0.1, 150, 0.005, 0, "expectedPredictions.0"), + (1, 2, 0.1, 0.5, 0.1, 150, 0.005, 0, "expectedPredictions.1"), + (2, 2, 0.1, 0.5, 0.1, 150, 0.005, 0, "expectedPredictions.2"), + (3, 2, 0.1, 0.5, 0.1, 150, 0.005, 0, "expectedPredictions.3"), + (4, 2, 0.1, 0.5, 0.1, 150, 0.005, 0, "expectedPredictions.4"), + (5, 2, 0.1, 0.5, 0.1, 150, 0.005, 0, "expectedPredictions.5"), + (6, 2, 0.1, 0.5, 0.1, 150, 0.005, 0, "expectedPredictions.6"), + (7, 2, 0.1, 0.5, 0.1, 150, 0.005, 0, "expectedPredictions.7"), + ], +) +def test_redsvm_predict_matches_expected( + kernel, degree, gamma, coef0, C, cache_size, tol, shrinking, expected_file +): + """Test that predictions match expected values.""" + X_train, y_train, X_test, _ = load_dataset( + dataset_name="balance-scale", data_path=TEST_DATASETS_DIR + ) + + classifier = REDSVM( + kernel=kernel, + degree=degree, + gamma=gamma, + coef0=coef0, + C=C, + cache_size=cache_size, + tol=tol, + shrinking=shrinking, + ) -@pytest.fixture -def train_file(dataset_path): - return np.loadtxt(dataset_path / "train_balance-scale.csv", delimiter=",") + classifier.fit(X_train, y_train) + y_pred = classifier.predict(X_test) + y_expected = np.loadtxt(TEST_PREDICTIONS_DIR / "REDSVM" / expected_file) + + npt.assert_equal( + y_pred, y_expected, "The prediction doesnt match with the desired values" + ) + + +@pytest.mark.parametrize( + "param_name, invalid_value, error_msg", + [ + ("kernel", -1, "unknown kernel type"), + ("cache_size", -1, "cache_size <= 0"), + ("tol", -1, "eps <= 0"), + ("shrinking", 2, "shrinking != 0 and shrinking != 1"), + ("kernel", 8, "Wrong input format: sample_serial_number out of range"), + ], +) +def test_redsvm_fit_hyperparameters_validation( + X, y, param_name, invalid_value, error_msg +): + """Test that hyperparameters are validated.""" + classifier = REDSVM(**{param_name: invalid_value}) + + with pytest.raises(ValueError, match=error_msg): + model = classifier.fit(X, y) + assert model is None, "The REDSVM fit method doesnt return Null on error" -@pytest.fixture -def test_file(dataset_path): - return np.loadtxt(dataset_path / "test_balance-scale.csv", delimiter=",") - - -def test_redsvm_fit_correct(dataset_path, train_file, test_file, predictions_path): - # Check if this algorithm can correctly classify a toy problem. - - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - - X_test = test_file[:, 0:(-1)] - - expected_predictions = [ - predictions_path / "expectedPredictions.0", - predictions_path / "expectedPredictions.1", - predictions_path / "expectedPredictions.2", - predictions_path / "expectedPredictions.3", - predictions_path / "expectedPredictions.4", - predictions_path / "expectedPredictions.5", - predictions_path / "expectedPredictions.6", - predictions_path / "expectedPredictions.7", - ] - - classifiers = [ - REDSVM( - kernel=0, - degree=2, - gamma=0.1, - coef0=0.5, - C=0.1, - cache_size=150, - tol=0.005, - shrinking=0, - ), - REDSVM( - kernel=1, - degree=2, - gamma=0.1, - coef0=0.5, - C=0.1, - cache_size=150, - tol=0.005, - shrinking=0, - ), - REDSVM( - kernel=2, - degree=2, - gamma=0.1, - coef0=0.5, - C=0.1, - cache_size=150, - tol=0.005, - shrinking=0, - ), - REDSVM( - kernel=3, - degree=2, - gamma=0.1, - coef0=0.5, - C=0.1, - cache_size=150, - tol=0.005, - shrinking=0, - ), - REDSVM( - kernel=4, - degree=2, - gamma=0.1, - coef0=0.5, - C=0.1, - cache_size=150, - tol=0.005, - shrinking=0, - ), - REDSVM( - kernel=5, - degree=2, - gamma=0.1, - coef0=0.5, - C=0.1, - cache_size=150, - tol=0.005, - shrinking=1, - ), - REDSVM( - kernel=6, - degree=2, - gamma=0.1, - coef0=0.5, - C=0.1, - cache_size=150, - tol=0.005, - shrinking=1, - ), - REDSVM( - kernel=7, - degree=2, - gamma=0.1, - coef0=0.5, - C=0.1, - cache_size=150, - tol=0.005, - shrinking=1, - ), - ] - - # Test execution and verification - for expected_prediction, classifier in zip(expected_predictions, classifiers): - classifier.fit(X_train, y_train) - predictions = classifier.predict(X_test) - expected_prediction = np.loadtxt(expected_prediction) - npt.assert_equal( - predictions, - expected_prediction, - "The prediction doesnt match with the desired values", - ) - - -def test_redsvm_fit_not_valid_parameter(train_file): - - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - - classifiers = [ - REDSVM(gamma=0.1, C=1, kernel=-1), - REDSVM(gamma=0.1, C=1, cache_size=-1), - REDSVM(gamma=0.1, C=1, tol=-1), - REDSVM(gamma=0.1, C=1, shrinking=2), - ] - - error_msgs = [ - "unknown kernel type", - "cache_size <= 0", - "eps <= 0", - "shrinking != 0 and shrinking != 1", - ] - - # Test execution and verification - for classifier, error_msg in zip(classifiers, error_msgs): - with pytest.raises(ValueError, match=error_msg): - model = classifier.fit(X_train, y_train) - assert model is None, "The REDSVM fit method doesnt return Null on error" - - -def test_redsvm_fit_not_valid_data(train_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - X_train_broken = train_file[:(-1), 0:(-1)] - y_train_broken = train_file[0:(-1), (-1)] - - # Test execution and verification - classifier = REDSVM(gamma=0.1, C=1, kernel=8) - with pytest.raises( - ValueError, match="Wrong input format: sample_serial_number out of range" - ): - model = classifier.fit(X_train, y_train) - assert model is None, "The REDSVM fit method doesnt return Null on error" +def test_redsvm_fit_input_validation(X, y): + """Test that input data is validated.""" + X_invalid = X[:-1, :-1] + y_invalid = y[:-1] - classifier = REDSVM(gamma=0.1, C=1) + classifier = REDSVM() with pytest.raises(ValueError): - model = classifier.fit(X_train, y_train_broken) + model = classifier.fit(X, y_invalid) assert model is None, "The REDSVM fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit([], y_train) + model = classifier.fit([], y) assert model is None, "The REDSVM fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit(X_train, []) + model = classifier.fit(X, []) assert model is None, "The REDSVM fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit(X_train_broken, y_train) + model = classifier.fit(X_invalid, y) assert model is None, "The REDSVM fit method doesnt return Null on error" -def test_redsvm_model_is_not_a_dict(train_file, test_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - - X_test = test_file[:, 0:(-1)] - - classifier = REDSVM(gamma=0.1, C=1) - classifier.fit(X_train, y_train) +def test_redsvm_validates_internal_model_format(X, y): + """Test that internal model format is validated.""" + classifier = REDSVM() + classifier.fit(X, y) - # Test execution and verification with pytest.raises(TypeError, match="Model should be a dictionary!"): classifier.model_ = 1 - classifier.predict(X_test) - + classifier.predict(X) -def test_redsvm_predict_not_valid_data(train_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - classifier = REDSVM(gamma=0.1, C=1) - classifier.fit(X_train, y_train) +def test_redsvm_predict_invalid_input_raises_error(X, y): + """Test that invalid input raises an error.""" + classifier = REDSVM() + classifier.fit(X, y) - # Test execution and verification with pytest.raises(ValueError): classifier.predict([]) diff --git a/orca_python/classifiers/tests/test_svorex.py b/orca_python/classifiers/tests/test_svorex.py index 67dcaac..72b76e1 100644 --- a/orca_python/classifiers/tests/test_svorex.py +++ b/orca_python/classifiers/tests/test_svorex.py @@ -1,147 +1,106 @@ """Tests for the SVOREX classifier.""" -from pathlib import Path - import numpy as np import numpy.testing as npt import pytest from orca_python.classifiers.SVOREX import SVOREX +from orca_python.datasets import load_dataset from orca_python.testing import TEST_DATASETS_DIR, TEST_PREDICTIONS_DIR @pytest.fixture -def dataset_path(): - return Path(TEST_DATASETS_DIR) / "balance-scale" +def X(): + """Create sample feature patterns for testing.""" + return np.array([[1, 2], [2, 1], [2, 2], [1, 1], [2, 3]]) @pytest.fixture -def predictions_path(): - return Path(TEST_PREDICTIONS_DIR) / "SVOREX" - +def y(): + """Create sample target variables for testing.""" + return np.array([1, 2, 2, 1, 2]) + + +@pytest.mark.parametrize( + "kernel, tol, C, kappa, degree, expected_file", + [ + (0, 0.002, 0.5, 0.1, 0, "expectedPredictions.0"), + (1, 0.002, 0.5, 0.1, 0, "expectedPredictions.1"), + (2, 0.002, 0.5, 0.1, 4, "expectedPredictions.2"), + ], +) +def test_svorex_predict_matches_expected(kernel, tol, C, kappa, degree, expected_file): + """Test that predictions match expected values.""" + X_train, y_train, X_test, _ = load_dataset( + dataset_name="balance-scale", data_path=TEST_DATASETS_DIR + ) + + classifier = SVOREX(kernel=kernel, tol=tol, C=C, kappa=kappa, degree=degree) + classifier.fit(X_train, y_train) + y_pred = classifier.predict(X_test) + y_expected = np.loadtxt(TEST_PREDICTIONS_DIR / "SVOREX" / expected_file) + + npt.assert_equal( + y_pred, y_expected, "The prediction doesnt match with the desired values" + ) + + +@pytest.mark.parametrize( + "params, error_msg", + [ + ({"tol": 0}, "- T is invalid"), + ({"C": 0}, "- C is invalid"), + ({"kappa": 0}, "- K is invalid"), + ({"kernel": 2, "degree": 0}, "- P is invalid"), + ({"kappa": -1}, "-1 is invalid"), + ], +) +def test_svorex_fit_hyperparameters_validation(X, y, params, error_msg): + """Test that hyperparameters are validated.""" + classifier = SVOREX(**params) + + with pytest.raises(ValueError, match=error_msg): + model = classifier.fit(X, y) + assert model is None, "The SVOREX fit method doesnt return Null on error" -@pytest.fixture -def train_file(dataset_path): - return np.loadtxt(dataset_path / "train_balance-scale.csv", delimiter=",") +def test_svorex_fit_input_validation(X, y): + """Test that input data is validated.""" + X_invalid = X[:-1, :-1] + y_invalid = y[:-1] -@pytest.fixture -def test_file(dataset_path): - return np.loadtxt(dataset_path / "test_balance-scale.csv", delimiter=",") - - -def test_svorex_fit_correct(dataset_path, train_file, test_file, predictions_path): - # Check if this algorithm can correctly classify a toy problem. - - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - - X_test = test_file[:, 0:(-1)] - - expected_predictions = [ - predictions_path / "expectedPredictions.0", - predictions_path / "expectedPredictions.1", - predictions_path / "expectedPredictions.2", - ] - - classifiers = [ - SVOREX(kernel=0, tol=0.002, C=0.5, kappa=0.1), - SVOREX(kernel=1, tol=0.002, C=0.5, kappa=0.1), - SVOREX(kernel=2, degree=4, tol=0.002, C=0.5, kappa=0.1), - ] - - # Test execution and verification - for expected_prediction, classifier in zip(expected_predictions, classifiers): - classifier.fit(X_train, y_train) - predictions = classifier.predict(X_test) - expected_prediction = np.loadtxt(expected_prediction) - npt.assert_equal( - predictions, - expected_prediction, - "The prediction doesnt match with the desired values", - ) - - -def test_svorex_fit_not_valid_parameter(train_file): - - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - - classifiers = [ - SVOREX(C=0.1, kappa=1, tol=0), - SVOREX(C=0, kappa=1), - SVOREX(C=0.1, kappa=0), - SVOREX(kernel=2, degree=0, C=0.1, kappa=1), - SVOREX(kernel=0, C=0.1, kappa=-1), - ] - - error_msgs = [ - "- T is invalid", - "- C is invalid", - "- K is invalid", - "- P is invalid", - "-1 is invalid", - ] - - # Test execution and verification - for classifier, error_msg in zip(classifiers, error_msgs): - with pytest.raises(ValueError, match=error_msg): - model = classifier.fit(X_train, y_train) - assert model is None, "The SVOREX fit method doesnt return Null on error" - - -def test_svorex_fit_not_valid_data(train_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - X_train_broken = train_file[0:(-1), 0:(-2)] - y_train_broken = train_file[0:(-1), (-1)] - - # Test execution and verification - classifier = SVOREX(kappa=0.1, C=1) + classifier = SVOREX() with pytest.raises(ValueError): - model = classifier.fit(X_train, y_train_broken) + model = classifier.fit(X, y_invalid) assert model is None, "The SVOREX fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit([], y_train) + model = classifier.fit([], y) assert model is None, "The SVOREX fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit(X_train, []) + model = classifier.fit(X, []) assert model is None, "The SVOREX fit method doesnt return Null on error" with pytest.raises(ValueError): - model = classifier.fit(X_train_broken, y_train) + model = classifier.fit(X_invalid, y) assert model is None, "The SVOREX fit method doesnt return Null on error" -def test_svorex_model_is_not_a_dict(train_file, test_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] +def test_svorex_validates_internal_model_format(X, y): + """Test that internal model format is validated.""" + classifier = SVOREX() + classifier.fit(X, y) - X_test = test_file[:, 0:(-1)] - - classifier = SVOREX(kappa=0.1, C=1) - classifier.fit(X_train, y_train) - - # Test execution and verification with pytest.raises(TypeError, match="Model should be a dictionary!"): classifier.model_ = 1 - classifier.predict(X_test) + classifier.predict(X) -def test_svorex_predict_not_valid_data(train_file): - # Test preparation - X_train = train_file[:, 0:(-1)] - y_train = train_file[:, (-1)] - - classifier = SVOREX(kappa=0.1, C=1) - classifier.fit(X_train, y_train) +def test_svorex_predict_invalid_input_raises_error(X, y): + """Test that invalid input raises an error.""" + classifier = SVOREX() + classifier.fit(X, y) - # Test execution and verification with pytest.raises(ValueError): classifier.predict([])