From d2bbfb2655a732ce11f29036a467bbbd5c4f7080 Mon Sep 17 00:00:00 2001 From: Anastasios Zouzias Date: Mon, 21 Oct 2019 18:52:39 +0200 Subject: [PATCH] [tests] more metrics tests (#15) * [tests] metrics * more tests * [travis] exclude build/ folder from coveralls * [travis] minor fix * refactor metrics --- .travis.yml | 4 +- src/GBT.h | 4 +- src/dataset.h | 1 - src/python_api.cpp | 1 - test/CMakeLists.txt | 2 +- test/test_metric_logloss.cpp | 80 ++++++++++++++++++++++ test/test_metric_rmse.cpp | 125 +++++++++++++++++++++++++++++++++++ test/test_metrics.cpp | 27 -------- 8 files changed, 210 insertions(+), 34 deletions(-) create mode 100644 test/test_metric_logloss.cpp create mode 100644 test/test_metric_rmse.cpp delete mode 100644 test/test_metrics.cpp diff --git a/.travis.yml b/.travis.yml index b1e219c..faf6ac1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ before_install: - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - sudo apt-get update -q - sudo apt-get install -q gcc g++ cmake libeigen3-dev git wget python-dev valgrind - - pip install --user cpp-coveralls + - travis_wait pip install --user cpp-coveralls - git submodule add -f https://github.com/pybind/pybind11.git # add pybind11 submodule - git submodule update --init --recursive @@ -19,7 +19,7 @@ script: - ./bin/unit_tests after_success: - - coveralls --root .. -E ".*pybind11.*" -E ".*external.*" -E ".*CMakeFiles.*" -E ".*test/.*.cpp.*" + - coveralls --include src --include include --gcov-options '\-lp' --root .. --build-root .; notifications: email: false diff --git a/src/GBT.h b/src/GBT.h index 6119b1f..b7f3363 100644 --- a/src/GBT.h +++ b/src/GBT.h @@ -158,7 +158,6 @@ namespace microgbt { std::cout << "[Duration: " << duration << " millis] | [Train Loss]: " << trainLoss << " | [Valid Loss]: " << bestValidationLoss <scoreToPrediction(score); - }; + } /** * Return sum of scores up to numIterations @@ -209,6 +208,7 @@ namespace microgbt { else break; } + return (double)score; } diff --git a/src/dataset.h b/src/dataset.h index 6202232..3cb5c10 100644 --- a/src/dataset.h +++ b/src/dataset.h @@ -106,7 +106,6 @@ namespace microgbt { _sortedMatrixIdx = SortedMatrixType(rows, cols); - #pragma omp parallel for schedule(static) for ( long j = 0; j < cols; j++) { _sortedMatrixIdx.col(j) = sortIndices(j); } diff --git a/src/python_api.cpp b/src/python_api.cpp index 79ab5e7..2be3ac0 100644 --- a/src/python_api.cpp +++ b/src/python_api.cpp @@ -3,7 +3,6 @@ #include #include #include "GBT.h" -#include "dataset.h" namespace py = pybind11; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b142478..dc452bb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,7 +2,7 @@ add_executable( unit_tests main.cpp test_split_info.cpp - test_metrics.cpp + test_metric_rmse.cpp test_dataset.cpp test_treenode.cpp test_tree.cpp diff --git a/test/test_metric_logloss.cpp b/test/test_metric_logloss.cpp new file mode 100644 index 0000000..e4a1716 --- /dev/null +++ b/test/test_metric_logloss.cpp @@ -0,0 +1,80 @@ +#include +#include +#include "gtest/gtest.h" + +using namespace microgbt; + +TEST(LogLoss, LogLossLogit) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.logit(1.0), 1 / ( 1 + exp(-1.0)), 1.0e-11); +} + +TEST(LogLoss, LogLossMiddleValue) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.logit(0.0), 0.5, 1.0e-11); +} + +TEST(LogLoss, LogLossClipUpper) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.clip(1.0), 1, 1.0e-7); +} + +TEST(LogLoss, LogLossClipUpperOverflow) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.clip(100.0), 1, 1.0e-7); +} + +TEST(LogLoss, LogLossClipLower) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.clip(0.0), 0, 1.0e-7); +} + +TEST(LogLoss, LogLossClipLowerUnderFlow) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.clip(-10000.0), 0, 1.0e-7); +} + +TEST(LogLoss, LogLossGradient) +{ + LogLoss logloss; + Vector preds = Vector(10); + Vector targets = Vector(10); + + std::fill(preds.begin(), preds.end(), 100.0); + std::fill(targets.begin(), targets.end(), 99.0); + + Vector grads = logloss.gradients(preds, targets); + ASSERT_EQ(grads.size(), preds.size()); + ASSERT_NEAR(grads[0], 100.0 - 99, 1.0e-7); +} + +TEST(LogLoss, LogLossHessian) +{ + LogLoss logloss; + Vector preds = Vector(10); + + std::fill(preds.begin(), preds.end(), 0.5); + + Vector hessian = logloss.hessian(preds); + ASSERT_EQ(hessian.size(), preds.size()); + ASSERT_NEAR(hessian[0], 0.25, 1.0e-7); +} + +TEST(LogLoss, LogLossLossAtMustBeZero) +{ + LogLoss logloss; + Vector preds = Vector(10); + Vector targets = Vector(10); + + std::fill(preds.begin(), preds.end(), 1.0); + std::fill(targets.begin(), targets.end(), 1.0); + + double loss = logloss.lossAt(preds, targets); + ASSERT_NEAR(loss, 0, 1.0e-7); +} \ No newline at end of file diff --git a/test/test_metric_rmse.cpp b/test/test_metric_rmse.cpp new file mode 100644 index 0000000..9af3f97 --- /dev/null +++ b/test/test_metric_rmse.cpp @@ -0,0 +1,125 @@ +#include +#include +#include "gtest/gtest.h" + +using namespace microgbt; + +TEST(microgbt, LogLoss) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.logit(1.0), 1 / ( 1 + exp(-1.0)), 1.0e-11); +} + +TEST(microgbt, LogLossMiddleValue) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.logit(0.0), 0.5, 1.0e-11); +} + +TEST(microgbt, LogLossClipUpper) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.clip(1.0), 1, 1.0e-7); +} + +TEST(microgbt, LogLossClipUpperOverflow) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.clip(100.0), 1, 1.0e-7); +} + +TEST(microgbt, LogLossClipLower) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.clip(0.0), 0, 1.0e-7); +} + +TEST(microgbt, LogLossClipLowerUnderFlow) +{ + LogLoss logloss; + ASSERT_NEAR(logloss.clip(-10000.0), 0, 1.0e-7); +} + +TEST(microgbt, LogLossGradient) +{ + LogLoss logloss; + Vector preds = Vector(10); + Vector targets = Vector(10); + + std::fill(preds.begin(), preds.end(), 100.0); + std::fill(targets.begin(), targets.end(), 99.0); + + Vector grads = logloss.gradients(preds, targets); + ASSERT_EQ(grads.size(), preds.size()); + ASSERT_NEAR(grads[0], 100.0 - 99, 1.0e-7); +} + +TEST(microgbt, LogLossHessian) +{ + LogLoss logloss; + Vector preds = Vector(10); + + std::fill(preds.begin(), preds.end(), 0.5); + + Vector hessian = logloss.hessian(preds); + ASSERT_EQ(hessian.size(), preds.size()); + ASSERT_NEAR(hessian[0], 0.25, 1.0e-7); +} + +TEST(microgbt, LogLossLossAtMustBeZero) +{ + LogLoss logloss; + Vector preds = Vector(10); + Vector targets = Vector(10); + + std::fill(preds.begin(), preds.end(), 1.0); + std::fill(targets.begin(), targets.end(), 1.0); + + double loss = logloss.lossAt(preds, targets); + ASSERT_NEAR(loss, 0, 1.0e-7); +} + +TEST(microgbt, RMSE) +{ + RMSE rmse; + ASSERT_NEAR(rmse.scoreToPrediction(10.1), 10.1, 1.0e-11); +} + +TEST(microgbt, RMSEHessian) +{ + RMSE rmse; + Vector preds = Vector(10); + Vector hessian = rmse.hessian(preds); + ASSERT_EQ(hessian.size(), preds.size()); + + // Hessian is the constant 2 + ASSERT_NEAR(hessian[0], 2.0, 1.0e-11); + ASSERT_NEAR(hessian[9], 2.0, 1.0e-11); +} + +TEST(microgbt, RMSEGradient) +{ + RMSE rmse; + Vector preds = Vector(10); + Vector targets = Vector(10); + + std::fill(preds.begin(), preds.end(), 100.0); + std::fill(targets.begin(), targets.end(), 99.0); + + Vector grads = rmse.gradients(preds, targets); + ASSERT_EQ(grads.size(), preds.size()); + ASSERT_NEAR(grads[0], 2 * (100.0 - 99), 1.0e-7); +} + +TEST(microgbt, RMSELossAtMustBeZero) +{ + RMSE rmse; + Vector preds = Vector(10); + Vector targets = Vector(10); + + std::fill(preds.begin(), preds.end(), 1.0); + std::fill(targets.begin(), targets.end(), 1.0); + + double loss = rmse.lossAt(preds, targets); + ASSERT_NEAR(loss, 0, 1.0e-7); +} diff --git a/test/test_metrics.cpp b/test/test_metrics.cpp deleted file mode 100644 index 29d1bdb..0000000 --- a/test/test_metrics.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include "gtest/gtest.h" - -TEST(microgbt, LogLoss) -{ - microgbt::LogLoss logloss; - ASSERT_NEAR(logloss.logit(1.0), 1 / ( 1 + exp(-1.0)), 1.0e-11); -} - -TEST(microgbt, LogLossClipUpper) -{ - microgbt::LogLoss logloss; - ASSERT_NEAR(logloss.clip(1.0), 1, 1.0e-7); -} - -TEST(microgbt, LogLossClipLower) -{ - microgbt::LogLoss logloss; - ASSERT_NEAR(logloss.clip(0.0), 0, 1.0e-7); -} - -TEST(microgbt, RMSE) -{ - microgbt::RMSE rmse; - ASSERT_NEAR(rmse.scoreToPrediction(10.1), 10.1, 1.0e-11); -}