Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@
# For the list of contributors see $ROOTSYS/README/CREDITS. #
################################################################################

from .. import pythonization
import cppyy

import json

import cppyy


def get_basescore(model):
"""Get base score from an XGBoost sklearn estimator.
Expand Down Expand Up @@ -60,7 +59,7 @@ def SaveXGBoost(xgb_model, key_name, output_path, num_inputs):
"reg:squarederror": "identity",
}
model_objective = xgb_model.objective
if not model_objective in objective_map:
if model_objective not in objective_map:
raise Exception(
'XGBoost model has unsupported objective "{}". Supported objectives are {}.'.format(
model_objective, objective_map.keys()
Expand All @@ -74,13 +73,13 @@ def SaveXGBoost(xgb_model, key_name, output_path, num_inputs):
# Dump XGB model as json file
xgb_model.get_booster().dump_model(output_path, dump_format="json")

with open(output_path, "r") as json_file:
forest = json.load(json_file)

# Dump XGB model as txt file
xgb_model.get_booster().dump_model(output_path)

features = cppyy.gbl.std.vector["std::string"]([f"f{i}" for i in range(num_inputs)])
if xgb_model.get_booster().feature_names is None:
features = cppyy.gbl.std.vector["std::string"]([f"f{i}" for i in range(num_inputs)])
else:
features = cppyy.gbl.std.vector["std::string"](xgb_model.get_booster().feature_names)
bs = get_basescore(xgb_model)
logistic = objective == "logistic"
bdt = cppyy.gbl.TMVA.Experimental.RBDT.LoadText(
Expand Down
5 changes: 4 additions & 1 deletion tmva/tmva/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,12 @@ endif()

if(dataframe)
# Test uses the xgboost sklearn plugin, so we need to check for sklearn too.
# It also uses pandas to test the case where the training data is passed via
# a pandas DataFrame.
ROOT_FIND_PYTHON_MODULE(xgboost)
ROOT_FIND_PYTHON_MODULE(pandas)
ROOT_FIND_PYTHON_MODULE(sklearn)
if (ROOT_XGBOOST_FOUND AND ROOT_SKLEARN_FOUND)
if (ROOT_XGBOOST_FOUND AND ROOT_SKLEARN_FOUND AND ROOT_PANDAS_FOUND)
ROOT_ADD_PYUNITTEST(rbdt_xgboost rbdt_xgboost.py)
endif()
endif()
Expand Down
24 changes: 15 additions & 9 deletions tmva/tmva/test/rbdt_xgboost.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
# XGBoost has to be imported before ROOT to avoid crashes because of clashing
# std::regexp symbols that are exported by cppyy.
# See also: https://github.com/wlav/cppyy/issues/227
import xgboost

import unittest
import ROOT

import numpy as np
import json
import pandas
import ROOT
import xgboost

np.random.seed(1234)

Expand Down Expand Up @@ -41,9 +38,18 @@ def _test_XGBRegression(label):
"""
Compare response of XGB regressor and TMVA tree inference system.
"""
x, y = create_dataset(1000, 10, 1)
n_samples = 1000
n_features = 10
x, y = create_dataset(n_samples, n_features, 1)
# Other than in the XGBBinary test, we're passing the training features via
# a pandas DataFrame this time. In that case, XGBoost will define custom
# feature names according to the column names in the dataframe, and we can
# test the case where the feature names in the .txt dump are not the
# default "f0", "f1", "f2", etc.
df_x = pandas.DataFrame({f"myfeature_{i}": x[:, i] for i in range(n_features)})
assert len(x) == len(df_x)
xgb = xgboost.XGBRegressor(n_estimators=1, max_depth=3)
xgb.fit(x, y)
xgb.fit(df_x, y)
ROOT.TMVA.Experimental.SaveXGBoost(xgb, "myModel", "testXGBRegression{}.root".format(label), num_inputs=10)
bdt = ROOT.TMVA.Experimental.RBDT("myModel", "testXGBRegression{}.root".format(label))

Expand Down
Loading