From 6d9c27be8bf0ff7883ff09e788071cf330f813fd Mon Sep 17 00:00:00 2001 From: Hamid Arian Date: Fri, 19 Jun 2026 19:47:00 -0400 Subject: [PATCH] feat(ensemble): bagging accuracy + cross_val_score (DecisionTree.jl) --- PARITY.md | 19 ++++ src/Ensemble/BaggingAccuracy.jl | 157 ++++++++++++++++++++++++++++++++ src/Ensemble/Ensemble.jl | 19 ++++ src/RiskLabAI.jl | 13 ++- src/Validation/CrossValScore.jl | 74 +++++++++++++++ src/Validation/Validation.jl | 6 +- test/runtests.jl | 46 ++++++++++ 7 files changed, 330 insertions(+), 4 deletions(-) create mode 100644 src/Ensemble/BaggingAccuracy.jl create mode 100644 src/Ensemble/Ensemble.jl create mode 100644 src/Validation/CrossValScore.jl diff --git a/PARITY.md b/PARITY.md index ffd463c..fa116db 100644 --- a/PARITY.md +++ b/PARITY.md @@ -409,4 +409,23 @@ noise). Sample weights are not supported by the backend and are dropped. The `controller`/`factory`/`strategy` scaffolding is replaced by plain functions; clustered MDI/MDA follow in a small follow-up. +## Ensemble & cross-validation scoring — PR (wired) + +Port of the `ensemble` sub-package plus the estimator-driven scoring half of +`backtest.validation`. + +| Concept | Python | Julia | Notes | +|---|---|---|---| +| Theoretical bagging accuracy | `bagging_classifier_accuracy` | `Ensemble.bagging_classifier_accuracy` | **exact** (binomial survival function) | +| Weighted-bagging evaluation | `BaggingClassifierAccuracy` | `Ensemble.fit_bagging` + `bagging_evaluate_schemes` | **behavioural**; uniform / cᵢ / 1−cᵢ² weighting on the DecisionTree.jl backend | +| Bootstrap accuracy | `calculate_bootstrap_accuracy` | `Ensemble.calculate_bootstrap_accuracy` | **behavioural** | +| Cross-validation score | `…validation.backtest_predictions` (scoring) | `Validation.cross_val_score` | **behavioural**; per-fold RF score (`:accuracy`/`:neg_log_loss`) over any cross-validator | + +**Deliberate divergence:** the sklearn `BaggingClassifierAccuracy` class becomes a +functional API; `class_weight="balanced"` and the Matplotlib plotting helper are +dropped. `cross_val_score` is the practical Julia equivalent of sklearn's +`cross_val_score` over the purged/combinatorial validators; the full path-level +`backtest_predictions` (per-CPCV-path OOS prediction assembly) remains a possible +elaboration. + _(further submodules appended as they are wired)_ diff --git a/src/Ensemble/BaggingAccuracy.jl b/src/Ensemble/BaggingAccuracy.jl new file mode 100644 index 0000000..c423a9a --- /dev/null +++ b/src/Ensemble/BaggingAccuracy.jl @@ -0,0 +1,157 @@ +""" +Bagging accuracy — native Julia port mirroring the Python `RiskLabAI.ensemble` +sub-package (López de Prado, AFML Ch. 6): the theoretical accuracy of a +majority-vote bagging classifier, plus an empirical weighted-bagging evaluator. + +Parity notes: + * `bagging_classifier_accuracy` is **deterministic** and matches Python exactly + (binomial survival function; verified in `test/runtests.jl`). + * `bagging_evaluate_schemes` and `calculate_bootstrap_accuracy` are + **behavioural** — they build a bagging ensemble on the `DecisionTree.jl` + backend (not bit-identical to scikit-learn) and are validated structurally. + +Deliberate divergence: the sklearn `BaggingClassifierAccuracy` class becomes a +small functional API; `class_weight="balanced"` and the Matplotlib plotting +helper are dropped (no class weights / plotting in the backend). + +Reference: De Prado, M. (2018), Advances in Financial Machine Learning, Ch. 6. +""" + +using Statistics: std +using Random: AbstractRNG, MersenneTwister, default_rng +using Distributions: Binomial, cdf +using DecisionTree: build_tree, apply_tree + +""" + bagging_classifier_accuracy(N, p) -> Float64 + +Theoretical accuracy of a majority vote of `N` independent classifiers each with +accuracy `p`: `P(X > ⌊N/2⌋)` for `X ~ Binomial(N, p)`. `N` must be odd. +Deterministic. Mirrors Python's `bagging_classifier_accuracy`. +""" +function bagging_classifier_accuracy(N::Integer, p::Real) + isodd(N) || throw(ArgumentError("Number of estimators N must be odd. Got $N.")) + k = (N - 1) ÷ 2 + return 1.0 - cdf(Binomial(N, p), k) +end + +_ensemble_rng(random_state) = + random_state === nothing ? default_rng() : + random_state isa AbstractRNG ? random_state : MersenneTwister(random_state) + +""" + fit_bagging(x, y; n_estimators=1000, max_samples=100, max_features=1, random_state=nothing) + -> (trees, classes) + +Fit a bagging ensemble: `n_estimators` decision trees, each trained on a +bootstrap of `max_samples` rows using `max_features` random features per split. +Returns the trees and the sorted class labels. Behavioural (binary +classification). Mirrors the fit step of Python's `BaggingClassifierAccuracy`. +""" +function fit_bagging( + x::AbstractMatrix{<:Real}, + y::AbstractVector; + n_estimators::Integer = 1000, + max_samples::Integer = 100, + max_features::Integer = 1, + random_state = nothing, +) + n = size(x, 1) + classes = sort(unique(y)) + length(classes) == 2 || throw(ArgumentError("only binary classification is supported")) + rng = _ensemble_rng(random_state) + draw = min(max_samples, n) + trees = Any[] + for _ = 1:n_estimators + sample = rand(rng, 1:n, draw) + push!(trees, build_tree(y[sample], x[sample, :], max_features, -1, 1, 2, 0.0; rng = rng)) + end + return trees, classes +end + +_tree_accuracy(tree, x, y) = sum(apply_tree(tree, x) .== y) / length(y) + +# Weighted signed-vote prediction: class_1 → +1, class_0 → −1, sum, threshold 0. +function _bagging_predict(trees, x, weights, classes) + n = size(x, 1) + votes = zeros(Float64, n) + for (j, tree) in enumerate(trees) + predictions = apply_tree(tree, x) + for i = 1:n + votes[i] += weights[j] * (predictions[i] == classes[2] ? 1.0 : -1.0) + end + end + return [v > 0 ? classes[2] : classes[1] for v in votes] +end + +""" + bagging_evaluate_schemes(x_train, y_train, x_test, y_test; kwargs...) -> Dict{String,Float64} + +Fit a bagging ensemble and return its test accuracy under three estimator +weighting schemes: `"uniform"`, `"c_i"` (proportional to each tree's training +accuracy) and `"variance"` (proportional to `1 − cᵢ²`). Behavioural. Mirrors +Python's `BaggingClassifierAccuracy.evaluate_all_schemes`. +""" +function bagging_evaluate_schemes( + x_train::AbstractMatrix{<:Real}, + y_train::AbstractVector, + x_test::AbstractMatrix{<:Real}, + y_test::AbstractVector; + n_estimators::Integer = 1000, + max_samples::Integer = 100, + max_features::Integer = 1, + random_state = nothing, +) + trees, classes = fit_bagging( + x_train, y_train; + n_estimators = n_estimators, + max_samples = max_samples, + max_features = max_features, + random_state = random_state, + ) + + c_i = [_tree_accuracy(tree, x_train, y_train) for tree in trees] + n = length(c_i) + uniform = fill(1.0 / n, n) + sum_c = sum(c_i) + weights_c = sum_c == 0 ? uniform : c_i ./ sum_c + variance = 1.0 .- c_i .^ 2 + sum_v = sum(variance) + weights_v = sum_v == 0 ? uniform : variance ./ sum_v + + schemes = Dict("uniform" => uniform, "c_i" => weights_c, "variance" => weights_v) + accuracies = Dict{String,Float64}() + for (name, weights) in schemes + predictions = _bagging_predict(trees, x_test, weights, classes) + accuracies[name] = sum(predictions .== y_test) / length(y_test) + end + return accuracies +end + +""" + calculate_bootstrap_accuracy(trees, classes, x, y; weights=uniform, n_bootstraps=1000, random_state=nothing) + -> (values, mean, std) + +Bootstrap the test set `n_bootstraps` times and return the ensemble accuracy on +each resample, with its mean and (sample) standard deviation. Behavioural. +Mirrors Python's `calculate_bootstrap_accuracy`. +""" +function calculate_bootstrap_accuracy( + trees, + classes, + x::AbstractMatrix{<:Real}, + y::AbstractVector; + weights::AbstractVector{<:Real} = fill(1.0 / length(trees), length(trees)), + n_bootstraps::Integer = 1000, + random_state = nothing, +) + rng = _ensemble_rng(random_state) + n = length(y) + values = Float64[] + for _ = 1:n_bootstraps + sample = rand(rng, 1:n, n) + predictions = _bagging_predict(trees, x[sample, :], weights, classes) + push!(values, sum(predictions .== y[sample]) / n) + end + return values, sum(values) / length(values), std(values; corrected = true) +end diff --git a/src/Ensemble/Ensemble.jl b/src/Ensemble/Ensemble.jl new file mode 100644 index 0000000..058e3c4 --- /dev/null +++ b/src/Ensemble/Ensemble.jl @@ -0,0 +1,19 @@ +""" + RiskLabAI.Ensemble + +Ensemble-methods submodule, mirroring the Python `RiskLabAI.ensemble` +sub-package (López de Prado, AFML Ch. 6): the theoretical accuracy of a +majority-vote bagging classifier and an empirical weighted-bagging evaluator +(on the `DecisionTree.jl` backend). +""" +module Ensemble + +include("BaggingAccuracy.jl") + +export + bagging_classifier_accuracy, + fit_bagging, + bagging_evaluate_schemes, + calculate_bootstrap_accuracy + +end # module Ensemble diff --git a/src/RiskLabAI.jl b/src/RiskLabAI.jl index 3c399f3..4489b96 100644 --- a/src/RiskLabAI.jl +++ b/src/RiskLabAI.jl @@ -53,7 +53,11 @@ using .Backtest: sharpe_ratio, bet_timing, calculate_holding_period, include("Validation/Validation.jl") using .Validation: KFoldCV, PurgedKFoldCV, CombinatorialPurgedCV, WalkForwardCV, - cv_split, backtest_paths, get_n_splits + cv_split, backtest_paths, get_n_splits, cross_val_score + +include("Ensemble/Ensemble.jl") +using .Ensemble: bagging_classifier_accuracy, fit_bagging, bagging_evaluate_schemes, + calculate_bootstrap_accuracy # --------------------------------------------------------------------------- # # Top-level exports. @@ -104,9 +108,12 @@ export probability_bet_size, average_bet_sizes, strategy_bet_sizing, mp_avg_active_signals, avg_active_signals, discrete_signal, generate_signal, bet_size_sigmoid, target_position, inverse_price, limit_price, compute_sigmoid_width, - # Validation — cross-validators + # Validation — cross-validators & scoring KFoldCV, PurgedKFoldCV, CombinatorialPurgedCV, WalkForwardCV, - cv_split, backtest_paths, get_n_splits, + cv_split, backtest_paths, get_n_splits, cross_val_score, + # Ensemble — bagging accuracy + bagging_classifier_accuracy, fit_bagging, bagging_evaluate_schemes, + calculate_bootstrap_accuracy, # Backtest (legacy) probabilityOfBacktestOverfitting, # BetSize diff --git a/src/Validation/CrossValScore.jl b/src/Validation/CrossValScore.jl new file mode 100644 index 0000000..ec859c8 --- /dev/null +++ b/src/Validation/CrossValScore.jl @@ -0,0 +1,74 @@ +""" +Cross-validation scoring — the estimator-driven companion to the cross-validators +(López de Prado, AFML Ch. 7). `cross_val_score` trains a random forest on each +train/test split produced by any of the cross-validators and returns the per-fold +score. This realises the scoring half of the Python `backtest_predictions` +machinery on the `DecisionTree.jl` backend. + +Behavioural: the random forest is not bit-identical to scikit-learn, so results +are validated structurally (a separable dataset scores well; an unpredictable one +does not). +""" + +using DecisionTree: build_forest, apply_forest, apply_forest_proba +using Statistics: mean + +# Train/test splits for every cross-validator (KFold/WalkForward need the sample +# count; the purged validators carry their own `event_starts`). +_cv_splits(cv::KFoldCV, n_samples) = cv_split(cv, n_samples) +_cv_splits(cv::WalkForwardCV, n_samples) = cv_split(cv, n_samples) +_cv_splits(cv::PurgedKFoldCV, _) = cv_split(cv) +_cv_splits(cv::CombinatorialPurgedCV, _) = cv_split(cv) + +function _accuracy(forest, x_test, y_test) + return sum(apply_forest(forest, x_test) .== y_test) / length(y_test) +end + +function _neg_log_loss(forest, x_test, y_test, classes) + proba = apply_forest_proba(forest, x_test, classes) + column_of = Dict(c => i for (i, c) in enumerate(classes)) + total = 0.0 + for n in eachindex(y_test) + p = clamp(proba[n, column_of[y_test[n]]], 1e-15, 1 - 1e-15) + total += log(p) + end + return total / length(y_test) +end + +""" + cross_val_score(cv, x, y; n_trees=100, n_subfeatures=-1, max_depth=-1, + scoring=:accuracy, random_state=0) -> Vector{Float64} + +Per-fold score of a random forest under the cross-validator `cv` (any of +`KFoldCV`, `PurgedKFoldCV`, `CombinatorialPurgedCV`, `WalkForwardCV`). `scoring` +is `:accuracy` or `:neg_log_loss`. Folds whose training set is empty are skipped. +Behavioural. +""" +function cross_val_score( + cv, + x::AbstractMatrix{<:Real}, + y::AbstractVector; + n_trees::Integer = 100, + n_subfeatures::Integer = -1, + max_depth::Integer = -1, + scoring::Symbol = :accuracy, + random_state::Integer = 0, +) + classes = sort(unique(y)) + scores = Float64[] + for (f, (train, test)) in enumerate(_cv_splits(cv, size(x, 1))) + (isempty(train) || isempty(test)) && continue + forest = build_forest( + y[train], x[train, :], n_subfeatures, n_trees, 0.7, max_depth, 1, 2, 0.0; + rng = random_state + f, + ) + if scoring == :accuracy + push!(scores, _accuracy(forest, x[test, :], y[test])) + elseif scoring == :neg_log_loss + push!(scores, _neg_log_loss(forest, x[test, :], y[test], classes)) + else + throw(ArgumentError("scoring must be :accuracy or :neg_log_loss")) + end + end + return scores +end diff --git a/src/Validation/Validation.jl b/src/Validation/Validation.jl index e842b4a..a47c6c5 100644 --- a/src/Validation/Validation.jl +++ b/src/Validation/Validation.jl @@ -18,6 +18,9 @@ module Validation include("CrossValidators.jl") +# Estimator-driven scoring over the cross-validators (DecisionTree.jl backend). +include("CrossValScore.jl") + export KFoldCV, PurgedKFoldCV, @@ -25,6 +28,7 @@ export WalkForwardCV, cv_split, get_n_splits, - backtest_paths + backtest_paths, + cross_val_score end # module Validation diff --git a/test/runtests.jl b/test/runtests.jl index 1d108f4..0894b8e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1126,3 +1126,49 @@ end @test count(startswith("I_"), names) == 4 @test count(startswith("R_"), names) == 3 end + +@testset "Ensemble & CV scoring (DecisionTree.jl)" begin + E = RiskLabAI.Ensemble + V = RiskLabAI.Validation + + # Theoretical bagging accuracy: exact (binomial survival function). + @test E.bagging_classifier_accuracy(11, 0.6) ≈ 0.75349813248 + @test E.bagging_classifier_accuracy(101, 0.55) ≈ 0.843755399638 + @test E.bagging_classifier_accuracy(3, 0.7) ≈ 0.784 + @test E.bagging_classifier_accuracy(7, 0.51) ≈ 0.5218662521 + @test_throws ArgumentError E.bagging_classifier_accuracy(10, 0.6) + + # Separable dataset for the behavioural pieces. + rng = MersenneTwister(7) + n = 200 + y = rand(rng, 0:1, n) + x = hcat(3.0 .* y .+ randn(rng, n), randn(rng, n)) + train = 1:150 + test = 151:200 + + schemes = E.bagging_evaluate_schemes( + x[train, :], y[train], x[test, :], y[test]; + n_estimators = 40, max_samples = 60, random_state = 1, + ) + @test Set(keys(schemes)) == Set(["uniform", "c_i", "variance"]) + @test all(0.0 <= v <= 1.0 for v in values(schemes)) + @test schemes["uniform"] > 0.6 # informative signal is learnable + + trees, classes = E.fit_bagging( + x[train, :], y[train]; n_estimators = 40, max_samples = 60, random_state = 1, + ) + values_boot, mean_boot, std_boot = + E.calculate_bootstrap_accuracy(trees, classes, x[test, :], y[test]; n_bootstraps = 50) + @test length(values_boot) == 50 + @test 0.0 <= mean_boot <= 1.0 + + # cross_val_score over a purged K-Fold and a plain K-Fold. + scores = V.cross_val_score(V.KFoldCV(5), x, y; n_trees = 30, random_state = 1) + @test length(scores) == 5 + @test sum(scores) / length(scores) > 0.7 + + starts = collect(1:n) + purged = V.PurgedKFoldCV(5, starts, starts; embargo = 0.0) + pscores = V.cross_val_score(purged, x, y; n_trees = 30, random_state = 1) + @test length(pscores) == 5 +end