diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 93beb37..7c2e983 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,12 +64,8 @@ jobs: - run: black --check RiskLabAI test lint: - name: ruff (advisory until cleanup lands) + name: ruff runs-on: ubuntu-latest - # Advisory until the dedicated ruff-cleanup pass lands (Dict->dict - # modernization, star-import removal, unused-import pruning); then remove - # continue-on-error to enforce. - continue-on-error: true steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/RiskLabAI/backtest/__init__.py b/RiskLabAI/backtest/__init__.py index fc8958d..8ad4c30 100644 --- a/RiskLabAI/backtest/__init__.py +++ b/RiskLabAI/backtest/__init__.py @@ -9,72 +9,73 @@ """ from . import validation - +from .backtest_overfitting_simulation import ( + backtest_overfitting_simulation_financial_metrics_rank_correlation, + backtest_overfitting_simulation_model_complexity, + format_cpu_info, + get_cpu_info, + measure_all_cv_computational_requirements, + measure_cpcv_parallelization, + measure_cpcv_scalability, + noised_backtest_overfitting_simulation, + # This file contains many functions, exporting the main ones + overall_backtest_overfitting_simulation, + overall_novel_methods_backtest_overfitting_simulation, + temporal_backtest_overfitting_simulation, + time_temporal_backtest_overfitting_simulation, + varying_embargo_backtest_overfitting_simulation, +) from .backtest_statistics import ( bet_timing, - calculate_holding_period, - calculate_hhi_concentration, calculate_hhi, + calculate_hhi_concentration, + calculate_holding_period, compute_drawdowns_time_under_water, +) +from .backtest_statistics import ( sharpe_ratio as pbo_sharpe_ratio, ) from .backtest_synthetic_data import synthetic_back_testing from .bet_sizing import ( - probability_bet_size, + Signal, + TPos, average_bet_sizes, - strategy_bet_sizing, avgActiveSignals, - mpAvgActiveSignals, - discreteSignal, - Signal, betSize, - TPos, + discreteSignal, + getW, inversePrice, limitPrice, - getW, + mpAvgActiveSignals, + probability_bet_size, + strategy_bet_sizing, +) +from .probabilistic_sharpe_ratio import ( + benchmark_sharpe_ratio, + probabilistic_sharpe_ratio, +) +from .probability_of_backtest_overfitting import ( + performance_evaluation, + probability_of_backtest_overfitting, ) from .strategy_risk import ( - sharpe_ratio_trials, - target_sharpe_ratio_symbolic, - implied_precision, bin_frequency, binomial_sharpe_ratio, - mix_gaussians, - failure_probability, calculate_strategy_risk, + failure_probability, + implied_precision, + mix_gaussians, + sharpe_ratio_trials, + target_sharpe_ratio_symbolic, ) from .test_set_overfitting import ( + estimated_sharpe_ratio_z_statistics, expected_max_sharpe_ratio, generate_max_sharpe_ratios, mean_std_error, - estimated_sharpe_ratio_z_statistics, strategy_type1_error_probability, - theta_for_type2_error, strategy_type2_error_probability, -) -from .probability_of_backtest_overfitting import ( - performance_evaluation, - probability_of_backtest_overfitting, -) -from .probabilistic_sharpe_ratio import ( - probabilistic_sharpe_ratio, - benchmark_sharpe_ratio, -) -from .backtest_overfitting_simulation import ( - # This file contains many functions, exporting the main ones - overall_backtest_overfitting_simulation, - temporal_backtest_overfitting_simulation, - time_temporal_backtest_overfitting_simulation, - varying_embargo_backtest_overfitting_simulation, - backtest_overfitting_simulation_financial_metrics_rank_correlation, - backtest_overfitting_simulation_model_complexity, - noised_backtest_overfitting_simulation, - overall_novel_methods_backtest_overfitting_simulation, - measure_all_cv_computational_requirements, - measure_cpcv_parallelization, - measure_cpcv_scalability, - get_cpu_info, - format_cpu_info, + theta_for_type2_error, ) # Define what `from RiskLabAI.backtest import *` will import diff --git a/RiskLabAI/backtest/backtest_overfitting_simulation.py b/RiskLabAI/backtest/backtest_overfitting_simulation.py index 2865c2d..732eaf9 100644 --- a/RiskLabAI/backtest/backtest_overfitting_simulation.py +++ b/RiskLabAI/backtest/backtest_overfitting_simulation.py @@ -14,38 +14,39 @@ to reduce this module's responsibilities. """ -import platform +import itertools +import subprocess import time +import warnings +from math import ceil +from typing import Any, Optional, Union + import numpy as np import pandas as pd -from math import ceil +import ta from scipy import stats as ss +from scipy.stats import kendalltau from sklearn.linear_model import LogisticRegression from tqdm import tqdm -import ta -import itertools -import warnings -from typing import Dict, Union, Tuple, List, Any, Optional -from scipy.stats import kendalltau -import subprocess +from RiskLabAI.backtest.validation import CrossValidatorController from RiskLabAI.data.differentiation import fractionally_differentiated_log_price from RiskLabAI.data.labeling import ( - daily_volatility_with_log_returns, cusum_filter_events_dynamic_threshold, - vertical_barrier, + daily_volatility_with_log_returns, meta_events, meta_labeling, + vertical_barrier, ) from RiskLabAI.data.weights import sample_weight_absolute_return_meta_labeling from RiskLabAI.utils import determine_strategy_side -from RiskLabAI.backtest.validation import CrossValidatorController -from .probability_of_backtest_overfitting import probability_of_backtest_overfitting + +from .bet_sizing import strategy_bet_sizing from .probabilistic_sharpe_ratio import ( - probabilistic_sharpe_ratio, benchmark_sharpe_ratio, + probabilistic_sharpe_ratio, ) -from .bet_sizing import strategy_bet_sizing +from .probability_of_backtest_overfitting import probability_of_backtest_overfitting def financial_features_backtest_overfitting_simulation( @@ -180,13 +181,13 @@ def financial_features_backtest_overfitting_simulation( def backtest_overfitting_simulation_results( prices: pd.Series, - strategy_parameters: Dict[str, Union[List[int], List[float], List[bool]]], - models: Dict[str, Dict[str, Any]], - cross_validators: Dict[str, Any], + strategy_parameters: dict[str, Union[list[int], list[float], list[bool]]], + models: dict[str, dict[str, Any]], + cross_validators: dict[str, Any], noise_scale: float = 0.0, random_state: int = None, n_jobs: int = 1, -) -> Dict[str, List[Dict[str, Any]]]: +) -> dict[str, list[dict[str, Any]]]: """ Conducts a simulation to evaluate the performance of trading strategies and models. @@ -332,13 +333,13 @@ def backtest_overfitting_simulation_results( def overall_backtest_overfitting_simulation( prices: pd.Series, - strategy_parameters: Dict[str, Union[List[int], List[float], List[bool]]], - models: Dict[str, Dict[str, Any]], + strategy_parameters: dict[str, Union[list[int], list[float], list[bool]]], + models: dict[str, dict[str, Any]], step_risk_free_rate: float, noise_scale: float = 0.0, random_state: int = None, n_jobs: int = 1, -) -> Tuple[Dict[str, float], Dict[str, float]]: +) -> tuple[dict[str, float], dict[str, float]]: """ Conducts an overall backtest overfitting simulation to calculate the metrics. @@ -410,12 +411,12 @@ def overall_backtest_overfitting_simulation( def temporal_backtest_overfitting_simulation( prices: pd.Series, - strategy_parameters: Dict[str, Union[List[int], List[float], List[bool]]], - models: Dict[str, Dict[str, Any]], + strategy_parameters: dict[str, Union[list[int], list[float], list[bool]]], + models: dict[str, dict[str, Any]], step_risk_free_rate: float, overfitting_partitions_length: int, n_jobs: int = 1, -) -> Tuple[Dict[str, List[float]], Dict[str, List[float]]]: +) -> tuple[dict[str, list[float]], dict[str, list[float]]]: """ Conducts a temporal backtest overfitting simulation to calculate the metrics in chunks. @@ -484,12 +485,12 @@ def temporal_backtest_overfitting_simulation( def time_temporal_backtest_overfitting_simulation( prices: pd.Series, - strategy_parameters: Dict[str, Union[List[int], List[float], List[bool]]], - models: Dict[str, Dict[str, Any]], + strategy_parameters: dict[str, Union[list[int], list[float], list[bool]]], + models: dict[str, dict[str, Any]], step_risk_free_rate: float, overfitting_partitions_duration: str = "A", # Annual grouping by default n_jobs: int = 1, -) -> Tuple[Dict[str, pd.Series], Dict[str, pd.Series]]: +) -> tuple[dict[str, pd.Series], dict[str, pd.Series]]: """ Conducts a time-temporal backtest overfitting simulation to calculate the metrics in time-indexed chunks. @@ -558,12 +559,12 @@ def time_temporal_backtest_overfitting_simulation( def varying_embargo_backtest_overfitting_simulation( prices: pd.Series, - strategy_parameters: Dict[str, Union[List[int], List[float], List[bool]]], - models: Dict[str, Dict[str, Any]], + strategy_parameters: dict[str, Union[list[int], list[float], list[bool]]], + models: dict[str, dict[str, Any]], step_risk_free_rate: float, - embargo_values: List[float], + embargo_values: list[float], n_jobs: int = 1, -) -> Tuple[pd.DataFrame, pd.DataFrame]: +) -> tuple[pd.DataFrame, pd.DataFrame]: """ Conducts a backtest overfitting simulation with varying embargo values to calculate the metrics. @@ -792,8 +793,8 @@ def expected_shortfall(returns, step_risk_free_rate, confidence_level=0.05): def backtest_overfitting_simulation_financial_metrics_rank_correlation( prices: pd.Series, - strategy_parameters: Dict[str, Union[List[int], List[float], List[bool]]], - models: Dict[str, Dict[str, Any]], + strategy_parameters: dict[str, Union[list[int], list[float], list[bool]]], + models: dict[str, dict[str, Any]], step_risk_free_rate: float, n_jobs: int = 1, ) -> pd.DataFrame: @@ -851,13 +852,13 @@ def backtest_overfitting_simulation_financial_metrics_rank_correlation( # Calculate the metrics for each half first_half_metrics = { metric_name: first_half.apply( - lambda x: metric_func(x, step_risk_free_rate), axis=0 + lambda x, fn=metric_func: fn(x, step_risk_free_rate), axis=0 ) for metric_name, metric_func in metrics.items() } second_half_metrics = { metric_name: second_half.apply( - lambda x: metric_func(x, step_risk_free_rate), axis=0 + lambda x, fn=metric_func: fn(x, step_risk_free_rate), axis=0 ) for metric_name, metric_func in metrics.items() } @@ -887,11 +888,11 @@ def backtest_overfitting_simulation_financial_metrics_rank_correlation( def backtest_overfitting_simulation_model_complexity( prices: pd.Series, - strategy_parameters: Dict[str, Union[List[int], List[float], List[bool]]], - models: Dict[str, Any], + strategy_parameters: dict[str, Union[list[int], list[float], list[bool]]], + models: dict[str, Any], step_risk_free_rate: float, n_jobs: int = 1, -) -> Tuple[pd.DataFrame, pd.DataFrame]: +) -> tuple[pd.DataFrame, pd.DataFrame]: """ Conducts a backtest overfitting simulation to compare the PBO and DSR values of each CV method for simple and complex models. @@ -1040,13 +1041,13 @@ def backtest_overfitting_simulation_model_complexity( def noised_backtest_overfitting_simulation( prices: pd.Series, - strategy_parameters: Dict[str, Union[List[int], List[float], List[bool]]], - models: Dict[str, Dict[str, Any]], + strategy_parameters: dict[str, Union[list[int], list[float], list[bool]]], + models: dict[str, dict[str, Any]], step_risk_free_rate: float, - noise_scales: List[float], + noise_scales: list[float], random_state: int = None, n_jobs: int = 1, -) -> Tuple[pd.DataFrame, pd.DataFrame]: +) -> tuple[pd.DataFrame, pd.DataFrame]: """ Conducts a noised backtest overfitting simulation to compare the new PBO/DSR values for different noise scales. @@ -1094,13 +1095,13 @@ def noised_backtest_overfitting_simulation( def overall_novel_methods_backtest_overfitting_simulation( prices: pd.Series, - strategy_parameters: Dict[str, Union[List[int], List[float], List[bool]]], - models: Dict[str, Dict[str, Any]], + strategy_parameters: dict[str, Union[list[int], list[float], list[bool]]], + models: dict[str, dict[str, Any]], step_risk_free_rate: float, noise_scale: float = 0.0, random_state: int = None, n_jobs: int = 1, -) -> Tuple[Dict[str, float], Dict[str, float]]: +) -> tuple[dict[str, float], dict[str, float]]: """ Conducts an overall backtest overfitting simulation to calculate the metrics for the novel CPCV methods. @@ -1231,7 +1232,7 @@ def format_cpu_info(cpu_info): # Function to generate random data, target, weights, and times def generate_random_data( n_samples: int, n_features: int -) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, pd.Series]: +) -> tuple[pd.DataFrame, pd.Series, np.ndarray, pd.Series]: date_range = pd.date_range(start="1980-01-01", periods=n_samples, freq="1h") data = pd.DataFrame( np.random.randn(n_samples, n_features), @@ -1248,7 +1249,7 @@ def generate_random_data( # Function to measure computational requirements def measure_computational_requirements( cross_validator, model, data, target, weights, n_jobs: int = 1 -) -> Dict[str, Any]: +) -> dict[str, Any]: from memory_profiler import memory_usage # optional dependency: RiskLabAI[dev] start_time = time.time() @@ -1266,7 +1267,7 @@ def measure_computational_requirements( # Main function to measure computational requirements for all CV methods def measure_all_cv_computational_requirements( - cross_validators: Dict[str, Any], + cross_validators: dict[str, Any], n_samples: int = 40 * 252, n_features: int = 22, n_jobs: int = 1, @@ -1312,11 +1313,14 @@ def measure_all_cv_computational_requirements( return results_df +_DEFAULT_N_JOBS_LIST = list(range(1, 9)) + + def measure_cpcv_parallelization( n_samples: int = 40 * 252, n_features: int = 22, n_repeats: int = 30, - n_jobs_list: List[int] = range(1, 9), + n_jobs_list: list[int] = _DEFAULT_N_JOBS_LIST, ) -> pd.DataFrame: # Generate random data, target, weights, and times data, target, weights, times = generate_random_data(n_samples, n_features) @@ -1359,8 +1363,8 @@ def measure_cpcv_parallelization( def measure_cpcv_scalability( - sample_sizes: List[int], - feature_sizes: List[int], + sample_sizes: list[int], + feature_sizes: list[int], n_repeats: int = 1, n_jobs: int = 1, ) -> pd.DataFrame: diff --git a/RiskLabAI/backtest/backtest_statistics.py b/RiskLabAI/backtest/backtest_statistics.py index 6276d92..f75407d 100644 --- a/RiskLabAI/backtest/backtest_statistics.py +++ b/RiskLabAI/backtest/backtest_statistics.py @@ -9,7 +9,6 @@ wraps the Numba `sharpe_ratio` and scales it by sqrt(freq). """ -from typing import Tuple, Optional import numpy as np import pandas as pd from numba import jit @@ -87,7 +86,7 @@ def bet_timing(target_positions: pd.Series) -> pd.Index: def calculate_holding_period( target_positions: pd.Series, -) -> Tuple[pd.DataFrame, float]: +) -> tuple[pd.DataFrame, float]: """ Derive the average holding period in days. @@ -192,7 +191,7 @@ def calculate_hhi(bet_returns: pd.Series) -> float: return hhi_normalized -def calculate_hhi_concentration(returns: pd.Series) -> Tuple[float, float, float]: +def calculate_hhi_concentration(returns: pd.Series) -> tuple[float, float, float]: """ Calculate HHI concentration for positive, negative, and monthly returns. @@ -220,7 +219,7 @@ def calculate_hhi_concentration(returns: pd.Series) -> Tuple[float, float, float def compute_drawdowns_time_under_water( pnl_series: pd.Series, dollars: bool = False -) -> Tuple[pd.Series, pd.Series]: +) -> tuple[pd.Series, pd.Series]: """ Compute series of drawdowns and the time under water. diff --git a/RiskLabAI/backtest/backtest_synthetic_data.py b/RiskLabAI/backtest/backtest_synthetic_data.py index 455e51c..9b258fc 100644 --- a/RiskLabAI/backtest/backtest_synthetic_data.py +++ b/RiskLabAI/backtest/backtest_synthetic_data.py @@ -5,10 +5,12 @@ from itertools import product from random import gauss -from typing import List, Tuple import numpy as np +_DEFAULT_PROFIT_TAKING_RANGE = np.linspace(0.5, 10, 20) +_DEFAULT_STOP_LOSS_RANGE = np.linspace(0.5, 10, 20) + def synthetic_back_testing( forecast: float, @@ -16,10 +18,10 @@ def synthetic_back_testing( sigma: float, n_iteration: int = 100000, maximum_holding_period: int = 100, - profit_taking_range: np.ndarray = np.linspace(0.5, 10, 20), - stop_loss_range: np.ndarray = np.linspace(0.5, 10, 20), + profit_taking_range: np.ndarray = _DEFAULT_PROFIT_TAKING_RANGE, + stop_loss_range: np.ndarray = _DEFAULT_STOP_LOSS_RANGE, seed: int = 0, -) -> List[Tuple[float, float, float, float, float]]: +) -> list[tuple[float, float, float, float, float]]: r""" Perform backtesting on synthetic price data from an OU process. diff --git a/RiskLabAI/backtest/bet_sizing.py b/RiskLabAI/backtest/bet_sizing.py index 9672882..329c380 100644 --- a/RiskLabAI/backtest/bet_sizing.py +++ b/RiskLabAI/backtest/bet_sizing.py @@ -5,7 +5,6 @@ Includes implementations from de Prado (2018). """ -from typing import Optional import numpy as np import pandas as pd from numba import jit diff --git a/RiskLabAI/backtest/probabilistic_sharpe_ratio.py b/RiskLabAI/backtest/probabilistic_sharpe_ratio.py index b7a0bed..6f644a1 100644 --- a/RiskLabAI/backtest/probabilistic_sharpe_ratio.py +++ b/RiskLabAI/backtest/probabilistic_sharpe_ratio.py @@ -9,7 +9,6 @@ easily plot the PSR curve. """ -from typing import List import numpy as np from scipy import stats as ss @@ -97,7 +96,7 @@ def probabilistic_sharpe_ratio( return ss.norm.cdf(test_statistic) -def benchmark_sharpe_ratio(sharpe_ratio_estimates: List[float]) -> float: +def benchmark_sharpe_ratio(sharpe_ratio_estimates: list[float]) -> float: r""" Calculate the expected maximum Sharpe Ratio (Benchmark SR). diff --git a/RiskLabAI/backtest/probability_of_backtest_overfitting.py b/RiskLabAI/backtest/probability_of_backtest_overfitting.py index 520e8e3..9ccb6d4 100644 --- a/RiskLabAI/backtest/probability_of_backtest_overfitting.py +++ b/RiskLabAI/backtest/probability_of_backtest_overfitting.py @@ -11,10 +11,10 @@ original notebook). """ -from typing import Tuple, Callable, List, Optional from itertools import combinations +from typing import Callable, Optional + import numpy as np -from numba import jit from joblib import Parallel, delayed from .backtest_statistics import sharpe_ratio @@ -26,7 +26,7 @@ def performance_evaluation( n_strategies: int, metric: Callable[[np.ndarray, float], float], risk_free_return: float, -) -> Tuple[bool, float]: +) -> tuple[bool, float]: """ Evaluate strategy performance on train/test splits. @@ -83,7 +83,7 @@ def probability_of_backtest_overfitting( risk_free_return: float = 0.0, metric: Optional[Callable[[np.ndarray, float], float]] = None, n_jobs: int = 1, -) -> Tuple[float, np.ndarray]: +) -> tuple[float, np.ndarray]: r""" Compute the Probability of Backtest Overfitting (PBO). diff --git a/RiskLabAI/backtest/strategy_risk.py b/RiskLabAI/backtest/strategy_risk.py index 5527c83..54c1434 100644 --- a/RiskLabAI/backtest/strategy_risk.py +++ b/RiskLabAI/backtest/strategy_risk.py @@ -4,16 +4,16 @@ """ import logging -from typing import Tuple + import numpy as np import scipy.stats as ss import sympy -from sympy import symbols, factor +from sympy import factor, symbols logger = logging.getLogger(__name__) -def sharpe_ratio_trials(p: float, n_run: int) -> Tuple[float, float, float]: +def sharpe_ratio_trials(p: float, n_run: int) -> tuple[float, float, float]: r""" Simulate binomial trials to estimate mean, std dev, and Sharpe ratio. diff --git a/RiskLabAI/backtest/test_set_overfitting.py b/RiskLabAI/backtest/test_set_overfitting.py index 9bfcb3e..3c8afaf 100644 --- a/RiskLabAI/backtest/test_set_overfitting.py +++ b/RiskLabAI/backtest/test_set_overfitting.py @@ -3,7 +3,6 @@ including the expected maximum Sharpe Ratio and Type I/II errors. """ -from typing import List import numpy as np import pandas as pd import scipy.stats as ss @@ -59,7 +58,7 @@ def expected_max_sharpe_ratio( def generate_max_sharpe_ratios( n_sims: int, - n_trials_list: List[int], + n_trials_list: list[int], std_sharpe_ratio: float, mean_sharpe_ratio: float, ) -> pd.DataFrame: @@ -109,7 +108,7 @@ def generate_max_sharpe_ratios( def mean_std_error( n_sims0: int, n_sims1: int, - n_trials: List[int], + n_trials: list[int], std_sharpe_ratio: float = 1.0, mean_sharpe_ratio: float = 0.0, ) -> pd.DataFrame: diff --git a/RiskLabAI/backtest/validation/__init__.py b/RiskLabAI/backtest/validation/__init__.py index c4d28c1..1b6663e 100644 --- a/RiskLabAI/backtest/validation/__init__.py +++ b/RiskLabAI/backtest/validation/__init__.py @@ -11,15 +11,15 @@ a CrossValidatorFactory for easy instantiation. """ +from .adaptive_combinatorial_purged import AdaptiveCombinatorialPurged +from .bagged_combinatorial_purged import BaggedCombinatorialPurged +from .combinatorial_purged import CombinatorialPurged +from .cross_validator_controller import CrossValidatorController +from .cross_validator_factory import CrossValidatorFactory from .cross_validator_interface import CrossValidator from .kfold import KFold from .purged_kfold import PurgedKFold from .walk_forward import WalkForward -from .combinatorial_purged import CombinatorialPurged -from .bagged_combinatorial_purged import BaggedCombinatorialPurged -from .adaptive_combinatorial_purged import AdaptiveCombinatorialPurged -from .cross_validator_factory import CrossValidatorFactory -from .cross_validator_controller import CrossValidatorController __all__ = [ # Interface diff --git a/RiskLabAI/backtest/validation/adaptive_combinatorial_purged.py b/RiskLabAI/backtest/validation/adaptive_combinatorial_purged.py index fa6c287..7e81bc0 100644 --- a/RiskLabAI/backtest/validation/adaptive_combinatorial_purged.py +++ b/RiskLabAI/backtest/validation/adaptive_combinatorial_purged.py @@ -5,9 +5,9 @@ import warnings from collections import ChainMap -from typing import Any, Dict, Generator, List, Optional, Tuple, Union - +from collections.abc import Generator from itertools import combinations +from typing import Any, Optional, Union import numpy as np import pandas as pd @@ -62,10 +62,10 @@ def __init__( self, n_splits: int, n_test_groups: int, - times: Union[pd.Series, Dict[str, pd.Series]], + times: Union[pd.Series, dict[str, pd.Series]], embargo: float = 0, n_subsplits: int = 3, - external_feature: Union[pd.Series, Dict[str, pd.Series]] = None, + external_feature: Union[pd.Series, dict[str, pd.Series]] = None, lower_quantile: float = 0.25, upper_quantile: float = 0.75, subtract_border_adjustments: bool = True, @@ -113,7 +113,7 @@ def _validate_input( def _single_adaptive_split_segments( self, indices: np.ndarray, single_external_feature: pd.Series - ) -> List[np.ndarray]: + ) -> list[np.ndarray]: """ Adaptively split data indices based on the external feature. @@ -185,7 +185,7 @@ def _get_split_segments( self, single_data: pd.DataFrame, single_external_feature: Optional[pd.Series] = None, - ) -> List[np.ndarray]: + ) -> list[np.ndarray]: """Override to use adaptive splitting.""" if single_external_feature is None: raise ValueError("_get_split_segments requires external_feature") @@ -198,7 +198,7 @@ def _single_split( single_times: pd.Series, single_data: pd.DataFrame, single_external_feature: Optional[pd.Series] = None, - ) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]: + ) -> Generator[tuple[np.ndarray, np.ndarray], None, None]: """ Split a single dataset into C(n, k) adaptively purged indices. """ @@ -222,12 +222,12 @@ def _single_split( def split( self, - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], - labels: Optional[Union[pd.Series, Dict[str, pd.Series]]] = None, + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], + labels: Optional[Union[pd.Series, dict[str, pd.Series]]] = None, groups: Optional[np.ndarray] = None, ) -> Union[ - Generator[Tuple[np.ndarray, np.ndarray], None, None], - Generator[Tuple[str, Tuple[np.ndarray, np.ndarray]], None, None], + Generator[tuple[np.ndarray, np.ndarray], None, None], + Generator[tuple[str, tuple[np.ndarray, np.ndarray]], None, None], ]: """ Split data (or dictionary of data) into adaptively purged indices. @@ -250,8 +250,8 @@ def split( def _combinations_and_path_locations_and_split_segments( self, data: pd.DataFrame, single_external_feature: Optional[pd.Series] = None - ) -> Tuple[ - List[Tuple[int, ...]], Dict[int, List[Tuple[int, int]]], List[np.ndarray] + ) -> tuple[ + list[tuple[int, ...]], dict[int, list[tuple[int, int]]], list[np.ndarray] ]: """Helper to compute all components, now including adaptive splits.""" if single_external_feature is None: @@ -268,7 +268,7 @@ def _single_backtest_paths( single_times: pd.Series, single_data: pd.DataFrame, single_external_feature: Optional[pd.Series] = None, - ) -> Dict[str, List[Dict[str, np.ndarray]]]: + ) -> dict[str, list[dict[str, np.ndarray]]]: """ Generate all adaptive combinatorial backtest paths. """ @@ -309,10 +309,10 @@ def _single_backtest_paths( def backtest_paths( self, - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], ) -> Union[ - Dict[str, List[Dict[str, np.ndarray]]], - Dict[str, Dict[str, List[Dict[str, np.ndarray]]]], + dict[str, list[dict[str, np.ndarray]]], + dict[str, dict[str, list[dict[str, np.ndarray]]]], ]: """ Generate adaptive backtest paths for data or a dictionary of data. @@ -341,7 +341,7 @@ def _single_backtest_predictions( single_external_feature: Optional[pd.Series] = None, # New arg predict_probability: bool = False, n_jobs: int = 1, - ) -> Dict[str, np.ndarray]: + ) -> dict[str, np.ndarray]: """ Obtain backtest predictions for all A-CPCV paths. """ @@ -388,8 +388,8 @@ def train_single_estimator( ) def get_path_data( - path_num: int, locs: List[Tuple[int, int]] - ) -> Dict[str, np.ndarray]: + path_num: int, locs: list[tuple[int, int]] + ) -> dict[str, np.ndarray]: """Assemble predictions for one path.""" path_predictions = [] for group_idx, split_idx in locs: @@ -417,13 +417,13 @@ def get_path_data( def backtest_predictions( self, - estimator: Union[Estimator, Dict[str, Estimator]], - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], - labels: Union[pd.Series, Dict[str, pd.Series]], - sample_weights: Optional[Union[np.ndarray, Dict[str, np.ndarray]]] = None, + estimator: Union[Estimator, dict[str, Estimator]], + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], + labels: Union[pd.Series, dict[str, pd.Series]], + sample_weights: Optional[Union[np.ndarray, dict[str, np.ndarray]]] = None, predict_probability: bool = False, n_jobs: int = 1, - ) -> Union[Dict[str, np.ndarray], Dict[str, Dict[str, np.ndarray]]]: + ) -> Union[dict[str, np.ndarray], dict[str, dict[str, np.ndarray]]]: """ Generate adaptive backtest predictions. """ diff --git a/RiskLabAI/backtest/validation/bagged_combinatorial_purged.py b/RiskLabAI/backtest/validation/bagged_combinatorial_purged.py index c6b60ce..27d8d45 100644 --- a/RiskLabAI/backtest/validation/bagged_combinatorial_purged.py +++ b/RiskLabAI/backtest/validation/bagged_combinatorial_purged.py @@ -4,7 +4,7 @@ import warnings from collections import ChainMap -from typing import Any, Dict, Optional, Union +from typing import Any, Optional, Union import numpy as np import pandas as pd @@ -13,8 +13,6 @@ from sklearn.ensemble import BaggingClassifier, BaggingRegressor from sklearn.exceptions import ConvergenceWarning -from typing import List, Tuple - from .combinatorial_purged import CombinatorialPurged # For type hinting sklearn-like estimators @@ -60,7 +58,7 @@ def __init__( self, n_splits: int, n_test_groups: int, - times: Union[pd.Series, Dict[str, pd.Series]], + times: Union[pd.Series, dict[str, pd.Series]], embargo: float = 0, classifier: bool = True, n_estimators: int = 10, @@ -91,7 +89,7 @@ def _single_backtest_predictions( single_weights: Optional[np.ndarray] = None, predict_probability: bool = False, n_jobs: int = 1, - ) -> Dict[str, np.ndarray]: + ) -> dict[str, np.ndarray]: """ Obtain backtest predictions for all B-CPCV paths. @@ -222,8 +220,8 @@ def train_single_bagging_estimator( # 2. Assemble predictions (this is fast, can be serial or parallel) def get_path_data( - path_num: int, locs: List[Tuple[int, int]] - ) -> Dict[str, np.ndarray]: + path_num: int, locs: list[tuple[int, int]] + ) -> dict[str, np.ndarray]: """Assemble predictions for one path.""" path_predictions = [] for group_idx, split_idx in locs: diff --git a/RiskLabAI/backtest/validation/combinatorial_purged.py b/RiskLabAI/backtest/validation/combinatorial_purged.py index 3efceff..e6a0a25 100644 --- a/RiskLabAI/backtest/validation/combinatorial_purged.py +++ b/RiskLabAI/backtest/validation/combinatorial_purged.py @@ -5,10 +5,11 @@ import warnings from collections import ChainMap, defaultdict +from collections.abc import Generator from copy import deepcopy from itertools import combinations from math import comb -from typing import Any, Dict, Generator, List, Optional, Tuple, Union +from typing import Any, Optional, Union import numpy as np import pandas as pd @@ -49,8 +50,8 @@ class CombinatorialPurged(PurgedKFold): @staticmethod def _path_locations( - n_splits: int, combinations_list: List[Tuple[int, ...]] - ) -> Dict[int, List[Tuple[int, int]]]: + n_splits: int, combinations_list: list[tuple[int, ...]] + ) -> dict[int, list[tuple[int, int]]]: """ Generate a labeled path matrix to map splits to backtest paths. @@ -101,7 +102,7 @@ def label_path_row(row: np.ndarray) -> np.ndarray: @staticmethod def _combinatorial_splits( - combinations_list: List[Tuple[int, ...]], split_segments: List[np.ndarray] + combinations_list: list[tuple[int, ...]], split_segments: list[np.ndarray] ) -> Generator[np.ndarray, None, None]: """ Generate combinatorial test sets. @@ -129,7 +130,7 @@ def __init__( self, n_splits: int, n_test_groups: int, - times: Union[pd.Series, Dict[str, pd.Series]], + times: Union[pd.Series, dict[str, pd.Series]], embargo: float = 0, ) -> None: """ @@ -153,8 +154,8 @@ def __init__( def get_n_splits( self, - data: Optional[Union[pd.DataFrame, Dict[str, pd.DataFrame]]] = None, - labels: Optional[Union[pd.Series, Dict[str, pd.Series]]] = None, + data: Optional[Union[pd.DataFrame, dict[str, pd.DataFrame]]] = None, + labels: Optional[Union[pd.Series, dict[str, pd.Series]]] = None, groups: Optional[np.ndarray] = None, ) -> int: """ @@ -178,7 +179,7 @@ def get_n_splits( """ return comb(self.n_splits, self.n_test_groups) - def _get_split_segments(self, single_data: pd.DataFrame) -> List[np.ndarray]: + def _get_split_segments(self, single_data: pd.DataFrame) -> list[np.ndarray]: """Helper to get the base K-Fold segments.""" indices = np.arange(single_data.shape[0]) return np.array_split(indices, self.n_splits) @@ -187,7 +188,7 @@ def _single_split( self, single_times: pd.Series, single_data: pd.DataFrame, - ) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]: + ) -> Generator[tuple[np.ndarray, np.ndarray], None, None]: """ Split a single dataset into C(n, k) purged train-test indices. @@ -222,8 +223,8 @@ def _single_split( def _combinations_and_path_locations_and_split_segments( self, data: pd.DataFrame - ) -> Tuple[ - List[Tuple[int, ...]], Dict[int, List[Tuple[int, int]]], List[np.ndarray] + ) -> tuple[ + list[tuple[int, ...]], dict[int, list[tuple[int, int]]], list[np.ndarray] ]: """ Helper to compute all necessary components for CPCV. @@ -250,7 +251,7 @@ def _single_backtest_paths( self, single_times: pd.Series, single_data: pd.DataFrame, - ) -> Dict[str, List[Dict[str, np.ndarray]]]: + ) -> dict[str, list[dict[str, np.ndarray]]]: """ Generate all combinatorial backtest paths for a single dataset. @@ -313,7 +314,7 @@ def _single_backtest_predictions( single_weights: Optional[np.ndarray] = None, predict_probability: bool = False, n_jobs: int = 1, - ) -> Dict[str, np.ndarray]: + ) -> dict[str, np.ndarray]: """ Obtain backtest predictions for all CPCV paths. @@ -379,8 +380,8 @@ def train_single_estimator( ) def get_path_data( - path_num: int, locs: List[Tuple[int, int]] - ) -> Dict[str, np.ndarray]: + path_num: int, locs: list[tuple[int, int]] + ) -> dict[str, np.ndarray]: """Assemble predictions for one path.""" path_predictions = [] diff --git a/RiskLabAI/backtest/validation/cross_validator_factory.py b/RiskLabAI/backtest/validation/cross_validator_factory.py index c2ab4f8..79bdfef 100644 --- a/RiskLabAI/backtest/validation/cross_validator_factory.py +++ b/RiskLabAI/backtest/validation/cross_validator_factory.py @@ -4,6 +4,7 @@ import inspect from typing import Any + from .adaptive_combinatorial_purged import AdaptiveCombinatorialPurged from .bagged_combinatorial_purged import BaggedCombinatorialPurged from .combinatorial_purged import CombinatorialPurged diff --git a/RiskLabAI/backtest/validation/cross_validator_interface.py b/RiskLabAI/backtest/validation/cross_validator_interface.py index 65c10b3..12b46a2 100644 --- a/RiskLabAI/backtest/validation/cross_validator_interface.py +++ b/RiskLabAI/backtest/validation/cross_validator_interface.py @@ -4,7 +4,8 @@ """ from abc import ABC, abstractmethod -from typing import Any, Dict, Generator, List, Optional, Tuple, Union +from collections.abc import Generator +from typing import Any, Optional, Union import numpy as np import pandas as pd @@ -26,8 +27,8 @@ class CrossValidator(ABC): @abstractmethod def get_n_splits( self, - data: Optional[Union[pd.DataFrame, Dict[str, pd.DataFrame]]] = None, - labels: Optional[Union[pd.Series, Dict[str, pd.Series]]] = None, + data: Optional[Union[pd.DataFrame, dict[str, pd.DataFrame]]] = None, + labels: Optional[Union[pd.Series, dict[str, pd.Series]]] = None, groups: Optional[np.ndarray] = None, ) -> int: """ @@ -53,7 +54,7 @@ def get_n_splits( def _single_split( self, single_data: pd.DataFrame, - ) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]: + ) -> Generator[tuple[np.ndarray, np.ndarray], None, None]: """ Split a single dataset into train-test indices. @@ -75,12 +76,12 @@ def _single_split( @abstractmethod def split( self, - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], - labels: Optional[Union[pd.Series, Dict[str, pd.Series]]] = None, + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], + labels: Optional[Union[pd.Series, dict[str, pd.Series]]] = None, groups: Optional[np.ndarray] = None, ) -> Union[ - Generator[Tuple[np.ndarray, np.ndarray], None, None], - Generator[Tuple[str, Tuple[np.ndarray, np.ndarray]], None, None], + Generator[tuple[np.ndarray, np.ndarray], None, None], + Generator[tuple[str, tuple[np.ndarray, np.ndarray]], None, None], ]: """ Split data (or dictionary of data) into train-test indices. @@ -109,7 +110,7 @@ def split( @abstractmethod def _single_backtest_paths( self, single_data: pd.DataFrame - ) -> Dict[str, List[Dict[str, np.ndarray]]]: + ) -> dict[str, list[dict[str, np.ndarray]]]: """ Generate backtest paths for a single dataset. @@ -134,10 +135,10 @@ def _single_backtest_paths( @abstractmethod def backtest_paths( self, - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], ) -> Union[ - Dict[str, List[Dict[str, np.ndarray]]], - Dict[str, Dict[str, List[Dict[str, np.ndarray]]]], + dict[str, list[dict[str, np.ndarray]]], + dict[str, dict[str, list[dict[str, np.ndarray]]]], ]: """ Generate backtest paths for data or a dictionary of data. @@ -166,7 +167,7 @@ def _single_backtest_predictions( single_weights: Optional[np.ndarray] = None, predict_probability: bool = False, n_jobs: int = 1, - ) -> Dict[str, np.ndarray]: + ) -> dict[str, np.ndarray]: """ Obtain backtest predictions for a single dataset. @@ -196,13 +197,13 @@ def _single_backtest_predictions( @abstractmethod def backtest_predictions( self, - estimator: Union[Estimator, Dict[str, Estimator]], - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], - labels: Union[pd.Series, Dict[str, pd.Series]], - sample_weights: Optional[Union[np.ndarray, Dict[str, np.ndarray]]] = None, + estimator: Union[Estimator, dict[str, Estimator]], + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], + labels: Union[pd.Series, dict[str, pd.Series]], + sample_weights: Optional[Union[np.ndarray, dict[str, np.ndarray]]] = None, predict_probability: bool = False, n_jobs: int = 1, - ) -> Union[Dict[str, np.ndarray], Dict[str, Dict[str, np.ndarray]]]: + ) -> Union[dict[str, np.ndarray], dict[str, dict[str, np.ndarray]]]: """ Generate backtest predictions for single or multiple datasets. diff --git a/RiskLabAI/backtest/validation/kfold.py b/RiskLabAI/backtest/validation/kfold.py index 22f20f8..9dbb7b4 100644 --- a/RiskLabAI/backtest/validation/kfold.py +++ b/RiskLabAI/backtest/validation/kfold.py @@ -3,8 +3,9 @@ """ import warnings +from collections.abc import Generator from copy import deepcopy -from typing import Any, Dict, Generator, List, Optional, Tuple, Union +from typing import Any, Optional, Union import numpy as np import pandas as pd @@ -52,8 +53,8 @@ def __init__( def get_n_splits( self, - data: Optional[Union[pd.DataFrame, Dict[str, pd.DataFrame]]] = None, - labels: Optional[Union[pd.Series, Dict[str, pd.Series]]] = None, + data: Optional[Union[pd.DataFrame, dict[str, pd.DataFrame]]] = None, + labels: Optional[Union[pd.Series, dict[str, pd.Series]]] = None, groups: Optional[np.ndarray] = None, ) -> int: """ @@ -85,7 +86,7 @@ def _get_shuffled_indices(self, n_samples: int) -> np.ndarray: def _single_split( self, single_data: pd.DataFrame, - ) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]: + ) -> Generator[tuple[np.ndarray, np.ndarray], None, None]: """ Split a single dataset into train-test indices. @@ -107,12 +108,12 @@ def _single_split( def split( self, - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], - labels: Optional[Union[pd.Series, Dict[str, pd.Series]]] = None, + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], + labels: Optional[Union[pd.Series, dict[str, pd.Series]]] = None, groups: Optional[np.ndarray] = None, ) -> Union[ - Generator[Tuple[np.ndarray, np.ndarray], None, None], - Generator[Tuple[str, Tuple[np.ndarray, np.ndarray]], None, None], + Generator[tuple[np.ndarray, np.ndarray], None, None], + Generator[tuple[str, tuple[np.ndarray, np.ndarray]], None, None], ]: """ Split data (or dictionary of data) into train-test indices. @@ -142,7 +143,7 @@ def split( def _single_backtest_paths( self, single_data: pd.DataFrame - ) -> Dict[str, List[Dict[str, np.ndarray]]]: + ) -> dict[str, list[dict[str, np.ndarray]]]: """ Generate backtest paths for a single dataset. For K-Fold, there is only one "path". @@ -174,10 +175,10 @@ def _single_backtest_paths( def backtest_paths( self, - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], ) -> Union[ - Dict[str, List[Dict[str, np.ndarray]]], - Dict[str, Dict[str, List[Dict[str, np.ndarray]]]], + dict[str, list[dict[str, np.ndarray]]], + dict[str, dict[str, list[dict[str, np.ndarray]]]], ]: """ Generate backtest paths for data or a dictionary of data. @@ -211,7 +212,7 @@ def _single_backtest_predictions( single_weights: Optional[np.ndarray] = None, predict_probability: bool = False, n_jobs: int = 1, - ) -> Dict[str, np.ndarray]: + ) -> dict[str, np.ndarray]: """ Obtain backtest predictions for a single dataset. @@ -242,7 +243,7 @@ def _single_backtest_predictions( def train_test_single_estimator( estimator_: Estimator, train_indices: np.ndarray, test_indices: np.ndarray - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray]: """Train model and return (predictions, test_indices).""" X_train = single_data.iloc[train_indices] y_train = single_labels.iloc[train_indices] @@ -292,13 +293,13 @@ def train_test_single_estimator( def backtest_predictions( self, - estimator: Union[Estimator, Dict[str, Estimator]], - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], - labels: Union[pd.Series, Dict[str, pd.Series]], - sample_weights: Optional[Union[np.ndarray, Dict[str, np.ndarray]]] = None, + estimator: Union[Estimator, dict[str, Estimator]], + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], + labels: Union[pd.Series, dict[str, pd.Series]], + sample_weights: Optional[Union[np.ndarray, dict[str, np.ndarray]]] = None, predict_probability: bool = False, n_jobs: int = 1, - ) -> Union[Dict[str, np.ndarray], Dict[str, Dict[str, np.ndarray]]]: + ) -> Union[dict[str, np.ndarray], dict[str, dict[str, np.ndarray]]]: """ Generate backtest predictions for single or multiple datasets. diff --git a/RiskLabAI/backtest/validation/purged_kfold.py b/RiskLabAI/backtest/validation/purged_kfold.py index c43da57..ce18e8b 100644 --- a/RiskLabAI/backtest/validation/purged_kfold.py +++ b/RiskLabAI/backtest/validation/purged_kfold.py @@ -4,8 +4,9 @@ """ import warnings +from collections.abc import Generator from copy import deepcopy -from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Union +from typing import Any, Optional, Union import numpy as np import pandas as pd @@ -84,7 +85,7 @@ def filtered_training_indices_with_embargo( training data (i.e., observations that do not overlap with test + embargo). """ - indices_to_drop: Set[int] = set() + indices_to_drop: set[int] = set() embargo_length = int(len(data_info_range) * embargo_fraction) if test_time_range.empty: @@ -157,7 +158,7 @@ def filtered_training_indices_with_embargo( def __init__( self, n_splits: int, - times: Union[pd.Series, Dict[str, pd.Series]], + times: Union[pd.Series, dict[str, pd.Series]], embargo: float = 0, ) -> None: """ @@ -180,8 +181,8 @@ def __init__( def get_n_splits( self, - data: Optional[Union[pd.DataFrame, Dict[str, pd.DataFrame]]] = None, - labels: Optional[Union[pd.Series, Dict[str, pd.Series]]] = None, + data: Optional[Union[pd.DataFrame, dict[str, pd.DataFrame]]] = None, + labels: Optional[Union[pd.Series, dict[str, pd.Series]]] = None, groups: Optional[np.ndarray] = None, ) -> int: """ @@ -250,7 +251,7 @@ def _single_split( self, single_times: pd.Series, single_data: pd.DataFrame, - ) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]: + ) -> Generator[tuple[np.ndarray, np.ndarray], None, None]: """ Split a single dataset into purged train-test indices. @@ -276,12 +277,12 @@ def _single_split( def split( self, - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], - labels: Optional[Union[pd.Series, Dict[str, pd.Series]]] = None, + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], + labels: Optional[Union[pd.Series, dict[str, pd.Series]]] = None, groups: Optional[np.ndarray] = None, ) -> Union[ - Generator[Tuple[np.ndarray, np.ndarray], None, None], - Generator[Tuple[str, Tuple[np.ndarray, np.ndarray]], None, None], + Generator[tuple[np.ndarray, np.ndarray], None, None], + Generator[tuple[str, tuple[np.ndarray, np.ndarray]], None, None], ]: """ Split data (or dictionary of data) into purged train-test indices. @@ -319,7 +320,7 @@ def _single_backtest_paths( self, single_times: pd.Series, single_data: pd.DataFrame, - ) -> Dict[str, List[Dict[str, np.ndarray]]]: + ) -> dict[str, list[dict[str, np.ndarray]]]: """ Generate backtest paths for a single dataset. For PurgedKFold, there is only one "path". @@ -357,10 +358,10 @@ def _single_backtest_paths( def backtest_paths( self, - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], ) -> Union[ - Dict[str, List[Dict[str, np.ndarray]]], - Dict[str, Dict[str, List[Dict[str, np.ndarray]]]], + dict[str, list[dict[str, np.ndarray]]], + dict[str, dict[str, list[dict[str, np.ndarray]]]], ]: """ Generate backtest paths for data or a dictionary of data. @@ -399,7 +400,7 @@ def _single_backtest_predictions( single_weights: Optional[np.ndarray] = None, predict_probability: bool = False, n_jobs: int = 1, - ) -> Dict[str, np.ndarray]: + ) -> dict[str, np.ndarray]: """ Obtain backtest predictions for a single dataset. @@ -468,13 +469,13 @@ def train_test_single_estimator( def backtest_predictions( self, - estimator: Union[Estimator, Dict[str, Estimator]], - data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], - labels: Union[pd.Series, Dict[str, pd.Series]], - sample_weights: Optional[Union[np.ndarray, Dict[str, np.ndarray]]] = None, + estimator: Union[Estimator, dict[str, Estimator]], + data: Union[pd.DataFrame, dict[str, pd.DataFrame]], + labels: Union[pd.Series, dict[str, pd.Series]], + sample_weights: Optional[Union[np.ndarray, dict[str, np.ndarray]]] = None, predict_probability: bool = False, n_jobs: int = 1, - ) -> Union[Dict[str, np.ndarray], Dict[str, Dict[str, np.ndarray]]]: + ) -> Union[dict[str, np.ndarray], dict[str, dict[str, np.ndarray]]]: """ Generate backtest predictions for single or multiple datasets. diff --git a/RiskLabAI/backtest/validation/walk_forward.py b/RiskLabAI/backtest/validation/walk_forward.py index 48de8f0..8b64ab0 100644 --- a/RiskLabAI/backtest/validation/walk_forward.py +++ b/RiskLabAI/backtest/validation/walk_forward.py @@ -3,8 +3,9 @@ """ import warnings +from collections.abc import Generator from copy import deepcopy -from typing import Any, Dict, Generator, Optional, Tuple +from typing import Any, Optional import numpy as np import pandas as pd @@ -65,7 +66,7 @@ def __init__( def _single_split( self, single_data: pd.DataFrame, - ) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]: + ) -> Generator[tuple[np.ndarray, np.ndarray], None, None]: """ Split a single dataset into walk-forward train-test indices. @@ -109,7 +110,7 @@ def _single_backtest_predictions( single_weights: Optional[np.ndarray] = None, predict_probability: bool = False, n_jobs: int = 1, - ) -> Dict[str, np.ndarray]: + ) -> dict[str, np.ndarray]: """ Obtain backtest predictions for a single dataset. diff --git a/RiskLabAI/cluster/__init__.py b/RiskLabAI/cluster/__init__.py index 61dd826..9cc7fab 100644 --- a/RiskLabAI/cluster/__init__.py +++ b/RiskLabAI/cluster/__init__.py @@ -7,13 +7,13 @@ """ from .clustering import ( - covariance_to_correlation, cluster_k_means_base, - make_new_outputs, cluster_k_means_top, - random_covariance_sub, - random_block_covariance, + covariance_to_correlation, + make_new_outputs, random_block_correlation, + random_block_covariance, + random_covariance_sub, ) __all__ = [ diff --git a/RiskLabAI/cluster/clustering.py b/RiskLabAI/cluster/clustering.py index ef693cc..6fac684 100644 --- a/RiskLabAI/cluster/clustering.py +++ b/RiskLabAI/cluster/clustering.py @@ -6,13 +6,14 @@ De Prado, M. (2020) Advances in financial machine learning. John Wiley & Sons. """ +from typing import Optional + import numpy as np import pandas as pd from scipy.linalg import block_diag from sklearn.cluster import KMeans from sklearn.metrics import silhouette_samples from sklearn.utils import check_random_state -from typing import Tuple, Dict, List, Optional def covariance_to_correlation(covariance: np.ndarray) -> np.ndarray: @@ -50,7 +51,7 @@ def cluster_k_means_base( max_clusters: int = 10, iterations: int = 10, random_state: Optional[int] = None, -) -> Tuple[pd.DataFrame, Dict[int, List[str]], pd.Series]: +) -> tuple[pd.DataFrame, dict[int, list[str]], pd.Series]: """ Perform the base K-Means clustering step. @@ -138,9 +139,9 @@ def cluster_k_means_base( def make_new_outputs( correlation: pd.DataFrame, - clusters_1: Dict[int, List[str]], - clusters_2: Dict[int, List[str]], -) -> Tuple[pd.DataFrame, Dict[int, List[str]], pd.Series]: + clusters_1: dict[int, list[str]], + clusters_2: dict[int, list[str]], +) -> tuple[pd.DataFrame, dict[int, list[str]], pd.Series]: """ Merge two disjoint sets of clusters and re-calculate metrics. @@ -195,7 +196,7 @@ def cluster_k_means_top( max_clusters: Optional[int] = None, iterations: int = 10, random_state: Optional[int] = None, -) -> Tuple[pd.DataFrame, Dict[int, List[str]], pd.Series]: +) -> tuple[pd.DataFrame, dict[int, list[str]], pd.Series]: """ Perform Optimized Nested Clustering (ONC). diff --git a/RiskLabAI/controller/bars_initializer.py b/RiskLabAI/controller/bars_initializer.py index 66c0cb2..1b5548e 100644 --- a/RiskLabAI/controller/bars_initializer.py +++ b/RiskLabAI/controller/bars_initializer.py @@ -3,9 +3,12 @@ various bar types (Standard, Time, Imbalance, Run). """ -from typing import Tuple, Union, Dict, Callable, Optional, Any +from typing import Any, Callable, Optional, Union + import pandas as pd +from RiskLabAI.data.structures.abstract_bars import AbstractBars + # Import bar types from RiskLabAI.data.structures.imbalance_bars import ( ExpectedImbalanceBars, @@ -14,13 +17,12 @@ from RiskLabAI.data.structures.run_bars import ExpectedRunBars, FixedRunBars from RiskLabAI.data.structures.standard_bars import StandardBars from RiskLabAI.data.structures.time_bars import TimeBars -from RiskLabAI.data.structures.abstract_bars import AbstractBars # Import constants from RiskLabAI.utils.constants import ( CUMULATIVE_DOLLAR, - CUMULATIVE_VOLUME, CUMULATIVE_TICKS, + CUMULATIVE_VOLUME, ) @@ -36,7 +38,7 @@ def __init__(self): """ Initializes the controller and maps method names to methods. """ - self.method_name_to_method: Dict[str, Callable[..., AbstractBars]] = { + self.method_name_to_method: dict[str, Callable[..., AbstractBars]] = { "expected_dollar_imbalance_bars": self.initialize_expected_dollar_imbalance_bars, "expected_volume_imbalance_bars": self.initialize_expected_volume_imbalance_bars, "expected_tick_imbalance_bars": self.initialize_expected_tick_imbalance_bars, @@ -60,7 +62,7 @@ def initialize_expected_dollar_imbalance_bars( window_size_for_expected_n_ticks_estimation: int = 10000, window_size_for_expected_imbalance_estimation: int = 10000, initial_estimate_of_expected_n_ticks_in_bar: int = 20000, - expected_ticks_number_bounds: Optional[Tuple[float, float]] = None, + expected_ticks_number_bounds: Optional[tuple[float, float]] = None, analyse_thresholds: bool = False, **kwargs: Any, # Accept extra kwargs but don't use them ) -> ExpectedImbalanceBars: @@ -81,7 +83,7 @@ def initialize_expected_volume_imbalance_bars( window_size_for_expected_n_ticks_estimation: int = 10000, initial_estimate_of_expected_n_ticks_in_bar: int = 20000, window_size_for_expected_imbalance_estimation: int = 10000, - expected_ticks_number_bounds: Optional[Tuple[float, float]] = None, + expected_ticks_number_bounds: Optional[tuple[float, float]] = None, analyse_thresholds: bool = False, **kwargs: Any, ) -> ExpectedImbalanceBars: @@ -100,7 +102,7 @@ def initialize_expected_tick_imbalance_bars( window_size_for_expected_n_ticks_estimation: int = 10000, initial_estimate_of_expected_n_ticks_in_bar: int = 20000, window_size_for_expected_imbalance_estimation: int = 10000, - expected_ticks_number_bounds: Optional[Tuple[float, float]] = None, + expected_ticks_number_bounds: Optional[tuple[float, float]] = None, analyse_thresholds: bool = False, **kwargs: Any, ) -> ExpectedImbalanceBars: @@ -164,7 +166,7 @@ def initialize_expected_dollar_run_bars( window_size_for_expected_n_ticks_estimation: int = 10000, initial_estimate_of_expected_n_ticks_in_bar: int = 20000, window_size_for_expected_imbalance_estimation: int = 10000, - expected_ticks_number_bounds: Optional[Tuple[float, float]] = None, + expected_ticks_number_bounds: Optional[tuple[float, float]] = None, analyse_thresholds: bool = False, **kwargs: Any, ) -> ExpectedRunBars: @@ -183,7 +185,7 @@ def initialize_expected_volume_run_bars( window_size_for_expected_n_ticks_estimation: int = 10000, initial_estimate_of_expected_n_ticks_in_bar: int = 20000, window_size_for_expected_imbalance_estimation: int = 10000, - expected_ticks_number_bounds: Optional[Tuple[float, float]] = None, + expected_ticks_number_bounds: Optional[tuple[float, float]] = None, analyse_thresholds: bool = False, **kwargs: Any, ) -> ExpectedRunBars: @@ -202,7 +204,7 @@ def initialize_expected_tick_run_bars( window_size_for_expected_n_ticks_estimation: int = 10000, initial_estimate_of_expected_n_ticks_in_bar: int = 20000, window_size_for_expected_imbalance_estimation: int = 10000, - expected_ticks_number_bounds: Optional[Tuple[float, float]] = None, + expected_ticks_number_bounds: Optional[tuple[float, float]] = None, analyse_thresholds: bool = False, **kwargs: Any, ) -> ExpectedRunBars: diff --git a/RiskLabAI/controller/data_structure_controller.py b/RiskLabAI/controller/data_structure_controller.py index d828843..3886967 100644 --- a/RiskLabAI/controller/data_structure_controller.py +++ b/RiskLabAI/controller/data_structure_controller.py @@ -7,26 +7,26 @@ """ import logging +from collections.abc import Generator +from typing import Any, Optional, Union import pandas as pd -import numpy as np -from typing import Iterable, Optional, Generator, Union, Dict, Any, List from RiskLabAI.controller.bars_initializer import BarsInitializerController from RiskLabAI.data.structures.abstract_bars import AbstractBars from RiskLabAI.utils.constants import ( - DATE_TIME, - TICK_NUMBER, - OPEN_PRICE, - HIGH_PRICE, - LOW_PRICE, CLOSE_PRICE, - CUMULATIVE_VOLUME, CUMULATIVE_BUY_VOLUME, + CUMULATIVE_DOLLAR, CUMULATIVE_SELL_VOLUME, CUMULATIVE_TICKS, - CUMULATIVE_DOLLAR, + CUMULATIVE_VOLUME, + DATE_TIME, + HIGH_PRICE, + LOW_PRICE, + OPEN_PRICE, THRESHOLD, + TICK_NUMBER, ) logger = logging.getLogger(__name__) @@ -62,7 +62,7 @@ def __init__(self) -> None: def handle_input_command( self, method_name: str, - method_arguments: Dict[str, Any], + method_arguments: dict[str, Any], input_data: Union[str, pd.DataFrame], output_path: Optional[str] = None, batch_size: int = 1_000_000, @@ -104,7 +104,7 @@ def handle_input_command( else: raise TypeError("input_data must be a string (path) or pd.DataFrame") - all_bars: List[List[Any]] = [] + all_bars: list[list[Any]] = [] # 3. Process data in batches logger.info("Processing data in batches...") @@ -159,8 +159,7 @@ def read_batches_from_string( try: # Use a generator to read the file in chunks # This is more memory-efficient and avoids reading the file twice. - for batch in pd.read_csv(input_path, chunksize=batch_size, parse_dates=[0]): - yield batch + yield from pd.read_csv(input_path, chunksize=batch_size, parse_dates=[0]) except FileNotFoundError: logger.error("File not found at %s", input_path) return diff --git a/RiskLabAI/core/__init__.py b/RiskLabAI/core/__init__.py index e550d28..06fd69c 100644 --- a/RiskLabAI/core/__init__.py +++ b/RiskLabAI/core/__init__.py @@ -27,12 +27,6 @@ from __future__ import annotations -from .base import ( - BaseBetSizer, - BaseLabeler, - BasePortfolioOptimizer, - Estimator, -) from ._builtins import ( BARS, BET_SIZERS, @@ -44,6 +38,12 @@ get_registry, list_components, ) +from .base import ( + BaseBetSizer, + BaseLabeler, + BasePortfolioOptimizer, + Estimator, +) from .registry import Registry __all__ = [ diff --git a/RiskLabAI/core/_builtins.py b/RiskLabAI/core/_builtins.py index 6dad01d..7cb533a 100644 --- a/RiskLabAI/core/_builtins.py +++ b/RiskLabAI/core/_builtins.py @@ -20,8 +20,6 @@ from __future__ import annotations -from typing import Dict, List - from .registry import Registry __all__ = [ @@ -149,7 +147,7 @@ # --------------------------------------------------------------------------- # # Family name -> registry, for discovery and a unified catalogue. # --------------------------------------------------------------------------- # -REGISTRIES: Dict[str, Registry] = { +REGISTRIES: dict[str, Registry] = { "bars": BARS, "cross_validators": CROSS_VALIDATORS, "feature_importance": FEATURE_IMPORTANCE, @@ -183,7 +181,7 @@ def get_registry(family: str) -> Registry: ) from None -def list_components() -> Dict[str, List[str]]: +def list_components() -> dict[str, list[str]]: """ Return a catalogue mapping each family name to its available component keys. diff --git a/RiskLabAI/core/base.py b/RiskLabAI/core/base.py index 7fbcc7c..6e0c709 100644 --- a/RiskLabAI/core/base.py +++ b/RiskLabAI/core/base.py @@ -26,7 +26,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any, Optional, Protocol, runtime_checkable +from typing import Any, Protocol, runtime_checkable import pandas as pd @@ -75,7 +75,7 @@ class BaseLabeler(ABC): def label( self, prices: pd.Series, - events: Optional[pd.DataFrame] = None, + events: pd.DataFrame | None = None, **kwargs: Any, ) -> pd.DataFrame: """ diff --git a/RiskLabAI/core/registry.py b/RiskLabAI/core/registry.py index bd67b69..ab47928 100644 --- a/RiskLabAI/core/registry.py +++ b/RiskLabAI/core/registry.py @@ -30,14 +30,10 @@ import importlib import inspect import logging +from collections.abc import Iterator from typing import ( Any, Callable, - Dict, - Iterator, - List, - Optional, - Tuple, ) logger = logging.getLogger(__name__) @@ -57,14 +53,14 @@ class _Entry: def __init__( self, key: str, - obj: Optional[Factory] = None, - lazy_target: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, + obj: Factory | None = None, + lazy_target: str | None = None, + metadata: dict[str, Any] | None = None, ) -> None: self.key = key self._obj = obj self._lazy_target = lazy_target - self.metadata: Dict[str, Any] = dict(metadata or {}) + self.metadata: dict[str, Any] = dict(metadata or {}) @property def is_lazy(self) -> bool: @@ -121,13 +117,13 @@ class Registry: 'woof' """ - def __init__(self, name: str, *, base: Optional[type] = None) -> None: + def __init__(self, name: str, *, base: type | None = None) -> None: self.name = name self.base = base # canonical key -> entry - self._entries: Dict[str, _Entry] = {} + self._entries: dict[str, _Entry] = {} # lower-cased key OR alias -> canonical key - self._index: Dict[str, str] = {} + self._index: dict[str, str] = {} # ------------------------------------------------------------------ # # Registration @@ -135,10 +131,10 @@ def __init__(self, name: str, *, base: Optional[type] = None) -> None: def register( self, key: Any = None, - obj: Optional[Factory] = None, + obj: Factory | None = None, *, - aliases: Tuple[str, ...] = (), - metadata: Optional[Dict[str, Any]] = None, + aliases: tuple[str, ...] = (), + metadata: dict[str, Any] | None = None, override: bool = False, ) -> Any: """ @@ -220,8 +216,8 @@ def register_lazy( key: str, target: str, *, - aliases: Tuple[str, ...] = (), - metadata: Optional[Dict[str, Any]] = None, + aliases: tuple[str, ...] = (), + metadata: dict[str, Any] | None = None, override: bool = False, ) -> None: """ @@ -248,11 +244,11 @@ def register_lazy( def _register( self, key: str, - obj: Optional[Factory], + obj: Factory | None, *, - lazy_target: Optional[str] = None, - aliases: Tuple[str, ...] = (), - metadata: Optional[Dict[str, Any]] = None, + lazy_target: str | None = None, + aliases: tuple[str, ...] = (), + metadata: dict[str, Any] | None = None, override: bool = False, ) -> None: if not isinstance(key, str) or not key: @@ -326,7 +322,7 @@ def create( kwargs = _filter_kwargs(factory, kwargs) return factory(*args, **kwargs) - def metadata(self, key: str) -> Dict[str, Any]: + def metadata(self, key: str) -> dict[str, Any]: """Return the metadata dict registered alongside ``key``.""" return dict(self._lookup(key).metadata) @@ -356,15 +352,15 @@ def _not_found_message(self, key: str) -> str: # ------------------------------------------------------------------ # # Introspection / mapping protocol # ------------------------------------------------------------------ # - def available(self) -> List[str]: + def available(self) -> list[str]: """Sorted list of canonical keys.""" return sorted(self._entries.keys()) - def keys(self) -> List[str]: + def keys(self) -> list[str]: """Alias for :meth:`available` (mapping-like access).""" return self.available() - def aliases(self) -> Dict[str, str]: + def aliases(self) -> dict[str, str]: """Mapping of alias -> canonical key (excludes canonical keys themselves).""" return { alias: canonical @@ -388,7 +384,7 @@ def __repr__(self) -> str: # pragma: no cover - cosmetic return f"" -def _filter_kwargs(factory: Factory, kwargs: Dict[str, Any]) -> Dict[str, Any]: +def _filter_kwargs(factory: Factory, kwargs: dict[str, Any]) -> dict[str, Any]: """ Drop keyword arguments the factory's signature does not accept. diff --git a/RiskLabAI/data/__init__.py b/RiskLabAI/data/__init__.py index 0e53b63..501be46 100644 --- a/RiskLabAI/data/__init__.py +++ b/RiskLabAI/data/__init__.py @@ -13,13 +13,15 @@ """ # Import sub-packages -from . import denoise -from . import differentiation -from . import distance -from . import labeling -from . import structures -from . import synthetic_data -from . import weights +from . import ( + denoise, + differentiation, + distance, + labeling, + structures, + synthetic_data, + weights, +) __all__ = [ "denoise", diff --git a/RiskLabAI/data/denoise/__init__.py b/RiskLabAI/data/denoise/__init__.py index 2600912..df60c0d 100644 --- a/RiskLabAI/data/denoise/__init__.py +++ b/RiskLabAI/data/denoise/__init__.py @@ -11,15 +11,15 @@ """ from .denoising import ( - marcenko_pastur_pdf, - fit_kde, - find_max_eval, - pca, - denoised_corr, - cov_to_corr, corr_to_cov, + cov_to_corr, denoise_cov, + denoised_corr, + find_max_eval, + fit_kde, + marcenko_pastur_pdf, optimal_portfolio, + pca, ) __all__ = [ diff --git a/RiskLabAI/data/denoise/denoising.py b/RiskLabAI/data/denoise/denoising.py index 9613871..b14aabf 100644 --- a/RiskLabAI/data/denoise/denoising.py +++ b/RiskLabAI/data/denoise/denoising.py @@ -12,11 +12,12 @@ John Wiley & Sons, Chapter 2. """ -import pandas as pd +from typing import Optional + import numpy as np +import pandas as pd from scipy.optimize import minimize from sklearn.neighbors import KernelDensity -from typing import Tuple, Union, Optional, Dict, Any # --- FIX 5: Removed unused imports for LedoitWolf and block_diag --- @@ -135,7 +136,7 @@ def _mp_pdf_fit_error( def find_max_eval( eigenvalues: np.ndarray, q: float, bandwidth: float -) -> Tuple[float, float]: +) -> tuple[float, float]: r""" Find the maximum theoretical eigenvalue (\(\lambda_{max}\)) by fitting the Marcenko-Pastur distribution. @@ -163,9 +164,8 @@ def find_max_eval( # Minimize the SSE to find the best-fit variance # --- FIX 2: Pass bandwidth to the objective function --- - objective_func = lambda *args: _mp_pdf_fit_error( - args[0], q, eigenvalues_1d, bandwidth - ) + def objective_func(*args): + return _mp_pdf_fit_error(args[0], q, eigenvalues_1d, bandwidth) optimizer_result = minimize( objective_func, @@ -242,7 +242,7 @@ def denoised_corr( # --- Utility Functions --- -def pca(matrix: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: +def pca(matrix: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ Computes the principal component analysis of a Hermitian matrix. Ensures eigenvalues are sorted descending. diff --git a/RiskLabAI/data/differentiation/differentiation.py b/RiskLabAI/data/differentiation/differentiation.py index d7cc5d2..f4ebdf4 100644 --- a/RiskLabAI/data/differentiation/differentiation.py +++ b/RiskLabAI/data/differentiation/differentiation.py @@ -10,10 +10,14 @@ John Wiley & Sons, Chapter 5. """ +from typing import TYPE_CHECKING, Optional + import numpy as np import pandas as pd from statsmodels.tsa.stattools import adfuller -from typing import Tuple, Optional + +if TYPE_CHECKING: + import matplotlib.pyplot as plt # type hints only; optional at runtime def calculate_weights_std(degree: float, size: int) -> np.ndarray: @@ -223,7 +227,7 @@ def fractional_difference_fixed_single( def plot_weights( - degree_range: Tuple[float, float], + degree_range: tuple[float, float], number_degrees: int, size: int, ax: Optional["plt.Axes"] = None, diff --git a/RiskLabAI/data/distance/distance_metric.py b/RiskLabAI/data/distance/distance_metric.py index 6274790..b1f46f5 100644 --- a/RiskLabAI/data/distance/distance_metric.py +++ b/RiskLabAI/data/distance/distance_metric.py @@ -13,10 +13,11 @@ John Wiley & Sons, Chapter 3. """ +from typing import Optional + import numpy as np import scipy.stats as ss from sklearn.metrics import mutual_info_score -from typing import Optional def calculate_variation_of_information( diff --git a/RiskLabAI/data/labeling/__init__.py b/RiskLabAI/data/labeling/__init__.py index d3eae51..c6caf9c 100644 --- a/RiskLabAI/data/labeling/__init__.py +++ b/RiskLabAI/data/labeling/__init__.py @@ -12,22 +12,22 @@ John Wiley & Sons, Chapters 3 & 4. """ +from .financial_labels import ( + calculate_t_value_linear_regression, + find_trend_using_trend_scanning, +) from .labeling import ( cusum_filter_events_dynamic_threshold, - symmetric_cusum_filter, daily_volatility_with_log_returns, - vertical_barrier, - triple_barrier, + expand_call, + lin_parts, meta_events, meta_labeling, - lin_parts, process_jobs, - expand_call, report_progress, -) -from .financial_labels import ( - calculate_t_value_linear_regression, - find_trend_using_trend_scanning, + symmetric_cusum_filter, + triple_barrier, + vertical_barrier, ) __all__ = [ diff --git a/RiskLabAI/data/labeling/financial_labels.py b/RiskLabAI/data/labeling/financial_labels.py index 8e3c58b..295d349 100644 --- a/RiskLabAI/data/labeling/financial_labels.py +++ b/RiskLabAI/data/labeling/financial_labels.py @@ -13,7 +13,6 @@ import numpy as np import pandas as pd from scipy import stats -from typing import List, Tuple, Optional def calculate_t_value_linear_regression(prices: pd.Series) -> float: @@ -50,7 +49,7 @@ def calculate_t_value_linear_regression(prices: pd.Series) -> float: def find_trend_using_trend_scanning( - molecule: pd.Index, close: pd.Series, span: Tuple[int, int] + molecule: pd.Index, close: pd.Series, span: tuple[int, int] ) -> pd.DataFrame: """ Implement the trend-scanning method. diff --git a/RiskLabAI/data/labeling/labeling.py b/RiskLabAI/data/labeling/labeling.py index 9c22e7c..7a12491 100644 --- a/RiskLabAI/data/labeling/labeling.py +++ b/RiskLabAI/data/labeling/labeling.py @@ -12,7 +12,7 @@ """ from concurrent.futures import ProcessPoolExecutor -from typing import List, Optional +from typing import Optional import numpy as np import pandas as pd @@ -22,11 +22,11 @@ # under their historical names for backward compatibility. `lin_parts` maps to # hpc's `linear_partitions` (identical for valid inputs, with an added guard). from RiskLabAI.hpc import ( # noqa: F401 (re-exported for backward compat) - process_jobs, expand_call, + process_jobs, report_progress, - linear_partitions as lin_parts, ) +from RiskLabAI.hpc import linear_partitions as lin_parts # noqa: F401 def cusum_filter_events_dynamic_threshold( @@ -193,8 +193,8 @@ def vertical_barrier( def triple_barrier( close: pd.Series, events: pd.DataFrame, - ptsl: List[float], - molecule: List[pd.Timestamp], + ptsl: list[float], + molecule: list[pd.Timestamp], ) -> pd.DataFrame: """ Apply the triple-barrier method for a subset of events. @@ -303,7 +303,7 @@ def triple_barrier( def meta_events( close: pd.Series, time_events: pd.DatetimeIndex, - ptsl: List[float], + ptsl: list[float], target: pd.Series, return_min: float, num_threads: int, diff --git a/RiskLabAI/data/structures/__init__.py b/RiskLabAI/data/structures/__init__.py index 28ccdbf..5449285 100644 --- a/RiskLabAI/data/structures/__init__.py +++ b/RiskLabAI/data/structures/__init__.py @@ -3,16 +3,15 @@ """ # Exports from standard_bars.py -from .standard_bars import StandardBars - -# Exports from time_bars.py -from .time_bars import TimeBars - # Exports from imbalance_bars.py from .imbalance_bars import ExpectedImbalanceBars, FixedImbalanceBars # Exports from run_bars.py from .run_bars import ExpectedRunBars, FixedRunBars +from .standard_bars import StandardBars + +# Exports from time_bars.py +from .time_bars import TimeBars # Note: Removed 'pca_weights' from here as it belongs in 'optimization', # not 'data.structures'. diff --git a/RiskLabAI/data/structures/abstract_bars.py b/RiskLabAI/data/structures/abstract_bars.py index b4db081..2bdfcd8 100644 --- a/RiskLabAI/data/structures/abstract_bars.py +++ b/RiskLabAI/data/structures/abstract_bars.py @@ -4,12 +4,15 @@ """ from abc import ABC, abstractmethod -from typing import Tuple, Union, List, Any, Dict, Iterable, Optional +from collections.abc import Iterable +from typing import Any, Optional, Union + import numpy as np + from RiskLabAI.utils.constants import * # Type hint for a single tick: (datetime, price, volume) -TickData = Union[List[Any], Tuple[Any, ...], np.ndarray] +TickData = Union[list[Any], tuple[Any, ...], np.ndarray] class AbstractBars(ABC): @@ -41,7 +44,7 @@ def __init__(self, bar_type: str): self.high_price: float = -np.inf self.low_price: float = np.inf - self.base_statistics: Dict[str, Union[int, float]] = { + self.base_statistics: dict[str, Union[int, float]] = { PREVIOUS_TICK_RULE: 0, CUMULATIVE_TICKS: 0, CUMULATIVE_DOLLAR: 0, @@ -51,7 +54,7 @@ def __init__(self, bar_type: str): } @abstractmethod - def construct_bars_from_data(self, data: Iterable[TickData]) -> List[List[Any]]: + def construct_bars_from_data(self, data: Iterable[TickData]) -> list[list[Any]]: """ Constructs bars from input tick data. @@ -162,7 +165,7 @@ def _tick_rule(self, price: float) -> int: self.previous_tick_price = price return signed_tick - def _high_and_low_price_update(self, price: float) -> Tuple[float, float]: + def _high_and_low_price_update(self, price: float) -> tuple[float, float]: """ Update the high and low prices for the current bar. @@ -188,7 +191,7 @@ def _construct_next_bar( high_price: float, low_price: float, threshold: float, - ) -> List[Any]: + ) -> list[Any]: """ Format and return the newly constructed bar. diff --git a/RiskLabAI/data/structures/abstract_imbalance_bars.py b/RiskLabAI/data/structures/abstract_imbalance_bars.py index 457bf59..307846a 100644 --- a/RiskLabAI/data/structures/abstract_imbalance_bars.py +++ b/RiskLabAI/data/structures/abstract_imbalance_bars.py @@ -3,13 +3,15 @@ """ from abc import abstractmethod -from typing import Union, List, Any, Iterable, Optional +from collections.abc import Iterable +from typing import Any, Optional + import numpy as np +from RiskLabAI.data.structures.abstract_bars import TickData from RiskLabAI.data.structures.abstract_information_driven_bars import ( AbstractInformationDrivenBars, ) -from RiskLabAI.data.structures.abstract_bars import TickData from RiskLabAI.utils.constants import * @@ -52,7 +54,7 @@ def __init__( self.analyse_thresholds = [] if analyse_thresholds else None - def construct_bars_from_data(self, data: Iterable[TickData]) -> List[List[Any]]: + def construct_bars_from_data(self, data: Iterable[TickData]) -> list[list[Any]]: """ Constructs imbalance bars from input tick data. (Parameters same as original) diff --git a/RiskLabAI/data/structures/abstract_information_driven_bars.py b/RiskLabAI/data/structures/abstract_information_driven_bars.py index 6dcb1f0..c95228d 100644 --- a/RiskLabAI/data/structures/abstract_information_driven_bars.py +++ b/RiskLabAI/data/structures/abstract_information_driven_bars.py @@ -3,12 +3,13 @@ """ from abc import abstractmethod -from typing import Union, List, Optional +from typing import Optional + import numpy as np -from RiskLabAI.utils.ewma import ewma from RiskLabAI.data.structures.abstract_bars import AbstractBars from RiskLabAI.utils.constants import * +from RiskLabAI.utils.ewma import ewma class AbstractInformationDrivenBars(AbstractBars): diff --git a/RiskLabAI/data/structures/abstract_run_bars.py b/RiskLabAI/data/structures/abstract_run_bars.py index 3ecf1cf..df46a99 100644 --- a/RiskLabAI/data/structures/abstract_run_bars.py +++ b/RiskLabAI/data/structures/abstract_run_bars.py @@ -3,15 +3,17 @@ """ from abc import abstractmethod -from typing import Union, List, Any, Iterable, Optional +from collections.abc import Iterable +from typing import Any, Optional + import numpy as np +from RiskLabAI.data.structures.abstract_bars import TickData from RiskLabAI.data.structures.abstract_information_driven_bars import ( AbstractInformationDrivenBars, ) -from RiskLabAI.data.structures.abstract_bars import TickData -from RiskLabAI.utils.ewma import ewma from RiskLabAI.utils.constants import * +from RiskLabAI.utils.ewma import ewma class AbstractRunBars(AbstractInformationDrivenBars): @@ -54,7 +56,7 @@ def __init__( self.analyse_thresholds = [] if analyse_thresholds else None - def construct_bars_from_data(self, data: Iterable[TickData]) -> List[List[Any]]: + def construct_bars_from_data(self, data: Iterable[TickData]) -> list[list[Any]]: """ Constructs run bars from input tick data. (Parameters same as original) diff --git a/RiskLabAI/data/structures/imbalance_bars.py b/RiskLabAI/data/structures/imbalance_bars.py index 6fa0189..b6a6a44 100644 --- a/RiskLabAI/data/structures/imbalance_bars.py +++ b/RiskLabAI/data/structures/imbalance_bars.py @@ -4,12 +4,12 @@ - FixedImbalanceBars """ -from typing import Tuple, Union, Optional +from typing import Optional + import numpy as np from RiskLabAI.data.structures.abstract_imbalance_bars import AbstractImbalanceBars from RiskLabAI.utils.constants import * - from RiskLabAI.utils.ewma import ewma @@ -25,7 +25,7 @@ def __init__( window_size_for_expected_n_ticks_estimation: int = 10000, initial_estimate_of_expected_n_ticks_in_bar: int = 20000, window_size_for_expected_imbalance_estimation: int = 10000, - expected_ticks_number_bounds: Optional[Tuple[float, float]] = None, + expected_ticks_number_bounds: Optional[tuple[float, float]] = None, analyse_thresholds: bool = False, ): """ diff --git a/RiskLabAI/data/structures/run_bars.py b/RiskLabAI/data/structures/run_bars.py index 8537529..325afcb 100644 --- a/RiskLabAI/data/structures/run_bars.py +++ b/RiskLabAI/data/structures/run_bars.py @@ -4,12 +4,12 @@ - FixedRunBars """ -from typing import Union, Iterable, List, Optional, Tuple +from typing import Optional + import numpy as np from RiskLabAI.data.structures.abstract_run_bars import AbstractRunBars from RiskLabAI.utils.constants import * - from RiskLabAI.utils.ewma import ewma @@ -25,7 +25,7 @@ def __init__( window_size_for_expected_n_ticks_estimation: int, initial_estimate_of_expected_n_ticks_in_bar: int, window_size_for_expected_imbalance_estimation: int, - expected_ticks_number_bounds: Optional[Tuple[float, float]] = None, + expected_ticks_number_bounds: Optional[tuple[float, float]] = None, analyse_thresholds: bool = False, ): """ diff --git a/RiskLabAI/data/structures/standard_bars.py b/RiskLabAI/data/structures/standard_bars.py index 8fa1828..110a3b9 100644 --- a/RiskLabAI/data/structures/standard_bars.py +++ b/RiskLabAI/data/structures/standard_bars.py @@ -2,8 +2,9 @@ Implements Standard Bars (Tick, Volume, Dollar). """ -from typing import Union, List, Any, Iterable -import numpy as np +from collections.abc import Iterable +from typing import Any + from RiskLabAI.data.structures.abstract_bars import AbstractBars, TickData @@ -30,7 +31,7 @@ def __init__(self, bar_type: str, threshold: float = 50000): super().__init__(bar_type) self.threshold = threshold - def construct_bars_from_data(self, data: Iterable[TickData]) -> List[List[Any]]: + def construct_bars_from_data(self, data: Iterable[TickData]) -> list[list[Any]]: """ Constructs standard bars from input tick data. diff --git a/RiskLabAI/data/structures/time_bars.py b/RiskLabAI/data/structures/time_bars.py index 855291d..835e7a3 100644 --- a/RiskLabAI/data/structures/time_bars.py +++ b/RiskLabAI/data/structures/time_bars.py @@ -2,9 +2,12 @@ Implements Time Bars. """ -from typing import Union, List, Any, Iterable +from collections.abc import Iterable +from typing import Any + import numpy as np import pandas as pd + from RiskLabAI.data.structures.abstract_bars import AbstractBars, TickData @@ -50,7 +53,7 @@ def __init__(self, resolution_type: str, resolution_units: int): self.current_bar_timestamp = np.nan self.current_bar_end_timestamp = np.nan - def construct_bars_from_data(self, data: Iterable[TickData]) -> List[List[Any]]: + def construct_bars_from_data(self, data: Iterable[TickData]) -> list[list[Any]]: """ Constructs time bars from input tick data. @@ -76,11 +79,11 @@ def construct_bars_from_data(self, data: Iterable[TickData]) -> List[List[Any]]: # Get tick timestamp in seconds try: tick_timestamp_sec = date_time.timestamp() - except AttributeError: + except AttributeError as err: raise TypeError( "TimeBars require `date_time` to be a pandas Timestamp " "or datetime object with a .timestamp() method." - ) + ) from err # Determine the "floor" timestamp for this bar bar_start_timestamp_sec = ( diff --git a/RiskLabAI/data/synthetic_data/__init__.py b/RiskLabAI/data/synthetic_data/__init__.py index 8960510..b06e36e 100644 --- a/RiskLabAI/data/synthetic_data/__init__.py +++ b/RiskLabAI/data/synthetic_data/__init__.py @@ -7,19 +7,19 @@ """ from .drift_burst_hypothesis import drift_volatility_burst -from .synthetic_controlled_environment import ( - compute_log_returns, - heston_merton_log_returns, - align_params_length, - generate_prices_from_regimes, - parallel_generate_prices, -) from .simulation import ( - random_cov, form_block_matrix, form_true_matrix, + random_cov, simulates_cov_mu, ) +from .synthetic_controlled_environment import ( + align_params_length, + compute_log_returns, + generate_prices_from_regimes, + heston_merton_log_returns, + parallel_generate_prices, +) __all__ = [ "drift_volatility_burst", diff --git a/RiskLabAI/data/synthetic_data/drift_burst_hypothesis.py b/RiskLabAI/data/synthetic_data/drift_burst_hypothesis.py index f5522f4..7dd5156 100644 --- a/RiskLabAI/data/synthetic_data/drift_burst_hypothesis.py +++ b/RiskLabAI/data/synthetic_data/drift_burst_hypothesis.py @@ -6,7 +6,6 @@ """ import numpy as np -from typing import Tuple def drift_volatility_burst( @@ -18,7 +17,7 @@ def drift_volatility_burst( alpha: float, beta: float, explosion_filter_width: float = 0.1, -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: r""" Compute the drift and volatility for a DBH burst scenario. diff --git a/RiskLabAI/data/synthetic_data/simulation.py b/RiskLabAI/data/synthetic_data/simulation.py index bd7ced9..973c1c2 100644 --- a/RiskLabAI/data/synthetic_data/simulation.py +++ b/RiskLabAI/data/synthetic_data/simulation.py @@ -8,7 +8,6 @@ import pandas as pd from scipy.linalg import block_diag from sklearn.covariance import LedoitWolf -from typing import Tuple # Import the utility from the denoising module from RiskLabAI.data.denoise.denoising import corr_to_cov @@ -54,7 +53,7 @@ def form_block_matrix( def form_true_matrix( n_blocks: int, block_size: int, block_correlation: float -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: """ Forms a shuffled block diagonal correlation matrix and the corresponding covariance matrix. @@ -82,7 +81,7 @@ def form_true_matrix( def simulates_cov_mu( mu0: np.ndarray, cov0: np.ndarray, n_obs: int, shrink: bool = False -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: """ Simulates multivariate normal observations and computes the sample mean and covariance. diff --git a/RiskLabAI/data/synthetic_data/synthetic_controlled_environment.py b/RiskLabAI/data/synthetic_data/synthetic_controlled_environment.py index 5488361..53aa447 100644 --- a/RiskLabAI/data/synthetic_data/synthetic_controlled_environment.py +++ b/RiskLabAI/data/synthetic_data/synthetic_controlled_environment.py @@ -3,15 +3,15 @@ with Markov-switching regimes. """ -from typing import Dict, List, Tuple, Union, Optional +from typing import Optional, Union import numpy as np import pandas as pd -from numba import jit from joblib import Parallel, delayed +from numba import jit # Type hint for regime parameters -RegimeParams = Dict[str, Union[float, List[float]]] +RegimeParams = dict[str, Union[float, list[float]]] @jit(nopython=True) @@ -213,7 +213,7 @@ def heston_merton_log_returns( def align_params_length( regime_params: RegimeParams, -) -> Tuple[Dict[str, List[float]], int]: +) -> tuple[dict[str, list[float]], int]: """ Align the parameter lists within a regime to be the same length. @@ -234,7 +234,7 @@ def align_params_length( """ max_len = max(len(v) if isinstance(v, list) else 1 for v in regime_params.values()) - aligned_params: Dict[str, List[float]] = {} + aligned_params: dict[str, list[float]] = {} for key, value in regime_params.items(): if isinstance(value, list): if len(value) < max_len: @@ -250,12 +250,12 @@ def align_params_length( def generate_prices_from_regimes( - regimes: Dict[str, RegimeParams], + regimes: dict[str, RegimeParams], transition_matrix: np.ndarray, total_time: float, n_steps: int, random_state: Optional[int] = None, -) -> Tuple[pd.Series, np.ndarray]: +) -> tuple[pd.Series, np.ndarray]: """ Generate a price series from a Markov-switching regime model. @@ -288,7 +288,7 @@ def generate_prices_from_regimes( simulated_regimes = markov_chain.simulate(ts_length=n_steps, random_state=rng) # 2. Unpack parameters based on simulated regimes - param_lists: Dict[str, List[float]] = { + param_lists: dict[str, list[float]] = { "mu": [], "kappa": [], "theta": [], @@ -316,7 +316,7 @@ def generate_prices_from_regimes( # 3. Finalize parameter arrays and regime path simulated_regimes_final = np.array(regime_path_expanded) - param_arrays: Dict[str, np.ndarray] = { + param_arrays: dict[str, np.ndarray] = { key: np.array(val) for key, val in param_lists.items() } @@ -355,13 +355,13 @@ def generate_prices_from_regimes( def parallel_generate_prices( number_of_paths: int, - regimes: Dict[str, RegimeParams], + regimes: dict[str, RegimeParams], transition_matrix: np.ndarray, total_time: float, n_steps: int, random_state: Optional[int] = None, n_jobs: int = 1, -) -> Tuple[pd.DataFrame, pd.DataFrame]: +) -> tuple[pd.DataFrame, pd.DataFrame]: """ Parallel generation of price paths. diff --git a/RiskLabAI/data/weights/__init__.py b/RiskLabAI/data/weights/__init__.py index 87c8883..5788242 100644 --- a/RiskLabAI/data/weights/__init__.py +++ b/RiskLabAI/data/weights/__init__.py @@ -10,10 +10,10 @@ """ from .sample_weights import ( - expand_label_for_meta_labeling, calculate_average_uniqueness, - sample_weight_absolute_return_meta_labeling, calculate_time_decay, + expand_label_for_meta_labeling, + sample_weight_absolute_return_meta_labeling, ) __all__ = [ diff --git a/RiskLabAI/data/weights/sample_weights.py b/RiskLabAI/data/weights/sample_weights.py index 23971b8..011c4bb 100644 --- a/RiskLabAI/data/weights/sample_weights.py +++ b/RiskLabAI/data/weights/sample_weights.py @@ -9,7 +9,6 @@ import numpy as np import pandas as pd -from typing import Optional def expand_label_for_meta_labeling( diff --git a/RiskLabAI/ensemble/empirical_bagging_accuracy.py b/RiskLabAI/ensemble/empirical_bagging_accuracy.py index fdc8daf..e59cb5d 100644 --- a/RiskLabAI/ensemble/empirical_bagging_accuracy.py +++ b/RiskLabAI/ensemble/empirical_bagging_accuracy.py @@ -5,14 +5,18 @@ Chapter 6, Section 6.5, pp. 91-92. """ +from typing import TYPE_CHECKING, Optional + import numpy as np import pandas as pd +from scipy.stats import norm + +if TYPE_CHECKING: + import matplotlib.pyplot as plt # type hints only; optional at runtime from sklearn.ensemble import BaggingClassifier -from sklearn.tree import DecisionTreeClassifier -from sklearn.metrics import accuracy_score from sklearn.exceptions import NotFittedError -from scipy.stats import norm -from typing import List, Optional, Tuple, Dict, Any +from sklearn.metrics import accuracy_score +from sklearn.tree import DecisionTreeClassifier class BaggingClassifierAccuracy: @@ -136,7 +140,7 @@ def calculate_c_i(self, X: pd.DataFrame, y: pd.Series) -> np.ndarray: self.c_i_scores_ = np.array(c_i_scores) return self.c_i_scores_ - def calculate_weights(self, X: pd.DataFrame, y: pd.Series) -> Dict[str, np.ndarray]: + def calculate_weights(self, X: pd.DataFrame, y: pd.Series) -> dict[str, np.ndarray]: """ Calculates weights for each estimator based on three schemes: 1. Uniform (w_i = 1/N) @@ -236,7 +240,7 @@ def evaluate_all_schemes( y_test: pd.Series, X_train: pd.DataFrame, y_train: pd.Series, - ) -> Dict[str, float]: + ) -> dict[str, float]: """ Fits, calculates weights, and evaluates accuracy for all three weighting schemes. @@ -277,7 +281,7 @@ def evaluate_all_schemes( def calculate_bootstrap_accuracy( clf: BaggingClassifier, X: pd.DataFrame, y: pd.Series, n_bootstraps: int = 1000 -) -> Tuple[np.ndarray, float, float]: +) -> tuple[np.ndarray, float, float]: """ Calculates the accuracy of a bagging classifier over multiple bootstrapped samples of the test set. diff --git a/RiskLabAI/features/__init__.py b/RiskLabAI/features/__init__.py index 92c1df0..b3ae6a1 100644 --- a/RiskLabAI/features/__init__.py +++ b/RiskLabAI/features/__init__.py @@ -1,4 +1,6 @@ -from . import entropy_features -from . import feature_importance -from . import microstructural_features -from . import structural_breaks +from . import ( + entropy_features, + feature_importance, + microstructural_features, + structural_breaks, +) diff --git a/RiskLabAI/features/entropy_features/__init__.py b/RiskLabAI/features/entropy_features/__init__.py index b725cf1..34cca69 100644 --- a/RiskLabAI/features/entropy_features/__init__.py +++ b/RiskLabAI/features/entropy_features/__init__.py @@ -5,11 +5,11 @@ as Shannon, Lempel-Ziv, and Kontoyiannis. """ -from .shannon import shannon_entropy +from .kontoyiannis import kontoyiannis_entropy, longest_match_length from .lempel_ziv import lempel_ziv_entropy -from .pmf import probability_mass_function from .plug_in import plug_in_entropy_estimator -from .kontoyiannis import kontoyiannis_entropy, longest_match_length +from .pmf import probability_mass_function +from .shannon import shannon_entropy __all__ = [ "shannon_entropy", diff --git a/RiskLabAI/features/entropy_features/kontoyiannis.py b/RiskLabAI/features/entropy_features/kontoyiannis.py index 84fe66e..e1fc903 100644 --- a/RiskLabAI/features/entropy_features/kontoyiannis.py +++ b/RiskLabAI/features/entropy_features/kontoyiannis.py @@ -3,10 +3,10 @@ """ from math import log2 -from typing import Tuple, Optional +from typing import Optional -def longest_match_length(message: str, i: int, n: int) -> Tuple[int, str]: +def longest_match_length(message: str, i: int, n: int) -> tuple[int, str]: """ Find the length of the longest match for the substring starting at `i`. @@ -29,25 +29,25 @@ def longest_match_length(message: str, i: int, n: int) -> Tuple[int, str]: - The matched substring. """ longest_match = "" - # Iterate through possible lengths `l` - for l in range(1, n + 1): - pattern = message[i : i + l] + # Iterate through possible match lengths + for length in range(1, n + 1): + pattern = message[i : i + length] # Stop if pattern goes beyond message length - if i + l > len(message): + if i + length > len(message): break found = False # Look back in the window [max(0, i-n), i-1] for j in range(max(0, i - n), i): - candidate = message[j : j + l] + candidate = message[j : j + length] if pattern == candidate: longest_match = pattern found = True break - # If pattern of length `l` was not found, the - # longest match was of length `l-1`. + # If a pattern of this length was not found, the + # longest match was one shorter. if not found: break diff --git a/RiskLabAI/features/entropy_features/plug_in.py b/RiskLabAI/features/entropy_features/plug_in.py index f30b020..cce70e7 100644 --- a/RiskLabAI/features/entropy_features/plug_in.py +++ b/RiskLabAI/features/entropy_features/plug_in.py @@ -3,7 +3,7 @@ """ from math import log2 -from typing import Dict + from .pmf import probability_mass_function diff --git a/RiskLabAI/features/entropy_features/pmf.py b/RiskLabAI/features/entropy_features/pmf.py index 434e184..ee369c2 100644 --- a/RiskLabAI/features/entropy_features/pmf.py +++ b/RiskLabAI/features/entropy_features/pmf.py @@ -3,12 +3,11 @@ """ from collections import Counter -from typing import Dict def probability_mass_function( message: str, approximate_word_length: int -) -> Dict[str, float]: +) -> dict[str, float]: """ Calculate the Probability Mass Function (PMF) of n-grams. diff --git a/RiskLabAI/features/feature_importance/__init__.py b/RiskLabAI/features/feature_importance/__init__.py index 816d2c0..90d73c0 100644 --- a/RiskLabAI/features/feature_importance/__init__.py +++ b/RiskLabAI/features/feature_importance/__init__.py @@ -15,21 +15,22 @@ simulates_cov_mu, ) -# Core Strategy Pattern -from .feature_importance_strategy import FeatureImportanceStrategy -from .feature_importance_factory import FeatureImportanceFactory +from .clustered_feature_importance_mda import ClusteredFeatureImportanceMDA +from .clustered_feature_importance_mdi import ClusteredFeatureImportanceMDI from .feature_importance_controller import FeatureImportanceController +from .feature_importance_factory import FeatureImportanceFactory +from .feature_importance_mda import FeatureImportanceMDA # Strategy Implementations from .feature_importance_mdi import FeatureImportanceMDI -from .clustered_feature_importance_mdi import ClusteredFeatureImportanceMDI -from .feature_importance_mda import FeatureImportanceMDA -from .clustered_feature_importance_mda import ClusteredFeatureImportanceMDA from .feature_importance_sfi import FeatureImportanceSFI +# Core Strategy Pattern +from .feature_importance_strategy import FeatureImportanceStrategy + # Utility Functions for this module from .generate_synthetic_data import get_test_dataset -from .orthogonal_features import orthogonal_features, _compute_eigenvectors +from .orthogonal_features import _compute_eigenvectors, orthogonal_features from .weighted_tau import calculate_weighted_tau # Placeholder for future modules (from original file) diff --git a/RiskLabAI/features/feature_importance/clustered_feature_importance_mda.py b/RiskLabAI/features/feature_importance/clustered_feature_importance_mda.py index 785cf0b..992d894 100644 --- a/RiskLabAI/features/feature_importance/clustered_feature_importance_mda.py +++ b/RiskLabAI/features/feature_importance/clustered_feature_importance_mda.py @@ -3,12 +3,13 @@ """ import logging -from typing import Dict, Tuple, List, Any +from typing import Any + import numpy as np import pandas as pd -from sklearn.ensemble import RandomForestClassifier -from sklearn.model_selection import KFold from sklearn.metrics import log_loss +from sklearn.model_selection import KFold + from .feature_importance_strategy import FeatureImportanceStrategy logger = logging.getLogger(__name__) @@ -25,7 +26,7 @@ class ClusteredFeatureImportanceMDA(FeatureImportanceStrategy): def __init__( self, classifier: object, - clusters: Dict[str, List[str]], + clusters: dict[str, list[str]], n_splits: int = 10, random_state: int = 42, ): diff --git a/RiskLabAI/features/feature_importance/clustered_feature_importance_mdi.py b/RiskLabAI/features/feature_importance/clustered_feature_importance_mdi.py index 73fea1e..d353a8a 100644 --- a/RiskLabAI/features/feature_importance/clustered_feature_importance_mdi.py +++ b/RiskLabAI/features/feature_importance/clustered_feature_importance_mdi.py @@ -2,10 +2,12 @@ Computes Clustered Mean Decrease Impurity (MDI) feature importance. """ -import pandas as pd +from typing import Any + import numpy as np +import pandas as pd from sklearn.ensemble import BaseEnsemble -from typing import Dict, List, Any + from .feature_importance_strategy import FeatureImportanceStrategy @@ -19,7 +21,7 @@ class ClusteredFeatureImportanceMDI(FeatureImportanceStrategy): def __init__( self, classifier: BaseEnsemble, - clusters: Dict[str, List[str]], + clusters: dict[str, list[str]], ): """ Initialize the strategy. @@ -38,7 +40,7 @@ def __init__( self.clusters = clusters def _group_mean_std( - self, dataframe: pd.DataFrame, clusters: Dict[str, List[str]] + self, dataframe: pd.DataFrame, clusters: dict[str, list[str]] ) -> pd.DataFrame: """ Calculate the mean and standard deviation for cluster importances. diff --git a/RiskLabAI/features/feature_importance/feature_importance_controller.py b/RiskLabAI/features/feature_importance/feature_importance_controller.py index 8eb656f..2483b04 100644 --- a/RiskLabAI/features/feature_importance/feature_importance_controller.py +++ b/RiskLabAI/features/feature_importance/feature_importance_controller.py @@ -2,8 +2,10 @@ Controller class to manage various feature importance strategies. """ -from typing import Any, Dict +from typing import Any + import pandas as pd + from .feature_importance_factory import FeatureImportanceFactory from .feature_importance_strategy import FeatureImportanceStrategy diff --git a/RiskLabAI/features/feature_importance/feature_importance_factory.py b/RiskLabAI/features/feature_importance/feature_importance_factory.py index 510660b..df1d622 100644 --- a/RiskLabAI/features/feature_importance/feature_importance_factory.py +++ b/RiskLabAI/features/feature_importance/feature_importance_factory.py @@ -2,13 +2,14 @@ Factory class for creating feature importance strategy objects. """ -from typing import Any, Dict, List, Type -from .feature_importance_strategy import FeatureImportanceStrategy -from .feature_importance_mdi import FeatureImportanceMDI +from typing import Any + +from .clustered_feature_importance_mda import ClusteredFeatureImportanceMDA from .clustered_feature_importance_mdi import ClusteredFeatureImportanceMDI from .feature_importance_mda import FeatureImportanceMDA -from .clustered_feature_importance_mda import ClusteredFeatureImportanceMDA +from .feature_importance_mdi import FeatureImportanceMDI from .feature_importance_sfi import FeatureImportanceSFI +from .feature_importance_strategy import FeatureImportanceStrategy class FeatureImportanceFactory: @@ -44,7 +45,7 @@ def create_feature_importance( If an invalid `strategy_type` is provided. """ - strategies: Dict[str, Type[FeatureImportanceStrategy]] = { + strategies: dict[str, type[FeatureImportanceStrategy]] = { "MDI": FeatureImportanceMDI, "ClusteredMDI": ClusteredFeatureImportanceMDI, "MDA": FeatureImportanceMDA, diff --git a/RiskLabAI/features/feature_importance/feature_importance_mda.py b/RiskLabAI/features/feature_importance/feature_importance_mda.py index 4c5bd59..7047dec 100644 --- a/RiskLabAI/features/feature_importance/feature_importance_mda.py +++ b/RiskLabAI/features/feature_importance/feature_importance_mda.py @@ -3,11 +3,13 @@ """ import logging +from typing import Any + import numpy as np import pandas as pd from sklearn.metrics import log_loss from sklearn.model_selection import KFold -from typing import List, Optional, Any, Callable + from .feature_importance_strategy import FeatureImportanceStrategy logger = logging.getLogger(__name__) diff --git a/RiskLabAI/features/feature_importance/feature_importance_mdi.py b/RiskLabAI/features/feature_importance/feature_importance_mdi.py index 0a0f92b..f45af92 100644 --- a/RiskLabAI/features/feature_importance/feature_importance_mdi.py +++ b/RiskLabAI/features/feature_importance/feature_importance_mdi.py @@ -2,11 +2,12 @@ Computes Mean Decrease Impurity (MDI) feature importance. """ -import pandas as pd +from typing import Any + import numpy as np -from typing import List, Optional, Union, Any -from sklearn.ensemble import BaseEnsemble +import pandas as pd from sklearn.ensemble import BaseEnsemble + from .feature_importance_strategy import FeatureImportanceStrategy diff --git a/RiskLabAI/features/feature_importance/feature_importance_sfi.py b/RiskLabAI/features/feature_importance/feature_importance_sfi.py index b0ec307..2b0e93e 100644 --- a/RiskLabAI/features/feature_importance/feature_importance_sfi.py +++ b/RiskLabAI/features/feature_importance/feature_importance_sfi.py @@ -2,11 +2,13 @@ Computes Single Feature Importance (SFI). """ -import pandas as pd +from typing import Any + import numpy as np -from sklearn.metrics import log_loss, accuracy_score +import pandas as pd +from sklearn.metrics import accuracy_score, log_loss from sklearn.model_selection import KFold -from typing import List, Optional, Union, Any + from .feature_importance_strategy import FeatureImportanceStrategy diff --git a/RiskLabAI/features/feature_importance/feature_importance_strategy.py b/RiskLabAI/features/feature_importance/feature_importance_strategy.py index c06c548..982dec9 100644 --- a/RiskLabAI/features/feature_importance/feature_importance_strategy.py +++ b/RiskLabAI/features/feature_importance/feature_importance_strategy.py @@ -3,9 +3,10 @@ """ from abc import ABC, abstractmethod -import pandas as pd from typing import Any +import pandas as pd + class FeatureImportanceStrategy(ABC): """ diff --git a/RiskLabAI/features/feature_importance/generate_synthetic_data.py b/RiskLabAI/features/feature_importance/generate_synthetic_data.py index 3637656..67e625e 100644 --- a/RiskLabAI/features/feature_importance/generate_synthetic_data.py +++ b/RiskLabAI/features/feature_importance/generate_synthetic_data.py @@ -7,7 +7,6 @@ import numpy as np import pandas as pd from sklearn.datasets import make_classification -from typing import Tuple def get_test_dataset( @@ -17,7 +16,7 @@ def get_test_dataset( n_samples: int = 10000, random_state: int = 0, sigma_std: float = 0.0, -) -> Tuple[pd.DataFrame, pd.Series]: +) -> tuple[pd.DataFrame, pd.Series]: """ Generate a synthetic dataset with informative, redundant, and noise features. diff --git a/RiskLabAI/features/feature_importance/orthogonal_features.py b/RiskLabAI/features/feature_importance/orthogonal_features.py index 37a659c..8136c8b 100644 --- a/RiskLabAI/features/feature_importance/orthogonal_features.py +++ b/RiskLabAI/features/feature_importance/orthogonal_features.py @@ -5,9 +5,8 @@ De Prado, M. (2018) Advances in financial machine learning. """ -import pandas as pd import numpy as np -from typing import Tuple +import pandas as pd def _compute_eigenvectors( @@ -58,7 +57,7 @@ def _compute_eigenvectors( def orthogonal_features( features: pd.DataFrame, variance_threshold: float = 0.95 -) -> Tuple[pd.DataFrame, pd.DataFrame]: +) -> tuple[pd.DataFrame, pd.DataFrame]: """ Compute orthogonal features using PCA. diff --git a/RiskLabAI/features/feature_importance/weighted_tau.py b/RiskLabAI/features/feature_importance/weighted_tau.py index e024624..b5f271e 100644 --- a/RiskLabAI/features/feature_importance/weighted_tau.py +++ b/RiskLabAI/features/feature_importance/weighted_tau.py @@ -2,8 +2,8 @@ Calculates the weighted Kendall's tau. """ -import scipy.stats as stats import numpy as np +import scipy.stats as stats def calculate_weighted_tau( diff --git a/RiskLabAI/features/microstructural_features/__init__.py b/RiskLabAI/features/microstructural_features/__init__.py index f1f2f57..9ac17ee 100644 --- a/RiskLabAI/features/microstructural_features/__init__.py +++ b/RiskLabAI/features/microstructural_features/__init__.py @@ -5,15 +5,15 @@ such as the Corwin-Schultz spread and Bekker-Parkinson volatility. """ +from .bekker_parkinson_volatility_estimator import ( + bekker_parkinson_volatility_estimates, + sigma_estimates, +) from .corwin_schultz import ( - beta_estimates, - gamma_estimates, alpha_estimates, + beta_estimates, corwin_schultz_estimator, -) -from .bekker_parkinson_volatility_estimator import ( - sigma_estimates, - bekker_parkinson_volatility_estimates, + gamma_estimates, ) __all__ = [ diff --git a/RiskLabAI/features/microstructural_features/bekker_parkinson_volatility_estimator.py b/RiskLabAI/features/microstructural_features/bekker_parkinson_volatility_estimator.py index d0afdb7..ad513f0 100644 --- a/RiskLabAI/features/microstructural_features/bekker_parkinson_volatility_estimator.py +++ b/RiskLabAI/features/microstructural_features/bekker_parkinson_volatility_estimator.py @@ -8,9 +8,11 @@ """ from math import pi -import pandas as pd + import numpy as np -from .corwin_schultz import beta_estimates, gamma_estimates, _DENOMINATOR +import pandas as pd + +from .corwin_schultz import _DENOMINATOR, beta_estimates, gamma_estimates def sigma_estimates(beta: pd.Series, gamma: pd.Series) -> pd.Series: diff --git a/RiskLabAI/features/structural_breaks/__init__.py b/RiskLabAI/features/structural_breaks/__init__.py index 6752482..72bc6c7 100644 --- a/RiskLabAI/features/structural_breaks/__init__.py +++ b/RiskLabAI/features/structural_breaks/__init__.py @@ -7,11 +7,11 @@ """ from .structural_breaks import ( - lag_dataframe, - prepare_data, compute_beta, - get_expanding_window_adf, get_bsadf_statistic, + get_expanding_window_adf, + lag_dataframe, + prepare_data, ) __all__ = [ diff --git a/RiskLabAI/features/structural_breaks/structural_breaks.py b/RiskLabAI/features/structural_breaks/structural_breaks.py index 0f1f405..2c3c486 100644 --- a/RiskLabAI/features/structural_breaks/structural_breaks.py +++ b/RiskLabAI/features/structural_breaks/structural_breaks.py @@ -7,13 +7,14 @@ John Wiley & Sons, Chapter 17. """ +from typing import Any, Union + import numpy as np import pandas as pd -from typing import List, Union, Tuple, Dict, Any def lag_dataframe( - market_data: pd.DataFrame, lags: Union[int, List[int]] + market_data: pd.DataFrame, lags: Union[int, list[int]] ) -> pd.DataFrame: """ Apply lags to a DataFrame. @@ -51,7 +52,7 @@ def lag_dataframe( def prepare_data( log_price_series: pd.Series, constant: str, lags: int -) -> Tuple[pd.DataFrame, pd.DataFrame]: +) -> tuple[pd.DataFrame, pd.DataFrame]: """ Prepare the y and X matrices for ADF regression. @@ -116,7 +117,7 @@ def prepare_data( def compute_beta( y_window: np.ndarray, x_window: np.ndarray -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: """ Compute OLS beta coefficients and their variance. @@ -217,7 +218,7 @@ def get_bsadf_statistic( min_sample_length: int, constant: str, lags: int, -) -> Dict[str, Any]: +) -> dict[str, Any]: """ Compute the Backward Supremum ADF (BSADF) statistic. diff --git a/RiskLabAI/hpc/__init__.py b/RiskLabAI/hpc/__init__.py index a1d61b9..b62b19e 100644 --- a/RiskLabAI/hpc/__init__.py +++ b/RiskLabAI/hpc/__init__.py @@ -5,14 +5,14 @@ """ from .hpc import ( - report_progress, expand_call, - process_jobs, - process_jobs_sequential, linear_partitions, - nested_partitions, mp_pandas_obj, + nested_partitions, parallel_run, + process_jobs, + process_jobs_sequential, + report_progress, ) __all__ = [ diff --git a/RiskLabAI/hpc/hpc.py b/RiskLabAI/hpc/hpc.py index 8456e6d..b9d06b9 100644 --- a/RiskLabAI/hpc/hpc.py +++ b/RiskLabAI/hpc/hpc.py @@ -5,14 +5,16 @@ on pandas objects, using Python's multiprocessing and Joblib. """ +import datetime as dt import logging import multiprocessing as mp -import pandas as pd -import numpy as np -import datetime as dt import time +from collections.abc import Iterable +from typing import Any, Callable, Optional, Union + import joblib # <-- Added missing import -from typing import List, Dict, Any, Callable, Tuple, Union, Iterable, Optional +import numpy as np +import pandas as pd logger = logging.getLogger(__name__) @@ -23,7 +25,7 @@ def parallel_run( num_cpus: int = -1, lin_partition: bool = False, **kwargs, -) -> List[Any]: +) -> list[Any]: """ Executes a function in parallel over an iterable using Joblib. @@ -121,7 +123,7 @@ def report_progress( logger.info(message) -def expand_call(kargs: Dict[str, Any]) -> Any: +def expand_call(kargs: dict[str, Any]) -> Any: """ Wrapper function to expand keyword arguments for a callback. @@ -144,8 +146,8 @@ def expand_call(kargs: Dict[str, Any]) -> Any: def process_jobs( - jobs: List[Dict[str, Any]], task: Optional[str] = None, num_threads: int = -1 -) -> List[Any]: + jobs: list[dict[str, Any]], task: Optional[str] = None, num_threads: int = -1 +) -> list[Any]: """ Process a list of jobs in parallel using multiprocessing. @@ -194,7 +196,7 @@ def process_jobs( return outputs -def process_jobs_sequential(jobs: List[Dict[str, Any]]) -> List[Any]: +def process_jobs_sequential(jobs: list[dict[str, Any]]) -> list[Any]: """ Run jobs sequentially (single-thread). Useful for debugging. @@ -301,12 +303,12 @@ def nested_partitions( def mp_pandas_obj( func: Callable[..., pd.Series], - pandas_object: Tuple[str, pd.Index], + pandas_object: tuple[str, pd.Index], num_threads: int = -1, mp_batches: int = 1, linear_partition: bool = True, **kwargs: Any, -) -> Union[pd.DataFrame, pd.Series, List[Any]]: +) -> Union[pd.DataFrame, pd.Series, list[Any]]: """ Parallelize a function call on a pandas object (DataFrame/Series). diff --git a/RiskLabAI/optimization/__init__.py b/RiskLabAI/optimization/__init__.py index f5d7809..f316602 100644 --- a/RiskLabAI/optimization/__init__.py +++ b/RiskLabAI/optimization/__init__.py @@ -8,13 +8,19 @@ - Custom Hyper-Parameter Tuning """ +from .hedging import ( + pca_weights, +) from .hrp import ( cluster_variance, + hrp, quasi_diagonal, recursive_bisection, - hrp, ) - +from .hyper_parameter_tuning import ( + MyPipeline, + clf_hyper_fit, +) from .nco import ( # cluster_kmeans_base is imported into nco.py, not defined there. # It should be imported from RiskLabAI.cluster.clustering directly @@ -23,15 +29,6 @@ get_optimal_portfolio_weights_nco, ) -from .hedging import ( - pca_weights, -) - -from .hyper_parameter_tuning import ( - MyPipeline, - clf_hyper_fit, -) - __all__ = [ # hrp.py "cluster_variance", diff --git a/RiskLabAI/optimization/hedging.py b/RiskLabAI/optimization/hedging.py index 2164c1b..b4a4986 100644 --- a/RiskLabAI/optimization/hedging.py +++ b/RiskLabAI/optimization/hedging.py @@ -2,9 +2,10 @@ Implements PCA-based portfolio hedging techniques. """ -import numpy as np from typing import Optional +import numpy as np + def pca_weights( cov: np.ndarray, diff --git a/RiskLabAI/optimization/hrp.py b/RiskLabAI/optimization/hrp.py index 208bff5..d1e7527 100644 --- a/RiskLabAI/optimization/hrp.py +++ b/RiskLabAI/optimization/hrp.py @@ -10,7 +10,6 @@ import pandas as pd import scipy.cluster.hierarchy as sch import scipy.spatial.distance as scd -from typing import List def inverse_variance_weights(covariance_matrix: pd.DataFrame) -> np.ndarray: @@ -33,7 +32,7 @@ def inverse_variance_weights(covariance_matrix: pd.DataFrame) -> np.ndarray: def cluster_variance( - covariance_matrix: pd.DataFrame, clustered_items: List[str] + covariance_matrix: pd.DataFrame, clustered_items: list[str] ) -> float: """ Compute the variance of a cluster using inverse-variance weighting. @@ -58,7 +57,7 @@ def cluster_variance( return cluster_var -def quasi_diagonal(linkage_matrix: np.ndarray) -> List[int]: +def quasi_diagonal(linkage_matrix: np.ndarray) -> list[int]: """ Return a sorted list of original item indices for a quasi-diagonal matrix. @@ -96,7 +95,7 @@ def quasi_diagonal(linkage_matrix: np.ndarray) -> List[int]: def recursive_bisection( - covariance_matrix: pd.DataFrame, sorted_items: List[str] + covariance_matrix: pd.DataFrame, sorted_items: list[str] ) -> pd.Series: """ Compute the Hierarchical Risk Parity (HRP) weights diff --git a/RiskLabAI/optimization/hyper_parameter_tuning.py b/RiskLabAI/optimization/hyper_parameter_tuning.py index 5752c62..d797350 100644 --- a/RiskLabAI/optimization/hyper_parameter_tuning.py +++ b/RiskLabAI/optimization/hyper_parameter_tuning.py @@ -3,12 +3,13 @@ and the custom PurgedKFold cross-validators. """ +from typing import Any, Optional, Union + import numpy as np import pandas as pd -from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.ensemble import BaggingClassifier +from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.pipeline import Pipeline -from typing import Dict, Any, List, Optional, Union # Import the controller from the refactored validation module from RiskLabAI.backtest.validation import CrossValidatorController @@ -59,10 +60,10 @@ def clf_hyper_fit( label: pd.Series, times: pd.Series, pipe_clf: Pipeline, - param_grid: Dict[str, Any], + param_grid: dict[str, Any], validator_type: str = "purgedkfold", - validator_params: Optional[Dict[str, Any]] = None, - bagging: Optional[List[Union[int, float]]] = None, + validator_params: Optional[dict[str, Any]] = None, + bagging: Optional[list[Union[int, float]]] = None, rnd_search_iter: int = 0, n_jobs: int = -1, **fit_params, diff --git a/RiskLabAI/optimization/nco.py b/RiskLabAI/optimization/nco.py index 8c92d2d..bf5df86 100644 --- a/RiskLabAI/optimization/nco.py +++ b/RiskLabAI/optimization/nco.py @@ -5,9 +5,10 @@ De Prado, M. (2018) Advances in financial machine learning. """ +from typing import Optional + import numpy as np import pandas as pd -from typing import Optional, Tuple, Dict, List # Import canonical implementations instead of duplicating from RiskLabAI.cluster.clustering import cluster_k_means_base, covariance_to_correlation diff --git a/RiskLabAI/pde/__init__.py b/RiskLabAI/pde/__init__.py index f1a94ec..8e943b1 100644 --- a/RiskLabAI/pde/__init__.py +++ b/RiskLabAI/pde/__init__.py @@ -20,30 +20,30 @@ ) from _exc from .equation import ( - Equation, - PricingDefaultRisk, HJBLQ, BlackScholesBarenblatt, + Equation, + PricingDefaultRisk, PricingDiffRate, ) from .model import ( - TimeNet, - Net1, - MAB, - SAB, ISAB, + MAB, PMA, - TimeNetForSet, + SAB, + DeepBSDE, DeepTimeSetTransformer, FBSNNNetwork, - DeepBSDE, + Net1, TimeDependentNetwork, TimeDependentNetworkMonteCarlo, + TimeNet, + TimeNetForSet, ) from .solver import ( - initialize_weights, FBSDESolver, FBSNNolver, # Note: Typo in original filename? + initialize_weights, ) __all__ = [ diff --git a/RiskLabAI/pde/equation.py b/RiskLabAI/pde/equation.py index 1d01c19..5426077 100644 --- a/RiskLabAI/pde/equation.py +++ b/RiskLabAI/pde/equation.py @@ -5,11 +5,12 @@ Based on: https://github.com/frankhan91/DeepBSDE """ +from typing import Optional, Union + +import numpy as np import torch import torch.nn as nn from torch import Tensor -import numpy as np -from typing import Tuple, Optional, Union class Equation: @@ -36,7 +37,7 @@ def __init__(self, eqn_config: dict): self.sqrt_delta_t: float = np.sqrt(self.delta_t) self.y_init: Optional[float] = None - def sample(self, num_sample: int) -> Tuple[np.ndarray, np.ndarray]: + def sample(self, num_sample: int) -> tuple[np.ndarray, np.ndarray]: """ Sample the forward SDE (e.g., the underlying asset path). @@ -132,7 +133,7 @@ class PricingDefaultRisk(Equation): """ def __init__(self, eqn_config: dict): - super(PricingDefaultRisk, self).__init__(eqn_config) + super().__init__(eqn_config) self.x_init = np.ones(self.dim) * 100.0 self.sigma = 0.2 self.rate = 0.02 # R @@ -145,7 +146,7 @@ def __init__(self, eqn_config: dict): self.vl = 70.0 self.slope = (self.gammah - self.gammal) / (self.vh - self.vl) - def sample(self, num_sample: int) -> Tuple[np.ndarray, np.ndarray]: + def sample(self, num_sample: int) -> tuple[np.ndarray, np.ndarray]: dw_sample = ( np.random.normal(size=[num_sample, self.dim, self.num_time_interval]) * self.sqrt_delta_t @@ -185,12 +186,12 @@ class HJBLQ(Equation): """ def __init__(self, eqn_config: dict): - super(HJBLQ, self).__init__(eqn_config) + super().__init__(eqn_config) self.x_init = np.zeros(self.dim) self.sigma = np.sqrt(2.0) self.lambd = 1.0 - def sample(self, num_sample: int) -> Tuple[np.ndarray, np.ndarray]: + def sample(self, num_sample: int) -> tuple[np.ndarray, np.ndarray]: dw_sample = ( np.random.normal(size=[num_sample, self.dim, self.num_time_interval]) * self.sqrt_delta_t @@ -224,7 +225,7 @@ class BlackScholesBarenblatt(Equation): """ def __init__(self, eqn_config: dict): - super(BlackScholesBarenblatt, self).__init__(eqn_config) + super().__init__(eqn_config) self.x_init = np.ones(self.dim) * np.array( [1.0 / (1.0 + i % 2) for i in range(self.dim)] ) @@ -232,7 +233,7 @@ def __init__(self, eqn_config: dict): self.rate = 0.05 # interest rate R self.mu_bar = 0.0 - def sample(self, num_sample: int) -> Tuple[np.ndarray, np.ndarray]: + def sample(self, num_sample: int) -> tuple[np.ndarray, np.ndarray]: dw_sample = ( np.random.normal(size=(num_sample, self.dim, self.num_time_interval)) * self.sqrt_delta_t @@ -268,7 +269,7 @@ class PricingDiffRate(Equation): """ def __init__(self, eqn_config: dict): - super(PricingDiffRate, self).__init__(eqn_config) + super().__init__(eqn_config) self.x_init = np.ones(self.dim) * 100 self.sigma = 0.2 self.mu_bar = 0.06 @@ -276,7 +277,7 @@ def __init__(self, eqn_config: dict): self.rb = 0.06 self.alpha = 1.0 / self.dim - def sample(self, num_sample: int) -> Tuple[np.ndarray, np.ndarray]: + def sample(self, num_sample: int) -> tuple[np.ndarray, np.ndarray]: dw_sample = ( np.random.normal(size=[num_sample, self.dim, self.num_time_interval]) * self.sqrt_delta_t diff --git a/RiskLabAI/pde/model.py b/RiskLabAI/pde/model.py index d7d118c..beef487 100644 --- a/RiskLabAI/pde/model.py +++ b/RiskLabAI/pde/model.py @@ -9,13 +9,13 @@ Set-Transformer code based on: https://github.com/juho-lee/set_transformer """ +import math + import torch import torch.nn as nn import torch.nn.functional as F from torch import Tensor -from torch.nn import Module, Linear, BatchNorm1d, Tanh, ReLU -import math -from typing import List +from torch.nn import BatchNorm1d, Linear, Module, ReLU, Tanh # --- Set Transformer Components --- @@ -26,7 +26,7 @@ class MAB(Module): def __init__( self, dim_q: int, dim_k: int, dim_v: int, num_heads: int, ln: bool = False ): - super(MAB, self).__init__() + super().__init__() self.dim_v = dim_v self.num_heads = num_heads self.fc_q = Linear(dim_q, dim_v) @@ -58,7 +58,7 @@ class SAB(Module): """Self-Attention Block (SAB).""" def __init__(self, dim_in: int, dim_out: int, num_heads: int, ln: bool = False): - super(SAB, self).__init__() + super().__init__() self.mab = MAB(dim_in, dim_in, dim_out, num_heads, ln=ln) def forward(self, x: Tensor) -> Tensor: @@ -71,7 +71,7 @@ class ISAB(Module): def __init__( self, dim_in: int, dim_out: int, num_heads: int, num_inds: int, ln: bool = False ): - super(ISAB, self).__init__() + super().__init__() self.i = nn.Parameter(torch.Tensor(1, num_inds, dim_out)) nn.init.xavier_uniform_(self.i) self.mab0 = MAB(dim_out, dim_in, dim_out, num_heads, ln=ln) @@ -86,7 +86,7 @@ class PMA(Module): """Pooling Multi-Head Attention (PMA).""" def __init__(self, dim: int, num_heads: int, num_seeds: int, ln: bool = False): - super(PMA, self).__init__() + super().__init__() self.s = nn.Parameter(torch.Tensor(1, num_seeds, dim)) nn.init.xavier_uniform_(self.s) self.mab = MAB(dim, dim, dim, num_heads, ln=ln) @@ -104,7 +104,7 @@ class TimeNetForSet(Module): """ def __init__(self, in_features: int = 1, out_features: int = 64): - super(TimeNetForSet, self).__init__() + super().__init__() self.feature_layer = Linear(in_features, out_features) self.time_layer1 = Linear(1, 10) @@ -131,7 +131,7 @@ class DeepTimeSetTransformer(Module): """ def __init__(self, input_dim: int): - super(DeepTimeSetTransformer, self).__init__() + super().__init__() # Feature extractor layers self.layer1 = Linear(input_dim, 32) @@ -183,7 +183,7 @@ class TimeNet(Module): """Simple feed-forward network for time features.""" def __init__(self, output_dim: int): - super(TimeNet, self).__init__() + super().__init__() self.layers = nn.ModuleList( [ Linear(4, 100), @@ -206,7 +206,7 @@ class Net1(Module): """A simple Linear + BatchNorm layer.""" def __init__(self, input_dim: int, output_dim: int): - super(Net1, self).__init__() + super().__init__() self.layer = Linear(input_dim, output_dim) self.bn = BatchNorm1d(output_dim) # Note: bn is not used in forward @@ -220,8 +220,8 @@ def forward(self, x: Tensor) -> Tensor: class FBSNNNetwork(Module): """Feed-forward network for the FBSNN solver.""" - def __init__(self, layer_sizes: List[int]): - super(FBSNNNetwork, self).__init__() + def __init__(self, layer_sizes: list[int]): + super().__init__() self.n_layer = len(layer_sizes) - 1 self.layers = nn.ModuleList([]) @@ -239,8 +239,8 @@ def forward(self, x: Tensor) -> Tensor: class DeepBSDE(Module): """Network for the Deep BSDE method (one net per time step).""" - def __init__(self, layer_sizes: List[int]): - super(DeepBSDE, self).__init__() + def __init__(self, layer_sizes: list[int]): + super().__init__() self.n_layer = len(layer_sizes) - 1 self.layers = nn.ModuleList([]) self.batch_layer = nn.ModuleList([]) @@ -269,8 +269,8 @@ def forward(self, x: Tensor) -> Tensor: class TimeDependentNetwork(Module): """Time-dependent network for BSDE solver.""" - def __init__(self, indim: int, layersize: List[int], outdim: int): - super(TimeDependentNetwork, self).__init__() + def __init__(self, indim: int, layersize: list[int], outdim: int): + super().__init__() self.n_layer = len(layersize) self.layers = nn.ModuleList([]) self.time_layer = nn.ModuleList([]) @@ -314,8 +314,8 @@ def forward(self, t: Tensor, x: Tensor) -> Tensor: class TimeDependentNetworkMonteCarlo(Module): """Time-dependent network for BSDE solver with Monte Carlo gradient.""" - def __init__(self, indim: int, layersize: List[int], outdim: int, sigma: float): - super(TimeDependentNetworkMonteCarlo, self).__init__() + def __init__(self, indim: int, layersize: list[int], outdim: int, sigma: float): + super().__init__() self.n_layer = len(layersize) self.layers = nn.ModuleList([]) self.time_layer = nn.ModuleList([]) diff --git a/RiskLabAI/pde/solver.py b/RiskLabAI/pde/solver.py index 177fefe..468e779 100644 --- a/RiskLabAI/pde/solver.py +++ b/RiskLabAI/pde/solver.py @@ -6,13 +6,11 @@ import logging import torch -import torch.nn as nn import torch.autograd as autograd -import numpy as np -from typing import List, Tuple, Any +import torch.nn as nn -from RiskLabAI.pde.model import * from RiskLabAI.pde.equation import Equation +from RiskLabAI.pde.model import * logger = logging.getLogger(__name__) @@ -39,7 +37,7 @@ class FBSDESolver: def __init__( self, pde: Equation, - layer_sizes: List[int], + layer_sizes: list[int], learning_rate: float, solving_method: str, device: torch.device, @@ -98,7 +96,7 @@ def __init__( def compute_loss( self, y_path: Tensor, dw_path: Tensor, t: Tensor, init_y: Tensor, init_z: Tensor - ) -> Tuple[Tensor, ...]: + ) -> tuple[Tensor, ...]: """ Compute the loss for a single batch. """ @@ -159,7 +157,7 @@ def compute_loss( def solve( self, num_iterations: int, batch_size: int, init_y: float - ) -> Tuple[List[float], List[float]]: + ) -> tuple[list[float], list[float]]: """ Solves the PDE. @@ -257,7 +255,7 @@ class FBSNNolver: def __init__( self, pde: Equation, - layer_sizes: List[int], + layer_sizes: list[int], learning_rate: float, device: torch.device, ): @@ -292,7 +290,7 @@ def compute_loss( Compute the loss for the FBSNN method. """ batch_size = y_path.size()[0] - y_terminal = init_y.expand(batch_size, 1) # Not used, but kept for signature + init_y.expand(batch_size, 1) # Not used, but kept for signature loss = torch.tensor(0.0, device=self.device) for z in range(self.pde.num_time_interval): @@ -344,7 +342,7 @@ def solve( num_iterations: int, batch_size: int, init_y: float, # Note: init_y is not used by FBSNN, but kept for API - ) -> Tuple[List[float], List[float]]: + ) -> tuple[list[float], list[float]]: """ Solves the PDE using the FBSNN method. diff --git a/RiskLabAI/utils/__init__.py b/RiskLabAI/utils/__init__.py index 0aec361..bf7c34a 100644 --- a/RiskLabAI/utils/__init__.py +++ b/RiskLabAI/utils/__init__.py @@ -13,8 +13,8 @@ from .constants import * from .ewma import ewma -from .progress import progress_bar from .momentum_mean_reverting_strategy_sides import determine_strategy_side +from .progress import progress_bar # Plotting helpers are imported lazily (PEP 562) so that the base install # does not require matplotlib/seaborn/plotly (RiskLabAI[plot] extra). diff --git a/RiskLabAI/utils/ewma.py b/RiskLabAI/utils/ewma.py index ffa36ca..2e64010 100644 --- a/RiskLabAI/utils/ewma.py +++ b/RiskLabAI/utils/ewma.py @@ -4,8 +4,7 @@ """ import numpy as np -from numba import jit, float64, int64 -from typing import Union +from numba import float64, jit @jit(nopython=True) diff --git a/RiskLabAI/utils/progress.py b/RiskLabAI/utils/progress.py index 3727ed8..ff893fd 100644 --- a/RiskLabAI/utils/progress.py +++ b/RiskLabAI/utils/progress.py @@ -4,7 +4,6 @@ import sys import time -from typing import Optional def progress_bar( diff --git a/RiskLabAI/utils/publication_plots.py b/RiskLabAI/utils/publication_plots.py index 2dcf7b8..c6c9a84 100644 --- a/RiskLabAI/utils/publication_plots.py +++ b/RiskLabAI/utils/publication_plots.py @@ -6,17 +6,17 @@ """ import logging +import os +from typing import Any, Optional -import matplotlib.pyplot as plt import matplotlib.figure as fig # For type hinting +import matplotlib.pyplot as plt import seaborn as sns -import os -from typing import Optional, Dict, Any logger = logging.getLogger(__name__) # [THEMES dictionary remains the same] -THEMES: Dict[str, Dict[str, Any]] = { +THEMES: dict[str, dict[str, Any]] = { "light": { "figure.facecolor": "#FFFFFF", "axes.facecolor": "#FFFFFF", diff --git a/RiskLabAI/utils/update_figure_layout.py b/RiskLabAI/utils/update_figure_layout.py index 749f9fa..b4357e3 100644 --- a/RiskLabAI/utils/update_figure_layout.py +++ b/RiskLabAI/utils/update_figure_layout.py @@ -4,7 +4,6 @@ """ import plotly.graph_objects as go -from typing import Optional def update_figure_layout( diff --git a/pyproject.toml b/pyproject.toml index 10445fa..28e80e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,3 +89,25 @@ target-version = "py39" select = ["E", "F", "W", "I", "UP", "B"] # E501 deferred until the mechanical black formatting pass lands ignore = ["E501"] + +[tool.ruff.lint.per-file-ignores] +# Package __init__ files exist to re-export their submodules' public API. +"__init__.py" = ["F401", "F403", "F405"] +# The bars stack consumes the column-name constants via `from ...constants +# import *`. Replacing these with explicit imports is tracked for 2.0.0 +# (see NAMING_CANON_2.0.0.md, "Replace ... import * in the bars stack"). +"RiskLabAI/data/structures/abstract_bars.py" = ["F403", "F405"] +"RiskLabAI/data/structures/abstract_imbalance_bars.py" = ["F403", "F405"] +"RiskLabAI/data/structures/abstract_information_driven_bars.py" = ["F403", "F405"] +"RiskLabAI/data/structures/abstract_run_bars.py" = ["F403", "F405"] +"RiskLabAI/data/structures/imbalance_bars.py" = ["F403", "F405"] +"RiskLabAI/data/structures/run_bars.py" = ["F403", "F405"] +"RiskLabAI/pde/solver.py" = ["F403", "F405"] +# base.py re-exports the canonical interfaces lazily via module __getattr__, +# so the names listed in __all__ are resolved on access, not at definition. +"RiskLabAI/core/base.py" = ["F822"] +# Tests import the constants namespace wholesale for readability. +"test/data/structures/test_standard_bars.py" = ["F403", "F405"] +# importorskip("torch") must execute before the torch-dependent imports below +# it, so module-level imports cannot all sit at the top of the file. +"test/pde/test_pde_solver.py" = ["E402"] diff --git a/test/backtest/test_backtest_overfitting_simulation.py b/test/backtest/test_backtest_overfitting_simulation.py index 9c50a48..072c0f4 100644 --- a/test/backtest/test_backtest_overfitting_simulation.py +++ b/test/backtest/test_backtest_overfitting_simulation.py @@ -3,16 +3,17 @@ """ import platform -from pyexpat import features + import numpy as np import pandas as pd import pytest + from RiskLabAI.backtest.backtest_overfitting_simulation import ( - sharpe_ratio, - sortino_ratio, expected_shortfall, financial_features_backtest_overfitting_simulation, get_cpu_info, + sharpe_ratio, + sortino_ratio, ) diff --git a/test/backtest/test_backtest_statistics.py b/test/backtest/test_backtest_statistics.py index ff111c6..b7b9155 100644 --- a/test/backtest/test_backtest_statistics.py +++ b/test/backtest/test_backtest_statistics.py @@ -5,11 +5,11 @@ import numpy as np import pandas as pd import pytest + from RiskLabAI.backtest.backtest_statistics import ( bet_timing, - calculate_holding_period, calculate_hhi, - calculate_hhi_concentration, + calculate_holding_period, compute_drawdowns_time_under_water, ) diff --git a/test/backtest/test_backtest_synthetic_data.py b/test/backtest/test_backtest_synthetic_data.py index 5be229d..a116587 100644 --- a/test/backtest/test_backtest_synthetic_data.py +++ b/test/backtest/test_backtest_synthetic_data.py @@ -3,7 +3,7 @@ """ import numpy as np -import pytest + from RiskLabAI.backtest.backtest_synthetic_data import synthetic_back_testing diff --git a/test/backtest/test_bet_sizing.py b/test/backtest/test_bet_sizing.py index 3a5a749..7a1fdfc 100644 --- a/test/backtest/test_bet_sizing.py +++ b/test/backtest/test_bet_sizing.py @@ -4,16 +4,16 @@ import numpy as np import pandas as pd -import pytest from scipy.stats import norm + from RiskLabAI.backtest.bet_sizing import ( - probability_bet_size, + TPos, average_bet_sizes, - strategy_bet_sizing, avgActiveSignals, betSize, - TPos, getW, + probability_bet_size, + strategy_bet_sizing, ) diff --git a/test/backtest/test_probabilistic_sharpe_ratio.py b/test/backtest/test_probabilistic_sharpe_ratio.py index 6affbf1..6bad22a 100644 --- a/test/backtest/test_probabilistic_sharpe_ratio.py +++ b/test/backtest/test_probabilistic_sharpe_ratio.py @@ -3,12 +3,12 @@ """ import numpy as np -import pytest from scipy import stats as ss from scipy.stats import norm + from RiskLabAI.backtest.probabilistic_sharpe_ratio import ( - probabilistic_sharpe_ratio, benchmark_sharpe_ratio, + probabilistic_sharpe_ratio, ) diff --git a/test/backtest/test_probability_of_backtest_overfitting.py b/test/backtest/test_probability_of_backtest_overfitting.py index ae87281..a113f74 100644 --- a/test/backtest/test_probability_of_backtest_overfitting.py +++ b/test/backtest/test_probability_of_backtest_overfitting.py @@ -4,10 +4,11 @@ import numpy as np import pytest + from RiskLabAI.backtest.probability_of_backtest_overfitting import ( - sharpe_ratio, performance_evaluation, probability_of_backtest_overfitting, + sharpe_ratio, ) diff --git a/test/backtest/test_strategy_risk.py b/test/backtest/test_strategy_risk.py index 83999a1..ffd4e2b 100644 --- a/test/backtest/test_strategy_risk.py +++ b/test/backtest/test_strategy_risk.py @@ -3,14 +3,13 @@ """ import numpy as np -import pytest + from RiskLabAI.backtest.strategy_risk import ( - sharpe_ratio_trials, - implied_precision, bin_frequency, binomial_sharpe_ratio, + implied_precision, mix_gaussians, - failure_probability, + sharpe_ratio_trials, ) @@ -66,7 +65,7 @@ def test_implied_precision_and_bin_frequency(): assert np.isclose(freq, 252, atol=0.01) # <-- TIGHTENED TOLERANCE # Test implied_precision with sl=0.01 (positive) - prec = implied_precision( + implied_precision( stop_loss=0.01, # Positive profit_taking=0.01, frequency=252, diff --git a/test/backtest/test_test_set_overfitting.py b/test/backtest/test_test_set_overfitting.py index 23f9565..ce36a02 100644 --- a/test/backtest/test_test_set_overfitting.py +++ b/test/backtest/test_test_set_overfitting.py @@ -3,17 +3,16 @@ """ import numpy as np -import pandas as pd -import pytest from scipy.stats import norm + from RiskLabAI.backtest.test_set_overfitting import ( + estimated_sharpe_ratio_z_statistics, expected_max_sharpe_ratio, generate_max_sharpe_ratios, mean_std_error, - estimated_sharpe_ratio_z_statistics, strategy_type1_error_probability, - theta_for_type2_error, strategy_type2_error_probability, + theta_for_type2_error, ) diff --git a/test/backtest/validation/test_adaptive_combinatorial_purged.py b/test/backtest/validation/test_adaptive_combinatorial_purged.py index d5e1088..f64f081 100644 --- a/test/backtest/validation/test_adaptive_combinatorial_purged.py +++ b/test/backtest/validation/test_adaptive_combinatorial_purged.py @@ -6,14 +6,11 @@ import pandas as pd import pytest from sklearn.linear_model import LogisticRegression -from itertools import combinations from RiskLabAI.backtest.validation.adaptive_combinatorial_purged import ( AdaptiveCombinatorialPurged, ) - from RiskLabAI.backtest.validation.combinatorial_purged import CombinatorialPurged -from itertools import combinations # Re-use the purged k-fold fixture and add an external feature diff --git a/test/backtest/validation/test_bagged_combinatorial_purged.py b/test/backtest/validation/test_bagged_combinatorial_purged.py index 3eedb31..5d3dcce 100644 --- a/test/backtest/validation/test_bagged_combinatorial_purged.py +++ b/test/backtest/validation/test_bagged_combinatorial_purged.py @@ -5,8 +5,7 @@ import numpy as np import pandas as pd import pytest -from sklearn.linear_model import LogisticRegression, LinearRegression -from typing import List +from sklearn.linear_model import LinearRegression, LogisticRegression from RiskLabAI.backtest.validation.bagged_combinatorial_purged import ( BaggedCombinatorialPurged, diff --git a/test/backtest/validation/test_combinatorial_purged.py b/test/backtest/validation/test_combinatorial_purged.py index c9a2c8c..e157edc 100644 --- a/test/backtest/validation/test_combinatorial_purged.py +++ b/test/backtest/validation/test_combinatorial_purged.py @@ -2,14 +2,15 @@ Tests for CombinatorialPurged cross-validator. """ +from itertools import combinations +from math import comb + import numpy as np import pandas as pd import pytest from sklearn.linear_model import LogisticRegression -from math import comb from RiskLabAI.backtest.validation.combinatorial_purged import CombinatorialPurged -from itertools import combinations # Re-use the purged k-fold fixture diff --git a/test/backtest/validation/test_cross_validator_controller.py b/test/backtest/validation/test_cross_validator_controller.py index 9d97287..9df9727 100644 --- a/test/backtest/validation/test_cross_validator_controller.py +++ b/test/backtest/validation/test_cross_validator_controller.py @@ -5,7 +5,6 @@ import pandas as pd import pytest - from RiskLabAI.backtest.validation.combinatorial_purged import CombinatorialPurged from RiskLabAI.backtest.validation.cross_validator_controller import ( CrossValidatorController, diff --git a/test/backtest/validation/test_purged_kfold.py b/test/backtest/validation/test_purged_kfold.py index bde4a69..18bf73d 100644 --- a/test/backtest/validation/test_purged_kfold.py +++ b/test/backtest/validation/test_purged_kfold.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd import pytest -from sklearn.linear_model import LogisticRegression from RiskLabAI.backtest.validation.purged_kfold import PurgedKFold diff --git a/test/cluster/test_clustering.py b/test/cluster/test_clustering.py index 02fc4c6..fffd1fc 100644 --- a/test/cluster/test_clustering.py +++ b/test/cluster/test_clustering.py @@ -3,13 +3,13 @@ """ import numpy as np -import pandas as pd import pytest + from RiskLabAI.cluster.clustering import ( - covariance_to_correlation, - random_block_correlation, cluster_k_means_base, cluster_k_means_top, + covariance_to_correlation, + random_block_correlation, ) diff --git a/test/controller/test_bars_initializer.py b/test/controller/test_bars_initializer.py index 10f9868..76804e7 100644 --- a/test/controller/test_bars_initializer.py +++ b/test/controller/test_bars_initializer.py @@ -3,6 +3,7 @@ """ import pytest + from RiskLabAI.controller.bars_initializer import BarsInitializerController # Import all bar types to check instance diff --git a/test/controller/test_data_structure_controller.py b/test/controller/test_data_structure_controller.py index 22e7e87..f56e2bb 100644 --- a/test/controller/test_data_structure_controller.py +++ b/test/controller/test_data_structure_controller.py @@ -2,10 +2,12 @@ Tests for controller/data_structure_controller.py """ -import pytest -import pandas as pd -import numpy as np import io + +import numpy as np +import pandas as pd +import pytest + from RiskLabAI.controller.data_structure_controller import Controller @@ -51,7 +53,7 @@ def test_controller_read_from_string(): csv_data = "datetime,price,volume\n" + "2020-01-01T00:00:00,100,10\n" * 7 # Use io.StringIO to simulate a file - csv_file = io.StringIO(csv_data) + io.StringIO(csv_data) # We need to mock 'open' to return our string file # This is advanced, so for now we'll test the dataframe method diff --git a/test/core/test_base.py b/test/core/test_base.py index fdeb38c..e616eb3 100644 --- a/test/core/test_base.py +++ b/test/core/test_base.py @@ -56,10 +56,10 @@ def label(self, prices, events=None, **kwargs): # Lazy re-exports of the canonical interfaces # --------------------------------------------------------------------------- # def test_lazy_reexports_resolve_to_real_interfaces(): - from RiskLabAI.data.structures.abstract_bars import AbstractBars from RiskLabAI.backtest.validation.cross_validator_interface import ( CrossValidator, ) + from RiskLabAI.data.structures.abstract_bars import AbstractBars from RiskLabAI.features.feature_importance.feature_importance_strategy import ( FeatureImportanceStrategy, ) diff --git a/test/core/test_builtin_parity.py b/test/core/test_builtin_parity.py index 9faec9d..4662c8c 100644 --- a/test/core/test_builtin_parity.py +++ b/test/core/test_builtin_parity.py @@ -13,8 +13,8 @@ FEATURE_IMPORTANCE, LABELERS, PORTFOLIO_OPTIMIZERS, - list_components, get_registry, + list_components, ) from RiskLabAI.core.base import CrossValidator @@ -46,8 +46,8 @@ def test_cross_validator_create_end_to_end(): # Feature importance # --------------------------------------------------------------------------- # def test_feature_importance_registry_matches_implementations(): - from RiskLabAI.features.feature_importance.feature_importance_mdi import ( - FeatureImportanceMDI, + from RiskLabAI.features.feature_importance.clustered_feature_importance_mda import ( + ClusteredFeatureImportanceMDA, ) from RiskLabAI.features.feature_importance.clustered_feature_importance_mdi import ( ClusteredFeatureImportanceMDI, @@ -55,8 +55,8 @@ def test_feature_importance_registry_matches_implementations(): from RiskLabAI.features.feature_importance.feature_importance_mda import ( FeatureImportanceMDA, ) - from RiskLabAI.features.feature_importance.clustered_feature_importance_mda import ( - ClusteredFeatureImportanceMDA, + from RiskLabAI.features.feature_importance.feature_importance_mdi import ( + FeatureImportanceMDI, ) from RiskLabAI.features.feature_importance.feature_importance_sfi import ( FeatureImportanceSFI, @@ -78,8 +78,6 @@ def test_feature_importance_registry_matches_implementations(): # Bars # --------------------------------------------------------------------------- # def test_bars_registry_matches_bar_classes(): - from RiskLabAI.data.structures.standard_bars import StandardBars - from RiskLabAI.data.structures.time_bars import TimeBars from RiskLabAI.data.structures.imbalance_bars import ( ExpectedImbalanceBars, FixedImbalanceBars, @@ -88,6 +86,8 @@ def test_bars_registry_matches_bar_classes(): ExpectedRunBars, FixedRunBars, ) + from RiskLabAI.data.structures.standard_bars import StandardBars + from RiskLabAI.data.structures.time_bars import TimeBars expected = { "standard_bars": StandardBars, diff --git a/test/data/denoise/test_denoising.py b/test/data/denoise/test_denoising.py index c307059..cbcf59e 100644 --- a/test/data/denoise/test_denoising.py +++ b/test/data/denoise/test_denoising.py @@ -2,14 +2,15 @@ Tests for data/denoise/denoising.py """ -import pytest import numpy as np import pandas as pd +import pytest + from RiskLabAI.data.denoise import ( - marcenko_pastur_pdf, - cov_to_corr, corr_to_cov, + cov_to_corr, denoise_cov, + marcenko_pastur_pdf, optimal_portfolio, ) diff --git a/test/data/differentiation/test_differentiation.py b/test/data/differentiation/test_differentiation.py index 23bdf52..0a9a06d 100644 --- a/test/data/differentiation/test_differentiation.py +++ b/test/data/differentiation/test_differentiation.py @@ -6,12 +6,13 @@ import pandas as pd import pytest from statsmodels.tsa.stattools import adfuller + from RiskLabAI.data.differentiation import ( - calculate_weights_std, calculate_weights_ffd, - fractional_difference_std, + calculate_weights_std, fractional_difference_fixed, fractional_difference_fixed_single, + fractional_difference_std, fractionally_differentiated_log_price, ) diff --git a/test/data/distance/test_distance_metric.py b/test/data/distance/test_distance_metric.py index 3b4c29c..c1903b9 100644 --- a/test/data/distance/test_distance_metric.py +++ b/test/data/distance/test_distance_metric.py @@ -4,11 +4,12 @@ import numpy as np import pytest + from RiskLabAI.data.distance import ( + calculate_distance, + calculate_mutual_information, calculate_number_of_bins, calculate_variation_of_information, - calculate_mutual_information, - calculate_distance, ) diff --git a/test/data/labeling/test_financial_labels.py b/test/data/labeling/test_financial_labels.py index 6e37de6..6d9395f 100644 --- a/test/data/labeling/test_financial_labels.py +++ b/test/data/labeling/test_financial_labels.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -import pytest + from RiskLabAI.data.labeling import ( calculate_t_value_linear_regression, find_trend_using_trend_scanning, diff --git a/test/data/labeling/test_labeling.py b/test/data/labeling/test_labeling.py index ba817a5..1e6077b 100644 --- a/test/data/labeling/test_labeling.py +++ b/test/data/labeling/test_labeling.py @@ -5,13 +5,14 @@ import numpy as np import pandas as pd import pytest + from RiskLabAI.data.labeling import ( - symmetric_cusum_filter, cusum_filter_events_dynamic_threshold, daily_volatility_with_log_returns, - vertical_barrier, meta_events, meta_labeling, + symmetric_cusum_filter, + vertical_barrier, ) diff --git a/test/data/structures/test_imbalance_bars.py b/test/data/structures/test_imbalance_bars.py index 9e340cd..833045b 100644 --- a/test/data/structures/test_imbalance_bars.py +++ b/test/data/structures/test_imbalance_bars.py @@ -2,12 +2,12 @@ Tests for data/structures/imbalance_bars.py """ -import numpy as np import pandas as pd import pytest + from RiskLabAI.data.structures.imbalance_bars import ( - FixedImbalanceBars, ExpectedImbalanceBars, + FixedImbalanceBars, ) diff --git a/test/data/structures/test_run_bars.py b/test/data/structures/test_run_bars.py index 0d1133e..cd6a42e 100644 --- a/test/data/structures/test_run_bars.py +++ b/test/data/structures/test_run_bars.py @@ -2,10 +2,10 @@ Tests for data/structures/run_bars.py """ -import numpy as np import pandas as pd import pytest -from RiskLabAI.data.structures.run_bars import FixedRunBars, ExpectedRunBars + +from RiskLabAI.data.structures.run_bars import FixedRunBars @pytest.fixture diff --git a/test/data/structures/test_standard_bars.py b/test/data/structures/test_standard_bars.py index 7a7f3f1..1564929 100644 --- a/test/data/structures/test_standard_bars.py +++ b/test/data/structures/test_standard_bars.py @@ -2,9 +2,9 @@ Tests for data/structures/standard_bars.py """ -import numpy as np import pandas as pd import pytest + from RiskLabAI.data.structures.standard_bars import StandardBars from RiskLabAI.utils.constants import * diff --git a/test/data/structures/test_time_bars.py b/test/data/structures/test_time_bars.py index d8d4a8e..df19e90 100644 --- a/test/data/structures/test_time_bars.py +++ b/test/data/structures/test_time_bars.py @@ -2,9 +2,9 @@ Tests for data/structures/time_bars.py """ -import numpy as np import pandas as pd import pytest + from RiskLabAI.data.structures.time_bars import TimeBars diff --git a/test/data/synthetic_data/test_drift_burst_hypothesis.py b/test/data/synthetic_data/test_drift_burst_hypothesis.py index 69943a6..ab7cd65 100644 --- a/test/data/synthetic_data/test_drift_burst_hypothesis.py +++ b/test/data/synthetic_data/test_drift_burst_hypothesis.py @@ -3,7 +3,7 @@ """ import numpy as np -import pytest + from RiskLabAI.data.synthetic_data.drift_burst_hypothesis import drift_volatility_burst diff --git a/test/data/synthetic_data/test_synthetic_controlled_environment.py b/test/data/synthetic_data/test_synthetic_controlled_environment.py index eb6a249..3329cd6 100644 --- a/test/data/synthetic_data/test_synthetic_controlled_environment.py +++ b/test/data/synthetic_data/test_synthetic_controlled_environment.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd import pytest + from RiskLabAI.data.synthetic_data.synthetic_controlled_environment import ( align_params_length, generate_prices_from_regimes, @@ -34,9 +35,8 @@ def sample_regimes(): "rho": -0.8, "lam": 0.2, "m": -0.05, - "v": 0.1, - # Test list-based params - "v": [0.1, 0.15], # 2-step regime + # List-based param: 2-step regime + "v": [0.1, 0.15], }, } # P(calm->calm)=0.9, P(crisis->crisis)=0.8 diff --git a/test/data/weights/test_sample_weights.py b/test/data/weights/test_sample_weights.py index 8233129..325b35e 100644 --- a/test/data/weights/test_sample_weights.py +++ b/test/data/weights/test_sample_weights.py @@ -5,11 +5,12 @@ import numpy as np import pandas as pd import pytest + from RiskLabAI.data.weights.sample_weights import ( - expand_label_for_meta_labeling, calculate_average_uniqueness, - sample_weight_absolute_return_meta_labeling, calculate_time_decay, + expand_label_for_meta_labeling, + sample_weight_absolute_return_meta_labeling, ) diff --git a/test/ensemble/test_bagging_classifier_accuracy.py b/test/ensemble/test_bagging_classifier_accuracy.py index b309d51..a6c5d65 100644 --- a/test/ensemble/test_bagging_classifier_accuracy.py +++ b/test/ensemble/test_bagging_classifier_accuracy.py @@ -2,8 +2,8 @@ Tests for ensemble/bagging_classifier_accuracy.py """ -import pytest import numpy as np + from RiskLabAI.ensemble.bagging_classifier_accuracy import bagging_classifier_accuracy diff --git a/test/features/entropy_features/test_entropy.py b/test/features/entropy_features/test_entropy.py index 18b3468..05ffbc2 100644 --- a/test/features/entropy_features/test_entropy.py +++ b/test/features/entropy_features/test_entropy.py @@ -2,13 +2,13 @@ Tests for modules in features/entropy_features/ """ -import pytest import numpy as np -from RiskLabAI.features.entropy_features.shannon import shannon_entropy + +from RiskLabAI.features.entropy_features.kontoyiannis import kontoyiannis_entropy from RiskLabAI.features.entropy_features.lempel_ziv import lempel_ziv_entropy -from RiskLabAI.features.entropy_features.pmf import probability_mass_function from RiskLabAI.features.entropy_features.plug_in import plug_in_entropy_estimator -from RiskLabAI.features.entropy_features.kontoyiannis import kontoyiannis_entropy +from RiskLabAI.features.entropy_features.pmf import probability_mass_function +from RiskLabAI.features.entropy_features.shannon import shannon_entropy # --- Test Data --- MSG_LOW = "AAAAAAAAAA" diff --git a/test/features/feature_importance/test_feature_importance.py b/test/features/feature_importance/test_feature_importance.py index 559bc61..597ad91 100644 --- a/test/features/feature_importance/test_feature_importance.py +++ b/test/features/feature_importance/test_feature_importance.py @@ -2,10 +2,11 @@ Tests for the features/feature_importance module. """ -import pytest -import pandas as pd import numpy as np +import pandas as pd +import pytest from sklearn.ensemble import RandomForestClassifier + from RiskLabAI.features.feature_importance.feature_importance_controller import ( FeatureImportanceController, ) diff --git a/test/features/feature_importance/test_generate_synthetic_data.py b/test/features/feature_importance/test_generate_synthetic_data.py index 281322d..c8e5e53 100644 --- a/test/features/feature_importance/test_generate_synthetic_data.py +++ b/test/features/feature_importance/test_generate_synthetic_data.py @@ -2,8 +2,8 @@ Tests for generate_synthetic_data.py """ -import pytest import pandas as pd + from RiskLabAI.features.feature_importance.generate_synthetic_data import ( get_test_dataset, ) diff --git a/test/features/feature_importance/test_orthogonal_features.py b/test/features/feature_importance/test_orthogonal_features.py index 8a61b8b..fe4c019 100644 --- a/test/features/feature_importance/test_orthogonal_features.py +++ b/test/features/feature_importance/test_orthogonal_features.py @@ -2,9 +2,10 @@ Tests for orthogonal_features.py """ -import pytest -import pandas as pd import numpy as np +import pandas as pd +import pytest + from RiskLabAI.features.feature_importance.orthogonal_features import ( orthogonal_features, ) diff --git a/test/features/feature_importance/test_weighted_tau.py b/test/features/feature_importance/test_weighted_tau.py index f6e9a53..dab816a 100644 --- a/test/features/feature_importance/test_weighted_tau.py +++ b/test/features/feature_importance/test_weighted_tau.py @@ -2,8 +2,8 @@ Tests for weighted_tau.py """ -import pytest import numpy as np + from RiskLabAI.features.feature_importance.weighted_tau import calculate_weighted_tau diff --git a/test/features/microstructural_features/test_microstructure.py b/test/features/microstructural_features/test_microstructure.py index bbdbf7b..7c43c2b 100644 --- a/test/features/microstructural_features/test_microstructure.py +++ b/test/features/microstructural_features/test_microstructure.py @@ -5,15 +5,15 @@ import numpy as np import pandas as pd import pytest + +from RiskLabAI.features.microstructural_features.bekker_parkinson_volatility_estimator import ( + bekker_parkinson_volatility_estimates, +) from RiskLabAI.features.microstructural_features.corwin_schultz import ( - beta_estimates, - gamma_estimates, alpha_estimates, + beta_estimates, corwin_schultz_estimator, -) -from RiskLabAI.features.microstructural_features.bekker_parkinson_volatility_estimator import ( - sigma_estimates, - bekker_parkinson_volatility_estimates, + gamma_estimates, ) diff --git a/test/features/structural_breaks/test_structural_breaks.py b/test/features/structural_breaks/test_structural_breaks.py index ec457a8..22956cb 100644 --- a/test/features/structural_breaks/test_structural_breaks.py +++ b/test/features/structural_breaks/test_structural_breaks.py @@ -2,15 +2,16 @@ Tests for features/structural_breaks/structural_breaks.py """ -import pytest -import pandas as pd import numpy as np +import pandas as pd +import pytest import statsmodels.api as sm + from RiskLabAI.features.structural_breaks.structural_breaks import ( - lag_dataframe, - prepare_data, compute_beta, get_bsadf_statistic, + lag_dataframe, + prepare_data, ) diff --git a/test/hpc/test_hpc.py b/test/hpc/test_hpc.py index 338bcdf..ded7190 100644 --- a/test/hpc/test_hpc.py +++ b/test/hpc/test_hpc.py @@ -2,15 +2,15 @@ Tests for hpc/hpc.py """ -import pytest -import pandas as pd import numpy as np +import pandas as pd + from RiskLabAI.hpc.hpc import ( linear_partitions, + mp_pandas_obj, nested_partitions, - process_jobs_sequential, process_jobs, - mp_pandas_obj, + process_jobs_sequential, ) # --- Test Functions for Parallelism --- diff --git a/test/optimization/test_hedging.py b/test/optimization/test_hedging.py index f5bb6ed..e578bb0 100644 --- a/test/optimization/test_hedging.py +++ b/test/optimization/test_hedging.py @@ -4,6 +4,7 @@ import numpy as np import pytest + from RiskLabAI.optimization.hedging import pca_weights diff --git a/test/optimization/test_hrp.py b/test/optimization/test_hrp.py index b9bf5f5..7d3a2c6 100644 --- a/test/optimization/test_hrp.py +++ b/test/optimization/test_hrp.py @@ -2,16 +2,15 @@ Tests for optimization/hrp.py """ -import pytest import numpy as np import pandas as pd +import pytest + from RiskLabAI.optimization.hrp import ( - inverse_variance_weights, cluster_variance, - quasi_diagonal, hrp, + inverse_variance_weights, ) -import scipy.cluster.hierarchy as sch @pytest.fixture diff --git a/test/optimization/test_hyper_parameter_tuning.py b/test/optimization/test_hyper_parameter_tuning.py index b6a3599..08a3b5c 100644 --- a/test/optimization/test_hyper_parameter_tuning.py +++ b/test/optimization/test_hyper_parameter_tuning.py @@ -2,14 +2,14 @@ Tests for optimization/hyper_parameter_tuning.py """ -from os import pipe -import pytest -import pandas as pd import numpy as np -from sklearn.pipeline import Pipeline +import pandas as pd +import pytest +from sklearn.ensemble import BaggingClassifier from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler -from sklearn.ensemble import BaggingClassifier + from RiskLabAI.optimization.hyper_parameter_tuning import MyPipeline, clf_hyper_fit diff --git a/test/optimization/test_nco.py b/test/optimization/test_nco.py index 86a3325..aa7c46e 100644 --- a/test/optimization/test_nco.py +++ b/test/optimization/test_nco.py @@ -2,9 +2,9 @@ Tests for optimization/nco.py """ -import pytest import numpy as np -import pandas as pd +import pytest + from RiskLabAI.optimization.nco import ( get_optimal_portfolio_weights, get_optimal_portfolio_weights_nco, diff --git a/test/pde/test_pde_solver.py b/test/pde/test_pde_solver.py index 8f2d443..50a9791 100644 --- a/test/pde/test_pde_solver.py +++ b/test/pde/test_pde_solver.py @@ -5,8 +5,6 @@ import pytest torch = pytest.importorskip("torch") -import torch -import numpy as np # Import the main components from RiskLabAI.pde.equation import HJBLQ diff --git a/test/test_consolidation.py b/test/test_consolidation.py index da53dae..e2ac270 100644 --- a/test/test_consolidation.py +++ b/test/test_consolidation.py @@ -15,8 +15,8 @@ # labeling helpers are now single-sourced from RiskLabAI.hpc # --------------------------------------------------------------------------- # def test_labeling_helpers_are_reexported_from_hpc(): - from RiskLabAI.data import labeling from RiskLabAI import hpc + from RiskLabAI.data import labeling # Public names still import (backward compatibility). assert labeling.process_jobs is hpc.process_jobs diff --git a/test/test_performance.py b/test/test_performance.py index 59b2e7e..ca6a207 100644 --- a/test/test_performance.py +++ b/test/test_performance.py @@ -11,11 +11,11 @@ import numpy as np import pandas as pd +from RiskLabAI.backtest.bet_sizing import mpAvgActiveSignals from RiskLabAI.data.differentiation.differentiation import ( - fractional_difference_std, calculate_weights_std, + fractional_difference_std, ) -from RiskLabAI.backtest.bet_sizing import mpAvgActiveSignals from RiskLabAI.data.labeling.labeling import triple_barrier diff --git a/test/utils/test_ewma.py b/test/utils/test_ewma.py index 1172760..4625344 100644 --- a/test/utils/test_ewma.py +++ b/test/utils/test_ewma.py @@ -2,9 +2,9 @@ Tests for utils/ewma.py """ -import pytest import numpy as np import pandas as pd + from RiskLabAI.utils.ewma import ewma diff --git a/test/utils/test_momentum_mean_reverting_strategy_sides.py b/test/utils/test_momentum_mean_reverting_strategy_sides.py index f54c69d..e4dbb53 100644 --- a/test/utils/test_momentum_mean_reverting_strategy_sides.py +++ b/test/utils/test_momentum_mean_reverting_strategy_sides.py @@ -2,9 +2,9 @@ Tests for utils/momentum_mean_reverting_strategy_sides.py """ -import pytest import pandas as pd -import numpy as np +import pytest + from RiskLabAI.utils.momentum_mean_reverting_strategy_sides import ( determine_strategy_side, ) diff --git a/test/utils/test_progress.py b/test/utils/test_progress.py index c293780..e2178ba 100644 --- a/test/utils/test_progress.py +++ b/test/utils/test_progress.py @@ -2,8 +2,8 @@ Tests for utils/progress.py """ -import pytest import time + from RiskLabAI.utils.progress import progress_bar