From a456f99e35d90bb4a898f14790d4be6b105379b1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 12 Mar 2025 01:51:08 +0100 Subject: [PATCH] fix(qav3): compute RMSE on a label period basis MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../LightGBMRegressorQuickAdapterV35.py | 20 +++++++------------ .../XGBoostRegressorQuickAdapterV35.py | 20 +++++++------------ 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py index 8a2c9c7..d813d3b 100644 --- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py @@ -580,10 +580,6 @@ def period_objective( step=candles_step, ) test_window = (test_window // label_period_candles) * label_period_candles - if test_window < min_label_period_candles: - raise optuna.TrialPruned( - f"Adjusted test window {test_window} is too small for minimum label period {min_label_period_candles}." - ) X_test = X_test.iloc[-test_window:] y_test = y_test.iloc[-test_window:] test_weights = test_weights[-test_window:] @@ -602,25 +598,23 @@ def period_objective( y_pred = model.predict(X_test) n_windows = len(y_test) // label_period_candles - y_test_windows = [ + y_test = [ y_test.iloc[i : i + label_period_candles].to_numpy() for i in np.arange(0, label_period_candles * n_windows, label_period_candles) ] - test_weights_windows = [ + test_weights = [ test_weights[i : i + label_period_candles] for i in np.arange(0, label_period_candles * n_windows, label_period_candles) ] - y_pred_windows = [ + y_pred = [ y_pred[i : i + label_period_candles] for i in np.arange(0, label_period_candles * n_windows, label_period_candles) ] - y_test = [window for window in y_test_windows] - test_weights = np.concatenate([window for window in test_weights_windows]) - y_pred = [window for window in y_pred_windows] - error = sklearn.metrics.root_mean_squared_error( - y_test, y_pred, sample_weight=test_weights - ) + error = 0.0 + for y_t, y_p, t_w in zip(y_test, y_pred, test_weights): + error += sklearn.metrics.root_mean_squared_error(y_t, y_p, sample_weight=t_w) + error /= n_windows return error diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py index 54a7ca3..8feb2c8 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py @@ -583,10 +583,6 @@ def period_objective( step=candles_step, ) test_window = (test_window // label_period_candles) * label_period_candles - if test_window < min_label_period_candles: - raise optuna.TrialPruned( - f"Adjusted test window {test_window} is too small for minimum label period {min_label_period_candles}." - ) X_test = X_test.iloc[-test_window:] y_test = y_test.iloc[-test_window:] test_weights = test_weights[-test_window:] @@ -610,25 +606,23 @@ def period_objective( y_pred = model.predict(X_test) n_windows = len(y_test) // label_period_candles - y_test_windows = [ + y_test = [ y_test.iloc[i : i + label_period_candles].to_numpy() for i in np.arange(0, label_period_candles * n_windows, label_period_candles) ] - test_weights_windows = [ + test_weights = [ test_weights[i : i + label_period_candles] for i in np.arange(0, label_period_candles * n_windows, label_period_candles) ] - y_pred_windows = [ + y_pred = [ y_pred[i : i + label_period_candles] for i in np.arange(0, label_period_candles * n_windows, label_period_candles) ] - y_test = [window for window in y_test_windows] - test_weights = np.concatenate([window for window in test_weights_windows]) - y_pred = [window for window in y_pred_windows] - error = sklearn.metrics.root_mean_squared_error( - y_test, y_pred, sample_weight=test_weights - ) + error = 0.0 + for y_t, y_p, t_w in zip(y_test, y_pred, test_weights): + error += sklearn.metrics.root_mean_squared_error(y_t, y_p, sample_weight=t_w) + error /= n_windows return error -- 2.43.0