From 57f828cbc7f5a266730703f87d4a322a74556aef Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Fri, 14 Mar 2025 13:50:36 +0100 Subject: [PATCH] perf(qav3): optimize label period splitting over fit live prediction window MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../LightGBMRegressorQuickAdapterV35.py | 37 ++++++++++++------- .../XGBoostRegressorQuickAdapterV35.py | 37 ++++++++++++------- 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py index af74df5..9af3c64 100644 --- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py @@ -556,7 +556,9 @@ def period_objective( y = y.iloc[-train_window:] train_weights = train_weights[-train_window:] - min_test_window: int = int(min_train_window * test_size) + min_test_window: int = max( + int(min_train_window * test_size), fit_live_predictions_candles + ) max_test_window: int = max(len(X_test), min_test_window) test_window: int = trial.suggest_int( "test_period_candles", min_test_window, max_test_window, step=candles_step @@ -578,7 +580,7 @@ def period_objective( ) y_pred = model.predict(X_test) - min_label_period_candles: int = 10 + min_label_period_candles: int = fit_live_predictions_candles // 10 max_label_period_candles: int = max( fit_live_predictions_candles // 6, min_label_period_candles ) @@ -588,17 +590,26 @@ def period_objective( max_label_period_candles, step=candles_step, ) - label_period_frequency: int = int( - fit_live_predictions_candles / (label_period_candles * 2) - ) - label_window: int = label_period_candles * label_period_frequency - y_test = y_test.iloc[-label_window:].to_numpy() - test_weights = test_weights[-label_window:] - y_pred = y_pred[-label_window:] - - error = sklearn.metrics.root_mean_squared_error( - y_test, y_pred, sample_weight=test_weights - ) + y_test = y_test.iloc[-fit_live_predictions_candles:].to_numpy() + test_weights = test_weights[-fit_live_predictions_candles:] + y_pred = y_pred[-fit_live_predictions_candles:] + y_test = [ + y_test[i : i + label_period_candles] + for i in range(0, len(y_test), label_period_candles) + ] + test_weights = [ + test_weights[i : i + label_period_candles] + for i in range(0, len(test_weights), label_period_candles) + ] + y_pred = [ + y_pred[i : i + label_period_candles] + for i in range(0, len(y_pred), label_period_candles) + ] + + error = 0.0 + for y_t, y_p, t_w in zip(y_test, y_pred, test_weights): + error += sklearn.metrics.root_mean_squared_error(y_t, y_p, sample_weight=t_w) + error /= len(y_test) return error diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py index 35bdd35..751067f 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py @@ -559,7 +559,9 @@ def period_objective( y = y.iloc[-train_window:] train_weights = train_weights[-train_window:] - min_test_window: int = int(min_train_window * test_size) + min_test_window: int = max( + int(min_train_window * test_size), fit_live_predictions_candles + ) max_test_window: int = max(len(X_test), min_test_window) test_window: int = trial.suggest_int( "test_period_candles", min_test_window, max_test_window, step=candles_step @@ -586,7 +588,7 @@ def period_objective( ) y_pred = model.predict(X_test) - min_label_period_candles: int = 10 + min_label_period_candles: int = fit_live_predictions_candles // 10 max_label_period_candles: int = max( fit_live_predictions_candles // 6, min_label_period_candles ) @@ -596,17 +598,26 @@ def period_objective( max_label_period_candles, step=candles_step, ) - label_period_frequency: int = int( - fit_live_predictions_candles / (label_period_candles * 2) - ) - label_window: int = label_period_candles * label_period_frequency - y_test = y_test.iloc[-label_window:].to_numpy() - test_weights = test_weights[-label_window:] - y_pred = y_pred[-label_window:] - - error = sklearn.metrics.root_mean_squared_error( - y_test, y_pred, sample_weight=test_weights - ) + y_test = y_test.iloc[-fit_live_predictions_candles:].to_numpy() + test_weights = test_weights[-fit_live_predictions_candles:] + y_pred = y_pred[-fit_live_predictions_candles:] + y_test = [ + y_test[i : i + label_period_candles] + for i in range(0, len(y_test), label_period_candles) + ] + test_weights = [ + test_weights[i : i + label_period_candles] + for i in range(0, len(test_weights), label_period_candles) + ] + y_pred = [ + y_pred[i : i + label_period_candles] + for i in range(0, len(y_pred), label_period_candles) + ] + + error = 0.0 + for y_t, y_p, t_w in zip(y_test, y_pred, test_weights): + error += sklearn.metrics.root_mean_squared_error(y_t, y_p, sample_weight=t_w) + error /= len(y_test) return error -- 2.43.0