From 57f828cbc7f5a266730703f87d4a322a74556aef Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Fri, 14 Mar 2025 13:50:36 +0100
Subject: [PATCH] perf(qav3): optimize label period splitting over fit live
 prediction window
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 .../LightGBMRegressorQuickAdapterV35.py       | 37 ++++++++++++-------
 .../XGBoostRegressorQuickAdapterV35.py        | 37 ++++++++++++-------
 2 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py
index af74df5..9af3c64 100644
--- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py
+++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py
@@ -556,7 +556,9 @@ def period_objective(
     y = y.iloc[-train_window:]
     train_weights = train_weights[-train_window:]
 
-    min_test_window: int = int(min_train_window * test_size)
+    min_test_window: int = max(
+        int(min_train_window * test_size), fit_live_predictions_candles
+    )
     max_test_window: int = max(len(X_test), min_test_window)
     test_window: int = trial.suggest_int(
         "test_period_candles", min_test_window, max_test_window, step=candles_step
@@ -578,7 +580,7 @@ def period_objective(
     )
     y_pred = model.predict(X_test)
 
-    min_label_period_candles: int = 10
+    min_label_period_candles: int = fit_live_predictions_candles // 10
     max_label_period_candles: int = max(
         fit_live_predictions_candles // 6, min_label_period_candles
     )
@@ -588,17 +590,26 @@ def period_objective(
         max_label_period_candles,
         step=candles_step,
     )
-    label_period_frequency: int = int(
-        fit_live_predictions_candles / (label_period_candles * 2)
-    )
-    label_window: int = label_period_candles * label_period_frequency
-    y_test = y_test.iloc[-label_window:].to_numpy()
-    test_weights = test_weights[-label_window:]
-    y_pred = y_pred[-label_window:]
-
-    error = sklearn.metrics.root_mean_squared_error(
-        y_test, y_pred, sample_weight=test_weights
-    )
+    y_test = y_test.iloc[-fit_live_predictions_candles:].to_numpy()
+    test_weights = test_weights[-fit_live_predictions_candles:]
+    y_pred = y_pred[-fit_live_predictions_candles:]
+    y_test = [
+        y_test[i : i + label_period_candles]
+        for i in range(0, len(y_test), label_period_candles)
+    ]
+    test_weights = [
+        test_weights[i : i + label_period_candles]
+        for i in range(0, len(test_weights), label_period_candles)
+    ]
+    y_pred = [
+        y_pred[i : i + label_period_candles]
+        for i in range(0, len(y_pred), label_period_candles)
+    ]
+
+    error = 0.0
+    for y_t, y_p, t_w in zip(y_test, y_pred, test_weights):
+        error += sklearn.metrics.root_mean_squared_error(y_t, y_p, sample_weight=t_w)
+    error /= len(y_test)
 
     return error
 
diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py
index 35bdd35..751067f 100644
--- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py
+++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py
@@ -559,7 +559,9 @@ def period_objective(
     y = y.iloc[-train_window:]
     train_weights = train_weights[-train_window:]
 
-    min_test_window: int = int(min_train_window * test_size)
+    min_test_window: int = max(
+        int(min_train_window * test_size), fit_live_predictions_candles
+    )
     max_test_window: int = max(len(X_test), min_test_window)
     test_window: int = trial.suggest_int(
         "test_period_candles", min_test_window, max_test_window, step=candles_step
@@ -586,7 +588,7 @@ def period_objective(
     )
     y_pred = model.predict(X_test)
 
-    min_label_period_candles: int = 10
+    min_label_period_candles: int = fit_live_predictions_candles // 10
     max_label_period_candles: int = max(
         fit_live_predictions_candles // 6, min_label_period_candles
     )
@@ -596,17 +598,26 @@ def period_objective(
         max_label_period_candles,
         step=candles_step,
     )
-    label_period_frequency: int = int(
-        fit_live_predictions_candles / (label_period_candles * 2)
-    )
-    label_window: int = label_period_candles * label_period_frequency
-    y_test = y_test.iloc[-label_window:].to_numpy()
-    test_weights = test_weights[-label_window:]
-    y_pred = y_pred[-label_window:]
-
-    error = sklearn.metrics.root_mean_squared_error(
-        y_test, y_pred, sample_weight=test_weights
-    )
+    y_test = y_test.iloc[-fit_live_predictions_candles:].to_numpy()
+    test_weights = test_weights[-fit_live_predictions_candles:]
+    y_pred = y_pred[-fit_live_predictions_candles:]
+    y_test = [
+        y_test[i : i + label_period_candles]
+        for i in range(0, len(y_test), label_period_candles)
+    ]
+    test_weights = [
+        test_weights[i : i + label_period_candles]
+        for i in range(0, len(test_weights), label_period_candles)
+    ]
+    y_pred = [
+        y_pred[i : i + label_period_candles]
+        for i in range(0, len(y_pred), label_period_candles)
+    ]
+
+    error = 0.0
+    for y_t, y_p, t_w in zip(y_test, y_pred, test_weights):
+        error += sklearn.metrics.root_mean_squared_error(y_t, y_p, sample_weight=t_w)
+    error /= len(y_test)
 
     return error
 
-- 
2.53.0