]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
perf(qav3): optimize label period splitting over fit live prediction
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Fri, 14 Mar 2025 12:50:36 +0000 (13:50 +0100)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Fri, 14 Mar 2025 12:50:36 +0000 (13:50 +0100)
window

Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py
quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py

index af74df5dae363468a5943e21579fbd129db55c52..9af3c6400bd1944ac68d7792428e24d78764ace5 100644 (file)
@@ -556,7 +556,9 @@ def period_objective(
     y = y.iloc[-train_window:]
     train_weights = train_weights[-train_window:]
 
-    min_test_window: int = int(min_train_window * test_size)
+    min_test_window: int = max(
+        int(min_train_window * test_size), fit_live_predictions_candles
+    )
     max_test_window: int = max(len(X_test), min_test_window)
     test_window: int = trial.suggest_int(
         "test_period_candles", min_test_window, max_test_window, step=candles_step
@@ -578,7 +580,7 @@ def period_objective(
     )
     y_pred = model.predict(X_test)
 
-    min_label_period_candles: int = 10
+    min_label_period_candles: int = fit_live_predictions_candles // 10
     max_label_period_candles: int = max(
         fit_live_predictions_candles // 6, min_label_period_candles
     )
@@ -588,17 +590,26 @@ def period_objective(
         max_label_period_candles,
         step=candles_step,
     )
-    label_period_frequency: int = int(
-        fit_live_predictions_candles / (label_period_candles * 2)
-    )
-    label_window: int = label_period_candles * label_period_frequency
-    y_test = y_test.iloc[-label_window:].to_numpy()
-    test_weights = test_weights[-label_window:]
-    y_pred = y_pred[-label_window:]
-
-    error = sklearn.metrics.root_mean_squared_error(
-        y_test, y_pred, sample_weight=test_weights
-    )
+    y_test = y_test.iloc[-fit_live_predictions_candles:].to_numpy()
+    test_weights = test_weights[-fit_live_predictions_candles:]
+    y_pred = y_pred[-fit_live_predictions_candles:]
+    y_test = [
+        y_test[i : i + label_period_candles]
+        for i in range(0, len(y_test), label_period_candles)
+    ]
+    test_weights = [
+        test_weights[i : i + label_period_candles]
+        for i in range(0, len(test_weights), label_period_candles)
+    ]
+    y_pred = [
+        y_pred[i : i + label_period_candles]
+        for i in range(0, len(y_pred), label_period_candles)
+    ]
+
+    error = 0.0
+    for y_t, y_p, t_w in zip(y_test, y_pred, test_weights):
+        error += sklearn.metrics.root_mean_squared_error(y_t, y_p, sample_weight=t_w)
+    error /= len(y_test)
 
     return error
 
index 35bdd35b0c2326a58d2e2f6a6bd52586cf71b236..751067f18decef3435dd371549c7fcb731a1ae25 100644 (file)
@@ -559,7 +559,9 @@ def period_objective(
     y = y.iloc[-train_window:]
     train_weights = train_weights[-train_window:]
 
-    min_test_window: int = int(min_train_window * test_size)
+    min_test_window: int = max(
+        int(min_train_window * test_size), fit_live_predictions_candles
+    )
     max_test_window: int = max(len(X_test), min_test_window)
     test_window: int = trial.suggest_int(
         "test_period_candles", min_test_window, max_test_window, step=candles_step
@@ -586,7 +588,7 @@ def period_objective(
     )
     y_pred = model.predict(X_test)
 
-    min_label_period_candles: int = 10
+    min_label_period_candles: int = fit_live_predictions_candles // 10
     max_label_period_candles: int = max(
         fit_live_predictions_candles // 6, min_label_period_candles
     )
@@ -596,17 +598,26 @@ def period_objective(
         max_label_period_candles,
         step=candles_step,
     )
-    label_period_frequency: int = int(
-        fit_live_predictions_candles / (label_period_candles * 2)
-    )
-    label_window: int = label_period_candles * label_period_frequency
-    y_test = y_test.iloc[-label_window:].to_numpy()
-    test_weights = test_weights[-label_window:]
-    y_pred = y_pred[-label_window:]
-
-    error = sklearn.metrics.root_mean_squared_error(
-        y_test, y_pred, sample_weight=test_weights
-    )
+    y_test = y_test.iloc[-fit_live_predictions_candles:].to_numpy()
+    test_weights = test_weights[-fit_live_predictions_candles:]
+    y_pred = y_pred[-fit_live_predictions_candles:]
+    y_test = [
+        y_test[i : i + label_period_candles]
+        for i in range(0, len(y_test), label_period_candles)
+    ]
+    test_weights = [
+        test_weights[i : i + label_period_candles]
+        for i in range(0, len(test_weights), label_period_candles)
+    ]
+    y_pred = [
+        y_pred[i : i + label_period_candles]
+        for i in range(0, len(y_pred), label_period_candles)
+    ]
+
+    error = 0.0
+    for y_t, y_p, t_w in zip(y_test, y_pred, test_weights):
+        error += sklearn.metrics.root_mean_squared_error(y_t, y_p, sample_weight=t_w)
+    error /= len(y_test)
 
     return error