From 7c984d33dce0f26d2f2dd51c645ca6969023d0d6 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Tue, 28 Jan 2025 22:37:00 +0100
Subject: [PATCH] perf: use optuna to fine tune the train and test sets candles
 size
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 .../LightGBMRegressorQuickAdapterV35.py       | 91 ++++++++++--------
 .../XGBoostRegressorQuickAdapterV3.py         |  9 +-
 .../XGBoostRegressorQuickAdapterV35.py        | 96 +++++++++++--------
 3 files changed, 115 insertions(+), 81 deletions(-)

diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py
index 2d98624..37c69a8 100644
--- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py
+++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py
@@ -45,18 +45,17 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel):
 
         X = data_dictionary["train_features"]
         y = data_dictionary["train_labels"]
+        train_weights = data_dictionary["train_weights"]
 
         X_test = data_dictionary["test_features"]
         y_test = data_dictionary["test_labels"]
         test_weights = data_dictionary["test_weights"]
 
-        eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights)
-
-        sample_weight = data_dictionary["train_weights"]
+        lgbm_model = self.get_init_model(dk.pair)
 
-        model_training_parameters = self.model_training_parameters
+        logger.info(f"Model training parameters : {self.model_training_parameters}")
 
-        lgbm_model = self.get_init_model(dk.pair)
+        model = LGBMRegressor(**self.model_training_parameters)
 
         optuna_hyperopt: bool = (
             self.freqai_info.get("optuna_hyperopt", False)
@@ -78,14 +77,15 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel):
                     trial,
                     X,
                     y,
-                    sample_weight,
+                    train_weights,
                     X_test,
                     y_test,
+                    test_weights,
                     self.model_training_parameters,
                 ),
                 n_trials=self.freqai_info.get("optuna_hyperopt_trials", N_TRIALS),
                 n_jobs=self.freqai_info.get("optuna_hyperopt_jobs", 1),
-                timeout=self.freqai_info.get("optuna_hyperopt_timeout", 7200),
+                timeout=self.freqai_info.get("optuna_hyperopt_timeout", 3600),
             )
 
             hp = study.best_params
@@ -97,33 +97,22 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel):
                 f"Optuna hyperopt {'best objective value':>20s} : {study.best_value}"
             )
 
-            window = hp.get("train_period_candles")
-            X = X.tail(window)
-            y = y.tail(window)
-            sample_weight = sample_weight[-window:]
-
-            model_training_parameters = {
-                **model_training_parameters,
-                **{
-                    "n_estimators": hp.get("n_estimators"),
-                    "num_leaves": hp.get("num_leaves"),
-                    "learning_rate": hp.get("learning_rate"),
-                    "min_child_samples": hp.get("min_child_samples"),
-                    "subsample": hp.get("subsample"),
-                    "colsample_bytree": hp.get("colsample_bytree"),
-                    "reg_alpha": hp.get("reg_alpha"),
-                    "reg_lambda": hp.get("reg_lambda"),
-                },
-            }
-
-        logger.info(f"Model training parameters : {model_training_parameters}")
-
-        model = LGBMRegressor(**model_training_parameters)
+            train_window = hp.get("train_period_candles")
+            X = X.tail(train_window)
+            y = y.tail(train_window)
+            train_weights = train_weights[-train_window:]
+
+            test_window = hp.get("test_period_candles")
+            X_test = X_test.tail(test_window)
+            y_test = y_test.tail(test_window)
+            test_weights = test_weights[-test_window:]
+
+        eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights)
 
         model.fit(
             X=X,
             y=y,
-            sample_weight=sample_weight,
+            sample_weight=train_weights,
             eval_set=eval_set,
             eval_sample_weight=eval_weights,
             init_model=lgbm_model,
@@ -221,7 +210,36 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel):
         return eval_set, eval_weights
 
 
-def objective(trial, X, y, weights, X_test, y_test, params):
+def objective(trial, X, y, train_weights, X_test, y_test, test_weights, params):
+    train_window = trial.suggest_int("train_period_candles", 1152, 17280, step=100)
+    X = X.tail(train_window)
+    y = y.tail(train_window)
+    train_weights = train_weights[-train_window:]
+
+    test_window = trial.suggest_int("test_period_candles", 1152, 17280, step=100)
+    X_test = X_test.tail(test_window)
+    y_test = y_test.tail(test_window)
+    test_weights = test_weights[-test_window:]
+
+    # Fit the model
+    model = LGBMRegressor(**params)
+    model.fit(
+        X=X,
+        y=y,
+        sample_weight=train_weights,
+        eval_set=[(X_test, y_test)],
+        eval_sample_weight=[test_weights],
+        eval_metric="rmse",
+        callbacks=[optuna.integration.LightGBMPruningCallback(trial, "rmse")],
+    )
+    y_pred = model.predict(X_test)
+
+    error = sklearn.metrics.root_mean_squared_error(y_test, y_pred)
+
+    return error
+
+
+def hp_objective(trial, X, y, train_weights, X_test, y_test, test_weights, params):
     study_params = {
         "objective": "rmse",
         "n_estimators": trial.suggest_int("n_estimators", 100, 800),
@@ -234,18 +252,15 @@ def objective(trial, X, y, weights, X_test, y_test, params):
         "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
     }
     params = {**params, **study_params}
-    window = trial.suggest_int("train_period_candles", 1152, 17280, step=100)
 
     # Fit the model
     model = LGBMRegressor(**params)
-    X = X.tail(window)
-    y = y.tail(window)
-    weights = weights[-window:]
     model.fit(
-        X,
-        y,
-        sample_weight=weights,
+        X=X,
+        y=y,
+        sample_weight=train_weights,
         eval_set=[(X_test, y_test)],
+        eval_sample_weight=[test_weights],
         eval_metric="rmse",
         callbacks=[optuna.integration.LightGBMPruningCallback(trial, "rmse")],
     )
diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py
index 17a0821..3ab27f7 100644
--- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py
+++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py
@@ -41,24 +41,23 @@ class XGBoostRegressorQuickAdapterV3(BaseRegressionModel):
 
         X = data_dictionary["train_features"]
         y = data_dictionary["train_labels"]
+        train_weights = data_dictionary["train_weights"]
 
         X_test = data_dictionary["test_features"]
         y_test = data_dictionary["test_labels"]
         test_weights = data_dictionary["test_weights"]
 
-        eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights)
-
-        sample_weight = data_dictionary["train_weights"]
-
         xgb_model = self.get_init_model(dk.pair)
 
         model = XGBRegressor(**self.model_training_parameters)
 
+        eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights)
+
         start = time.time()
         model.fit(
             X=X,
             y=y,
-            sample_weight=sample_weight,
+            sample_weight=train_weights,
             eval_set=eval_set,
             sample_weight_eval_set=eval_weights,
             xgb_model=xgb_model,
diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py
index e2fa445..3233c5c 100644
--- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py
+++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py
@@ -45,18 +45,17 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel):
 
         X = data_dictionary["train_features"]
         y = data_dictionary["train_labels"]
+        train_weights = data_dictionary["train_weights"]
 
         X_test = data_dictionary["test_features"]
         y_test = data_dictionary["test_labels"]
         test_weights = data_dictionary["test_weights"]
 
-        eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights)
-
-        sample_weight = data_dictionary["train_weights"]
+        xgb_model = self.get_init_model(dk.pair)
 
-        model_training_parameters = self.model_training_parameters
+        logger.info(f"Model training parameters : {self.model_training_parameters}")
 
-        xgb_model = self.get_init_model(dk.pair)
+        model = XGBRegressor(**self.model_training_parameters)
 
         optuna_hyperopt: bool = (
             self.freqai_info.get("optuna_hyperopt", False)
@@ -75,14 +74,15 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel):
                     trial,
                     X,
                     y,
-                    sample_weight,
+                    train_weights,
                     X_test,
                     y_test,
+                    test_weights,
                     self.model_training_parameters,
                 ),
                 n_trials=self.freqai_info.get("optuna_hyperopt_trials", N_TRIALS),
                 n_jobs=self.freqai_info.get("optuna_hyperopt_jobs", 1),
-                timeout=self.freqai_info.get("optuna_hyperopt_timeout", 7200),
+                timeout=self.freqai_info.get("optuna_hyperopt_timeout", 3600),
             )
 
             hp = study.best_params
@@ -94,33 +94,22 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel):
                 f"Optuna hyperopt {'best objective value':>20s} : {study.best_value}"
             )
 
-            window = hp.get("train_period_candles")
-            X = X.tail(window)
-            y = y.tail(window)
-            sample_weight = sample_weight[-window:]
-
-            model_training_parameters = {
-                **model_training_parameters,
-                **{
-                    "n_estimators": hp.get("n_estimators"),
-                    "learning_rate": hp.get("learning_rate"),
-                    "max_depth": hp.get("max_depth"),
-                    "gamma": hp.get("gamma"),
-                    "subsample": hp.get("subsample"),
-                    "colsample_bytree": hp.get("colsample_bytree"),
-                    "reg_alpha": hp.get("reg_alpha"),
-                    "reg_lambda": hp.get("reg_lambda"),
-                },
-            }
-
-        logger.info(f"Model training parameters : {model_training_parameters}")
-
-        model = XGBRegressor(**model_training_parameters)
+            train_window = hp.get("train_period_candles")
+            X = X.tail(train_window)
+            y = y.tail(train_window)
+            train_weights = train_weights[-train_window:]
+
+            test_window = hp.get("test_period_candles")
+            X_test = X_test.tail(test_window)
+            y_test = y_test.tail(test_window)
+            test_weights = test_weights[-test_window:]
+
+        eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights)
 
         model.fit(
             X=X,
             y=y,
-            sample_weight=sample_weight,
+            sample_weight=train_weights,
             eval_set=eval_set,
             sample_weight_eval_set=eval_weights,
             xgb_model=xgb_model,
@@ -218,7 +207,41 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel):
         return eval_set, eval_weights
 
 
-def objective(trial, X, y, weights, X_test, y_test, params):
+def objective(trial, X, y, train_weights, X_test, y_test, test_weights, params):
+    train_window = trial.suggest_int("train_period_candles", 1152, 17280, step=100)
+    X = X.tail(train_window)
+    y = y.tail(train_window)
+    train_weights = train_weights[-train_window:]
+
+    test_window = trial.suggest_int("test_period_candles", 1152, 17280, step=100)
+    X_test = X_test.tail(test_window)
+    y_test = y_test.tail(test_window)
+    test_weights = test_weights[-test_window:]
+
+    # Fit the model
+    model = XGBRegressor(
+        objective="reg:squarederror",
+        eval_metric="rmse",
+        callbacks=[
+            optuna.integration.XGBoostPruningCallback(trial, "validation_0-rmse")
+        ],
+        **params,
+    )
+    model.fit(
+        X=X,
+        y=y,
+        sample_weight=train_weights,
+        eval_set=[(X_test, y_test)],
+        sample_weight_eval_set=[test_weights],
+    )
+    y_pred = model.predict(X_test)
+
+    error = sklearn.metrics.root_mean_squared_error(y_test, y_pred)
+
+    return error
+
+
+def hp_objective(trial, X, y, train_weights, X_test, y_test, test_weights, params):
     study_params = {
         "objective": "reg:squarederror",
         "eval_metric": "rmse",
@@ -235,18 +258,15 @@ def objective(trial, X, y, weights, X_test, y_test, params):
         ],
     }
     params = {**params, **study_params}
-    window = trial.suggest_int("train_period_candles", 1152, 17280, step=100)
 
     # Fit the model
     model = XGBRegressor(**params)
-    X = X.tail(window)
-    y = y.tail(window)
-    weights = weights[-window:]
     model.fit(
-        X,
-        y,
-        sample_weight=weights,
+        X=X,
+        y=y,
+        sample_weight=train_weights,
         eval_set=[(X_test, y_test)],
+        sample_weight_eval_set=[test_weights],
     )
     y_pred = model.predict(X_test)
 
-- 
2.43.0