From 7c984d33dce0f26d2f2dd51c645ca6969023d0d6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Tue, 28 Jan 2025 22:37:00 +0100 Subject: [PATCH] perf: use optuna to fine tune the train and test sets candles size MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../LightGBMRegressorQuickAdapterV35.py | 91 ++++++++++-------- .../XGBoostRegressorQuickAdapterV3.py | 9 +- .../XGBoostRegressorQuickAdapterV35.py | 96 +++++++++++-------- 3 files changed, 115 insertions(+), 81 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py index 2d98624..37c69a8 100644 --- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py @@ -45,18 +45,17 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] + train_weights = data_dictionary["train_weights"] X_test = data_dictionary["test_features"] y_test = data_dictionary["test_labels"] test_weights = data_dictionary["test_weights"] - eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights) - - sample_weight = data_dictionary["train_weights"] + lgbm_model = self.get_init_model(dk.pair) - model_training_parameters = self.model_training_parameters + logger.info(f"Model training parameters : {self.model_training_parameters}") - lgbm_model = self.get_init_model(dk.pair) + model = LGBMRegressor(**self.model_training_parameters) optuna_hyperopt: bool = ( self.freqai_info.get("optuna_hyperopt", False) @@ -78,14 +77,15 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): trial, X, y, - sample_weight, + train_weights, X_test, y_test, + test_weights, self.model_training_parameters, ), n_trials=self.freqai_info.get("optuna_hyperopt_trials", N_TRIALS), n_jobs=self.freqai_info.get("optuna_hyperopt_jobs", 1), - timeout=self.freqai_info.get("optuna_hyperopt_timeout", 7200), + timeout=self.freqai_info.get("optuna_hyperopt_timeout", 3600), ) hp = study.best_params @@ -97,33 +97,22 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): f"Optuna hyperopt {'best objective value':>20s} : {study.best_value}" ) - window = hp.get("train_period_candles") - X = X.tail(window) - y = y.tail(window) - sample_weight = sample_weight[-window:] - - model_training_parameters = { - **model_training_parameters, - **{ - "n_estimators": hp.get("n_estimators"), - "num_leaves": hp.get("num_leaves"), - "learning_rate": hp.get("learning_rate"), - "min_child_samples": hp.get("min_child_samples"), - "subsample": hp.get("subsample"), - "colsample_bytree": hp.get("colsample_bytree"), - "reg_alpha": hp.get("reg_alpha"), - "reg_lambda": hp.get("reg_lambda"), - }, - } - - logger.info(f"Model training parameters : {model_training_parameters}") - - model = LGBMRegressor(**model_training_parameters) + train_window = hp.get("train_period_candles") + X = X.tail(train_window) + y = y.tail(train_window) + train_weights = train_weights[-train_window:] + + test_window = hp.get("test_period_candles") + X_test = X_test.tail(test_window) + y_test = y_test.tail(test_window) + test_weights = test_weights[-test_window:] + + eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights) model.fit( X=X, y=y, - sample_weight=sample_weight, + sample_weight=train_weights, eval_set=eval_set, eval_sample_weight=eval_weights, init_model=lgbm_model, @@ -221,7 +210,36 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): return eval_set, eval_weights -def objective(trial, X, y, weights, X_test, y_test, params): +def objective(trial, X, y, train_weights, X_test, y_test, test_weights, params): + train_window = trial.suggest_int("train_period_candles", 1152, 17280, step=100) + X = X.tail(train_window) + y = y.tail(train_window) + train_weights = train_weights[-train_window:] + + test_window = trial.suggest_int("test_period_candles", 1152, 17280, step=100) + X_test = X_test.tail(test_window) + y_test = y_test.tail(test_window) + test_weights = test_weights[-test_window:] + + # Fit the model + model = LGBMRegressor(**params) + model.fit( + X=X, + y=y, + sample_weight=train_weights, + eval_set=[(X_test, y_test)], + eval_sample_weight=[test_weights], + eval_metric="rmse", + callbacks=[optuna.integration.LightGBMPruningCallback(trial, "rmse")], + ) + y_pred = model.predict(X_test) + + error = sklearn.metrics.root_mean_squared_error(y_test, y_pred) + + return error + + +def hp_objective(trial, X, y, train_weights, X_test, y_test, test_weights, params): study_params = { "objective": "rmse", "n_estimators": trial.suggest_int("n_estimators", 100, 800), @@ -234,18 +252,15 @@ def objective(trial, X, y, weights, X_test, y_test, params): "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True), } params = {**params, **study_params} - window = trial.suggest_int("train_period_candles", 1152, 17280, step=100) # Fit the model model = LGBMRegressor(**params) - X = X.tail(window) - y = y.tail(window) - weights = weights[-window:] model.fit( - X, - y, - sample_weight=weights, + X=X, + y=y, + sample_weight=train_weights, eval_set=[(X_test, y_test)], + eval_sample_weight=[test_weights], eval_metric="rmse", callbacks=[optuna.integration.LightGBMPruningCallback(trial, "rmse")], ) diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py index 17a0821..3ab27f7 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py @@ -41,24 +41,23 @@ class XGBoostRegressorQuickAdapterV3(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] + train_weights = data_dictionary["train_weights"] X_test = data_dictionary["test_features"] y_test = data_dictionary["test_labels"] test_weights = data_dictionary["test_weights"] - eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights) - - sample_weight = data_dictionary["train_weights"] - xgb_model = self.get_init_model(dk.pair) model = XGBRegressor(**self.model_training_parameters) + eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights) + start = time.time() model.fit( X=X, y=y, - sample_weight=sample_weight, + sample_weight=train_weights, eval_set=eval_set, sample_weight_eval_set=eval_weights, xgb_model=xgb_model, diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py index e2fa445..3233c5c 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py @@ -45,18 +45,17 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] + train_weights = data_dictionary["train_weights"] X_test = data_dictionary["test_features"] y_test = data_dictionary["test_labels"] test_weights = data_dictionary["test_weights"] - eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights) - - sample_weight = data_dictionary["train_weights"] + xgb_model = self.get_init_model(dk.pair) - model_training_parameters = self.model_training_parameters + logger.info(f"Model training parameters : {self.model_training_parameters}") - xgb_model = self.get_init_model(dk.pair) + model = XGBRegressor(**self.model_training_parameters) optuna_hyperopt: bool = ( self.freqai_info.get("optuna_hyperopt", False) @@ -75,14 +74,15 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): trial, X, y, - sample_weight, + train_weights, X_test, y_test, + test_weights, self.model_training_parameters, ), n_trials=self.freqai_info.get("optuna_hyperopt_trials", N_TRIALS), n_jobs=self.freqai_info.get("optuna_hyperopt_jobs", 1), - timeout=self.freqai_info.get("optuna_hyperopt_timeout", 7200), + timeout=self.freqai_info.get("optuna_hyperopt_timeout", 3600), ) hp = study.best_params @@ -94,33 +94,22 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): f"Optuna hyperopt {'best objective value':>20s} : {study.best_value}" ) - window = hp.get("train_period_candles") - X = X.tail(window) - y = y.tail(window) - sample_weight = sample_weight[-window:] - - model_training_parameters = { - **model_training_parameters, - **{ - "n_estimators": hp.get("n_estimators"), - "learning_rate": hp.get("learning_rate"), - "max_depth": hp.get("max_depth"), - "gamma": hp.get("gamma"), - "subsample": hp.get("subsample"), - "colsample_bytree": hp.get("colsample_bytree"), - "reg_alpha": hp.get("reg_alpha"), - "reg_lambda": hp.get("reg_lambda"), - }, - } - - logger.info(f"Model training parameters : {model_training_parameters}") - - model = XGBRegressor(**model_training_parameters) + train_window = hp.get("train_period_candles") + X = X.tail(train_window) + y = y.tail(train_window) + train_weights = train_weights[-train_window:] + + test_window = hp.get("test_period_candles") + X_test = X_test.tail(test_window) + y_test = y_test.tail(test_window) + test_weights = test_weights[-test_window:] + + eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights) model.fit( X=X, y=y, - sample_weight=sample_weight, + sample_weight=train_weights, eval_set=eval_set, sample_weight_eval_set=eval_weights, xgb_model=xgb_model, @@ -218,7 +207,41 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): return eval_set, eval_weights -def objective(trial, X, y, weights, X_test, y_test, params): +def objective(trial, X, y, train_weights, X_test, y_test, test_weights, params): + train_window = trial.suggest_int("train_period_candles", 1152, 17280, step=100) + X = X.tail(train_window) + y = y.tail(train_window) + train_weights = train_weights[-train_window:] + + test_window = trial.suggest_int("test_period_candles", 1152, 17280, step=100) + X_test = X_test.tail(test_window) + y_test = y_test.tail(test_window) + test_weights = test_weights[-test_window:] + + # Fit the model + model = XGBRegressor( + objective="reg:squarederror", + eval_metric="rmse", + callbacks=[ + optuna.integration.XGBoostPruningCallback(trial, "validation_0-rmse") + ], + **params, + ) + model.fit( + X=X, + y=y, + sample_weight=train_weights, + eval_set=[(X_test, y_test)], + sample_weight_eval_set=[test_weights], + ) + y_pred = model.predict(X_test) + + error = sklearn.metrics.root_mean_squared_error(y_test, y_pred) + + return error + + +def hp_objective(trial, X, y, train_weights, X_test, y_test, test_weights, params): study_params = { "objective": "reg:squarederror", "eval_metric": "rmse", @@ -235,18 +258,15 @@ def objective(trial, X, y, weights, X_test, y_test, params): ], } params = {**params, **study_params} - window = trial.suggest_int("train_period_candles", 1152, 17280, step=100) # Fit the model model = XGBRegressor(**params) - X = X.tail(window) - y = y.tail(window) - weights = weights[-window:] model.fit( - X, - y, - sample_weight=weights, + X=X, + y=y, + sample_weight=train_weights, eval_set=[(X_test, y_test)], + sample_weight_eval_set=[test_weights], ) y_pred = model.predict(X_test) -- 2.43.0