From 8769c5519a7e4834126678acf765035b3879ff3f Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 25 Jan 2025 01:05:14 +0100 Subject: [PATCH] fix: avoid error with optuna and no test set defined MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- quickadapter/user_data/config-template.json | 6 +-- .../XGBoostRegressorQuickAdapterV3.py | 23 +++++++---- .../XGBoostRegressorQuickAdapterV35.py | 41 ++++++++++++------- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/quickadapter/user_data/config-template.json b/quickadapter/user_data/config-template.json index fe1b932..bfcf6c3 100644 --- a/quickadapter/user_data/config-template.json +++ b/quickadapter/user_data/config-template.json @@ -128,7 +128,7 @@ "track_performance": false, "data_kitchen_thread_count": 6, // set to number of CPU threads / 4 "weibull_outlier_threshold": 0.999, - "optuna_hyperopt": false, + "optuna_hyperopt": true, "extra_returns_per_train": { "DI_value_param1": 0, "DI_value_param2": 0, @@ -167,14 +167,14 @@ "buffer_train_data_candles": 100 }, "data_split_parameters": { - "test_size": 0, + "test_size": 0.3, "random_state": 1, "shuffle": false }, "model_training_parameters": { // "device": "gpu", // "use_rmm:": true, - "nthread": 6, // set to number of CPU threads / 4 + "n_jobs": 6, // set to number of CPU threads / 4 "verbosity": 1 } }, diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py index fc56fd4..8e476eb 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV3.py @@ -45,14 +45,11 @@ class XGBoostRegressorQuickAdapterV3(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] - if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0: - eval_set = None - eval_weights = None - else: - eval_set = [ - (data_dictionary["test_features"], data_dictionary["test_labels"]) - ] - eval_weights = [data_dictionary["test_weights"]] + X_test = data_dictionary["test_features"] + y_test = data_dictionary["test_labels"] + test_weights = data_dictionary["test_weights"] + + eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights) sample_weight = data_dictionary["train_weights"] @@ -145,3 +142,13 @@ class XGBoostRegressorQuickAdapterV3(BaseRegressionModel): dk.data["extra_returns_per_train"]["DI_value_param2"] = f[1] dk.data["extra_returns_per_train"]["DI_value_param3"] = f[2] dk.data["extra_returns_per_train"]["DI_cutoff"] = cutoff + + def eval_set_and_weights(self, X_test, y_test, test_weights): + if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.3) == 0: + eval_set = None + eval_weights = None + else: + eval_set = [(X_test, y_test)] + eval_weights = [test_weights] + + return eval_set, eval_weights diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py index 9955672..efe82c9 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py @@ -46,21 +46,22 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] - if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0: - eval_set = None - eval_weights = None - else: - eval_set = [ - (data_dictionary["test_features"], data_dictionary["test_labels"]) - ] - eval_weights = [data_dictionary["test_weights"]] + X_test = data_dictionary["test_features"] + y_test = data_dictionary["test_labels"] + test_weights = data_dictionary["test_weights"] + + eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights) sample_weight = data_dictionary["train_weights"] xgb_model = self.get_init_model(dk.pair) start = time.time() hp = {} - if self.freqai_info.get("optuna_hyperopt", False): + if ( + self.freqai_info.get("optuna_hyperopt", False) + and self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.3) + > 0 + ): study = optuna.create_study(direction="minimize") study.optimize( lambda trial: objective( @@ -68,8 +69,8 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): X, y, sample_weight, - data_dictionary["test_features"], - data_dictionary["test_labels"], + X_test, + y_test, self.model_training_parameters, ), n_trials=N_TRIALS, @@ -80,8 +81,10 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): hp = study.best_params # trial = study.best_trial for key, value in hp.items(): - logger.debug(f"Optuna {key:>20s} : {value}") - logger.info(f"Optuna {'best objective value':>20s} : {study.best_value}") + logger.debug(f"Optuna hyperopt {key:>20s} : {value}") + logger.info( + f"Optuna hyperopt {'best objective value':>20s} : {study.best_value}" + ) window = hp.get("train_period_candles", 4032) X = X.tail(window) @@ -174,10 +177,18 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): dk.data["extra_returns_per_train"]["DI_value_param3"] = f[2] dk.data["extra_returns_per_train"]["DI_cutoff"] = cutoff + def eval_set_and_weights(self, X_test, y_test, test_weights): + if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.3) == 0: + eval_set = None + eval_weights = None + else: + eval_set = [(X_test, y_test)] + eval_weights = [test_weights] + + return eval_set, eval_weights -def objective(trial, X, y, weights, X_test, y_test, params): - """Define the objective function""" +def objective(trial, X, y, weights, X_test, y_test, params): window = trial.suggest_int("train_period_candles", 1152, 17280, step=600) # Fit the model -- 2.43.0