From e31c083b0f090056883302d23211ea558cf05e17 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Thu, 6 Feb 2025 12:15:37 +0100 Subject: [PATCH] fix(qav3): avoid train and test set disperancy MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../LightGBMRegressorQuickAdapterV35.py | 27 ++++++++++++------- .../XGBoostRegressorQuickAdapterV35.py | 27 ++++++++++++------- .../user_data/strategies/QuickAdapterV3.py | 15 ++++++++--- 3 files changed, 48 insertions(+), 21 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py index 65f6b16..c4f274f 100644 --- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py @@ -103,6 +103,12 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): y_test = y_test.tail(test_window) test_weights = test_weights[-test_window:] + if dk.pair not in self.freqai_info["feature_parameters"]: + self.freqai_info["feature_parameters"][dk.pair] = {} + self.freqai_info["feature_parameters"][dk.pair]["label_period_candles"] = ( + self.__optuna_hp.get("label_period_candles") + ) + eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights) model.fit( @@ -146,9 +152,6 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): label_period_candles = self.__optuna_hp.get( "label_period_candles", self.ft_params["label_period_candles"] ) - self.freqai_info["feature_parameters"]["label_period_candles"] = ( - label_period_candles - ) else: label_period_candles = self.ft_params["label_period_candles"] min_pred, max_pred = min_max_pred( @@ -231,19 +234,25 @@ def objective( candles_step, params, ): - if (len(X) != len(y)) or (len(X) != len(train_weights)): - raise ValueError("Training sets must have the same length") - if (len(X_test) != len(y_test)) or (len(X_test) != len(test_weights)): - raise ValueError("Test sets must have the same length") + min_train_window: int = 10 + max_train_window: int = ( + len(X) if len(X) > min_train_window else (min_train_window + len(X)) + ) train_window = trial.suggest_int( - "train_period_candles", 0, len(X), step=candles_step + "train_period_candles", min_train_window, max_train_window, step=candles_step ) X = X.tail(train_window) y = y.tail(train_window) train_weights = train_weights[-train_window:] + min_test_window: int = 10 + max_test_window: int = ( + len(X_test) + if len(X_test) > min_test_window + else (min_test_window + len(X_test)) + ) test_window = trial.suggest_int( - "test_period_candles", 0, len(X_test), step=candles_step + "test_period_candles", min_test_window, max_test_window, step=candles_step ) X_test = X_test.tail(test_window) y_test = y_test.tail(test_window) diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py index 6a2e820..d66ec07 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py @@ -103,6 +103,12 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): y_test = y_test.tail(test_window) test_weights = test_weights[-test_window:] + if dk.pair not in self.freqai_info["feature_parameters"]: + self.freqai_info["feature_parameters"][dk.pair] = {} + self.freqai_info["feature_parameters"][dk.pair]["label_period_candles"] = ( + self.__optuna_hp.get("label_period_candles") + ) + eval_set, eval_weights = self.eval_set_and_weights(X_test, y_test, test_weights) model.fit( @@ -146,9 +152,6 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): label_period_candles = self.__optuna_hp.get( "label_period_candles", self.ft_params["label_period_candles"] ) - self.freqai_info["feature_parameters"]["label_period_candles"] = ( - label_period_candles - ) else: label_period_candles = self.ft_params["label_period_candles"] min_pred, max_pred = min_max_pred( @@ -231,19 +234,25 @@ def objective( candles_step, params, ): - if (len(X) != len(y)) or (len(X) != len(train_weights)): - raise ValueError("Training sets must have the same length") - if (len(X_test) != len(y_test)) or (len(X_test) != len(test_weights)): - raise ValueError("Test sets must have the same length") + min_train_window: int = 10 + max_train_window: int = ( + len(X) if len(X) > min_train_window else (min_train_window + len(X)) + ) train_window = trial.suggest_int( - "train_period_candles", 0, len(X), step=candles_step + "train_period_candles", min_train_window, max_train_window, step=candles_step ) X = X.tail(train_window) y = y.tail(train_window) train_weights = train_weights[-train_window:] + min_test_window: int = 10 + max_test_window: int = ( + len(X_test) + if len(X_test) > min_test_window + else (min_test_window + len(X_test)) + ) test_window = trial.suggest_int( - "test_period_candles", 0, len(X_test), step=candles_step + "test_period_candles", min_test_window, max_test_window, step=candles_step ) X_test = X_test.tail(test_window) y_test = y_test.tail(test_window) diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index 30ba56a..a2c1f44 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -225,17 +225,26 @@ class QuickAdapterV3(IStrategy): dataframe["%-hour_of_day"] = (dataframe["date"].dt.hour + 1) / 25 return dataframe - def set_freqai_targets(self, dataframe, **kwargs): + def set_freqai_targets(self, dataframe, metadata, **kwargs): + pair = str(metadata.get("pair")) + label_period_candles = ( + self.freqai_info["feature_parameters"] + .get(pair, {}) + .get( + "label_period_candles", + self.freqai_info["feature_parameters"]["label_period_candles"], + ) + ) dataframe["&s-extrema"] = 0 min_peaks = argrelextrema( dataframe["low"].values, np.less, - order=self.freqai_info["feature_parameters"]["label_period_candles"], + order=label_period_candles, ) max_peaks = argrelextrema( dataframe["high"].values, np.greater, - order=self.freqai_info["feature_parameters"]["label_period_candles"], + order=label_period_candles, ) for mp in min_peaks[0]: dataframe.at[mp, "&s-extrema"] = -1 -- 2.43.0