From 9276980fddc499ecc53ea77efd34475834d745fc Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 12 Feb 2025 10:08:50 +0100 Subject: [PATCH] refactor!: add predictions smoothing tunable MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- quickadapter/user_data/config-template.json | 7 +-- .../LightGBMRegressorQuickAdapterV35.py | 53 ++++++++++++++++--- .../XGBoostRegressorQuickAdapterV35.py | 53 ++++++++++++++++--- .../user_data/strategies/QuickAdapterV3.py | 16 +++--- 4 files changed, 104 insertions(+), 25 deletions(-) diff --git a/quickadapter/user_data/config-template.json b/quickadapter/user_data/config-template.json index d73e690..ba7ba0e 100644 --- a/quickadapter/user_data/config-template.json +++ b/quickadapter/user_data/config-template.json @@ -112,8 +112,9 @@ "identifier": "quickadapter-xgboost", // "identifier": "quickadapter-lgbm", "fit_live_predictions_candles": 300, - "track_performance": false, "data_kitchen_thread_count": 6, // set to number of CPU threads / 4 + "track_performance": false, + "predictions_smoothing": "log-sum-exp", "outlier_threshold": 0.999, "optuna_hyperopt": { "enabled": true, @@ -128,8 +129,8 @@ "DI_value_param2": 0, "DI_value_param3": 0, "DI_cutoff": 2, - "&s-minima_sort_threshold": -2, - "&s-maxima_sort_threshold": 2, + "&s-minima_threshold": -2, + "&s-maxima_threshold": 2, "label_period_candles": 100, "rmse": 0 }, diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py index b7cff11..86318b6 100644 --- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py @@ -166,8 +166,8 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): ) if not warmed_up: - dk.data["extra_returns_per_train"]["&s-maxima_sort_threshold"] = 2 - dk.data["extra_returns_per_train"]["&s-minima_sort_threshold"] = -2 + dk.data["extra_returns_per_train"]["&s-maxima_threshold"] = 2 + dk.data["extra_returns_per_train"]["&s-minima_threshold"] = -2 else: if self.__optuna_hyperopt: label_period_candles = self.__optuna_hp.get(pair, {}).get( @@ -175,13 +175,13 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): ) else: label_period_candles = self.ft_params["label_period_candles"] - min_pred, max_pred = min_max_pred( + min_pred, max_pred = self.min_max_pred( pred_df_full, num_candles, label_period_candles, ) - dk.data["extra_returns_per_train"]["&s-minima_sort_threshold"] = min_pred - dk.data["extra_returns_per_train"]["&s-maxima_sort_threshold"] = max_pred + dk.data["extra_returns_per_train"]["&s-minima_threshold"] = min_pred + dk.data["extra_returns_per_train"]["&s-maxima_threshold"] = max_pred dk.data["labels_mean"], dk.data["labels_std"] = {}, {} for label in dk.label_list + dk.unique_class_list: @@ -245,8 +245,30 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): ) return storage + def min_max_pred( + self, + pred_df: pd.DataFrame, + fit_live_predictions_candles: int, + label_period_candles: int, + ) -> tuple[float, float]: + predictions_smoothing = self.freqai_info.get( + "predictions_smoothing", "log-sum-exp" + ) + if predictions_smoothing == "log-sum-exp": + return log_sum_exp_min_max_pred( + pred_df, fit_live_predictions_candles, label_period_candles + ) + elif predictions_smoothing == "mean": + return mean_min_max_pred( + pred_df, fit_live_predictions_candles, label_period_candles + ) + elif predictions_smoothing == "median": + return median_min_max_pred( + pred_df, fit_live_predictions_candles, label_period_candles + ) + -def min_max_pred( +def log_sum_exp_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: label_period_frequency: int = int( @@ -260,7 +282,24 @@ def min_max_pred( return min_pred, max_pred -def __min_max_pred( +def mean_min_max_pred( + pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int +) -> tuple[float, float]: + pred_df_sorted = ( + pred_df.select_dtypes(exclude=["object"]) + .copy() + .apply(lambda col: col.sort_values(ascending=False, ignore_index=True)) + ) + + label_period_frequency: int = int( + fit_live_predictions_candles / label_period_candles + ) + min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean() + max_pred = pred_df_sorted.iloc[:label_period_frequency].mean() + return min_pred["&s-extrema"], max_pred["&s-extrema"] + + +def median_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: pred_df_sorted = ( diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py index 5f8922a..36363c8 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py @@ -169,8 +169,8 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): ) if not warmed_up: - dk.data["extra_returns_per_train"]["&s-maxima_sort_threshold"] = 2 - dk.data["extra_returns_per_train"]["&s-minima_sort_threshold"] = -2 + dk.data["extra_returns_per_train"]["&s-maxima_threshold"] = 2 + dk.data["extra_returns_per_train"]["&s-minima_threshold"] = -2 else: if self.__optuna_hyperopt: label_period_candles = self.__optuna_hp.get(pair, {}).get( @@ -178,13 +178,13 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): ) else: label_period_candles = self.ft_params["label_period_candles"] - min_pred, max_pred = min_max_pred( + min_pred, max_pred = self.min_max_pred( pred_df_full, num_candles, label_period_candles, ) - dk.data["extra_returns_per_train"]["&s-minima_sort_threshold"] = min_pred - dk.data["extra_returns_per_train"]["&s-maxima_sort_threshold"] = max_pred + dk.data["extra_returns_per_train"]["&s-minima_threshold"] = min_pred + dk.data["extra_returns_per_train"]["&s-maxima_threshold"] = max_pred dk.data["labels_mean"], dk.data["labels_std"] = {}, {} for label in dk.label_list + dk.unique_class_list: @@ -248,8 +248,30 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): ) return storage + def min_max_pred( + self, + pred_df: pd.DataFrame, + fit_live_predictions_candles: int, + label_period_candles: int, + ) -> tuple[float, float]: + predictions_smoothing = self.freqai_info.get( + "predictions_smoothing", "log-sum-exp" + ) + if predictions_smoothing == "log-sum-exp": + return log_sum_exp_min_max_pred( + pred_df, fit_live_predictions_candles, label_period_candles + ) + elif predictions_smoothing == "mean": + return mean_min_max_pred( + pred_df, fit_live_predictions_candles, label_period_candles + ) + elif predictions_smoothing == "median": + return median_min_max_pred( + pred_df, fit_live_predictions_candles, label_period_candles + ) + -def min_max_pred( +def log_sum_exp_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: label_period_frequency: int = int( @@ -263,7 +285,24 @@ def min_max_pred( return min_pred, max_pred -def __min_max_pred( +def mean_min_max_pred( + pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int +) -> tuple[float, float]: + pred_df_sorted = ( + pred_df.select_dtypes(exclude=["object"]) + .copy() + .apply(lambda col: col.sort_values(ascending=False, ignore_index=True)) + ) + + label_period_frequency: int = int( + fit_live_predictions_candles / label_period_candles + ) + min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean() + max_pred = pred_df_sorted.iloc[:label_period_frequency].mean() + return min_pred["&s-extrema"], max_pred["&s-extrema"] + + +def median_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: pred_df_sorted = ( diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index 19f13f2..f6ffc7b 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -71,8 +71,8 @@ class QuickAdapterV3(IStrategy): "rmse": {"rmse": {"color": "#c28ce3", "type": "line"}}, "extrema": { "&s-extrema": {"color": "#f53580", "type": "line"}, - "&s-minima_sort_threshold": {"color": "#4ae747", "type": "line"}, - "&s-maxima_sort_threshold": {"color": "#5b5e4b", "type": "line"}, + "&s-minima_threshold": {"color": "#4ae747", "type": "line"}, + "&s-maxima_threshold": {"color": "#5b5e4b", "type": "line"}, }, "min_max": { "maxima": {"color": "#ac7fc", "type": "bar"}, @@ -266,15 +266,15 @@ class QuickAdapterV3(IStrategy): 1, ) - dataframe["minima_sort_threshold"] = dataframe["&s-minima_sort_threshold"] - dataframe["maxima_sort_threshold"] = dataframe["&s-maxima_sort_threshold"] + dataframe["minima_threshold"] = dataframe["&s-minima_threshold"] + dataframe["maxima_threshold"] = dataframe["&s-maxima_threshold"] return dataframe def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame: enter_long_conditions = [ df["do_predict"] == 1, df["DI_catch"] == 1, - df["&s-extrema"] < df["minima_sort_threshold"], + df["&s-extrema"] < df["minima_threshold"], ] if enter_long_conditions: @@ -286,7 +286,7 @@ class QuickAdapterV3(IStrategy): enter_short_conditions = [ df["do_predict"] == 1, df["DI_catch"] == 1, - df["&s-extrema"] > df["maxima_sort_threshold"], + df["&s-extrema"] > df["maxima_threshold"], ] if enter_short_conditions: @@ -329,13 +329,13 @@ class QuickAdapterV3(IStrategy): return "outlier_detected" if ( - last_candle["&s-extrema"] < last_candle["minima_sort_threshold"] + last_candle["&s-extrema"] < last_candle["minima_threshold"] and entry_tag == "short" ): return "minima_detected_short" if ( - last_candle["&s-extrema"] > last_candle["maxima_sort_threshold"] + last_candle["&s-extrema"] > last_candle["maxima_threshold"] and entry_tag == "long" ): return "maxima_detected_long" -- 2.43.0