From 0e033256f7e27b2c4a18034924ec21e64b749c72 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 26 Feb 2025 01:07:15 +0100 Subject: [PATCH] feat(qav3): add quantile tunable for predictions smoothing MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../LightGBMRegressorQuickAdapterV35.py | 42 +++++++++---------- .../XGBoostRegressorQuickAdapterV35.py | 42 +++++++++---------- 2 files changed, 38 insertions(+), 46 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py index ced1b7f..f368eae 100644 --- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py @@ -271,8 +271,8 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): label_period_candles: int, ) -> tuple[float, float]: predictions_smoothing = self.freqai_info.get("predictions_smoothing", "mean") - if predictions_smoothing == "log-sum-exp": - return log_sum_exp_min_max_pred( + if predictions_smoothing == "quantile": + return quantile_min_max_pred( pred_df, fit_live_predictions_candles, label_period_candles ) elif predictions_smoothing == "mean": @@ -283,6 +283,10 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): return median_min_max_pred( pred_df, fit_live_predictions_candles, label_period_candles ) + else: + raise ValueError( + f"Invalid predictions_smoothing value: '{predictions_smoothing}'" + ) def optuna_hp_enqueue_previous_best_trial( self, @@ -487,22 +491,24 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): return False -def log_sum_exp_min_max_pred( +def mean_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: + pred_df_sorted = ( + pred_df.select_dtypes(exclude=["object"]) + .copy() + .apply(lambda col: col.sort_values(ascending=False, ignore_index=True)) + ) + label_period_frequency: int = int( fit_live_predictions_candles / (label_period_candles * 2) ) - extrema = pred_df.tail(label_period_candles * label_period_frequency)[ - EXTREMA_COLUMN - ] - min_pred = real_soft_min(extrema) - max_pred = real_soft_max(extrema) - - return min_pred, max_pred + min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean() + max_pred = pred_df_sorted.iloc[:label_period_frequency].mean() + return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN] -def mean_min_max_pred( +def quantile_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: pred_df_sorted = ( @@ -514,8 +520,8 @@ def mean_min_max_pred( label_period_frequency: int = int( fit_live_predictions_candles / (label_period_candles * 2) ) - min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean() - max_pred = pred_df_sorted.iloc[:label_period_frequency].mean() + min_pred = pred_df_sorted.iloc[-label_period_frequency:].quantile(0.25) + max_pred = pred_df_sorted.iloc[:label_period_frequency].quantile(0.75) return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN] @@ -636,13 +642,3 @@ def hp_objective( error = sklearn.metrics.root_mean_squared_error(y_test, y_pred) return error - - -def real_soft_max(series: pd.Series, beta=1.0) -> float: - maximum = series.max() - return maximum + spy.special.logsumexp(beta * (series - maximum)) / beta - - -def real_soft_min(series: pd.Series, beta=1.0) -> float: - minimum = series.min() - return minimum - spy.special.logsumexp(-beta * (series - minimum)) / beta diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py index 575ef13..0aacf3d 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py @@ -272,8 +272,8 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): label_period_candles: int, ) -> tuple[float, float]: predictions_smoothing = self.freqai_info.get("predictions_smoothing", "mean") - if predictions_smoothing == "log-sum-exp": - return log_sum_exp_min_max_pred( + if predictions_smoothing == "quantile": + return quantile_min_max_pred( pred_df, fit_live_predictions_candles, label_period_candles ) elif predictions_smoothing == "mean": @@ -284,6 +284,10 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): return median_min_max_pred( pred_df, fit_live_predictions_candles, label_period_candles ) + else: + raise ValueError( + f"Invalid predictions_smoothing value '{predictions_smoothing}'" + ) def optuna_hp_enqueue_previous_best_trial( self, @@ -488,22 +492,24 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): return False -def log_sum_exp_min_max_pred( +def mean_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: + pred_df_sorted = ( + pred_df.select_dtypes(exclude=["object"]) + .copy() + .apply(lambda col: col.sort_values(ascending=False, ignore_index=True)) + ) + label_period_frequency: int = int( fit_live_predictions_candles / (label_period_candles * 2) ) - extrema = pred_df.tail(label_period_candles * label_period_frequency)[ - EXTREMA_COLUMN - ] - min_pred = real_soft_min(extrema) - max_pred = real_soft_max(extrema) - - return min_pred, max_pred + min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean() + max_pred = pred_df_sorted.iloc[:label_period_frequency].mean() + return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN] -def mean_min_max_pred( +def quantile_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: pred_df_sorted = ( @@ -515,8 +521,8 @@ def mean_min_max_pred( label_period_frequency: int = int( fit_live_predictions_candles / (label_period_candles * 2) ) - min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean() - max_pred = pred_df_sorted.iloc[:label_period_frequency].mean() + min_pred = pred_df_sorted.iloc[-label_period_frequency:].quantile(0.25) + max_pred = pred_df_sorted.iloc[:label_period_frequency].quantile(0.75) return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN] @@ -642,13 +648,3 @@ def hp_objective( error = sklearn.metrics.root_mean_squared_error(y_test, y_pred) return error - - -def real_soft_max(series: pd.Series, beta=1.0) -> float: - maximum = series.max() - return maximum + spy.special.logsumexp(beta * (series - maximum)) / beta - - -def real_soft_min(series: pd.Series, beta=1.0) -> float: - minimum = series.min() - return minimum - spy.special.logsumexp(-beta * (series - minimum)) / beta -- 2.43.0