From: Jérôme Benoit Date: Wed, 26 Feb 2025 00:07:15 +0000 (+0100) Subject: feat(qav3): add quantile tunable for predictions smoothing X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=0e033256f7e27b2c4a18034924ec21e64b749c72;p=freqai-strategies.git feat(qav3): add quantile tunable for predictions smoothing Signed-off-by: Jérôme Benoit --- diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py index ced1b7f..f368eae 100644 --- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py @@ -271,8 +271,8 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): label_period_candles: int, ) -> tuple[float, float]: predictions_smoothing = self.freqai_info.get("predictions_smoothing", "mean") - if predictions_smoothing == "log-sum-exp": - return log_sum_exp_min_max_pred( + if predictions_smoothing == "quantile": + return quantile_min_max_pred( pred_df, fit_live_predictions_candles, label_period_candles ) elif predictions_smoothing == "mean": @@ -283,6 +283,10 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): return median_min_max_pred( pred_df, fit_live_predictions_candles, label_period_candles ) + else: + raise ValueError( + f"Invalid predictions_smoothing value: '{predictions_smoothing}'" + ) def optuna_hp_enqueue_previous_best_trial( self, @@ -487,22 +491,24 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): return False -def log_sum_exp_min_max_pred( +def mean_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: + pred_df_sorted = ( + pred_df.select_dtypes(exclude=["object"]) + .copy() + .apply(lambda col: col.sort_values(ascending=False, ignore_index=True)) + ) + label_period_frequency: int = int( fit_live_predictions_candles / (label_period_candles * 2) ) - extrema = pred_df.tail(label_period_candles * label_period_frequency)[ - EXTREMA_COLUMN - ] - min_pred = real_soft_min(extrema) - max_pred = real_soft_max(extrema) - - return min_pred, max_pred + min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean() + max_pred = pred_df_sorted.iloc[:label_period_frequency].mean() + return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN] -def mean_min_max_pred( +def quantile_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: pred_df_sorted = ( @@ -514,8 +520,8 @@ def mean_min_max_pred( label_period_frequency: int = int( fit_live_predictions_candles / (label_period_candles * 2) ) - min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean() - max_pred = pred_df_sorted.iloc[:label_period_frequency].mean() + min_pred = pred_df_sorted.iloc[-label_period_frequency:].quantile(0.25) + max_pred = pred_df_sorted.iloc[:label_period_frequency].quantile(0.75) return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN] @@ -636,13 +642,3 @@ def hp_objective( error = sklearn.metrics.root_mean_squared_error(y_test, y_pred) return error - - -def real_soft_max(series: pd.Series, beta=1.0) -> float: - maximum = series.max() - return maximum + spy.special.logsumexp(beta * (series - maximum)) / beta - - -def real_soft_min(series: pd.Series, beta=1.0) -> float: - minimum = series.min() - return minimum - spy.special.logsumexp(-beta * (series - minimum)) / beta diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py index 575ef13..0aacf3d 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py @@ -272,8 +272,8 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): label_period_candles: int, ) -> tuple[float, float]: predictions_smoothing = self.freqai_info.get("predictions_smoothing", "mean") - if predictions_smoothing == "log-sum-exp": - return log_sum_exp_min_max_pred( + if predictions_smoothing == "quantile": + return quantile_min_max_pred( pred_df, fit_live_predictions_candles, label_period_candles ) elif predictions_smoothing == "mean": @@ -284,6 +284,10 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): return median_min_max_pred( pred_df, fit_live_predictions_candles, label_period_candles ) + else: + raise ValueError( + f"Invalid predictions_smoothing value '{predictions_smoothing}'" + ) def optuna_hp_enqueue_previous_best_trial( self, @@ -488,22 +492,24 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): return False -def log_sum_exp_min_max_pred( +def mean_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: + pred_df_sorted = ( + pred_df.select_dtypes(exclude=["object"]) + .copy() + .apply(lambda col: col.sort_values(ascending=False, ignore_index=True)) + ) + label_period_frequency: int = int( fit_live_predictions_candles / (label_period_candles * 2) ) - extrema = pred_df.tail(label_period_candles * label_period_frequency)[ - EXTREMA_COLUMN - ] - min_pred = real_soft_min(extrema) - max_pred = real_soft_max(extrema) - - return min_pred, max_pred + min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean() + max_pred = pred_df_sorted.iloc[:label_period_frequency].mean() + return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN] -def mean_min_max_pred( +def quantile_min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ) -> tuple[float, float]: pred_df_sorted = ( @@ -515,8 +521,8 @@ def mean_min_max_pred( label_period_frequency: int = int( fit_live_predictions_candles / (label_period_candles * 2) ) - min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean() - max_pred = pred_df_sorted.iloc[:label_period_frequency].mean() + min_pred = pred_df_sorted.iloc[-label_period_frequency:].quantile(0.25) + max_pred = pred_df_sorted.iloc[:label_period_frequency].quantile(0.75) return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN] @@ -642,13 +648,3 @@ def hp_objective( error = sklearn.metrics.root_mean_squared_error(y_test, y_pred) return error - - -def real_soft_max(series: pd.Series, beta=1.0) -> float: - maximum = series.max() - return maximum + spy.special.logsumexp(beta * (series - maximum)) / beta - - -def real_soft_min(series: pd.Series, beta=1.0) -> float: - minimum = series.min() - return minimum - spy.special.logsumexp(-beta * (series - minimum)) / beta