From ac58f537896f338c08c03aff172bbaf8f86999e8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Mon, 10 Feb 2025 20:26:58 +0100 Subject: [PATCH] perf(qav3): experiment with log-sum-exp prediction smoothing MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../LightGBMRegressorQuickAdapterV35.py | 46 +++++++++++++------ .../XGBoostRegressorQuickAdapterV35.py | 46 +++++++++++++------ .../user_data/strategies/QuickAdapterV3.py | 2 +- 3 files changed, 65 insertions(+), 29 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py index f21788e..5726f8e 100644 --- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py @@ -11,6 +11,7 @@ import optuna import sklearn import warnings import re +import numpy as np N_TRIALS = 36 TEST_SIZE = 0.1 @@ -236,19 +237,28 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): def min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ): - pred_df_sorted = pd.DataFrame() - for label in pred_df.keys(): - if pred_df[label].dtype == object: - continue - pred_df_sorted[label] = pred_df[label] - - for col in pred_df_sorted: - pred_df_sorted[col] = pred_df_sorted[col].sort_values( - ascending=False, ignore_index=True - ) - frequency = fit_live_predictions_candles / (label_period_candles * 2) - max_pred = pred_df_sorted.iloc[: int(frequency)].mean() - min_pred = pred_df_sorted.iloc[-int(frequency) :].mean() + min_pred = pred_df.tail(label_period_candles).apply( + lambda col: smooth_min(col, beta=10) + ) + max_pred = pred_df.tail(label_period_candles).apply( + lambda col: smooth_max(col, beta=10) + ) + + return min_pred, max_pred + + +def __min_max_pred( + pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int +): + pred_df_sorted = ( + pred_df.select_dtypes(exclude=["object"]) + .copy() + .apply(lambda col: col.sort_values(ascending=False, ignore_index=True)) + ) + + frequency = fit_live_predictions_candles / label_period_candles + min_pred = pred_df_sorted.iloc[-int(frequency) :].median() + max_pred = pred_df_sorted.iloc[: int(frequency)].median() return min_pred, max_pred @@ -303,7 +313,7 @@ def objective( y_pred = model.predict(X_test) min_label_period_candles = int(fit_live_predictions_candles / 10) - max_label_period_candles = int(fit_live_predictions_candles / 2) + max_label_period_candles = fit_live_predictions_candles label_period_candles = trial.suggest_int( "label_period_candles", min_label_period_candles, @@ -351,3 +361,11 @@ def hp_objective(trial, X, y, train_weights, X_test, y_test, test_weights, param def sanitize_path(path: str) -> str: allowed = re.compile(r"[^A-Za-z0-9 _\-\.\(\)]") return allowed.sub("_", path) + + +def smooth_max(series, beta=1.0): + return np.log(np.sum(np.exp(beta * series))) / beta + + +def smooth_min(series, beta=1.0): + return -np.log(np.sum(np.exp(-beta * series))) / beta diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py index b70099c..b2d1dbf 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py @@ -11,6 +11,7 @@ import optuna import sklearn import warnings import re +import numpy as np N_TRIALS = 36 TEST_SIZE = 0.1 @@ -236,19 +237,28 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): def min_max_pred( pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int ): - pred_df_sorted = pd.DataFrame() - for label in pred_df.keys(): - if pred_df[label].dtype == object: - continue - pred_df_sorted[label] = pred_df[label] - - for col in pred_df_sorted: - pred_df_sorted[col] = pred_df_sorted[col].sort_values( - ascending=False, ignore_index=True - ) - frequency = fit_live_predictions_candles / (label_period_candles * 2) - max_pred = pred_df_sorted.iloc[: int(frequency)].mean() - min_pred = pred_df_sorted.iloc[-int(frequency) :].mean() + min_pred = pred_df.tail(label_period_candles).apply( + lambda col: smooth_min(col, beta=10) + ) + max_pred = pred_df.tail(label_period_candles).apply( + lambda col: smooth_max(col, beta=10) + ) + + return min_pred, max_pred + + +def __min_max_pred( + pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int +): + pred_df_sorted = ( + pred_df.select_dtypes(exclude=["object"]) + .copy() + .apply(lambda col: col.sort_values(ascending=False, ignore_index=True)) + ) + + frequency = fit_live_predictions_candles / label_period_candles + min_pred = pred_df_sorted.iloc[-int(frequency) :].median() + max_pred = pred_df_sorted.iloc[: int(frequency)].median() return min_pred, max_pred @@ -308,7 +318,7 @@ def objective( y_pred = model.predict(X_test) min_label_period_candles = int(fit_live_predictions_candles / 10) - max_label_period_candles = int(fit_live_predictions_candles / 2) + max_label_period_candles = fit_live_predictions_candles label_period_candles = trial.suggest_int( "label_period_candles", min_label_period_candles, max_label_period_candles ) @@ -359,3 +369,11 @@ def hp_objective(trial, X, y, train_weights, X_test, y_test, test_weights, param def sanitize_path(path: str) -> str: allowed = re.compile(r"[^A-Za-z0-9 _\-\.\(\)]") return allowed.sub("_", path) + + +def smooth_max(series, beta=1.0): + return np.log(np.sum(np.exp(beta * series))) / beta + + +def smooth_min(series, beta=1.0): + return -np.log(np.sum(np.exp(-beta * series))) / beta diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index 280fe9b..fee2d05 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -255,7 +255,7 @@ class QuickAdapterV3(IStrategy): dataframe["&s-extrema"] = ( dataframe["&s-extrema"] .rolling(window=6, win_type="gaussian", center=True) - .mean(std=0.5) + .median(std=0.5) ) return dataframe -- 2.43.0