]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
perf(qav3): experiment with log-sum-exp prediction smoothing
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Mon, 10 Feb 2025 19:26:58 +0000 (20:26 +0100)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Mon, 10 Feb 2025 19:26:58 +0000 (20:26 +0100)
Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py
quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py
quickadapter/user_data/strategies/QuickAdapterV3.py

index f21788efab8e675e75167cd5d66876aab40808ff..5726f8e9f4c3fbec45e897c7263bd0e778e017ad 100644 (file)
@@ -11,6 +11,7 @@ import optuna
 import sklearn
 import warnings
 import re
+import numpy as np
 
 N_TRIALS = 36
 TEST_SIZE = 0.1
@@ -236,19 +237,28 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel):
 def min_max_pred(
     pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int
 ):
-    pred_df_sorted = pd.DataFrame()
-    for label in pred_df.keys():
-        if pred_df[label].dtype == object:
-            continue
-        pred_df_sorted[label] = pred_df[label]
-
-    for col in pred_df_sorted:
-        pred_df_sorted[col] = pred_df_sorted[col].sort_values(
-            ascending=False, ignore_index=True
-        )
-    frequency = fit_live_predictions_candles / (label_period_candles * 2)
-    max_pred = pred_df_sorted.iloc[: int(frequency)].mean()
-    min_pred = pred_df_sorted.iloc[-int(frequency) :].mean()
+    min_pred = pred_df.tail(label_period_candles).apply(
+        lambda col: smooth_min(col, beta=10)
+    )
+    max_pred = pred_df.tail(label_period_candles).apply(
+        lambda col: smooth_max(col, beta=10)
+    )
+
+    return min_pred, max_pred
+
+
+def __min_max_pred(
+    pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int
+):
+    pred_df_sorted = (
+        pred_df.select_dtypes(exclude=["object"])
+        .copy()
+        .apply(lambda col: col.sort_values(ascending=False, ignore_index=True))
+    )
+
+    frequency = fit_live_predictions_candles / label_period_candles
+    min_pred = pred_df_sorted.iloc[-int(frequency) :].median()
+    max_pred = pred_df_sorted.iloc[: int(frequency)].median()
     return min_pred, max_pred
 
 
@@ -303,7 +313,7 @@ def objective(
     y_pred = model.predict(X_test)
 
     min_label_period_candles = int(fit_live_predictions_candles / 10)
-    max_label_period_candles = int(fit_live_predictions_candles / 2)
+    max_label_period_candles = fit_live_predictions_candles
     label_period_candles = trial.suggest_int(
         "label_period_candles",
         min_label_period_candles,
@@ -351,3 +361,11 @@ def hp_objective(trial, X, y, train_weights, X_test, y_test, test_weights, param
 def sanitize_path(path: str) -> str:
     allowed = re.compile(r"[^A-Za-z0-9 _\-\.\(\)]")
     return allowed.sub("_", path)
+
+
+def smooth_max(series, beta=1.0):
+    return np.log(np.sum(np.exp(beta * series))) / beta
+
+
+def smooth_min(series, beta=1.0):
+    return -np.log(np.sum(np.exp(-beta * series))) / beta
index b70099c7218407bac01ca3d85985782fec964b43..b2d1dbfe7931868bdcb1cf184897542781263f5b 100644 (file)
@@ -11,6 +11,7 @@ import optuna
 import sklearn
 import warnings
 import re
+import numpy as np
 
 N_TRIALS = 36
 TEST_SIZE = 0.1
@@ -236,19 +237,28 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel):
 def min_max_pred(
     pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int
 ):
-    pred_df_sorted = pd.DataFrame()
-    for label in pred_df.keys():
-        if pred_df[label].dtype == object:
-            continue
-        pred_df_sorted[label] = pred_df[label]
-
-    for col in pred_df_sorted:
-        pred_df_sorted[col] = pred_df_sorted[col].sort_values(
-            ascending=False, ignore_index=True
-        )
-    frequency = fit_live_predictions_candles / (label_period_candles * 2)
-    max_pred = pred_df_sorted.iloc[: int(frequency)].mean()
-    min_pred = pred_df_sorted.iloc[-int(frequency) :].mean()
+    min_pred = pred_df.tail(label_period_candles).apply(
+        lambda col: smooth_min(col, beta=10)
+    )
+    max_pred = pred_df.tail(label_period_candles).apply(
+        lambda col: smooth_max(col, beta=10)
+    )
+
+    return min_pred, max_pred
+
+
+def __min_max_pred(
+    pred_df: pd.DataFrame, fit_live_predictions_candles: int, label_period_candles: int
+):
+    pred_df_sorted = (
+        pred_df.select_dtypes(exclude=["object"])
+        .copy()
+        .apply(lambda col: col.sort_values(ascending=False, ignore_index=True))
+    )
+
+    frequency = fit_live_predictions_candles / label_period_candles
+    min_pred = pred_df_sorted.iloc[-int(frequency) :].median()
+    max_pred = pred_df_sorted.iloc[: int(frequency)].median()
     return min_pred, max_pred
 
 
@@ -308,7 +318,7 @@ def objective(
     y_pred = model.predict(X_test)
 
     min_label_period_candles = int(fit_live_predictions_candles / 10)
-    max_label_period_candles = int(fit_live_predictions_candles / 2)
+    max_label_period_candles = fit_live_predictions_candles
     label_period_candles = trial.suggest_int(
         "label_period_candles", min_label_period_candles, max_label_period_candles
     )
@@ -359,3 +369,11 @@ def hp_objective(trial, X, y, train_weights, X_test, y_test, test_weights, param
 def sanitize_path(path: str) -> str:
     allowed = re.compile(r"[^A-Za-z0-9 _\-\.\(\)]")
     return allowed.sub("_", path)
+
+
+def smooth_max(series, beta=1.0):
+    return np.log(np.sum(np.exp(beta * series))) / beta
+
+
+def smooth_min(series, beta=1.0):
+    return -np.log(np.sum(np.exp(-beta * series))) / beta
index 280fe9b0a0fb2d6e339ac5daf18dfaf0149010bd..fee2d05216b3f3af0de5f3cf7ec0dcc9412f662d 100644 (file)
@@ -255,7 +255,7 @@ class QuickAdapterV3(IStrategy):
         dataframe["&s-extrema"] = (
             dataframe["&s-extrema"]
             .rolling(window=6, win_type="gaussian", center=True)
-            .mean(std=0.5)
+            .median(std=0.5)
         )
         return dataframe