fix(qav3): make hyperopt compatible with ZigZag extreme labeling

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Mon, 7 Apr 2025 09:07:29 +0000 (11:07 +0200)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Mon, 7 Apr 2025 09:07:29 +0000 (11:07 +0200)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Mon, 7 Apr 2025 09:07:29 +0000 (11:07 +0200)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Mon, 7 Apr 2025 09:07:29 +0000 (11:07 +0200)
diff --git a/quickadapter/docs/labeling_window.txt b/quickadapter/docs/labeling_window.txt

deleted file mode 100644 (file)

index cf4ef98..0000000
--- a/quickadapter/docs/labeling_window.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-- = candle = { open, close, high, low, volume, natr_label_period_candles, ... } dataframe
-
-Labeling window:
-----------------
-
-[   live predictions window   ] size fixed in configuration
-[   label window   ] size can change because of HPO
-
-                                                                                                                                                  now
-                                                                                                                                                   |
------------------------------------------------------------------------------------------
-             ][   label window   ][   label window   ][   label window   ][   label window   ]
-                                  [               live predictions window               ]
-
-Price movement expectation over the labeling window:
-----------------------------------------------------
-
-current natr = (atr / current close) * 100
-trade duration candles = (current candle date - trade candle date) // timeframe (in the same time unit)
-
-                           trade candle
-                                |
-                                     current candle
-                                |
----------------------------------
-[   current natr label window   ]
-
-trade duration candles = 0
-expected trade price movement in the future label window candles from trade opening = trade natr * 0.01 * trade rate
-expected price movement in the future label window candles = current natr * 0.01 * current rate
-
-   trade candle                 current candle
-             |                             |
----------------------------------
-[   current natr label window   ]
-        ]
-
-trade duration candles > 0
-expected trade price movement in the future label window candles from trade opening = trade natr * 0.01 * trade rate
-expected trade price movement in the future trade duration candles window from trade opening = trade natr * 0.01 * trade rate * trade duration candles / label window
-expected price movement in the future label window candles = current natr * 0.01 * current rate
-expected price movement in the future trade duration candles window = current natr * 0.01 * current rate * trade duration candles / label window
-
-  trade 1 candle                                 trade 2 candle            current candle
-             |                                         |                          |
-------------------------------------------------------------------
-[   current natr label window   ][   current natr label window   ]
-      [   trade 2 natr label window   ]
-        ]
-
-expected trade price movement in the future trade duration candles window from trade opening approximation = trade natr * 0.01 * trade rate * trade duration candles / label window
-expected price movement in the future trade duration candles window approximation = current natr * 0.01 * current rate * trade duration candles / label window
diff --git a/quickadapter/user_data/config-template.json b/quickadapter/user_data/config-template.json

index fecf86efd47f3a62ef185317a1bfb6a264162b8e..3967e3343ab062a79afa8a0cd91f4cd03a8d1253 100644 (file)
--- a/quickadapter/user_data/config-template.json
+++ b/quickadapter/user_data/config-template.json
@@ -118,7 +118,6 @@
      "fit_live_predictions_candles": 600,
      "data_kitchen_thread_count": 6, // set to number of CPU threads / 4
      "track_performance": false,
-    "prediction_thresholds_smoothing": "mean",
      "outlier_threshold": 0.999,
      "optuna_hyperopt": {
        "enabled": true,
diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py

index d39eabca8db2ac5f8c94f5c7e857ed41915a69b4..6d181341333b5b43c90114132594c09a7710d204 100644 (file)
--- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
+++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
@@ -8,7 +8,6 @@ import optuna
  import sklearn
  import warnings
  
-from statistics import geometric_mean
  from functools import cached_property
  from typing import Any, Callable, Optional
  from pathlib import Path
@@ -44,7 +43,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
      https://github.com/sponsors/robcaulk
      """
  
-    version = "3.6.7"
+    version = "3.6.8"
  
      @cached_property
      def __optuna_config(self) -> dict:
@@ -160,6 +159,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
                      y_test,
                      test_weights,
                      self.freqai_info.get("fit_live_predictions_candles", 100),
+                    self.ft_params.get("label_period_candles", 50),
                      self.__optuna_config.get("candles_step"),
                      model_training_parameters,
                  ),
@@ -214,12 +214,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
          if self.live:
              if not hasattr(self, "exchange_candles"):
                  self.exchange_candles = len(self.dd.model_return_values[pair].index)
-            candle_diff = len(self.dd.historic_predictions[pair].index) - (
+            candles_diff = len(self.dd.historic_predictions[pair].index) - (
                  num_candles + self.exchange_candles
              )
-            if candle_diff < 0:
+            if candles_diff < 0:
                  logger.warning(
-                    f"{pair}: fit live predictions not warmed up yet. Still {abs(candle_diff)} candles to go."
+                    f"{pair}: fit live predictions not warmed up yet. Still {abs(candles_diff)} candles to go."
                  )
                  warmed_up = False
  
@@ -233,12 +233,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
              dk.data["extra_returns_per_train"][MINIMA_THRESHOLD_COLUMN] = -2
              dk.data["extra_returns_per_train"][MAXIMA_THRESHOLD_COLUMN] = 2
          else:
-            label_period_candles = self.get_label_period_candles(pair)
-            min_pred, max_pred = self.min_max_pred(
-                pred_df_full,
-                num_candles,
-                label_period_candles,
-            )
+            min_pred, max_pred = self.min_max_pred(pred_df_full)
              dk.data["extra_returns_per_train"][MINIMA_THRESHOLD_COLUMN] = min_pred
              dk.data["extra_returns_per_train"][MAXIMA_THRESHOLD_COLUMN] = max_pred
  
@@ -293,25 +288,17 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
  
          return eval_set, eval_weights
  
-    def min_max_pred(
-        self,
-        pred_df: pd.DataFrame,
-        fit_live_predictions_candles: int,
-        label_period_candles: int,
-    ) -> tuple[pd.Series, pd.Series]:
-        prediction_thresholds_smoothing = self.freqai_info.get(
-            "prediction_thresholds_smoothing", "quantile"
+    def min_max_pred(self, pred_df: pd.DataFrame) -> tuple[float, float]:
+        temperature = self.freqai_info.get("predictions_temperature", 140.0)
+        min_pred = smoothed_min(
+            pred_df[EXTREMA_COLUMN],
+            temperature=temperature,
          )
-        smoothing_methods: dict[
-            str, Callable[[pd.DataFrame, int, int], tuple[pd.Series, pd.Series]]
-        ] = {
-            "quantile": self.quantile_min_max_pred,
-            "mean": QuickAdapterRegressorV3.mean_min_max_pred,
-            "median": QuickAdapterRegressorV3.median_min_max_pred,
-        }
-        return smoothing_methods.get(
-            prediction_thresholds_smoothing, smoothing_methods["quantile"]
-        )(pred_df, fit_live_predictions_candles, label_period_candles)
+        max_pred = smoothed_max(
+            pred_df[EXTREMA_COLUMN],
+            temperature=temperature,
+        )
+        return min_pred, max_pred
  
      def optuna_optimize(
          self,
@@ -485,70 +472,6 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
          except ValueError:
              return False
  
-    @staticmethod
-    def get_pred_df_sorted_and_label_period_frequency(
-        pred_df: pd.DataFrame,
-        fit_live_predictions_candles: int,
-        label_period_candles: int,
-    ) -> tuple[pd.DataFrame, int]:
-        pred_df_sorted = (
-            pred_df[[EXTREMA_COLUMN]]
-            .copy()
-            .sort_values(by=EXTREMA_COLUMN, ascending=False)
-            .reset_index(drop=True)
-        )
-        label_period_frequency: int = max(
-            1, fit_live_predictions_candles // label_period_candles
-        )
-
-        return pred_df_sorted, label_period_frequency
-
-    @staticmethod
-    def mean_min_max_pred(
-        pred_df: pd.DataFrame,
-        fit_live_predictions_candles: int,
-        label_period_candles: int,
-    ) -> tuple[pd.Series, pd.Series]:
-        pred_df_sorted, label_period_frequency = (
-            QuickAdapterRegressorV3.get_pred_df_sorted_and_label_period_frequency(
-                pred_df, fit_live_predictions_candles, label_period_candles
-            )
-        )
-        min_pred = pred_df_sorted.iloc[-label_period_frequency:].mean()
-        max_pred = pred_df_sorted.iloc[:label_period_frequency].mean()
-        return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN]
-
-    @staticmethod
-    def median_min_max_pred(
-        pred_df: pd.DataFrame,
-        fit_live_predictions_candles: int,
-        label_period_candles: int,
-    ) -> tuple[pd.Series, pd.Series]:
-        pred_df_sorted, label_period_frequency = (
-            QuickAdapterRegressorV3.get_pred_df_sorted_and_label_period_frequency(
-                pred_df, fit_live_predictions_candles, label_period_candles
-            )
-        )
-        min_pred = pred_df_sorted.iloc[-label_period_frequency:].median()
-        max_pred = pred_df_sorted.iloc[:label_period_frequency].median()
-        return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN]
-
-    def quantile_min_max_pred(
-        self,
-        pred_df: pd.DataFrame,
-        fit_live_predictions_candles: int,
-        label_period_candles: int,
-    ) -> tuple[pd.Series, pd.Series]:
-        pred_df_sorted, label_period_frequency = (
-            QuickAdapterRegressorV3.get_pred_df_sorted_and_label_period_frequency(
-                pred_df, fit_live_predictions_candles, label_period_candles
-            )
-        )
-        q = self.freqai_info.get("quantile", 0.75)
-        min_pred = pred_df_sorted.iloc[-label_period_frequency:].quantile(1 - q)
-        max_pred = pred_df_sorted.iloc[:label_period_frequency].quantile(q)
-        return min_pred[EXTREMA_COLUMN], max_pred[EXTREMA_COLUMN]
-
  
  def get_callbacks(trial: optuna.Trial, regressor: str) -> list[Callable]:
      if regressor == "xgboost":
@@ -632,11 +555,14 @@ def period_objective(
      y_test: pd.DataFrame,
      test_weights: np.ndarray,
      fit_live_predictions_candles: int,
+    label_period_candles: int,
      candles_step: int,
      model_training_parameters: dict,
  ) -> float:
      min_train_window: int = fit_live_predictions_candles * 2
-    max_train_window: int = max(len(X), min_train_window)
+    max_train_window: int = len(X)
+    if max_train_window < min_train_window:
+        min_train_window = max_train_window
      train_window: int = trial.suggest_int(
          "train_period_candles", min_train_window, max_train_window, step=candles_step
      )
@@ -645,7 +571,9 @@ def period_objective(
      train_weights = train_weights[-train_window:]
  
      min_test_window: int = fit_live_predictions_candles
-    max_test_window: int = max(len(X_test), min_test_window)
+    max_test_window: int = len(X_test)
+    if max_test_window < min_test_window:
+        min_test_window = max_test_window
      test_window: int = trial.suggest_int(
          "test_period_candles", min_test_window, max_test_window, step=candles_step
      )
@@ -665,46 +593,43 @@ def period_objective(
      )
      y_pred = model.predict(X_test)
  
-    min_label_period_candles: int = round_to_nearest(
-        max(fit_live_predictions_candles // 12, 20), candles_step
-    )
-    max_label_period_candles: int = round_to_nearest(
-        min(
-            max(fit_live_predictions_candles // 3, min_label_period_candles),
-            max(test_window // 2, min_label_period_candles),
-        ),
-        candles_step,
-    )
+    # TODO: implement a label_period_candles optimization compatible with ZigZag
      label_period_candles: int = trial.suggest_int(
          "label_period_candles",
-        min_label_period_candles,
-        max_label_period_candles,
+        label_period_candles,
+        label_period_candles,
          step=candles_step,
      )
-    label_periods_candles: int = (
-        test_window // label_period_candles
-    ) * label_period_candles
-    if label_periods_candles == 0 or label_period_candles > test_window:
-        return float("inf")
-    y_test_periods = [
-        y_test.iloc[-label_periods_candles:].to_numpy()[i : i + label_period_candles]
-        for i in range(0, label_periods_candles, label_period_candles)
-    ]
-    test_weights_periods = [
-        test_weights[-label_periods_candles:][i : i + label_period_candles]
-        for i in range(0, label_periods_candles, label_period_candles)
-    ]
-    y_pred_periods = [
-        y_pred[-label_periods_candles:][i : i + label_period_candles]
-        for i in range(0, label_periods_candles, label_period_candles)
-    ]
-
-    errors = [
-        sklearn.metrics.root_mean_squared_error(y_t, y_p, sample_weight=t_w)
-        for y_t, y_p, t_w in zip(y_test_periods, y_pred_periods, test_weights_periods)
-    ]
-
-    return geometric_mean(errors)
+
+    # min_label_period_candles: int = round_to_nearest(
+    #     max(fit_live_predictions_candles // 12, 20), candles_step
+    # )
+    # max_label_period_candles: int = round_to_nearest(
+    #     max(fit_live_predictions_candles // 4, min_label_period_candles),
+    #     candles_step,
+    # )
+    # label_period_candles: int = trial.suggest_int(
+    #     "label_period_candles",
+    #     min_label_period_candles,
+    #     max_label_period_candles,
+    #     step=candles_step,
+    # )
+    # if label_period_candles > test_window:
+    #     return float("inf")
+    # label_periods_candles: int = (
+    #     test_window // label_period_candles
+    # ) * label_period_candles
+    # if label_periods_candles == 0:
+    #     return float("inf")
+    # y_test = y_test.iloc[-label_periods_candles:]
+    # test_weights = test_weights[-label_periods_candles:]
+    # y_pred = y_pred[-label_periods_candles:]
+
+    error = sklearn.metrics.root_mean_squared_error(
+        y_test, y_pred, sample_weight=test_weights
+    )
+
+    return error
  
  
  def get_optuna_study_model_parameters(trial: optuna.Trial, regressor: str) -> dict:
@@ -767,3 +692,11 @@ def hp_objective(
      )
  
      return error
+
+
+def smoothed_max(series: pd.Series, temperature=1.0) -> float:
+    return sp.special.logsumexp(temperature * series.to_numpy()) / temperature
+
+
+def smoothed_min(series: pd.Series, temperature=1.0) -> float:
+    return -sp.special.logsumexp(-temperature * series.to_numpy()) / temperature
diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py

index d98155dcb55d1519834177f38722ceab61d19a8b..7647c707d21aeca7376dc5e8633f4efa170049fc 100644 (file)
--- a/quickadapter/user_data/strategies/QuickAdapterV3.py
+++ b/quickadapter/user_data/strategies/QuickAdapterV3.py
@@ -354,7 +354,7 @@ class QuickAdapterV3(IStrategy):
      def set_freqai_targets(self, dataframe, metadata, **kwargs):
          label_period_candles = self.get_label_period_candles(str(metadata.get("pair")))
          peak_indices, _, peak_directions = dynamic_zigzag(
-            dataframe, timeperiod=label_period_candles, ratio=self.label_natr_ratio
+            dataframe, period=label_period_candles, ratio=self.label_natr_ratio
          )
          dataframe[EXTREMA_COLUMN] = 0
          for peak_idx, peak_dir in zip(peak_indices, peak_directions):
diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py

index a4959eed843a04e1a4b65eeecc5f987c8462d2f6..860c93735a70c9d1eb1960b2a69e42e42de128a4 100644 (file)
--- a/quickadapter/user_data/strategies/Utils.py
+++ b/quickadapter/user_data/strategies/Utils.py
@@ -383,7 +383,7 @@ def zigzag(
  
  def dynamic_zigzag(
      df: pd.DataFrame,
-    timeperiod: int = 14,
+    period: int = 14,
      natr: bool = True,
      ratio: float = 1.0,
  ) -> tuple[list[int], list[float], list[int]]:
@@ -392,7 +392,7 @@ def dynamic_zigzag(
  
      Parameters:
      df (pd.DataFrame): OHLCV DataFrame.
-    timeperiod (int): Period for ATR/NATR calculation (default: 14).
+    period (int): Period for ATR/NATR calculation (default: 14).
      natr (bool): Use NATR (True) or ATR (False) (default: True).
      ratio (float): ratio for dynamic threshold (default: 1.0).
  
@@ -403,9 +403,9 @@ def dynamic_zigzag(
          return [], [], []
  
      if natr:
-        thresholds = ta.NATR(df, timeperiod=timeperiod)
+        thresholds = ta.NATR(df, timeperiod=period)
      else:
-        thresholds = ta.ATR(df, timeperiod=timeperiod)
+        thresholds = ta.ATR(df, timeperiod=period)
      thresholds = thresholds.ffill().bfill()
  
      indices = []
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Mon, 7 Apr 2025 09:07:29 +0000 (11:07 +0200)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Mon, 7 Apr 2025 09:07:29 +0000 (11:07 +0200)
quickadapter/docs/labeling_window.txt	[deleted file]	patch \| blob \| blame \| history
quickadapter/user_data/config-template.json		patch \| blob \| blame \| history
quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py		patch \| blob \| blame \| history
quickadapter/user_data/strategies/QuickAdapterV3.py		patch \| blob \| blame \| history
quickadapter/user_data/strategies/Utils.py		patch \| blob \| blame \| history