From 900a646d1220189e1df8ebec31a07a0c65e792e3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sun, 24 May 2026 00:23:35 +0200 Subject: [PATCH] fix(zigzag): default normalize to False to prevent label magnitude leak (#71) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When set_freqai_targets is invoked by FreqAI's backtesting loop, the dataframe passed to _generate_extrema_label spans the full historical window (right-truncated to the current train-window stop), not just train_period_days. With normalize=True, zigzag applies a global minmax scaling across all detected pivots in that wider window to amplitudes, amplitude_threshold_ratios, volume_rates and speeds. The resulting label magnitudes therefore depend on the global pivot distribution, including pivots outside the current training slice — a magnitude leak from out-of-train data into training labels. Switching the zigzag default to normalize=False emits raw log-amplitude values (|log(P2/P1)|) and defers any scaling to LabelTransformer, which is fitted strictly on the train slice and is therefore leak-free. The two existing call sites — _generate_extrema_label (label generation) and label_objective (Optuna hyperopt) — both want the unnormalized output, so the redundant normalize=False kwargs are dropped at the call sites in favor of the default. Strategy and regressor patch versions are bumped to 3.11.8. Caveat: with apply_label_weighting strategy="combined", the metrics now sit on heterogeneous scales (raw log-amplitudes ~[0.005, 0.5] mix with bounded ratios in [0, 1] like efficiency_ratio). Users relying on "combined" aggregation (power means, weighted_median, softmax) may need to introduce metric-specific rescaling on the train slice before aggregation. Direction-only (strategy= "none") and single-metric strategies (e.g. strategy="amplitude") are unaffected. --- quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py | 3 +-- quickadapter/user_data/strategies/QuickAdapterV3.py | 2 +- quickadapter/user_data/strategies/Utils.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py index 7f7b19b..a9aebce 100644 --- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py +++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py @@ -98,7 +98,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): https://github.com/sponsors/robcaulk """ - version = "3.11.7" + version = "3.11.8" _TEST_SIZE: Final[float] = 0.1 @@ -3592,7 +3592,6 @@ def label_objective( df, natr_period=label_period_candles, natr_multiplier=label_natr_multiplier, - normalize=False, ) median_amplitude = np.nanmedian(np.asarray(pivots_amplitudes, dtype=float)) diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index d7a2358..f02eba2 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -109,7 +109,7 @@ class QuickAdapterV3(IStrategy): _PLOT_EXTREMA_MIN_EPS: Final[float] = 0.01 def version(self) -> str: - return "3.11.7" + return "3.11.8" timeframe = "5m" timeframe_minutes = timeframe_to_minutes(timeframe) diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py index c5f8e74..a1def00 100644 --- a/quickadapter/user_data/strategies/Utils.py +++ b/quickadapter/user_data/strategies/Utils.py @@ -1669,7 +1669,7 @@ def zigzag( df: pd.DataFrame, natr_period: int = 14, natr_multiplier: float = 9.0, - normalize: bool = True, + normalize: bool = False, ) -> tuple[ list[int], list[float], -- 2.53.0