]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
fix(quickadapter): use slice-invariant lookahead for causal guard (#95)
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Mon, 22 Jun 2026 01:02:56 +0000 (03:02 +0200)
committerGitHub <noreply@github.com>
Mon, 22 Jun 2026 01:02:56 +0000 (03:02 +0200)
* fix(quickadapter): use slice-invariant lookahead for causal guard

PR #78 stored '<label>_known_at_index' as 'arange(len) + horizon +
kernel_half_width' -- absolute positions in the dataframe passed to
'set_freqai_targets'. freqtrade's 'dk.slice_dataframe' (a '.loc' filter)
runs AFTER 'set_freqai_targets' and drops warmup rows but preserves
column values, so those pre-slice positions survived into the post-slice
'unfiltered_df'. The causal guard then compared them against
'first_test_position' derived from 'np.arange(len(unfiltered_df))' --
local post-slice positions in a different coordinate system. The unit
mismatch wiped out most or all training rows on every pair.

Production crash on 2026-06-22 (XRP/USD): "removed 2621
causal-unsafe train rows" followed by "causal guard removed all
train rows, skipping".

Fix: the column now stores a per-row label lookahead (in candles),
invariant under 'dk.slice_dataframe'. Consumers combine the row's
local position with the lookahead to recover the local known-at
position before comparing to 'first_test_position'. Column name
'<label>_known_at_index' is retained for this hotfix; a rename to
'<label>_known_at_lookahead' (with rétro-compatible alias) is left
to a follow-up PR per AGENTS.md 'small, verifiable changes'.

Touches:
- Utils.py: producer rewritten to store a constant per-row lookahead;
  'LabelData' and 'label_known_at_column_name' docstrings document the
  new contract; '_LABEL_KNOWN_AT_SUFFIX' carries an inline disambiguation.
- QuickAdapterV3.py: smoothing-lookahead advance comment harmonized to
  the canonical 'per-row label lookahead (in candles)' phrasing.
- QuickAdapterRegressorV3.py: '_known_at_index' docstring rewritten;
  'train_test_split' and 'timeseries_split' causal-mode branches add
  'train_positions + delta' before the '< first_test_position' check;
  'timeseries_split' hoists 'train_positions' for symmetry with
  'train_test_split'.
- README.md: 'causal_mode' tunable description reflects the new
  comparison semantic.

Reviewed by three parallel Oracle passes (math/algo/scope,
Python state-of-the-art / harmonization, documentation /
terminology / concision) with cross-validation; one false alarm
on a missing position-only fallback in 'timeseries_split' was
resolved by confirming 'TimeSeriesSplit.gap' enforces the
chronological purge at the sklearn layer.

* docs(quickadapter): shrink _known_at_index docstring to LabelData pointer

Per multi-oracle PR #95 review (Oracle 3 §8.1): paragraph 1 of
_known_at_index duplicated the slice-invariance rationale already
canonical on LabelData.known_at_index. Replace with a thin pointer per
AGENTS.md *No duplication: maintain single authoritative documentation
source; reference other sources rather than copying.*

README.md
quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
quickadapter/user_data/strategies/QuickAdapterV3.py
quickadapter/user_data/strategies/Utils.py

index 70f168590dac2a2013caf1a8a99318651cc5a29d..2019e55d7d7e2cf9f42208ed9eb3a7e37389fb43 100644 (file)
--- a/README.md
+++ b/README.md
@@ -102,7 +102,7 @@ docker compose up -d --build
 | _Feature parameters_                                           |                               |                                                                                                                                                        |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | freqai.feature_parameters.label_period_candles                 | min/max midpoint              | int >= 1                                                                                                                                               | Zigzag labeling NATR horizon.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | freqai.feature_parameters.label_horizon_candles                | `label_period_candles`        | int >= 1                                                                                                                                               | Number of candles after a label row before the label is considered known by causal split guards. Recommended: cover the zigzag pivot confirmation lag (the smoothing kernel half-width is added automatically by `set_freqai_targets`). Used by causal split guards and `<label>_known_at_index` metadata. When unset, falls back to `label_period_candles`.                                                                                                                                                                                                                                              |
-| freqai.feature_parameters.causal_mode                          | true                          | bool                                                                                                                                                   | Causal split guard toggle. When `true` (default): rejects `data_split_parameters.shuffle=true`, `shuffle_after_split=true`, `reverse_train_test_order=true`; for `timeseries_split` auto-sets `gap=label_horizon_candles` when unset/`0` (rejects explicit `gap<label_horizon_candles`); for `train_test_split` drops train rows where position `>=first_test_position-label_horizon_candles`; with `<label>_known_at_index` columns, additionally drops rows where row-wise max `>=first_test_position`. `false` is deprecated; acausal baselines only.                                                                                                                                                                                                                                       |
+| freqai.feature_parameters.causal_mode                          | true                          | bool                                                                                                                                                   | Causal split guard toggle. When `true` (default): rejects `data_split_parameters.shuffle=true`, `shuffle_after_split=true`, `reverse_train_test_order=true`; for `timeseries_split` auto-sets `gap=label_horizon_candles` when unset/`0` (rejects explicit `gap<label_horizon_candles`); for `train_test_split` drops train rows where position `>=first_test_position-label_horizon_candles`; with `<label>_known_at_index` columns (per-row label lookahead in candles), additionally drops rows where `local_position + row-wise max(lookahead) >= first_test_position`. `false` is deprecated; acausal baselines only.                                                                                                                                                                                                                                       |
 | freqai.feature_parameters.min_label_period_candles             | 12                            | int >= 1                                                                                                                                               | Minimum labeling NATR horizon used for reversals labeling HPO.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | freqai.feature_parameters.max_label_period_candles             | 24                            | int >= 1                                                                                                                                               | Maximum labeling NATR horizon used for reversals labeling HPO.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | freqai.feature_parameters.label_natr_multiplier                | min/max midpoint              | float > 0                                                                                                                                              | Zigzag labeling NATR multiplier.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
index 5cd23e494777361fa92500a03b317e721d215763..e0606fb84794b7bd15a6f7eed583cacf54bd1cb7 100644 (file)
@@ -515,14 +515,17 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
         filtered_dataframe: pd.DataFrame,
         unfiltered_df: pd.DataFrame,
     ) -> pd.Series | None:
-        """Per-row leak boundary across all registered labels.
-
-        Returns the row-wise ``max`` of every present
-        ``<label>_known_at_index`` column. A label whose column is missing
-        or contains any NaN is skipped (silently — labels can opt in by
-        emitting the column). Returns ``None`` only when no label exposes
-        a usable column, in which case the caller falls back to the
-        position-based purge.
+        """Per-row label lookahead (in candles) across all registered labels.
+
+        See ``LabelData.known_at_index`` for the lookahead-vs-position
+        contract and the slice-invariance rationale; callers must add the
+        row's LOCAL position in ``unfiltered_df`` to recover the local
+        index at which the label becomes causally available.
+
+        Row-wise ``max`` of every present ``<label>_known_at_index``
+        column; labels with a missing column or any NaN are skipped
+        silently (opt-in by emission). Returns ``None`` when no label is
+        usable; callers then fall back to the position-based purge.
         """
         QuickAdapterRegressorV3._validate_index_alignment(
             filtered_dataframe, unfiltered_df
@@ -1971,10 +1974,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
                     features, unfiltered_df
                 )
                 if known_at_index is not None:
-                    known_at_train = known_at_index.loc[train_features.index]
-                    keep_mask &= (
-                        known_at_train.to_numpy(dtype=np.int64) < first_test_position
+                    known_at_train_delta = known_at_index.loc[train_features.index]
+                    known_at_train_position = (
+                        train_positions.to_numpy(dtype=np.int64)
+                        + known_at_train_delta.to_numpy(dtype=np.int64)
                     )
+                    keep_mask &= known_at_train_position < first_test_position
                 else:
                     _log_known_at_none_once(dk.pair, "train_test_split causal guard")
                 (
@@ -2391,14 +2396,17 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
                 filtered_dataframe, unfiltered_df
             )
             first_test_position = int(row_positions.iloc[test_idx].min())
+            train_positions = row_positions.iloc[train_idx]
             known_at_index = QuickAdapterRegressorV3._known_at_index(
                 filtered_dataframe, unfiltered_df
             )
             if known_at_index is not None:
-                known_at_train = known_at_index.iloc[train_idx]
-                keep_mask = (
-                    known_at_train.to_numpy(dtype=np.int64) < first_test_position
+                known_at_train_delta = known_at_index.iloc[train_idx]
+                known_at_train_position = (
+                    train_positions.to_numpy(dtype=np.int64)
+                    + known_at_train_delta.to_numpy(dtype=np.int64)
                 )
+                keep_mask = known_at_train_position < first_test_position
                 (
                     train_features,
                     train_labels,
index b010e71ab169b7351cf796064057a330d33dea5e..d2ed0aac2f697490f792745077c1b22bcba1c850 100644 (file)
@@ -996,8 +996,8 @@ class QuickAdapterV3(IStrategy):
                 )
 
             # Zero-phase smoothing reads future candles within the kernel
-            # half-width; advance the known-at index so causal split guards
-            # account for the smoothing lookahead.
+            # half-width; extend the per-row label lookahead so causal
+            # split guards account for the smoothing lookahead.
             known_at_column = label_known_at_column_name(label_col)
             if known_at_column in dataframe.columns:
                 kernel_half_width = get_smoothing_kernel_half_width(
index c8705157d5cfc7a07f279ab6b381d7c96f6bc06a..6ff7413536d62087266832f87623516cf7b13bd9 100644 (file)
@@ -531,6 +531,8 @@ EXTREMA_DIRECTION_COLUMN: Final[str] = "extrema_direction"
 EXTREMA_DIRECTION_SMOOTHED_COLUMN: Final[str] = "extrema_direction_smoothed"
 EXTREMA_WEIGHT_COLUMN: Final[str] = "extrema_weight"
 EXTREMA_WEIGHT_SMOOTHED_COLUMN: Final[str] = "extrema_weight_smoothed"
+# Suffix is historical; stored values are per-row label lookaheads
+# (in candles), not absolute indexes. See ``LabelData.known_at_index``.
 _LABEL_KNOWN_AT_SUFFIX: Final[str] = "_known_at_index"
 
 LABEL_WEIGHT_SUFFIX: Final[str] = "_weight"
@@ -579,12 +581,30 @@ def label_weight_column_name(label_col: str) -> str:
 
 
 def label_known_at_column_name(label_col: str) -> str:
-    """Return the known-at-index column name for a label column."""
+    """Return the per-row label-lookahead column name for a label column.
+
+    Column values are lookaheads in candles, not absolute positions; see
+    ``LabelData.known_at_index``.
+    """
     return _label_aux_column_name(label_col, _LABEL_KNOWN_AT_SUFFIX)
 
 
 @dataclass
 class LabelData:
+    """Output of a label generator.
+
+    Attributes:
+        series: per-row label values aligned to ``dataframe.index``.
+        indices: positions of detected pivots in ``series``.
+        metrics: per-pivot metric lists (parallel to ``indices``).
+        known_at_index: optional per-row label lookahead in candles
+            (NOT an absolute position). Invariant under
+            ``dk.slice_dataframe``. Causal split guards recover the
+            local availability position as ``row_local_position +
+            known_at_index[row]``. ``None`` opts the label out of
+            label-aware causal filtering.
+    """
+
     series: pd.Series
     indices: list[int]
     metrics: dict[str, list[float]]
@@ -722,9 +742,14 @@ def _generate_extrema_label(
         "volume_weighted_efficiency_ratio": pivots_volume_weighted_efficiency_ratios,
     }
 
+    # Per-row label lookahead (in candles), NOT an absolute position:
+    # freqtrade's ``dk.slice_dataframe`` runs AFTER ``set_freqai_targets``,
+    # so any pre-slice absolute position would no longer match the causal
+    # guard's local ``np.arange(len(unfiltered_df))`` coordinate system.
     known_at_index = pd.Series(
-        np.arange(len(dataframe), dtype=np.int64) + label_horizon_candles,
+        int(label_horizon_candles),
         index=dataframe.index,
+        dtype=np.int64,
     )
 
     return LabelData(