| _Feature parameters_ | | | |
| freqai.feature_parameters.label_period_candles | min/max midpoint | int >= 1 | Zigzag labeling NATR horizon. |
| freqai.feature_parameters.label_horizon_candles | `label_period_candles` | int >= 1 | Number of candles after a label row before the label is considered known by causal split guards. Recommended: cover the zigzag pivot confirmation lag (the smoothing kernel half-width is added automatically by `set_freqai_targets`). Used by causal split guards and `<label>_known_at_index` metadata. When unset, falls back to `label_period_candles`. |
-| freqai.feature_parameters.causal_mode | true | bool | Causal split guard toggle. When `true` (default): rejects `data_split_parameters.shuffle=true`, `shuffle_after_split=true`, `reverse_train_test_order=true`; for `timeseries_split` auto-sets `gap=label_horizon_candles` when unset/`0` (rejects explicit `gap<label_horizon_candles`); for `train_test_split` drops train rows where position `>=first_test_position-label_horizon_candles`; with `<label>_known_at_index` columns, additionally drops rows where row-wise max `>=first_test_position`. `false` is deprecated; acausal baselines only. |
+| freqai.feature_parameters.causal_mode | true | bool | Causal split guard toggle. When `true` (default): rejects `data_split_parameters.shuffle=true`, `shuffle_after_split=true`, `reverse_train_test_order=true`; for `timeseries_split` auto-sets `gap=label_horizon_candles` when unset/`0` (rejects explicit `gap<label_horizon_candles`); for `train_test_split` drops train rows where position `>=first_test_position-label_horizon_candles`; with `<label>_known_at_index` columns (per-row label lookahead in candles), additionally drops rows where `local_position + row-wise max(lookahead) >= first_test_position`. `false` is deprecated; acausal baselines only. |
| freqai.feature_parameters.min_label_period_candles | 12 | int >= 1 | Minimum labeling NATR horizon used for reversals labeling HPO. |
| freqai.feature_parameters.max_label_period_candles | 24 | int >= 1 | Maximum labeling NATR horizon used for reversals labeling HPO. |
| freqai.feature_parameters.label_natr_multiplier | min/max midpoint | float > 0 | Zigzag labeling NATR multiplier. |
filtered_dataframe: pd.DataFrame,
unfiltered_df: pd.DataFrame,
) -> pd.Series | None:
- """Per-row leak boundary across all registered labels.
-
- Returns the row-wise ``max`` of every present
- ``<label>_known_at_index`` column. A label whose column is missing
- or contains any NaN is skipped (silently — labels can opt in by
- emitting the column). Returns ``None`` only when no label exposes
- a usable column, in which case the caller falls back to the
- position-based purge.
+ """Per-row label lookahead (in candles) across all registered labels.
+
+ See ``LabelData.known_at_index`` for the lookahead-vs-position
+ contract and the slice-invariance rationale; callers must add the
+ row's LOCAL position in ``unfiltered_df`` to recover the local
+ index at which the label becomes causally available.
+
+ Row-wise ``max`` of every present ``<label>_known_at_index``
+ column; labels with a missing column or any NaN are skipped
+ silently (opt-in by emission). Returns ``None`` when no label is
+ usable; callers then fall back to the position-based purge.
"""
QuickAdapterRegressorV3._validate_index_alignment(
filtered_dataframe, unfiltered_df
features, unfiltered_df
)
if known_at_index is not None:
- known_at_train = known_at_index.loc[train_features.index]
- keep_mask &= (
- known_at_train.to_numpy(dtype=np.int64) < first_test_position
+ known_at_train_delta = known_at_index.loc[train_features.index]
+ known_at_train_position = (
+ train_positions.to_numpy(dtype=np.int64)
+ + known_at_train_delta.to_numpy(dtype=np.int64)
)
+ keep_mask &= known_at_train_position < first_test_position
else:
_log_known_at_none_once(dk.pair, "train_test_split causal guard")
(
filtered_dataframe, unfiltered_df
)
first_test_position = int(row_positions.iloc[test_idx].min())
+ train_positions = row_positions.iloc[train_idx]
known_at_index = QuickAdapterRegressorV3._known_at_index(
filtered_dataframe, unfiltered_df
)
if known_at_index is not None:
- known_at_train = known_at_index.iloc[train_idx]
- keep_mask = (
- known_at_train.to_numpy(dtype=np.int64) < first_test_position
+ known_at_train_delta = known_at_index.iloc[train_idx]
+ known_at_train_position = (
+ train_positions.to_numpy(dtype=np.int64)
+ + known_at_train_delta.to_numpy(dtype=np.int64)
)
+ keep_mask = known_at_train_position < first_test_position
(
train_features,
train_labels,
)
# Zero-phase smoothing reads future candles within the kernel
- # half-width; advance the known-at index so causal split guards
- # account for the smoothing lookahead.
+ # half-width; extend the per-row label lookahead so causal
+ # split guards account for the smoothing lookahead.
known_at_column = label_known_at_column_name(label_col)
if known_at_column in dataframe.columns:
kernel_half_width = get_smoothing_kernel_half_width(
EXTREMA_DIRECTION_SMOOTHED_COLUMN: Final[str] = "extrema_direction_smoothed"
EXTREMA_WEIGHT_COLUMN: Final[str] = "extrema_weight"
EXTREMA_WEIGHT_SMOOTHED_COLUMN: Final[str] = "extrema_weight_smoothed"
+# Suffix is historical; stored values are per-row label lookaheads
+# (in candles), not absolute indexes. See ``LabelData.known_at_index``.
_LABEL_KNOWN_AT_SUFFIX: Final[str] = "_known_at_index"
LABEL_WEIGHT_SUFFIX: Final[str] = "_weight"
def label_known_at_column_name(label_col: str) -> str:
- """Return the known-at-index column name for a label column."""
+ """Return the per-row label-lookahead column name for a label column.
+
+ Column values are lookaheads in candles, not absolute positions; see
+ ``LabelData.known_at_index``.
+ """
return _label_aux_column_name(label_col, _LABEL_KNOWN_AT_SUFFIX)
@dataclass
class LabelData:
+ """Output of a label generator.
+
+ Attributes:
+ series: per-row label values aligned to ``dataframe.index``.
+ indices: positions of detected pivots in ``series``.
+ metrics: per-pivot metric lists (parallel to ``indices``).
+ known_at_index: optional per-row label lookahead in candles
+ (NOT an absolute position). Invariant under
+ ``dk.slice_dataframe``. Causal split guards recover the
+ local availability position as ``row_local_position +
+ known_at_index[row]``. ``None`` opts the label out of
+ label-aware causal filtering.
+ """
+
series: pd.Series
indices: list[int]
metrics: dict[str, list[float]]
"volume_weighted_efficiency_ratio": pivots_volume_weighted_efficiency_ratios,
}
+ # Per-row label lookahead (in candles), NOT an absolute position:
+ # freqtrade's ``dk.slice_dataframe`` runs AFTER ``set_freqai_targets``,
+ # so any pre-slice absolute position would no longer match the causal
+ # guard's local ``np.arange(len(unfiltered_df))`` coordinate system.
known_at_index = pd.Series(
- np.arange(len(dataframe), dtype=np.int64) + label_horizon_candles,
+ int(label_horizon_candles),
index=dataframe.index,
+ dtype=np.int64,
)
return LabelData(