From 64bbd71f52754d7a75f5ce28d8d25512406576a6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Mon, 25 May 2026 13:42:38 +0200 Subject: [PATCH] refactor(weights): tighten label-weight pipeline contract - compute_label_weights raises on strategy='none'; callers must skip - _scatter_weights default_weight is required (no implicit baseline) - gate weight-column writes on is_weighting_active (strategy AND pivots) - demote 'label weight column absent' log to debug (legitimate path) - remove DEFAULT_LABEL_WEIGHT (conflated three unrelated 1.0 semantics) - harden sanitize_and_renormalize fallback when drop_mask covers all rows - add Final[str] / Final[re.Pattern[str]] for type coherence --- .../freqaimodels/QuickAdapterRegressorV3.py | 14 ++--- .../user_data/strategies/QuickAdapterV3.py | 44 +++++++++------- quickadapter/user_data/strategies/Utils.py | 51 +++++++++++++------ 3 files changed, 69 insertions(+), 40 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py index 8cc527d..473a99b 100644 --- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py +++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py @@ -1528,9 +1528,10 @@ class QuickAdapterRegressorV3(BaseRegressionModel): ``label_weight_column_name(LABEL_COLUMNS[0])``. Alignment runs before any shuffle/split on ``features_filtered.index`` (a subset of ``unfiltered_df.index``) to avoid post-hoc reindex against shuffled - data. When the weight column is absent, ``label_weights=None`` is - forwarded to ``compose_sample_weights`` and only the base weights - contribute. + data. The weight column is absent when ``label_weighting.strategy`` + is ``'none'`` (no per-label importance applied); in that case + ``label_weights=None`` is forwarded to ``compose_sample_weights`` + and only the base weights contribute. """ if not unfiltered_df.index.is_unique: raise ValueError( @@ -1566,12 +1567,11 @@ class QuickAdapterRegressorV3(BaseRegressionModel): label_weights = unfiltered_df.loc[ features_filtered.index, weight_col ].to_numpy(dtype=float) - logger.debug(f"label weight column active: {weight_col}") + logger.debug(f"label weight column active: {weight_col!r}") else: label_weights = None - logger.warning( - f"label weight column not found ({weight_col!r}); " - f"falling back to base weights only" + logger.debug( + f"label weight column absent ({weight_col!r}); using base weights only" ) return compose_sample_weights( base_weights, diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index c0f122f..3d01231 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -860,38 +860,48 @@ class QuickAdapterV3(IStrategy): label_col, label_weighting["default"], label_weighting["columns"] ) - label_weights = compute_label_weights( - n_values=len(label_data.series), - indices=label_data.indices, - metrics=label_data.metrics, - weighting_config=col_weighting_config, + # Absent column routes downstream to base-weights-only fallback. + is_weighting_active = ( + col_weighting_config["strategy"] != WEIGHT_STRATEGIES[0] # "none" + and len(label_data.indices) > 0 ) - label_weight_col = label_weight_column_name(label_col) - dataframe[label_col] = label_data.series - dataframe[label_weight_col] = label_weights + + label_weight_col = label_weight_column_name(label_col) + if is_weighting_active: + dataframe[label_weight_col] = compute_label_weights( + n_values=len(label_data.series), + indices=label_data.indices, + metrics=label_data.metrics, + weighting_config=col_weighting_config, + ) if label_col == EXTREMA_COLUMN: dataframe[EXTREMA_DIRECTION_COLUMN] = dataframe[label_col] - dataframe[EXTREMA_WEIGHT_COLUMN] = dataframe[label_weight_col] + if is_weighting_active: + dataframe[EXTREMA_WEIGHT_COLUMN] = dataframe[label_weight_col] col_smoothing_config = get_label_column_config( label_col, label_smoothing["default"], label_smoothing["columns"] ) dataframe[label_col] = smooth(dataframe[label_col], **col_smoothing_config) - smoothed_label_weights = smooth( - dataframe[label_weight_col], **col_smoothing_config - ) - dataframe[label_weight_col] = smoothed_label_weights.where( - np.isfinite(smoothed_label_weights) & smoothed_label_weights.gt(0), - 0.0, - ) + if is_weighting_active: + smoothed_label_weights = smooth( + dataframe[label_weight_col], **col_smoothing_config + ) + dataframe[label_weight_col] = smoothed_label_weights.where( + np.isfinite(smoothed_label_weights) & smoothed_label_weights.gt(0), + 0.0, + ) if label_col == EXTREMA_COLUMN: dataframe[EXTREMA_DIRECTION_SMOOTHED_COLUMN] = dataframe[label_col] - dataframe[EXTREMA_WEIGHT_SMOOTHED_COLUMN] = dataframe[label_weight_col] + if is_weighting_active: + dataframe[EXTREMA_WEIGHT_SMOOTHED_COLUMN] = dataframe[ + label_weight_col + ] return dataframe diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py index fbc9b87..a4bc025 100644 --- a/quickadapter/user_data/strategies/Utils.py +++ b/quickadapter/user_data/strategies/Utils.py @@ -250,17 +250,17 @@ _PREDICTION_SPECS: Final[dict[str, _ParamSpec]] = { } -EXTREMA_COLUMN: Final = "&s-extrema" -EXTREMA_DIRECTION_COLUMN: Final = "extrema_direction" -EXTREMA_DIRECTION_SMOOTHED_COLUMN: Final = "extrema_direction_smoothed" -EXTREMA_WEIGHT_COLUMN: Final = "extrema_weight" -EXTREMA_WEIGHT_SMOOTHED_COLUMN: Final = "extrema_weight_smoothed" +EXTREMA_COLUMN: Final[str] = "&s-extrema" +EXTREMA_DIRECTION_COLUMN: Final[str] = "extrema_direction" +EXTREMA_DIRECTION_SMOOTHED_COLUMN: Final[str] = "extrema_direction_smoothed" +EXTREMA_WEIGHT_COLUMN: Final[str] = "extrema_weight" +EXTREMA_WEIGHT_SMOOTHED_COLUMN: Final[str] = "extrema_weight_smoothed" LABEL_WEIGHT_SUFFIX: Final[str] = "_weight" LABEL_COLUMNS: Final[tuple[str, ...]] = (EXTREMA_COLUMN,) -_FREQAI_LABEL_SIGIL_PATTERN: Final = re.compile(r"^&-?") +_FREQAI_LABEL_SIGIL_PATTERN: Final[re.Pattern[str]] = re.compile(r"^&-?") @lru_cache(maxsize=16) @@ -379,8 +379,6 @@ TRADE_PRICE_TARGETS: Final[tuple[TradePriceTarget, ...]] = ( ) -DEFAULT_LABEL_WEIGHT: Final[float] = 1.0 - SPARSE_TRAINING_MASS_THRESHOLD: Final[float] = 0.05 DEFAULT_FIT_LIVE_PREDICTIONS_CANDLES: Final[int] = 100 @@ -750,10 +748,16 @@ def sanitize_and_renormalize( ) fallback = np.ones(n, dtype=float) if drop_mask is not None: - fallback = np.where(drop_mask, 0.0, fallback) - fb_total = fallback.sum() + masked = np.where(drop_mask, 0.0, fallback) + fb_total = masked.sum() if fb_total > 0.0: - fallback = fallback * (n / fb_total) + return masked * (n / fb_total) + if logger is not None: + logger.warning( + "sanitize_and_renormalize: drop_mask covers all rows in " + "fallback; ignoring mask to preserve mean=1 (context=%s)", + context or "unspecified", + ) return fallback @@ -1005,7 +1009,7 @@ def smooth( def _impute_weights( weights: NDArray[np.floating], - default_weight: float = DEFAULT_LABEL_WEIGHT, + default_weight: float = 1.0, ) -> NDArray[np.floating]: weights = weights.astype(float, copy=True) @@ -1041,8 +1045,14 @@ def _scatter_weights( n_values: int, indices: list[int], weights: NDArray[np.floating], - default_weight: float = DEFAULT_LABEL_WEIGHT, + default_weight: float, ) -> NDArray[np.floating]: + """Scatter per-pivot weights into a full-length array. + + Non-pivot rows are filled with ``default_weight``. Callers pass ``0.0`` + to exclude non-pivot rows from training (pivot-only weighting), or a + positive value to give them a baseline weight. + """ if len(indices) == 0 or weights.size == 0: return np.full(n_values, default_weight, dtype=float) @@ -1140,7 +1150,7 @@ def _compute_combined_label_weights( coefficients = _parse_metric_coefficients(metric_coefficients) if len(coefficients) == 0: - coefficients = {k: DEFAULT_LABEL_WEIGHT for k in metrics.keys()} + coefficients = {k: 1.0 for k in metrics.keys()} imputed_metrics: list[NDArray[np.floating]] = [] coefficients_list: list[float] = [] @@ -1172,11 +1182,20 @@ def compute_label_weights( metrics: dict[str, list[float]], weighting_config: dict[str, Any], ) -> NDArray[np.floating]: + """Compute per-row label importance weights. + + Returns an array with positive values at pivot ``indices`` (scaled by + strategy) and ``0.0`` elsewhere. Callers must skip invocation when + strategy is ``'none'``; this raises ValueError otherwise. + """ label_weighting = {**DEFAULTS_LABEL_WEIGHTING, **weighting_config} strategy = label_weighting["strategy"] - if len(indices) == 0 or strategy == WEIGHT_STRATEGIES[0]: # "none" - return np.full(n_values, DEFAULT_LABEL_WEIGHT, dtype=float) + if strategy == WEIGHT_STRATEGIES[0]: # "none" + raise ValueError( + "compute_label_weights must not be called with strategy='none'; " + "callers must skip invocation when weighting is disabled" + ) weights: Optional[NDArray[np.floating]] = None -- 2.53.0