From 64bbd71f52754d7a75f5ce28d8d25512406576a6 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Mon, 25 May 2026 13:42:38 +0200
Subject: [PATCH] refactor(weights): tighten label-weight pipeline contract

- compute_label_weights raises on strategy='none'; callers must skip
- _scatter_weights default_weight is required (no implicit baseline)
- gate weight-column writes on is_weighting_active (strategy AND pivots)
- demote 'label weight column absent' log to debug (legitimate path)
- remove DEFAULT_LABEL_WEIGHT (conflated three unrelated 1.0 semantics)
- harden sanitize_and_renormalize fallback when drop_mask covers all rows
- add Final[str] / Final[re.Pattern[str]] for type coherence
---
 .../freqaimodels/QuickAdapterRegressorV3.py   | 14 ++---
 .../user_data/strategies/QuickAdapterV3.py    | 44 +++++++++-------
 quickadapter/user_data/strategies/Utils.py    | 51 +++++++++++++------
 3 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
index 8cc527d..473a99b 100644
--- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
+++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
@@ -1528,9 +1528,10 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
         ``label_weight_column_name(LABEL_COLUMNS[0])``. Alignment runs before
         any shuffle/split on ``features_filtered.index`` (a subset of
         ``unfiltered_df.index``) to avoid post-hoc reindex against shuffled
-        data. When the weight column is absent, ``label_weights=None`` is
-        forwarded to ``compose_sample_weights`` and only the base weights
-        contribute.
+        data. The weight column is absent when ``label_weighting.strategy``
+        is ``'none'`` (no per-label importance applied); in that case
+        ``label_weights=None`` is forwarded to ``compose_sample_weights``
+        and only the base weights contribute.
         """
         if not unfiltered_df.index.is_unique:
             raise ValueError(
@@ -1566,12 +1567,11 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
             label_weights = unfiltered_df.loc[
                 features_filtered.index, weight_col
             ].to_numpy(dtype=float)
-            logger.debug(f"label weight column active: {weight_col}")
+            logger.debug(f"label weight column active: {weight_col!r}")
         else:
             label_weights = None
-            logger.warning(
-                f"label weight column not found ({weight_col!r}); "
-                f"falling back to base weights only"
+            logger.debug(
+                f"label weight column absent ({weight_col!r}); using base weights only"
             )
         return compose_sample_weights(
             base_weights,
diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py
index c0f122f..3d01231 100644
--- a/quickadapter/user_data/strategies/QuickAdapterV3.py
+++ b/quickadapter/user_data/strategies/QuickAdapterV3.py
@@ -860,38 +860,48 @@ class QuickAdapterV3(IStrategy):
                 label_col, label_weighting["default"], label_weighting["columns"]
             )
 
-            label_weights = compute_label_weights(
-                n_values=len(label_data.series),
-                indices=label_data.indices,
-                metrics=label_data.metrics,
-                weighting_config=col_weighting_config,
+            # Absent column routes downstream to base-weights-only fallback.
+            is_weighting_active = (
+                col_weighting_config["strategy"] != WEIGHT_STRATEGIES[0]  # "none"
+                and len(label_data.indices) > 0
             )
 
-            label_weight_col = label_weight_column_name(label_col)
-
             dataframe[label_col] = label_data.series
-            dataframe[label_weight_col] = label_weights
+
+            label_weight_col = label_weight_column_name(label_col)
+            if is_weighting_active:
+                dataframe[label_weight_col] = compute_label_weights(
+                    n_values=len(label_data.series),
+                    indices=label_data.indices,
+                    metrics=label_data.metrics,
+                    weighting_config=col_weighting_config,
+                )
 
             if label_col == EXTREMA_COLUMN:
                 dataframe[EXTREMA_DIRECTION_COLUMN] = dataframe[label_col]
-                dataframe[EXTREMA_WEIGHT_COLUMN] = dataframe[label_weight_col]
+                if is_weighting_active:
+                    dataframe[EXTREMA_WEIGHT_COLUMN] = dataframe[label_weight_col]
 
             col_smoothing_config = get_label_column_config(
                 label_col, label_smoothing["default"], label_smoothing["columns"]
             )
 
             dataframe[label_col] = smooth(dataframe[label_col], **col_smoothing_config)
-            smoothed_label_weights = smooth(
-                dataframe[label_weight_col], **col_smoothing_config
-            )
-            dataframe[label_weight_col] = smoothed_label_weights.where(
-                np.isfinite(smoothed_label_weights) & smoothed_label_weights.gt(0),
-                0.0,
-            )
+            if is_weighting_active:
+                smoothed_label_weights = smooth(
+                    dataframe[label_weight_col], **col_smoothing_config
+                )
+                dataframe[label_weight_col] = smoothed_label_weights.where(
+                    np.isfinite(smoothed_label_weights) & smoothed_label_weights.gt(0),
+                    0.0,
+                )
 
             if label_col == EXTREMA_COLUMN:
                 dataframe[EXTREMA_DIRECTION_SMOOTHED_COLUMN] = dataframe[label_col]
-                dataframe[EXTREMA_WEIGHT_SMOOTHED_COLUMN] = dataframe[label_weight_col]
+                if is_weighting_active:
+                    dataframe[EXTREMA_WEIGHT_SMOOTHED_COLUMN] = dataframe[
+                        label_weight_col
+                    ]
 
         return dataframe
 
diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py
index fbc9b87..a4bc025 100644
--- a/quickadapter/user_data/strategies/Utils.py
+++ b/quickadapter/user_data/strategies/Utils.py
@@ -250,17 +250,17 @@ _PREDICTION_SPECS: Final[dict[str, _ParamSpec]] = {
 }
 
 
-EXTREMA_COLUMN: Final = "&s-extrema"
-EXTREMA_DIRECTION_COLUMN: Final = "extrema_direction"
-EXTREMA_DIRECTION_SMOOTHED_COLUMN: Final = "extrema_direction_smoothed"
-EXTREMA_WEIGHT_COLUMN: Final = "extrema_weight"
-EXTREMA_WEIGHT_SMOOTHED_COLUMN: Final = "extrema_weight_smoothed"
+EXTREMA_COLUMN: Final[str] = "&s-extrema"
+EXTREMA_DIRECTION_COLUMN: Final[str] = "extrema_direction"
+EXTREMA_DIRECTION_SMOOTHED_COLUMN: Final[str] = "extrema_direction_smoothed"
+EXTREMA_WEIGHT_COLUMN: Final[str] = "extrema_weight"
+EXTREMA_WEIGHT_SMOOTHED_COLUMN: Final[str] = "extrema_weight_smoothed"
 
 LABEL_WEIGHT_SUFFIX: Final[str] = "_weight"
 
 LABEL_COLUMNS: Final[tuple[str, ...]] = (EXTREMA_COLUMN,)
 
-_FREQAI_LABEL_SIGIL_PATTERN: Final = re.compile(r"^&-?")
+_FREQAI_LABEL_SIGIL_PATTERN: Final[re.Pattern[str]] = re.compile(r"^&-?")
 
 
 @lru_cache(maxsize=16)
@@ -379,8 +379,6 @@ TRADE_PRICE_TARGETS: Final[tuple[TradePriceTarget, ...]] = (
 )
 
 
-DEFAULT_LABEL_WEIGHT: Final[float] = 1.0
-
 SPARSE_TRAINING_MASS_THRESHOLD: Final[float] = 0.05
 
 DEFAULT_FIT_LIVE_PREDICTIONS_CANDLES: Final[int] = 100
@@ -750,10 +748,16 @@ def sanitize_and_renormalize(
         )
     fallback = np.ones(n, dtype=float)
     if drop_mask is not None:
-        fallback = np.where(drop_mask, 0.0, fallback)
-        fb_total = fallback.sum()
+        masked = np.where(drop_mask, 0.0, fallback)
+        fb_total = masked.sum()
         if fb_total > 0.0:
-            fallback = fallback * (n / fb_total)
+            return masked * (n / fb_total)
+        if logger is not None:
+            logger.warning(
+                "sanitize_and_renormalize: drop_mask covers all rows in "
+                "fallback; ignoring mask to preserve mean=1 (context=%s)",
+                context or "unspecified",
+            )
     return fallback
 
 
@@ -1005,7 +1009,7 @@ def smooth(
 
 def _impute_weights(
     weights: NDArray[np.floating],
-    default_weight: float = DEFAULT_LABEL_WEIGHT,
+    default_weight: float = 1.0,
 ) -> NDArray[np.floating]:
     weights = weights.astype(float, copy=True)
 
@@ -1041,8 +1045,14 @@ def _scatter_weights(
     n_values: int,
     indices: list[int],
     weights: NDArray[np.floating],
-    default_weight: float = DEFAULT_LABEL_WEIGHT,
+    default_weight: float,
 ) -> NDArray[np.floating]:
+    """Scatter per-pivot weights into a full-length array.
+
+    Non-pivot rows are filled with ``default_weight``. Callers pass ``0.0``
+    to exclude non-pivot rows from training (pivot-only weighting), or a
+    positive value to give them a baseline weight.
+    """
     if len(indices) == 0 or weights.size == 0:
         return np.full(n_values, default_weight, dtype=float)
 
@@ -1140,7 +1150,7 @@ def _compute_combined_label_weights(
 
     coefficients = _parse_metric_coefficients(metric_coefficients)
     if len(coefficients) == 0:
-        coefficients = {k: DEFAULT_LABEL_WEIGHT for k in metrics.keys()}
+        coefficients = {k: 1.0 for k in metrics.keys()}
 
     imputed_metrics: list[NDArray[np.floating]] = []
     coefficients_list: list[float] = []
@@ -1172,11 +1182,20 @@ def compute_label_weights(
     metrics: dict[str, list[float]],
     weighting_config: dict[str, Any],
 ) -> NDArray[np.floating]:
+    """Compute per-row label importance weights.
+
+    Returns an array with positive values at pivot ``indices`` (scaled by
+    strategy) and ``0.0`` elsewhere. Callers must skip invocation when
+    strategy is ``'none'``; this raises ValueError otherwise.
+    """
     label_weighting = {**DEFAULTS_LABEL_WEIGHTING, **weighting_config}
     strategy = label_weighting["strategy"]
 
-    if len(indices) == 0 or strategy == WEIGHT_STRATEGIES[0]:  # "none"
-        return np.full(n_values, DEFAULT_LABEL_WEIGHT, dtype=float)
+    if strategy == WEIGHT_STRATEGIES[0]:  # "none"
+        raise ValueError(
+            "compute_label_weights must not be called with strategy='none'; "
+            "callers must skip invocation when weighting is disabled"
+        )
 
     weights: Optional[NDArray[np.floating]] = None
 
-- 
2.53.0