From: Jérôme Benoit <jerome.benoit@piment-noir.org>
Date: Mon, 25 May 2026 02:38:25 +0000 (+0200)
Subject: fix(weights): canonical sanitize_and_renormalize and compose_sample_weights
X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=f6965d3134650007bbeaa77a478a04b604b579f1;p=freqai-strategies.git

fix(weights): canonical sanitize_and_renormalize and compose_sample_weights

Derived from independent dual-oracle mathematical specification with
proofs (mean=1 invariant, drop preservation, idempotency, collapse
degradation chain).

sanitize_and_renormalize:
- Fix latent bug: fallback path with non-empty drop_mask returned ones
  zeroed at drop_mask but did not renormalize, breaking the mean=1
  contract. The fallback now renormalizes so mean(out) == 1 holds on
  surviving rows.
- Replace .copy()+mutation with np.where for drop_mask application.

compose_sample_weights:
- Replace the post-compose combined.sum() guard (which duplicated the
  predicate sanitize_and_renormalize re-evaluates internally) with a
  single survivor-aware predicate covering drop_mask | ~isfinite | <=0
  in one pass. The check is the explicit branch point for the base-
  weights fallback when the label-weighted product collapses on
  surviving rows; this preserves the recency signal and the label-
  derived drop_mask instead of degrading to uniform.
- Warn when nonzero/n falls below SPARSE_TRAINING_MASS_THRESHOLD (5%,
  module-level constant) so operators can spot the sparse-training
  regime that pivot-only weights produce on long series with few pivots.

QuickAdapterV3._log_strategy_configuration:
- Warn at startup when label_smoothing.method is 'smm' or 'savgol'
  (with polyorder>=2) combined with a non-'none' label_weighting
  strategy, since these kernels can collapse a sparse weight signal
  and trip the all-rows-dropped guard.
---

diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
index 51488c9..8cc527d 100644
--- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
+++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
@@ -1557,9 +1557,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
             and isinstance(weight_factor, (int, float))
             and weight_factor > 0
         ):
-            base_weights = np.asarray(
-                dk.set_weights_higher_recent(n_rows), dtype=float
-            )
+            base_weights = np.asarray(dk.set_weights_higher_recent(n_rows), dtype=float)
         else:
             base_weights = np.ones(n_rows, dtype=float)
 
diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py
index 223221e..4e19c74 100644
--- a/quickadapter/user_data/strategies/QuickAdapterV3.py
+++ b/quickadapter/user_data/strategies/QuickAdapterV3.py
@@ -21,7 +21,12 @@ from freqtrade.exchange import timeframe_to_minutes, timeframe_to_prev_date
 from freqtrade.persistence import Trade
 from freqtrade.strategy import AnnotationType, stoploss_from_absolute
 from freqtrade.strategy.interface import IStrategy
-from LabelTransformer import COMBINED_AGGREGATIONS, get_label_column_config
+from LabelTransformer import (
+    COMBINED_AGGREGATIONS,
+    SMOOTHING_METHODS,
+    WEIGHT_STRATEGIES,
+    get_label_column_config,
+)
 from pandas import DataFrame, Series, isna
 from scipy.stats import pearsonr, t
 from technical.pivots_points import pivots_points
@@ -506,6 +511,26 @@ class QuickAdapterV3(IStrategy):
             logger.info(f"    mode: {col_smoothing['mode']}")
             logger.info(f"    sigma: {format_number(col_smoothing['sigma'])}")
 
+            method = col_smoothing["method"]
+            if col_weighting["strategy"] != WEIGHT_STRATEGIES[0] and (  # "none"
+                method == SMOOTHING_METHODS[4]  # "smm"
+                or (
+                    method == SMOOTHING_METHODS[6]  # "savgol"
+                    and col_smoothing["polyorder"] >= 2
+                )
+            ):
+                logger.warning(
+                    f"  Label [{label_col}]: smoothing method {method!r} can "
+                    f"collapse sparse weight signals (smm zeroes them when "
+                    f"fewer than half the window rows are nonzero; savgol "
+                    f"with polyorder>=2 adds negative lobes that are clipped "
+                    f"to zero), which may trip the all-rows-dropped guard in "
+                    f"compose_sample_weights once a non-'none' "
+                    f"label_weighting strategy is configured. Prefer a "
+                    f"non-negative linear kernel (gaussian, kaiser, triang, "
+                    f"sma, gaussian_filter1d)."
+                )
+
         logger.info("Reversal Confirmation:")
         logger.info(
             f"  lookback_period_candles: {self.reversal_confirmation['lookback_period_candles']}"
diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py
index 873b389..fbc9b87 100644
--- a/quickadapter/user_data/strategies/Utils.py
+++ b/quickadapter/user_data/strategies/Utils.py
@@ -381,6 +381,8 @@ TRADE_PRICE_TARGETS: Final[tuple[TradePriceTarget, ...]] = (
 
 DEFAULT_LABEL_WEIGHT: Final[float] = 1.0
 
+SPARSE_TRAINING_MASS_THRESHOLD: Final[float] = 0.05
+
 DEFAULT_FIT_LIVE_PREDICTIONS_CANDLES: Final[int] = 100
 
 
@@ -721,27 +723,37 @@ def sanitize_and_renormalize(
     logger: Logger | None = None,
     context: str | None = None,
 ) -> NDArray[np.floating]:
+    """Sanitize a weight vector and renormalize so ``mean(out) == 1``.
+
+    Non-finite or non-positive entries are treated as ``0``; rows in
+    ``drop_mask`` are forced to ``0``. On collapse (no positive finite
+    entry survives), returns ones on surviving rows and zeros on dropped
+    rows, rescaled so ``mean(out) == 1`` still holds.
+    """
     arr = np.asarray(arr, dtype=float)
-    if arr.size == 0:
+    n = arr.size
+    if n == 0:
         return arr
-    safe = np.where(np.isfinite(arr) & (arr > 0), arr, 0.0)
+    safe = np.where(np.isfinite(arr) & (arr > 0.0), arr, 0.0)
     if drop_mask is not None:
-        safe = safe.copy()
-        safe[drop_mask] = 0.0
+        safe = np.where(drop_mask, 0.0, safe)
     total = safe.sum()
-    if total > 0 and np.isfinite(total):
-        return safe * (len(safe) / total)
+    if total > 0.0 and np.isfinite(total):
+        return safe * (n / total)
     if logger is not None:
         logger.warning(
-            "sanitize_and_renormalize: weights collapsed (context=%s, total=%r, "
-            "n=%d); falling back to uniform weights",
+            "sanitize_and_renormalize: weights collapsed (context=%s, "
+            "total=%r, n=%d); falling back to uniform weights",
             context or "unspecified",
             total,
-            len(arr),
+            n,
         )
-    fallback = np.ones_like(arr)
+    fallback = np.ones(n, dtype=float)
     if drop_mask is not None:
-        fallback[drop_mask] = 0.0
+        fallback = np.where(drop_mask, 0.0, fallback)
+        fb_total = fallback.sum()
+        if fb_total > 0.0:
+            fallback = fallback * (n / fb_total)
     return fallback
 
 
@@ -753,22 +765,21 @@ def compose_sample_weights(
 ) -> NDArray[np.floating]:
     """Combine base sample weights with the label importance weights.
 
-    Returns w in R+^N with mean(w) == 1. The label weight vector is sanitized
-    (non-finite or <= 0 -> row dropped) and mean-normalized, multiplied with
-    base_weights, zeroed on dropped rows, and renormalized to mean=1.
-
-    The label weight vector is the output of ``compute_label_weights`` (which
-    already aggregates the configured metric sources via ``label_weighting``),
-    co-smoothed with the label column in ``set_freqai_targets`` and clipped
-    to a finite non-negative range. ``LABEL_COLUMNS`` is single-target by
-    design (one prediction target per model).
+    Returns ``w in R+^N`` with ``mean(w) == 1``. Rows where
+    ``label_weights[i]`` is non-finite or ``<= 0`` are dropped
+    (``out[i] == 0``); surviving rows carry ``base_weights * label_weights``
+    rescaled to global ``mean == 1``. On collapse of the label-weighted
+    product, falls back to ``base_weights`` (with the label-derived
+    drop_mask) so the recency signal is preserved.
 
     Raises ValueError on shape mismatch or when every row is dropped.
     """
     base_weights = np.asarray(base_weights, dtype=float)
     if label_weights is None:
-        return sanitize_and_renormalize(base_weights)
-    n = len(base_weights)
+        return sanitize_and_renormalize(
+            base_weights, logger=logger, context="compose:base_only"
+        )
+    n = base_weights.shape[0]
     arr = np.asarray(label_weights, dtype=float)
     if arr.shape != (n,):
         raise ValueError(
@@ -781,23 +792,39 @@ def compose_sample_weights(
             "compose_sample_weights: all rows dropped by zero or non-finite "
             "label weights; no surviving training samples"
         )
-    sanitized = np.where(drop_mask, 1.0, np.maximum(arr, np.finfo(float).tiny))
-    normalized = sanitize_and_renormalize(sanitized)
-    combined = base_weights * normalized
-    combined[drop_mask] = 0.0
-    combined_sum = combined.sum()
-    if combined_sum > 0 and np.isfinite(combined_sum):
-        ratio = n / combined_sum
-        if np.isfinite(ratio):
-            scaled = combined * ratio
-            if np.all(np.isfinite(scaled)):
-                return scaled
+    nonzero = int((~drop_mask).sum())
+    if nonzero / n < SPARSE_TRAINING_MASS_THRESHOLD:
+        logger.warning(
+            "compose_sample_weights: sparse training mass "
+            "(%d/%d rows = %.2f%% nonzero, threshold=%.2f%%)",
+            nonzero,
+            n,
+            100.0 * nonzero / n,
+            100.0 * SPARSE_TRAINING_MASS_THRESHOLD,
+        )
+    combined = base_weights * arr
+    # Detect collapse on surviving rows up front so the fallback can route
+    # to base weights rather than the uniform fallback inside sanitize.
+    survivor_mask = ~(drop_mask | ~np.isfinite(combined) | (combined <= 0.0))
+    survivor_total = float(np.where(survivor_mask, combined, 0.0).sum())
+    if survivor_total > 0.0 and np.isfinite(survivor_total):
+        return sanitize_and_renormalize(
+            combined,
+            drop_mask=drop_mask,
+            logger=logger,
+            context="compose:label_weighted",
+        )
     logger.warning(
-        "compose_sample_weights: composed weights collapsed "
-        "(combined_sum=%r); falling back to base weights",
-        combined_sum,
+        "compose_sample_weights: composed weights collapsed on surviving "
+        "rows (survivor_total=%g); falling back to base weights",
+        survivor_total,
+    )
+    return sanitize_and_renormalize(
+        base_weights,
+        drop_mask=drop_mask,
+        logger=logger,
+        context="compose:base_fallback",
     )
-    return sanitize_and_renormalize(base_weights, drop_mask=drop_mask)
 
 
 def nan_average(