From 69a43e8b4a699e7231aabebce75fc1a4c250bd76 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Mon, 25 May 2026 02:31:49 +0200
Subject: [PATCH] fix(weights): tighten observability and edge-case handling in
 label pipeline

- sanitize_and_renormalize accepts logger/context kwargs and warns on
  uniform-fallback collapse; six call sites in QuickAdapterRegressorV3
  thread their stage label (train_test_split / post_feature_pipeline /
  timeseries_split, train|test).
- Warn at startup when label_prediction.method='none' for any label, since
  populate_entry_trend would silently never trigger.
- Replace .notna() with np.isfinite() in the smoothed-weight clip so +Inf
  produced by smoothing kernels is also zeroed instead of relying on the
  downstream drop_mask in compose_sample_weights.
- _impute_weights tracks boundary NaN separately so injected zeros do not
  bias the interior median; finite endpoints are now preserved.
---
 .../freqaimodels/QuickAdapterRegressorV3.py   | 34 +++++++++++++++----
 .../user_data/strategies/QuickAdapterV3.py    |  3 +-
 quickadapter/user_data/strategies/Utils.py    | 29 ++++++++++++----
 3 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
index 6e50be1..c1a74a9 100644
--- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
+++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
@@ -1118,6 +1118,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
             logger.info(
                 f"    keep_fraction: {format_number(col_prediction['keep_fraction'])}"
             )
+            if col_prediction["method"] == PREDICTION_METHODS[0]:  # "none"
+                logger.warning(
+                    f"  Prediction method is 'none' for label [{label_col}]: "
+                    f"minima_threshold/maxima_threshold will not be computed and "
+                    f"entry signals based on them will never trigger."
+                )
 
         default_label_period_candles, default_label_natr_multiplier = (
             self._label_defaults
@@ -1483,9 +1489,13 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
                     )
                 )
 
-        train_weights = sanitize_and_renormalize(train_weights)
+        train_weights = sanitize_and_renormalize(
+            train_weights, logger=logger, context="train_test_split:train"
+        )
         if test_size != 0:
-            test_weights = sanitize_and_renormalize(test_weights)
+            test_weights = sanitize_and_renormalize(
+                test_weights, logger=logger, context="train_test_split:test"
+            )
 
         if feat_dict.get("reverse_train_test_order", False):
             return dk.build_data_dictionary(
@@ -1644,7 +1654,11 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
                 dd["train_features"], dd["train_labels"], dd["train_weights"]
             )
         )
-        dd["train_weights"] = sanitize_and_renormalize(dd["train_weights"])
+        dd["train_weights"] = sanitize_and_renormalize(
+            dd["train_weights"],
+            logger=logger,
+            context="post_feature_pipeline:train",
+        )
         dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
 
         if (
@@ -1693,7 +1707,11 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
                         dd["test_features"], dd["test_labels"], dd["test_weights"]
                     )
                 )
-                dd["test_weights"] = sanitize_and_renormalize(dd["test_weights"])
+                dd["test_weights"] = sanitize_and_renormalize(
+                    dd["test_weights"],
+                    logger=logger,
+                    context="post_feature_pipeline:test",
+                )
                 dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
 
         return dd
@@ -1803,8 +1821,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
         test_features = filtered_dataframe.iloc[test_idx]
         train_labels = labels.iloc[train_idx]
         test_labels = labels.iloc[test_idx]
-        train_weights = sanitize_and_renormalize(weights[train_idx])
-        test_weights = sanitize_and_renormalize(weights[test_idx])
+        train_weights = sanitize_and_renormalize(
+            weights[train_idx], logger=logger, context="timeseries_split:train"
+        )
+        test_weights = sanitize_and_renormalize(
+            weights[test_idx], logger=logger, context="timeseries_split:test"
+        )
 
         if feat_dict.get("reverse_train_test_order", False):
             return dk.build_data_dictionary(
diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py
index e839555..223221e 100644
--- a/quickadapter/user_data/strategies/QuickAdapterV3.py
+++ b/quickadapter/user_data/strategies/QuickAdapterV3.py
@@ -860,7 +860,8 @@ class QuickAdapterV3(IStrategy):
                 dataframe[label_weight_col], **col_smoothing_config
             )
             dataframe[label_weight_col] = smoothed_label_weights.where(
-                smoothed_label_weights.gt(0) & smoothed_label_weights.notna(), 0.0
+                np.isfinite(smoothed_label_weights) & smoothed_label_weights.gt(0),
+                0.0,
             )
 
             if label_col == EXTREMA_COLUMN:
diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py
index b168eb3..2c93905 100644
--- a/quickadapter/user_data/strategies/Utils.py
+++ b/quickadapter/user_data/strategies/Utils.py
@@ -717,6 +717,9 @@ def midpoint(value1: T, value2: T) -> T:
 def sanitize_and_renormalize(
     arr: NDArray[np.floating],
     drop_mask: NDArray[np.bool_] | None = None,
+    *,
+    logger: Logger | None = None,
+    context: str | None = None,
 ) -> NDArray[np.floating]:
     arr = np.asarray(arr, dtype=float)
     if arr.size == 0:
@@ -728,6 +731,14 @@ def sanitize_and_renormalize(
     total = safe.sum()
     if total > 0 and np.isfinite(total):
         return safe * (len(safe) / total)
+    if logger is not None:
+        logger.warning(
+            "sanitize_and_renormalize: weights collapsed (context=%s, total=%r, "
+            "n=%d); falling back to uniform weights",
+            context or "unspecified",
+            total,
+            len(arr),
+        )
     fallback = np.ones_like(arr)
     if drop_mask is not None:
         fallback[drop_mask] = 0.0
@@ -974,21 +985,27 @@ def _impute_weights(
     if weights.size == 0:
         return np.full_like(weights, default_weight, dtype=float)
 
-    # Weights computed by `zigzag` can be NaN on boundary pivots
+    # Zigzag emits NaN at unconfirmed boundary pivots; zero them out and
+    # exclude from the median so they don't drag interior imputation.
+    boundary_mask = np.zeros(weights.size, dtype=bool)
     if not np.isfinite(weights[0]):
-        weights[0] = 0.0
+        boundary_mask[0] = True
     if not np.isfinite(weights[-1]):
-        weights[-1] = 0.0
+        boundary_mask[-1] = True
 
     finite_mask = np.isfinite(weights)
-    if not finite_mask.any():
-        return np.full_like(weights, default_weight, dtype=float)
+    interior_finite_mask = finite_mask & ~boundary_mask
+    if not interior_finite_mask.any():
+        weights[~finite_mask] = default_weight
+        weights[boundary_mask] = 0.0
+        return weights
 
-    median_weight = np.nanmedian(weights[finite_mask])
+    median_weight = np.nanmedian(weights[interior_finite_mask])
     if not np.isfinite(median_weight):
         median_weight = default_weight
 
     weights[~finite_mask] = median_weight
+    weights[boundary_mask] = 0.0
 
     return weights
 
-- 
2.53.0