From 69a43e8b4a699e7231aabebce75fc1a4c250bd76 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Mon, 25 May 2026 02:31:49 +0200 Subject: [PATCH] fix(weights): tighten observability and edge-case handling in label pipeline - sanitize_and_renormalize accepts logger/context kwargs and warns on uniform-fallback collapse; six call sites in QuickAdapterRegressorV3 thread their stage label (train_test_split / post_feature_pipeline / timeseries_split, train|test). - Warn at startup when label_prediction.method='none' for any label, since populate_entry_trend would silently never trigger. - Replace .notna() with np.isfinite() in the smoothed-weight clip so +Inf produced by smoothing kernels is also zeroed instead of relying on the downstream drop_mask in compose_sample_weights. - _impute_weights tracks boundary NaN separately so injected zeros do not bias the interior median; finite endpoints are now preserved. --- .../freqaimodels/QuickAdapterRegressorV3.py | 34 +++++++++++++++---- .../user_data/strategies/QuickAdapterV3.py | 3 +- quickadapter/user_data/strategies/Utils.py | 29 ++++++++++++---- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py index 6e50be1..c1a74a9 100644 --- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py +++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py @@ -1118,6 +1118,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel): logger.info( f" keep_fraction: {format_number(col_prediction['keep_fraction'])}" ) + if col_prediction["method"] == PREDICTION_METHODS[0]: # "none" + logger.warning( + f" Prediction method is 'none' for label [{label_col}]: " + f"minima_threshold/maxima_threshold will not be computed and " + f"entry signals based on them will never trigger." + ) default_label_period_candles, default_label_natr_multiplier = ( self._label_defaults @@ -1483,9 +1489,13 @@ class QuickAdapterRegressorV3(BaseRegressionModel): ) ) - train_weights = sanitize_and_renormalize(train_weights) + train_weights = sanitize_and_renormalize( + train_weights, logger=logger, context="train_test_split:train" + ) if test_size != 0: - test_weights = sanitize_and_renormalize(test_weights) + test_weights = sanitize_and_renormalize( + test_weights, logger=logger, context="train_test_split:test" + ) if feat_dict.get("reverse_train_test_order", False): return dk.build_data_dictionary( @@ -1644,7 +1654,11 @@ class QuickAdapterRegressorV3(BaseRegressionModel): dd["train_features"], dd["train_labels"], dd["train_weights"] ) ) - dd["train_weights"] = sanitize_and_renormalize(dd["train_weights"]) + dd["train_weights"] = sanitize_and_renormalize( + dd["train_weights"], + logger=logger, + context="post_feature_pipeline:train", + ) dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"]) if ( @@ -1693,7 +1707,11 @@ class QuickAdapterRegressorV3(BaseRegressionModel): dd["test_features"], dd["test_labels"], dd["test_weights"] ) ) - dd["test_weights"] = sanitize_and_renormalize(dd["test_weights"]) + dd["test_weights"] = sanitize_and_renormalize( + dd["test_weights"], + logger=logger, + context="post_feature_pipeline:test", + ) dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"]) return dd @@ -1803,8 +1821,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel): test_features = filtered_dataframe.iloc[test_idx] train_labels = labels.iloc[train_idx] test_labels = labels.iloc[test_idx] - train_weights = sanitize_and_renormalize(weights[train_idx]) - test_weights = sanitize_and_renormalize(weights[test_idx]) + train_weights = sanitize_and_renormalize( + weights[train_idx], logger=logger, context="timeseries_split:train" + ) + test_weights = sanitize_and_renormalize( + weights[test_idx], logger=logger, context="timeseries_split:test" + ) if feat_dict.get("reverse_train_test_order", False): return dk.build_data_dictionary( diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index e839555..223221e 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -860,7 +860,8 @@ class QuickAdapterV3(IStrategy): dataframe[label_weight_col], **col_smoothing_config ) dataframe[label_weight_col] = smoothed_label_weights.where( - smoothed_label_weights.gt(0) & smoothed_label_weights.notna(), 0.0 + np.isfinite(smoothed_label_weights) & smoothed_label_weights.gt(0), + 0.0, ) if label_col == EXTREMA_COLUMN: diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py index b168eb3..2c93905 100644 --- a/quickadapter/user_data/strategies/Utils.py +++ b/quickadapter/user_data/strategies/Utils.py @@ -717,6 +717,9 @@ def midpoint(value1: T, value2: T) -> T: def sanitize_and_renormalize( arr: NDArray[np.floating], drop_mask: NDArray[np.bool_] | None = None, + *, + logger: Logger | None = None, + context: str | None = None, ) -> NDArray[np.floating]: arr = np.asarray(arr, dtype=float) if arr.size == 0: @@ -728,6 +731,14 @@ def sanitize_and_renormalize( total = safe.sum() if total > 0 and np.isfinite(total): return safe * (len(safe) / total) + if logger is not None: + logger.warning( + "sanitize_and_renormalize: weights collapsed (context=%s, total=%r, " + "n=%d); falling back to uniform weights", + context or "unspecified", + total, + len(arr), + ) fallback = np.ones_like(arr) if drop_mask is not None: fallback[drop_mask] = 0.0 @@ -974,21 +985,27 @@ def _impute_weights( if weights.size == 0: return np.full_like(weights, default_weight, dtype=float) - # Weights computed by `zigzag` can be NaN on boundary pivots + # Zigzag emits NaN at unconfirmed boundary pivots; zero them out and + # exclude from the median so they don't drag interior imputation. + boundary_mask = np.zeros(weights.size, dtype=bool) if not np.isfinite(weights[0]): - weights[0] = 0.0 + boundary_mask[0] = True if not np.isfinite(weights[-1]): - weights[-1] = 0.0 + boundary_mask[-1] = True finite_mask = np.isfinite(weights) - if not finite_mask.any(): - return np.full_like(weights, default_weight, dtype=float) + interior_finite_mask = finite_mask & ~boundary_mask + if not interior_finite_mask.any(): + weights[~finite_mask] = default_weight + weights[boundary_mask] = 0.0 + return weights - median_weight = np.nanmedian(weights[finite_mask]) + median_weight = np.nanmedian(weights[interior_finite_mask]) if not np.isfinite(median_weight): median_weight = default_weight weights[~finite_mask] = median_weight + weights[boundary_mask] = 0.0 return weights -- 2.53.0