From: Jérôme Benoit Date: Mon, 25 May 2026 02:38:25 +0000 (+0200) Subject: fix(weights): canonical sanitize_and_renormalize and compose_sample_weights X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=f6965d3134650007bbeaa77a478a04b604b579f1;p=freqai-strategies.git fix(weights): canonical sanitize_and_renormalize and compose_sample_weights Derived from independent dual-oracle mathematical specification with proofs (mean=1 invariant, drop preservation, idempotency, collapse degradation chain). sanitize_and_renormalize: - Fix latent bug: fallback path with non-empty drop_mask returned ones zeroed at drop_mask but did not renormalize, breaking the mean=1 contract. The fallback now renormalizes so mean(out) == 1 holds on surviving rows. - Replace .copy()+mutation with np.where for drop_mask application. compose_sample_weights: - Replace the post-compose combined.sum() guard (which duplicated the predicate sanitize_and_renormalize re-evaluates internally) with a single survivor-aware predicate covering drop_mask | ~isfinite | <=0 in one pass. The check is the explicit branch point for the base- weights fallback when the label-weighted product collapses on surviving rows; this preserves the recency signal and the label- derived drop_mask instead of degrading to uniform. - Warn when nonzero/n falls below SPARSE_TRAINING_MASS_THRESHOLD (5%, module-level constant) so operators can spot the sparse-training regime that pivot-only weights produce on long series with few pivots. QuickAdapterV3._log_strategy_configuration: - Warn at startup when label_smoothing.method is 'smm' or 'savgol' (with polyorder>=2) combined with a non-'none' label_weighting strategy, since these kernels can collapse a sparse weight signal and trip the all-rows-dropped guard. --- diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py index 51488c9..8cc527d 100644 --- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py +++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py @@ -1557,9 +1557,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): and isinstance(weight_factor, (int, float)) and weight_factor > 0 ): - base_weights = np.asarray( - dk.set_weights_higher_recent(n_rows), dtype=float - ) + base_weights = np.asarray(dk.set_weights_higher_recent(n_rows), dtype=float) else: base_weights = np.ones(n_rows, dtype=float) diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index 223221e..4e19c74 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -21,7 +21,12 @@ from freqtrade.exchange import timeframe_to_minutes, timeframe_to_prev_date from freqtrade.persistence import Trade from freqtrade.strategy import AnnotationType, stoploss_from_absolute from freqtrade.strategy.interface import IStrategy -from LabelTransformer import COMBINED_AGGREGATIONS, get_label_column_config +from LabelTransformer import ( + COMBINED_AGGREGATIONS, + SMOOTHING_METHODS, + WEIGHT_STRATEGIES, + get_label_column_config, +) from pandas import DataFrame, Series, isna from scipy.stats import pearsonr, t from technical.pivots_points import pivots_points @@ -506,6 +511,26 @@ class QuickAdapterV3(IStrategy): logger.info(f" mode: {col_smoothing['mode']}") logger.info(f" sigma: {format_number(col_smoothing['sigma'])}") + method = col_smoothing["method"] + if col_weighting["strategy"] != WEIGHT_STRATEGIES[0] and ( # "none" + method == SMOOTHING_METHODS[4] # "smm" + or ( + method == SMOOTHING_METHODS[6] # "savgol" + and col_smoothing["polyorder"] >= 2 + ) + ): + logger.warning( + f" Label [{label_col}]: smoothing method {method!r} can " + f"collapse sparse weight signals (smm zeroes them when " + f"fewer than half the window rows are nonzero; savgol " + f"with polyorder>=2 adds negative lobes that are clipped " + f"to zero), which may trip the all-rows-dropped guard in " + f"compose_sample_weights once a non-'none' " + f"label_weighting strategy is configured. Prefer a " + f"non-negative linear kernel (gaussian, kaiser, triang, " + f"sma, gaussian_filter1d)." + ) + logger.info("Reversal Confirmation:") logger.info( f" lookback_period_candles: {self.reversal_confirmation['lookback_period_candles']}" diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py index 873b389..fbc9b87 100644 --- a/quickadapter/user_data/strategies/Utils.py +++ b/quickadapter/user_data/strategies/Utils.py @@ -381,6 +381,8 @@ TRADE_PRICE_TARGETS: Final[tuple[TradePriceTarget, ...]] = ( DEFAULT_LABEL_WEIGHT: Final[float] = 1.0 +SPARSE_TRAINING_MASS_THRESHOLD: Final[float] = 0.05 + DEFAULT_FIT_LIVE_PREDICTIONS_CANDLES: Final[int] = 100 @@ -721,27 +723,37 @@ def sanitize_and_renormalize( logger: Logger | None = None, context: str | None = None, ) -> NDArray[np.floating]: + """Sanitize a weight vector and renormalize so ``mean(out) == 1``. + + Non-finite or non-positive entries are treated as ``0``; rows in + ``drop_mask`` are forced to ``0``. On collapse (no positive finite + entry survives), returns ones on surviving rows and zeros on dropped + rows, rescaled so ``mean(out) == 1`` still holds. + """ arr = np.asarray(arr, dtype=float) - if arr.size == 0: + n = arr.size + if n == 0: return arr - safe = np.where(np.isfinite(arr) & (arr > 0), arr, 0.0) + safe = np.where(np.isfinite(arr) & (arr > 0.0), arr, 0.0) if drop_mask is not None: - safe = safe.copy() - safe[drop_mask] = 0.0 + safe = np.where(drop_mask, 0.0, safe) total = safe.sum() - if total > 0 and np.isfinite(total): - return safe * (len(safe) / total) + if total > 0.0 and np.isfinite(total): + return safe * (n / total) if logger is not None: logger.warning( - "sanitize_and_renormalize: weights collapsed (context=%s, total=%r, " - "n=%d); falling back to uniform weights", + "sanitize_and_renormalize: weights collapsed (context=%s, " + "total=%r, n=%d); falling back to uniform weights", context or "unspecified", total, - len(arr), + n, ) - fallback = np.ones_like(arr) + fallback = np.ones(n, dtype=float) if drop_mask is not None: - fallback[drop_mask] = 0.0 + fallback = np.where(drop_mask, 0.0, fallback) + fb_total = fallback.sum() + if fb_total > 0.0: + fallback = fallback * (n / fb_total) return fallback @@ -753,22 +765,21 @@ def compose_sample_weights( ) -> NDArray[np.floating]: """Combine base sample weights with the label importance weights. - Returns w in R+^N with mean(w) == 1. The label weight vector is sanitized - (non-finite or <= 0 -> row dropped) and mean-normalized, multiplied with - base_weights, zeroed on dropped rows, and renormalized to mean=1. - - The label weight vector is the output of ``compute_label_weights`` (which - already aggregates the configured metric sources via ``label_weighting``), - co-smoothed with the label column in ``set_freqai_targets`` and clipped - to a finite non-negative range. ``LABEL_COLUMNS`` is single-target by - design (one prediction target per model). + Returns ``w in R+^N`` with ``mean(w) == 1``. Rows where + ``label_weights[i]`` is non-finite or ``<= 0`` are dropped + (``out[i] == 0``); surviving rows carry ``base_weights * label_weights`` + rescaled to global ``mean == 1``. On collapse of the label-weighted + product, falls back to ``base_weights`` (with the label-derived + drop_mask) so the recency signal is preserved. Raises ValueError on shape mismatch or when every row is dropped. """ base_weights = np.asarray(base_weights, dtype=float) if label_weights is None: - return sanitize_and_renormalize(base_weights) - n = len(base_weights) + return sanitize_and_renormalize( + base_weights, logger=logger, context="compose:base_only" + ) + n = base_weights.shape[0] arr = np.asarray(label_weights, dtype=float) if arr.shape != (n,): raise ValueError( @@ -781,23 +792,39 @@ def compose_sample_weights( "compose_sample_weights: all rows dropped by zero or non-finite " "label weights; no surviving training samples" ) - sanitized = np.where(drop_mask, 1.0, np.maximum(arr, np.finfo(float).tiny)) - normalized = sanitize_and_renormalize(sanitized) - combined = base_weights * normalized - combined[drop_mask] = 0.0 - combined_sum = combined.sum() - if combined_sum > 0 and np.isfinite(combined_sum): - ratio = n / combined_sum - if np.isfinite(ratio): - scaled = combined * ratio - if np.all(np.isfinite(scaled)): - return scaled + nonzero = int((~drop_mask).sum()) + if nonzero / n < SPARSE_TRAINING_MASS_THRESHOLD: + logger.warning( + "compose_sample_weights: sparse training mass " + "(%d/%d rows = %.2f%% nonzero, threshold=%.2f%%)", + nonzero, + n, + 100.0 * nonzero / n, + 100.0 * SPARSE_TRAINING_MASS_THRESHOLD, + ) + combined = base_weights * arr + # Detect collapse on surviving rows up front so the fallback can route + # to base weights rather than the uniform fallback inside sanitize. + survivor_mask = ~(drop_mask | ~np.isfinite(combined) | (combined <= 0.0)) + survivor_total = float(np.where(survivor_mask, combined, 0.0).sum()) + if survivor_total > 0.0 and np.isfinite(survivor_total): + return sanitize_and_renormalize( + combined, + drop_mask=drop_mask, + logger=logger, + context="compose:label_weighted", + ) logger.warning( - "compose_sample_weights: composed weights collapsed " - "(combined_sum=%r); falling back to base weights", - combined_sum, + "compose_sample_weights: composed weights collapsed on surviving " + "rows (survivor_total=%g); falling back to base weights", + survivor_total, + ) + return sanitize_and_renormalize( + base_weights, + drop_mask=drop_mask, + logger=logger, + context="compose:base_fallback", ) - return sanitize_and_renormalize(base_weights, drop_mask=drop_mask) def nan_average(