From 3db520560a889089e9c0fe06c20d23dc239fc251 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Mon, 22 Jun 2026 10:36:14 +0200
Subject: [PATCH] refactor(quickadapter): state-form prose and harmonization
 follow-up (#100)

Consolidates content + harmonization + Python-idiom follow-up findings
from the 4-axis review of `add1fb7..7c8197b` (PRs #90, #94, #95, #96,
#97, #98, #99 + 4 style commits + 1 chore version-constant update).

Prose state-form (`Utils.py`, `QuickAdapterRegressorV3.py`):
- `_normalize_label_column_name` docstring: ``Raises ValueError when
  the result contains `&` or `%` after sigil strip``.
- Deprecated-config-key warning aligns with the sibling pattern at
  the adjacent branch: ``f"{old_path!r} is deprecated, use
  {new_path!r} instead"``.
- `sanitize_and_renormalize` docstring states ``mean(out) == 1`` as the
  rescale invariant.
- Optuna-label throttle log reads ``callback throttled,
  {N} candles until next emission``.
- Fit-live-predictions warmup log reads ``Fit live predictions not
  warmed up: {N} candles until warmup completion``.

Docstrings on validator/composer helpers (3 functions lacking a
docstring at HEAD):
- `_apply_support_policy`: documents the ``policy='raise'`` /
  ``policy='fallback'`` dispatch contract.
- `_compose_train_weights_with_support`: documents the support-gating
  flow (None-label-weights branch routes through
  ``_apply_support_policy`` when ``strategy != 'none'``; main branch
  composes and validates the summary against three thresholds).
- `_validate_optuna_label_best_params`: enumerates the rejection
  paths and the optional ``expected_selection_metadata`` drift gate.

Harmonization (post-merge carry-over):
- `LABEL_WEIGHT_SUFFIX` renamed to `_LABEL_WEIGHT_SUFFIX`
  (no external consumer; symmetric with
  `_LABEL_KNOWN_AT_LOOKAHEAD_SUFFIX`).
- `safe_distribution_fit` call-site contexts harmonized with the
  PR #97 / PR #99 ``[<pair>] <event>`` convention:
  `f"[{pair}] di_values_weibull_fit"` and
  `f"[{pair}] label_norm_fit:{label_col}"`.

Python idioms:
- `_adapt_label_generator` rejects any 3rd positional parameter
  whose name is not ``logger``, regardless of whether the parameter
  is required or has a default. A defaulted non-``logger`` 3rd
  positional raises ``ValueError`` at registration. The 3-arg
  pass-through is reached only when ``positional[2].name == "logger"``.
- `_build_sample_weight_inputs` switches the two `logger.debug`
  calls to lazy ``%s`` formatting so the f-string body is not
  materialized when the debug level is disabled.
---
 .../freqaimodels/QuickAdapterRegressorV3.py   | 34 +++++++++--
 quickadapter/user_data/strategies/Utils.py    | 57 ++++++++++++-------
 2 files changed, 64 insertions(+), 27 deletions(-)
diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
index 3ef17ef..ba715a4 100644
--- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
+++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
@@ -579,6 +579,13 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
         policy: LabelWeightSupportPolicy,
         reasons: list[str],
     ) -> NDArray[np.floating]:
+        """Apply the configured ``support_policy`` to ``base_weights``.
+
+        ``policy='raise'`` raises ``ValueError`` with ``context`` and
+        ``reasons``. ``policy='fallback'`` logs a ``WARNING`` and returns
+        the sanitized base weights via
+        ``compose_sample_weights(base_weights, None, ...)``.
+        """
         reason_text = "; ".join(reasons)
         match policy:
             case "raise":
@@ -607,6 +614,20 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
         *,
         context: str,
     ) -> NDArray[np.floating]:
+        """Compose training-set weights with label-weight support gating.
+
+        When ``label_weights`` is ``None`` and
+        ``label_weighting_config['strategy'] != 'none'``, routes through
+        ``_apply_support_policy`` so the configured ``support_policy``
+        governs the outcome. Otherwise composes the label-weighted
+        product via ``compose_sample_weights`` and validates the
+        ``summarize_label_weight_support`` summary against
+        ``min_pivot_equivalent_count``,
+        ``min_positive_label_weight_fraction``, and
+        ``min_effective_sample_size``; any failure routes through
+        ``_apply_support_policy``. Returns the composed weights on
+        success or the fallback weights from the policy on failure.
+        """
         policy = cast(
             LabelWeightSupportPolicy, label_weighting_config["support_policy"]
         )
@@ -2073,11 +2094,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
             label_weights = unfiltered_df.loc[
                 features_filtered.index, weight_col
             ].to_numpy(dtype=float)
-            logger.debug(f"label weight column active: {weight_col!r}")
+            logger.debug("label weight column active: %r", weight_col)
         else:
             label_weights = None
             logger.debug(
-                f"label weight column absent ({weight_col!r}); using base weights only"
+                "label weight column absent (%r); using base weights only",
+                weight_col,
             )
         return SampleWeightInputs(
             base=base_weights,
@@ -2512,7 +2534,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
                 self._optuna_label_candles[pair] = 0
         else:
             logger.debug(
-                f"[{pair}] Optuna {namespace} callback throttled, still {optuna_label_remaining_candles} candles to go"
+                f"[{pair}] Optuna {namespace} callback throttled, {optuna_label_remaining_candles} candles until next emission"
             )
         if len(self._optuna_label_incremented_pairs) >= len(self.pairs):
             self._optuna_label_incremented_pairs = []
@@ -2559,7 +2581,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
             )
             if candles_diff < 0:
                 logger.warning(
-                    f"[{pair}] Fit live predictions not warmed up yet, still {abs(candles_diff)} candles to go"
+                    f"[{pair}] Fit live predictions not warmed up: {abs(candles_diff)} candles until warmup completion"
                 )
                 warmed_up = False
 
@@ -2614,7 +2636,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
                         # ``np.isfinite(cutoff)`` guard substitutes
                         # ``_DI_CUTOFF_DEFAULT``.
                         fallback=(0.0, 0.0, 0.0),
-                        context=f"di_values_weibull_fit:{pair}",
+                        context=f"[{pair}] di_values_weibull_fit",
                         logger=logger,
                         min_count=2,
                         require_variance=True,
@@ -2668,7 +2690,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
                     sample,
                     sp.stats.norm.fit,
                     fallback=fallback,
-                    context=f"label_norm_fit:{pair}:{label_col}",
+                    context=f"[{pair}] label_norm_fit:{label_col}",
                     logger=logger,
                     min_count=2,
                     require_variance=True,
diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py
index 6cd5a88..0189303 100644
--- a/quickadapter/user_data/strategies/Utils.py
+++ b/quickadapter/user_data/strategies/Utils.py
@@ -532,7 +532,7 @@ EXTREMA_DIRECTION_SMOOTHED_COLUMN: Final[str] = "extrema_direction_smoothed"
 EXTREMA_WEIGHT_COLUMN: Final[str] = "extrema_weight"
 EXTREMA_WEIGHT_SMOOTHED_COLUMN: Final[str] = "extrema_weight_smoothed"
 
-LABEL_WEIGHT_SUFFIX: Final[str] = "_weight"
+_LABEL_WEIGHT_SUFFIX: Final[str] = "_weight"
 _LABEL_KNOWN_AT_LOOKAHEAD_SUFFIX: Final[str] = "_known_at_lookahead"
 
 LABEL_COLUMNS: Final[tuple[str, ...]] = (EXTREMA_COLUMN,)
@@ -550,7 +550,8 @@ def _label_aux_column_name(label_col: str, suffix: str) -> str:
     nor with ``find_features`` (which selects columns containing ``%``).
     Preserves the project convention where a leading ``s`` denotes a smoothed
     target series (e.g. ``&s-extrema``); no ``s`` denotes a raw target.
-    Raises ``ValueError`` if the result still contains ``&`` or ``%``.
+    Raises ``ValueError`` when the result contains ``&`` or ``%`` after
+    sigil strip.
 
     Examples:
         ``("&s-extrema", "_weight")``  -> ``"s-extrema_weight"``
@@ -575,7 +576,7 @@ def _label_aux_column_name(label_col: str, suffix: str) -> str:
 
 def label_weight_column_name(label_col: str) -> str:
     """Return the weight column name for a label column."""
-    return _label_aux_column_name(label_col, LABEL_WEIGHT_SUFFIX)
+    return _label_aux_column_name(label_col, _LABEL_WEIGHT_SUFFIX)
 
 
 def label_known_at_lookahead_column_name(label_col: str) -> str:
@@ -616,14 +617,15 @@ def _adapt_label_generator(
 
     Detects the canonical ``(dataframe, params, logger) -> LabelData``
     shape by a positional parameter named ``logger`` at index 2 (with or
-    without a default). Generators without such a parameter are wrapped
-    to drop the logger argument at dispatch; defaulted positionals after
-    index 1 stay at their defaults. ``*args``, ``**kwargs``, keyword-only
-    ``logger``, fewer than 2 required positionals, more than 3 required
-    positionals, and 3 required positionals whose third name is not
-    ``logger`` raise ``ValueError`` at registration. Inspection runs
-    once at registration; dispatch in ``generate_label_data`` is a
-    direct call.
+    without a default). Generators with exactly 2 positional parameters
+    are wrapped to drop the logger argument at dispatch. Generators with
+    a 3rd positional parameter whose name is not ``logger`` raise
+    ``ValueError`` at registration, regardless of whether that parameter
+    is required or has a default. ``*args``, ``**kwargs``, keyword-only
+    ``logger``, fewer than 2 required positionals, and more than 3
+    required positionals also raise ``ValueError``. Inspection runs once
+    at registration; dispatch in ``generate_label_data`` is a direct
+    call.
     """
     sig = inspect.signature(generator)
     params = list(sig.parameters.values())
@@ -670,15 +672,14 @@ def _adapt_label_generator(
             f"required positional parameter(s); expected 2 "
             f"(dataframe, params) or 3 (dataframe, params, logger)"
         )
-    has_logger_at_third = n_total >= 3 and positional[2].name == "logger"
-    if has_logger_at_third:
+    if n_total >= 3:
+        if positional[2].name != "logger":
+            raise ValueError(
+                f"Invalid label generator {generator!r}: third positional "
+                f"parameter is named {positional[2].name!r}, expected "
+                f"``logger``"
+            )
         return cast(LabelGenerator, generator)
-    if n_required == 3:
-        raise ValueError(
-            f"Invalid label generator {generator!r}: third positional "
-            f"parameter is named {positional[2].name!r}, expected "
-            f"``logger``"
-        )
 
     @functools.wraps(generator)
     def adapted(
@@ -993,7 +994,7 @@ def migrate_config(config: dict[str, Any], logger: Logger) -> None:
             if old_section == new_section:
                 logger.warning(f"{old_path!r} is deprecated, use {new_key!r} instead")
             else:
-                logger.warning(f"{old_path!r} has moved to {new_path!r}")
+                logger.warning(f"{old_path!r} is deprecated, use {new_path!r} instead")
         else:
             _delete_path(config, old_path)
             if old_section == new_section:
@@ -1214,7 +1215,7 @@ def sanitize_and_renormalize(
     Non-finite or non-positive entries are treated as ``0``; rows in
     ``drop_mask`` are forced to ``0``. On collapse (no positive finite
     entry survives), returns ones on surviving rows and zeros on dropped
-    rows, rescaled so ``mean(out) == 1`` still holds.
+    rows, rescaled so ``mean(out) == 1``.
 
     ``context`` is the caller-supplied prefix attached to every warning
     and error emitted from this helper.
@@ -3757,6 +3758,20 @@ def _validate_optuna_label_best_params(
     *,
     expected_selection_metadata: dict[str, Any] | None = None,
 ) -> dict[str, Any] | None:
+    """Validate an Optuna ``label`` best-params payload against the v2 schema.
+
+    Returns the inner ``params`` dict on success; returns ``None`` on
+    rejection. Rejects non-dict input, missing or invalid ``schema_version``,
+    schema-version mismatch with ``_OPTUNA_LABEL_BEST_PARAMS_SCHEMA_VERSION``,
+    missing or invalid ``selection_metadata``, missing or invalid
+    ``selection_metadata.schema_version``, schema-version mismatch with
+    ``_OPTUNA_LABEL_SELECTION_SCHEMA_VERSION``, missing or invalid
+    ``label_period_candles`` / ``label_natr_multiplier`` /
+    ``label_horizon_candles``, and -- when ``expected_selection_metadata``
+    is provided -- any drift between the stored and the caller's current
+    ``selection_metadata``. Every rejection emits a ``[<pair>]``-prefixed
+    warning when ``logger`` is provided.
+    """
     if not isinstance(best_params, dict):
         if logger is not None:
             logger.warning(f"[{pair}] Ignoring Optuna label best params: not a dict")
-- 
2.53.0