From 3db520560a889089e9c0fe06c20d23dc239fc251 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Mon, 22 Jun 2026 10:36:14 +0200 Subject: [PATCH] refactor(quickadapter): state-form prose and harmonization follow-up (#100) Consolidates content + harmonization + Python-idiom follow-up findings from the 4-axis review of `add1fb7..7c8197b` (PRs #90, #94, #95, #96, #97, #98, #99 + 4 style commits + 1 chore version-constant update). Prose state-form (`Utils.py`, `QuickAdapterRegressorV3.py`): - `_normalize_label_column_name` docstring: ``Raises ValueError when the result contains `&` or `%` after sigil strip``. - Deprecated-config-key warning aligns with the sibling pattern at the adjacent branch: ``f"{old_path!r} is deprecated, use {new_path!r} instead"``. - `sanitize_and_renormalize` docstring states ``mean(out) == 1`` as the rescale invariant. - Optuna-label throttle log reads ``callback throttled, {N} candles until next emission``. - Fit-live-predictions warmup log reads ``Fit live predictions not warmed up: {N} candles until warmup completion``. Docstrings on validator/composer helpers (3 functions lacking a docstring at HEAD): - `_apply_support_policy`: documents the ``policy='raise'`` / ``policy='fallback'`` dispatch contract. - `_compose_train_weights_with_support`: documents the support-gating flow (None-label-weights branch routes through ``_apply_support_policy`` when ``strategy != 'none'``; main branch composes and validates the summary against three thresholds). - `_validate_optuna_label_best_params`: enumerates the rejection paths and the optional ``expected_selection_metadata`` drift gate. Harmonization (post-merge carry-over): - `LABEL_WEIGHT_SUFFIX` renamed to `_LABEL_WEIGHT_SUFFIX` (no external consumer; symmetric with `_LABEL_KNOWN_AT_LOOKAHEAD_SUFFIX`). - `safe_distribution_fit` call-site contexts harmonized with the PR #97 / PR #99 ``[] `` convention: `f"[{pair}] di_values_weibull_fit"` and `f"[{pair}] label_norm_fit:{label_col}"`. Python idioms: - `_adapt_label_generator` rejects any 3rd positional parameter whose name is not ``logger``, regardless of whether the parameter is required or has a default. A defaulted non-``logger`` 3rd positional raises ``ValueError`` at registration. The 3-arg pass-through is reached only when ``positional[2].name == "logger"``. - `_build_sample_weight_inputs` switches the two `logger.debug` calls to lazy ``%s`` formatting so the f-string body is not materialized when the debug level is disabled. --- .../freqaimodels/QuickAdapterRegressorV3.py | 34 +++++++++-- quickadapter/user_data/strategies/Utils.py | 57 ++++++++++++------- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py index 3ef17ef..ba715a4 100644 --- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py +++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py @@ -579,6 +579,13 @@ class QuickAdapterRegressorV3(BaseRegressionModel): policy: LabelWeightSupportPolicy, reasons: list[str], ) -> NDArray[np.floating]: + """Apply the configured ``support_policy`` to ``base_weights``. + + ``policy='raise'`` raises ``ValueError`` with ``context`` and + ``reasons``. ``policy='fallback'`` logs a ``WARNING`` and returns + the sanitized base weights via + ``compose_sample_weights(base_weights, None, ...)``. + """ reason_text = "; ".join(reasons) match policy: case "raise": @@ -607,6 +614,20 @@ class QuickAdapterRegressorV3(BaseRegressionModel): *, context: str, ) -> NDArray[np.floating]: + """Compose training-set weights with label-weight support gating. + + When ``label_weights`` is ``None`` and + ``label_weighting_config['strategy'] != 'none'``, routes through + ``_apply_support_policy`` so the configured ``support_policy`` + governs the outcome. Otherwise composes the label-weighted + product via ``compose_sample_weights`` and validates the + ``summarize_label_weight_support`` summary against + ``min_pivot_equivalent_count``, + ``min_positive_label_weight_fraction``, and + ``min_effective_sample_size``; any failure routes through + ``_apply_support_policy``. Returns the composed weights on + success or the fallback weights from the policy on failure. + """ policy = cast( LabelWeightSupportPolicy, label_weighting_config["support_policy"] ) @@ -2073,11 +2094,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel): label_weights = unfiltered_df.loc[ features_filtered.index, weight_col ].to_numpy(dtype=float) - logger.debug(f"label weight column active: {weight_col!r}") + logger.debug("label weight column active: %r", weight_col) else: label_weights = None logger.debug( - f"label weight column absent ({weight_col!r}); using base weights only" + "label weight column absent (%r); using base weights only", + weight_col, ) return SampleWeightInputs( base=base_weights, @@ -2512,7 +2534,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): self._optuna_label_candles[pair] = 0 else: logger.debug( - f"[{pair}] Optuna {namespace} callback throttled, still {optuna_label_remaining_candles} candles to go" + f"[{pair}] Optuna {namespace} callback throttled, {optuna_label_remaining_candles} candles until next emission" ) if len(self._optuna_label_incremented_pairs) >= len(self.pairs): self._optuna_label_incremented_pairs = [] @@ -2559,7 +2581,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): ) if candles_diff < 0: logger.warning( - f"[{pair}] Fit live predictions not warmed up yet, still {abs(candles_diff)} candles to go" + f"[{pair}] Fit live predictions not warmed up: {abs(candles_diff)} candles until warmup completion" ) warmed_up = False @@ -2614,7 +2636,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): # ``np.isfinite(cutoff)`` guard substitutes # ``_DI_CUTOFF_DEFAULT``. fallback=(0.0, 0.0, 0.0), - context=f"di_values_weibull_fit:{pair}", + context=f"[{pair}] di_values_weibull_fit", logger=logger, min_count=2, require_variance=True, @@ -2668,7 +2690,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): sample, sp.stats.norm.fit, fallback=fallback, - context=f"label_norm_fit:{pair}:{label_col}", + context=f"[{pair}] label_norm_fit:{label_col}", logger=logger, min_count=2, require_variance=True, diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py index 6cd5a88..0189303 100644 --- a/quickadapter/user_data/strategies/Utils.py +++ b/quickadapter/user_data/strategies/Utils.py @@ -532,7 +532,7 @@ EXTREMA_DIRECTION_SMOOTHED_COLUMN: Final[str] = "extrema_direction_smoothed" EXTREMA_WEIGHT_COLUMN: Final[str] = "extrema_weight" EXTREMA_WEIGHT_SMOOTHED_COLUMN: Final[str] = "extrema_weight_smoothed" -LABEL_WEIGHT_SUFFIX: Final[str] = "_weight" +_LABEL_WEIGHT_SUFFIX: Final[str] = "_weight" _LABEL_KNOWN_AT_LOOKAHEAD_SUFFIX: Final[str] = "_known_at_lookahead" LABEL_COLUMNS: Final[tuple[str, ...]] = (EXTREMA_COLUMN,) @@ -550,7 +550,8 @@ def _label_aux_column_name(label_col: str, suffix: str) -> str: nor with ``find_features`` (which selects columns containing ``%``). Preserves the project convention where a leading ``s`` denotes a smoothed target series (e.g. ``&s-extrema``); no ``s`` denotes a raw target. - Raises ``ValueError`` if the result still contains ``&`` or ``%``. + Raises ``ValueError`` when the result contains ``&`` or ``%`` after + sigil strip. Examples: ``("&s-extrema", "_weight")`` -> ``"s-extrema_weight"`` @@ -575,7 +576,7 @@ def _label_aux_column_name(label_col: str, suffix: str) -> str: def label_weight_column_name(label_col: str) -> str: """Return the weight column name for a label column.""" - return _label_aux_column_name(label_col, LABEL_WEIGHT_SUFFIX) + return _label_aux_column_name(label_col, _LABEL_WEIGHT_SUFFIX) def label_known_at_lookahead_column_name(label_col: str) -> str: @@ -616,14 +617,15 @@ def _adapt_label_generator( Detects the canonical ``(dataframe, params, logger) -> LabelData`` shape by a positional parameter named ``logger`` at index 2 (with or - without a default). Generators without such a parameter are wrapped - to drop the logger argument at dispatch; defaulted positionals after - index 1 stay at their defaults. ``*args``, ``**kwargs``, keyword-only - ``logger``, fewer than 2 required positionals, more than 3 required - positionals, and 3 required positionals whose third name is not - ``logger`` raise ``ValueError`` at registration. Inspection runs - once at registration; dispatch in ``generate_label_data`` is a - direct call. + without a default). Generators with exactly 2 positional parameters + are wrapped to drop the logger argument at dispatch. Generators with + a 3rd positional parameter whose name is not ``logger`` raise + ``ValueError`` at registration, regardless of whether that parameter + is required or has a default. ``*args``, ``**kwargs``, keyword-only + ``logger``, fewer than 2 required positionals, and more than 3 + required positionals also raise ``ValueError``. Inspection runs once + at registration; dispatch in ``generate_label_data`` is a direct + call. """ sig = inspect.signature(generator) params = list(sig.parameters.values()) @@ -670,15 +672,14 @@ def _adapt_label_generator( f"required positional parameter(s); expected 2 " f"(dataframe, params) or 3 (dataframe, params, logger)" ) - has_logger_at_third = n_total >= 3 and positional[2].name == "logger" - if has_logger_at_third: + if n_total >= 3: + if positional[2].name != "logger": + raise ValueError( + f"Invalid label generator {generator!r}: third positional " + f"parameter is named {positional[2].name!r}, expected " + f"``logger``" + ) return cast(LabelGenerator, generator) - if n_required == 3: - raise ValueError( - f"Invalid label generator {generator!r}: third positional " - f"parameter is named {positional[2].name!r}, expected " - f"``logger``" - ) @functools.wraps(generator) def adapted( @@ -993,7 +994,7 @@ def migrate_config(config: dict[str, Any], logger: Logger) -> None: if old_section == new_section: logger.warning(f"{old_path!r} is deprecated, use {new_key!r} instead") else: - logger.warning(f"{old_path!r} has moved to {new_path!r}") + logger.warning(f"{old_path!r} is deprecated, use {new_path!r} instead") else: _delete_path(config, old_path) if old_section == new_section: @@ -1214,7 +1215,7 @@ def sanitize_and_renormalize( Non-finite or non-positive entries are treated as ``0``; rows in ``drop_mask`` are forced to ``0``. On collapse (no positive finite entry survives), returns ones on surviving rows and zeros on dropped - rows, rescaled so ``mean(out) == 1`` still holds. + rows, rescaled so ``mean(out) == 1``. ``context`` is the caller-supplied prefix attached to every warning and error emitted from this helper. @@ -3757,6 +3758,20 @@ def _validate_optuna_label_best_params( *, expected_selection_metadata: dict[str, Any] | None = None, ) -> dict[str, Any] | None: + """Validate an Optuna ``label`` best-params payload against the v2 schema. + + Returns the inner ``params`` dict on success; returns ``None`` on + rejection. Rejects non-dict input, missing or invalid ``schema_version``, + schema-version mismatch with ``_OPTUNA_LABEL_BEST_PARAMS_SCHEMA_VERSION``, + missing or invalid ``selection_metadata``, missing or invalid + ``selection_metadata.schema_version``, schema-version mismatch with + ``_OPTUNA_LABEL_SELECTION_SCHEMA_VERSION``, missing or invalid + ``label_period_candles`` / ``label_natr_multiplier`` / + ``label_horizon_candles``, and -- when ``expected_selection_metadata`` + is provided -- any drift between the stored and the caller's current + ``selection_metadata``. Every rejection emits a ``[]``-prefixed + warning when ``logger`` is provided. + """ if not isinstance(best_params, dict): if logger is not None: logger.warning(f"[{pair}] Ignoring Optuna label best params: not a dict") -- 2.53.0