``label_weight_column_name(LABEL_COLUMNS[0])``. Alignment runs before
any shuffle/split on ``features_filtered.index`` (a subset of
``unfiltered_df.index``) to avoid post-hoc reindex against shuffled
- data. When the weight column is absent, ``label_weights=None`` is
- forwarded to ``compose_sample_weights`` and only the base weights
- contribute.
+ data. The weight column is absent when ``label_weighting.strategy``
+ is ``'none'`` (no per-label importance applied); in that case
+ ``label_weights=None`` is forwarded to ``compose_sample_weights``
+ and only the base weights contribute.
"""
if not unfiltered_df.index.is_unique:
raise ValueError(
label_weights = unfiltered_df.loc[
features_filtered.index, weight_col
].to_numpy(dtype=float)
- logger.debug(f"label weight column active: {weight_col}")
+ logger.debug(f"label weight column active: {weight_col!r}")
else:
label_weights = None
- logger.warning(
- f"label weight column not found ({weight_col!r}); "
- f"falling back to base weights only"
+ logger.debug(
+ f"label weight column absent ({weight_col!r}); using base weights only"
)
return compose_sample_weights(
base_weights,
label_col, label_weighting["default"], label_weighting["columns"]
)
- label_weights = compute_label_weights(
- n_values=len(label_data.series),
- indices=label_data.indices,
- metrics=label_data.metrics,
- weighting_config=col_weighting_config,
+ # Absent column routes downstream to base-weights-only fallback.
+ is_weighting_active = (
+ col_weighting_config["strategy"] != WEIGHT_STRATEGIES[0] # "none"
+ and len(label_data.indices) > 0
)
- label_weight_col = label_weight_column_name(label_col)
-
dataframe[label_col] = label_data.series
- dataframe[label_weight_col] = label_weights
+
+ label_weight_col = label_weight_column_name(label_col)
+ if is_weighting_active:
+ dataframe[label_weight_col] = compute_label_weights(
+ n_values=len(label_data.series),
+ indices=label_data.indices,
+ metrics=label_data.metrics,
+ weighting_config=col_weighting_config,
+ )
if label_col == EXTREMA_COLUMN:
dataframe[EXTREMA_DIRECTION_COLUMN] = dataframe[label_col]
- dataframe[EXTREMA_WEIGHT_COLUMN] = dataframe[label_weight_col]
+ if is_weighting_active:
+ dataframe[EXTREMA_WEIGHT_COLUMN] = dataframe[label_weight_col]
col_smoothing_config = get_label_column_config(
label_col, label_smoothing["default"], label_smoothing["columns"]
)
dataframe[label_col] = smooth(dataframe[label_col], **col_smoothing_config)
- smoothed_label_weights = smooth(
- dataframe[label_weight_col], **col_smoothing_config
- )
- dataframe[label_weight_col] = smoothed_label_weights.where(
- np.isfinite(smoothed_label_weights) & smoothed_label_weights.gt(0),
- 0.0,
- )
+ if is_weighting_active:
+ smoothed_label_weights = smooth(
+ dataframe[label_weight_col], **col_smoothing_config
+ )
+ dataframe[label_weight_col] = smoothed_label_weights.where(
+ np.isfinite(smoothed_label_weights) & smoothed_label_weights.gt(0),
+ 0.0,
+ )
if label_col == EXTREMA_COLUMN:
dataframe[EXTREMA_DIRECTION_SMOOTHED_COLUMN] = dataframe[label_col]
- dataframe[EXTREMA_WEIGHT_SMOOTHED_COLUMN] = dataframe[label_weight_col]
+ if is_weighting_active:
+ dataframe[EXTREMA_WEIGHT_SMOOTHED_COLUMN] = dataframe[
+ label_weight_col
+ ]
return dataframe
}
-EXTREMA_COLUMN: Final = "&s-extrema"
-EXTREMA_DIRECTION_COLUMN: Final = "extrema_direction"
-EXTREMA_DIRECTION_SMOOTHED_COLUMN: Final = "extrema_direction_smoothed"
-EXTREMA_WEIGHT_COLUMN: Final = "extrema_weight"
-EXTREMA_WEIGHT_SMOOTHED_COLUMN: Final = "extrema_weight_smoothed"
+EXTREMA_COLUMN: Final[str] = "&s-extrema"
+EXTREMA_DIRECTION_COLUMN: Final[str] = "extrema_direction"
+EXTREMA_DIRECTION_SMOOTHED_COLUMN: Final[str] = "extrema_direction_smoothed"
+EXTREMA_WEIGHT_COLUMN: Final[str] = "extrema_weight"
+EXTREMA_WEIGHT_SMOOTHED_COLUMN: Final[str] = "extrema_weight_smoothed"
LABEL_WEIGHT_SUFFIX: Final[str] = "_weight"
LABEL_COLUMNS: Final[tuple[str, ...]] = (EXTREMA_COLUMN,)
-_FREQAI_LABEL_SIGIL_PATTERN: Final = re.compile(r"^&-?")
+_FREQAI_LABEL_SIGIL_PATTERN: Final[re.Pattern[str]] = re.compile(r"^&-?")
@lru_cache(maxsize=16)
)
-DEFAULT_LABEL_WEIGHT: Final[float] = 1.0
-
SPARSE_TRAINING_MASS_THRESHOLD: Final[float] = 0.05
DEFAULT_FIT_LIVE_PREDICTIONS_CANDLES: Final[int] = 100
)
fallback = np.ones(n, dtype=float)
if drop_mask is not None:
- fallback = np.where(drop_mask, 0.0, fallback)
- fb_total = fallback.sum()
+ masked = np.where(drop_mask, 0.0, fallback)
+ fb_total = masked.sum()
if fb_total > 0.0:
- fallback = fallback * (n / fb_total)
+ return masked * (n / fb_total)
+ if logger is not None:
+ logger.warning(
+ "sanitize_and_renormalize: drop_mask covers all rows in "
+ "fallback; ignoring mask to preserve mean=1 (context=%s)",
+ context or "unspecified",
+ )
return fallback
def _impute_weights(
weights: NDArray[np.floating],
- default_weight: float = DEFAULT_LABEL_WEIGHT,
+ default_weight: float = 1.0,
) -> NDArray[np.floating]:
weights = weights.astype(float, copy=True)
n_values: int,
indices: list[int],
weights: NDArray[np.floating],
- default_weight: float = DEFAULT_LABEL_WEIGHT,
+ default_weight: float,
) -> NDArray[np.floating]:
+ """Scatter per-pivot weights into a full-length array.
+
+ Non-pivot rows are filled with ``default_weight``. Callers pass ``0.0``
+ to exclude non-pivot rows from training (pivot-only weighting), or a
+ positive value to give them a baseline weight.
+ """
if len(indices) == 0 or weights.size == 0:
return np.full(n_values, default_weight, dtype=float)
coefficients = _parse_metric_coefficients(metric_coefficients)
if len(coefficients) == 0:
- coefficients = {k: DEFAULT_LABEL_WEIGHT for k in metrics.keys()}
+ coefficients = {k: 1.0 for k in metrics.keys()}
imputed_metrics: list[NDArray[np.floating]] = []
coefficients_list: list[float] = []
metrics: dict[str, list[float]],
weighting_config: dict[str, Any],
) -> NDArray[np.floating]:
+ """Compute per-row label importance weights.
+
+ Returns an array with positive values at pivot ``indices`` (scaled by
+ strategy) and ``0.0`` elsewhere. Callers must skip invocation when
+ strategy is ``'none'``; this raises ValueError otherwise.
+ """
label_weighting = {**DEFAULTS_LABEL_WEIGHTING, **weighting_config}
strategy = label_weighting["strategy"]
- if len(indices) == 0 or strategy == WEIGHT_STRATEGIES[0]: # "none"
- return np.full(n_values, DEFAULT_LABEL_WEIGHT, dtype=float)
+ if strategy == WEIGHT_STRATEGIES[0]: # "none"
+ raise ValueError(
+ "compute_label_weights must not be called with strategy='none'; "
+ "callers must skip invocation when weighting is disabled"
+ )
weights: Optional[NDArray[np.floating]] = None