logger.info(
f" keep_fraction: {format_number(col_prediction['keep_fraction'])}"
)
+ if col_prediction["method"] == PREDICTION_METHODS[0]: # "none"
+ logger.warning(
+ f" Prediction method is 'none' for label [{label_col}]: "
+ f"minima_threshold/maxima_threshold will not be computed and "
+ f"entry signals based on them will never trigger."
+ )
default_label_period_candles, default_label_natr_multiplier = (
self._label_defaults
)
)
- train_weights = sanitize_and_renormalize(train_weights)
+ train_weights = sanitize_and_renormalize(
+ train_weights, logger=logger, context="train_test_split:train"
+ )
if test_size != 0:
- test_weights = sanitize_and_renormalize(test_weights)
+ test_weights = sanitize_and_renormalize(
+ test_weights, logger=logger, context="train_test_split:test"
+ )
if feat_dict.get("reverse_train_test_order", False):
return dk.build_data_dictionary(
dd["train_features"], dd["train_labels"], dd["train_weights"]
)
)
- dd["train_weights"] = sanitize_and_renormalize(dd["train_weights"])
+ dd["train_weights"] = sanitize_and_renormalize(
+ dd["train_weights"],
+ logger=logger,
+ context="post_feature_pipeline:train",
+ )
dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
if (
dd["test_features"], dd["test_labels"], dd["test_weights"]
)
)
- dd["test_weights"] = sanitize_and_renormalize(dd["test_weights"])
+ dd["test_weights"] = sanitize_and_renormalize(
+ dd["test_weights"],
+ logger=logger,
+ context="post_feature_pipeline:test",
+ )
dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
return dd
test_features = filtered_dataframe.iloc[test_idx]
train_labels = labels.iloc[train_idx]
test_labels = labels.iloc[test_idx]
- train_weights = sanitize_and_renormalize(weights[train_idx])
- test_weights = sanitize_and_renormalize(weights[test_idx])
+ train_weights = sanitize_and_renormalize(
+ weights[train_idx], logger=logger, context="timeseries_split:train"
+ )
+ test_weights = sanitize_and_renormalize(
+ weights[test_idx], logger=logger, context="timeseries_split:test"
+ )
if feat_dict.get("reverse_train_test_order", False):
return dk.build_data_dictionary(
dataframe[label_weight_col], **col_smoothing_config
)
dataframe[label_weight_col] = smoothed_label_weights.where(
- smoothed_label_weights.gt(0) & smoothed_label_weights.notna(), 0.0
+ np.isfinite(smoothed_label_weights) & smoothed_label_weights.gt(0),
+ 0.0,
)
if label_col == EXTREMA_COLUMN:
def sanitize_and_renormalize(
arr: NDArray[np.floating],
drop_mask: NDArray[np.bool_] | None = None,
+ *,
+ logger: Logger | None = None,
+ context: str | None = None,
) -> NDArray[np.floating]:
arr = np.asarray(arr, dtype=float)
if arr.size == 0:
total = safe.sum()
if total > 0 and np.isfinite(total):
return safe * (len(safe) / total)
+ if logger is not None:
+ logger.warning(
+ "sanitize_and_renormalize: weights collapsed (context=%s, total=%r, "
+ "n=%d); falling back to uniform weights",
+ context or "unspecified",
+ total,
+ len(arr),
+ )
fallback = np.ones_like(arr)
if drop_mask is not None:
fallback[drop_mask] = 0.0
if weights.size == 0:
return np.full_like(weights, default_weight, dtype=float)
- # Weights computed by `zigzag` can be NaN on boundary pivots
+ # Zigzag emits NaN at unconfirmed boundary pivots; zero them out and
+ # exclude from the median so they don't drag interior imputation.
+ boundary_mask = np.zeros(weights.size, dtype=bool)
if not np.isfinite(weights[0]):
- weights[0] = 0.0
+ boundary_mask[0] = True
if not np.isfinite(weights[-1]):
- weights[-1] = 0.0
+ boundary_mask[-1] = True
finite_mask = np.isfinite(weights)
- if not finite_mask.any():
- return np.full_like(weights, default_weight, dtype=float)
+ interior_finite_mask = finite_mask & ~boundary_mask
+ if not interior_finite_mask.any():
+ weights[~finite_mask] = default_weight
+ weights[boundary_mask] = 0.0
+ return weights
- median_weight = np.nanmedian(weights[finite_mask])
+ median_weight = np.nanmedian(weights[interior_finite_mask])
if not np.isfinite(median_weight):
median_weight = default_weight
weights[~finite_mask] = median_weight
+ weights[boundary_mask] = 0.0
return weights