From 1db36f00f31f9a1148a556c82ba81c1b3f0e1d54 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 20 Jun 2026 19:31:46 +0200 Subject: [PATCH] feat(label_weighting): add epsilon_gaussian fill_method, fix sub-floor pivot-row dip (#84) Add a fourth off-pivot weighting mode that superposes the epsilon floor and the gaussian bumps additively, and fix a related defect in _scatter_weights that allowed pivot rows to sit *below* the off-pivot field whenever that field at the pivot's index could legitimately exceed the pivot's raw weight. Math ---- Define the off-pivot field f(i) = phi + max_{p in P} w_p * exp(-(i - p)^2 / (2 * sigma_p^2)) with phi = eps * B(W) (B mean or median, eps in [0, 1]; phi = 0 on empty pivots or non-finite baseline), and per-pivot sigma_p from _compute_pivot_sigmas (fixed or k-NN). The combined formulation reuses both existing closed forms verbatim: fill_method = 'zero' -> f(i) = 0 fill_method = 'epsilon' -> f(i) = phi (constant in i) fill_method = 'gaussian' -> f(i) = max_p w_p * exp(...) fill_method = 'epsilon_gaussian' -> f(i) = phi + max_p w_p * exp(...) Bound: phi <= f(i) <= phi + max_p w_p. The new mode reduces to pure gaussian when eps = 0 (bit-identical). The reduction is a per-row max over per-pivot Gaussian bumps; phi is the epsilon floor. Sub-floor / sub-bump pivot-row dip (bug fix) -------------------------------------------- Before this change _scatter_weights wrote out[p] = w_p unconditionally, so a pivot whose raw weight was below the off-pivot field at its index appeared as a sharp dip relative to its neighbors. Two manifestations of the same defect class: - 'epsilon' / 'epsilon_gaussian' (sub-floor): a pivot with w_p < phi (e.g. W = (0.001, 1.0, 1.0) with eps = 0.5 and B = median, phi = 0.5) sat at 0.001 while neighbor rows sat at phi. - 'gaussian' / 'epsilon_gaussian' (sub-bump): a weak pivot with a strong neighbor (e.g. W = (0.001, 1.0) at indices (0, 1), sigma = 1) sat at 0.001 while the off-pivot field at the pivot's own index was 1.0 * exp(-0.5) ~= 0.6065 (the neighbor's gaussian bump). Both cases are corrected by a single uniform change: _scatter_weights now writes out[p] = max(w_p, fill[p]) so pivot rows are never written below the off-pivot field. 'zero' is bit-identical (fill is always 0, so max(w_p, 0) = w_p when w_p >= 0). 'gaussian' in the sparse-pivot regime (the typical configuration, especially with k-NN bandwidth) is also bit-identical because fill[p] equals w_p when no neighbor's bump at p exceeds w_p. Implementation -------------- - _scatter_weights: pivot rows take np.maximum(weights, fill_weights) unconditionally. Off-pivot rows unchanged. - _compute_epsilon_floor (renamed from _epsilon_floor): extracted helper that returns phi (mean / median / fallback). Reused by 'epsilon' and 'epsilon_gaussian'. Parameter baseline narrowed to the FillEpsilonBaseline Literal type. - _compute_gaussian_bumps (renamed from _gaussian_bumps): extracted adapter over _gaussian_fill_weights. Reused by 'gaussian' and 'epsilon_gaussian'. logger is kwarg-only. - compute_label_weights: dispatcher gains the FILL_METHODS[3] branch. The combined branch computes bumps once and adds phi in-place via np.add(out=fill_weights), keeping peak memory at the existing (chunk, M) buffer; phi is constant in p so the post-reduction add is algebraically identical to adding inside the chunk loop while saving O(chunk * M) writes. ValueError messages tightened to include 'supported values are ...' for parity with _compute_pivot_sigmas and _aggregate_metrics. - LabelTransformer.py: extends FillMethod Literal and FILL_METHODS tuple with 'epsilon_gaussian' at index 3. No new tunables, no new validators (the existing _EnumValidator(FILL_METHODS) picks up the new value automatically; existing range / type validators on fill_epsilon / fill_sigma_* / fill_bandwidth_* apply unchanged). - QuickAdapterV3.py: logging block refactored from if/elif chain to parallel if blocks keyed on tuple membership so epsilon and sigma parameter groups emit independently for each mode that uses them. Documentation ------------- README cells updated with set-membership 'Ignored when ...' clauses matching the new index sets (epsilon | epsilon_gaussian for the floor parameters, gaussian | epsilon_gaussian for the kernel parameters). The fill_method description names the additive composition explicitly and the pivot-row lift invariant (out[p] = max(w_p, f(p))). Verified manually on the host via AST extraction harness (no automated test infrastructure exists in quickadapter/): - zero mode: bit-exact with prior code (fill is 0, max(w_p, 0) = w_p). - gaussian mode, sparse pivots: bit-identical to prior code (no neighbor's bump at p exceeds w_p, so the lift is a no-op). - gaussian mode, neighbor-dominated regime: pivot rows lifted to the local field max, fixing the sub-bump dip. Verified with the counterexample W = (0.001, 1.0) at indices (0, 1), sigma = 1: legacy out[0] = 0.001, fixed out[0] = 1.0 * exp(-0.5) ~= 0.6065. - epsilon back-compat (above-floor pivots): phi = eps * mean(W) reproduced; pivots above phi unchanged. - epsilon pivot-dip fix: W = (0.001, 1.0, 1.0), eps = 0.5, baseline = median; legacy out[0] = 0.001, fixed out[0] = phi = 0.5. - epsilon_gaussian with eps = 0: bit-identical to pure gaussian. - epsilon_gaussian additive decomposition: out_eg - out_g = phi at every off-pivot row. - epsilon_gaussian pivot-row lifted: W = (0.001, 1.0, 1.0) at well-separated indices (e.g. (0, 100, 200)), eps = 0.5, baseline = median, sigma = 2.0; out[0] = phi + 0.001 ~= 0.501 (was 0.001 before the scatter fix). - empty pivots: all four modes return all-zero. - negative pivot weights still rejected by _gaussian_fill_weights. - knn bandwidth + epsilon_gaussian: finite, bounded below by phi. - ValueError messages on invalid fill_method / fill_epsilon_baseline include 'supported values are ...'. --- README.md | 204 +++++++++--------- .../user_data/strategies/LabelTransformer.py | 7 +- .../user_data/strategies/QuickAdapterV3.py | 13 +- quickadapter/user_data/strategies/Utils.py | 130 ++++++++--- 4 files changed, 215 insertions(+), 139 deletions(-) diff --git a/README.md b/README.md index 6aedc15..3f25d15 100644 --- a/README.md +++ b/README.md @@ -37,108 +37,108 @@ docker compose up -d --build ### Configuration tunables -| Path | Default | Type / Range | Description | -| -------------------------------------------------------------- | ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| _Protections_ | | | | -| custom_protections.trade_duration_candles | 72 | int >= 1 | Estimated trade duration in candles. Scales protections stop duration candles and trade limit. | -| custom_protections.lookback_period_fraction | 0.5 | float (0,1] | Fraction of `fit_live_predictions_candles` used to calculate `lookback_period_candles` for _MaxDrawdown_ and _StoplossGuard_ protections. | -| custom_protections.cooldown.enabled | true | bool | Enable/disable _CooldownPeriod_ protection. | -| custom_protections.cooldown.stop_duration_candles | 4 | int >= 1 | Number of candles to wait before allowing new trades after a trade is closed. | -| custom_protections.drawdown.enabled | true | bool | Enable/disable _MaxDrawdown_ protection. | -| custom_protections.drawdown.max_allowed_drawdown | 0.2 | float (0,1) | Maximum allowed drawdown. | -| custom_protections.stoploss.enabled | true | bool | Enable/disable _StoplossGuard_ protection. | -| _Leverage_ | | | | -| leverage | `proposed_leverage` | float [1.0, max_leverage] | Leverage. Fallback to `proposed_leverage` for the pair. | -| _Exit pricing_ | | | | -| exit_pricing.trade_price_target_method | `moving_average` | enum {`moving_average`,`quantile_interpolation`,`weighted_average`} | Trade NATR computation method. | -| exit_pricing.thresholds_calibration.decline_quantile | 0.5 | float (0,1) | PnL decline quantile threshold. | -| _Reversal confirmation_ | | | | -| reversal_confirmation.lookback_period_candles | 0 | int >= 0 | Prior confirming candles; 0 = none. | -| reversal_confirmation.decay_fraction | 0.5 | float (0,1] | Geometric per-candle volatility adjusted reversal threshold relaxation factor. | -| reversal_confirmation.min_natr_multiplier_fraction | 0.0095 | float [0,1] | Lower bound fraction for volatility adjusted reversal threshold. | -| reversal_confirmation.max_natr_multiplier_fraction | 0.0125 | float [0,1] | Upper bound fraction (>= lower bound) for volatility adjusted reversal threshold. | -| _Regressor model_ | | | | -| freqai.regressor | `xgboost` | enum {`xgboost`,`lightgbm`,`histgradientboostingregressor`,`ngboost`,`catboost`} | Machine learning regressor algorithm. | -| _Model training parameters_ | | | | -| freqai.model_training_parameters.gpu_vram_gb | 80 | enum {8,10,12,16,24,32,40,48,64,80} | Available GPU VRAM (GB) for CatBoost, not total. Constrains `depth`, `border_count`, and `max_ctr_complexity` ranges. | -| _Data split parameters_ | | | | -| freqai.data_split_parameters.method | `train_test_split` | enum {`train_test_split`,`timeseries_split`} | Data splitting strategy. `train_test_split` for sequential split, `timeseries_split` for chronological split with configurable gap. | -| freqai.data_split_parameters.test_size | 0.1 / None | float (0,1) \| int >= 1 \| None | Test set size. Float for fraction, int for count. Default: 0.1 for `train_test_split`, None for `timeseries_split` (sklearn dynamic sizing). | -| freqai.data_split_parameters.n_splits | 5 | int >= 2 | Controls train/test proportions for `timeseries_split` (higher = larger train set). | -| freqai.data_split_parameters.gap | 0 | int >= 0 | Samples to exclude between train/test for `timeseries_split`. When 0, auto-calculated from `label_period_candles` to prevent look-ahead bias. | -| freqai.data_split_parameters.max_train_size | None | int >= 1 \| None | Maximum training set size for `timeseries_split`. When set, creates a sliding window instead of expanding train set. None = no limit. | -| _Label smoothing_ | | | | -| freqai.label_smoothing.method | `gaussian` | enum {`none`,`gaussian`,`kaiser`,`kaiser_bessel_derived`,`triang`,`smm`,`sma`,`savgol`,`gaussian_filter1d`} | Label smoothing method (`kaiser_bessel_derived` uses an even-length Kaiser-Bessel-derived zero-phase kernel; `smm`=median, `sma`=mean, `savgol`=Savitzky–Golay). | -| freqai.label_smoothing.window_candles | 5 | int >= 3 | Smoothing window length (candles). | -| freqai.label_smoothing.beta | 8.0 | float > 0 | Shape parameter for `kaiser` and `kaiser_bessel_derived` kernels. | -| freqai.label_smoothing.polyorder | 3 | int >= 0 | Polynomial order for `savgol` smoothing. | -| freqai.label_smoothing.mode | `mirror` | enum {`mirror`,`constant`,`nearest`,`wrap`,`interp`} | Boundary mode for `savgol` and `gaussian_filter1d`. | -| freqai.label_smoothing.sigma | 1.0 | float > 0 | Gaussian `sigma` for `gaussian_filter1d` smoothing. | -| _Label weighting_ | | | | -| freqai.label_weighting.strategy | `none` | enum {`none`,`uniform`,`amplitude`,`amplitude_threshold_ratio`,`volume_rate`,`speed`,`efficiency_ratio`,`volume_weighted_efficiency_ratio`,`combined`} | Label weighting metric: none (`none`), uniform unit weight on every detected pivot (`uniform`), swing amplitude (`amplitude`), swing amplitude / median volatility-threshold ratio (`amplitude_threshold_ratio`), swing volume per candle (`volume_rate`), swing speed (`speed`), swing efficiency ratio (`efficiency_ratio`), swing volume-weighted efficiency ratio (`volume_weighted_efficiency_ratio`), or combined metrics aggregation (`combined`). Switching between `none` and any other strategy requires deleting trained models to realign training emphasis. | -| freqai.label_weighting.metric_coefficients | {} | dict[str, float] | Per-metric coefficients for `combined` strategy. Keys: `amplitude`, `amplitude_threshold_ratio`, `volume_rate`, `speed`, `efficiency_ratio`, `volume_weighted_efficiency_ratio`. | -| freqai.label_weighting.aggregation | `arithmetic_mean` | enum {`arithmetic_mean`,`geometric_mean`,`harmonic_mean`,`quadratic_mean`,`weighted_median`,`softmax`} | Metric aggregation method for `combined` strategy. `arithmetic_mean`=(Σ(w·m)/Σ(w)), `geometric_mean`=(∏(m^w))^(1/Σw), `harmonic_mean`=Σ(w)/(Σ(w/m)), `quadratic_mean`=(Σ(w·m²)/Σ(w))^(1/2), `weighted_median`=Q₀.₅(m,w), `softmax`=Σ(m·s_i) where s_i=w_i·exp(m_i/T)/Σ(w_j·exp(m_j/T)). | -| freqai.label_weighting.softmax_temperature | 1.0 | float > 0 | Temperature T for `softmax` aggregation, controls distribution sharpness. | -| freqai.label_weighting.fill_method | `zero` | enum {`zero`,`epsilon`,`gaussian`} | Off-pivot weighting scheme. `zero` hard-zeros off-pivot rows; `epsilon` applies a flat baseline `fill_epsilon * (pivot_weights)`; `gaussian` applies heatmap-style decay around each pivot. Switching away from `zero` may require retuning tree-leaf regularization (`min_child_weight`, `lambda`) and resetting any prior Optuna study. Changing this parameter requires deleting trained models. | -| freqai.label_weighting.fill_epsilon | 0.000001 | float [0,1] | Off-pivot fraction of the pivot baseline. Ignored when `fill_method != "epsilon"`. | -| freqai.label_weighting.fill_epsilon_baseline | `mean` | enum {`mean`,`median`} | Pivot baseline statistic. `mean` tracks central tendency; `median` is robust against pivot-weight skew. Ignored when `fill_method != "epsilon"`. | -| freqai.label_weighting.fill_sigma_candles | 10.0 | float >= 0.5 | Gaussian standard deviation in candles for `fill_method == "gaussian"`. Acts as the upper bound on per-pivot sigma when `fill_bandwidth == "knn"`. Lower bound 0.5 prevents severe underflow in the Gaussian tail. Ignored when `fill_method != "gaussian"`. | -| freqai.label_weighting.fill_sigma_min_candles | 0.5 | float >= 0.5 | Lower bound on per-pivot sigma in candles when `fill_bandwidth == "knn"`. Clipped to `fill_sigma_candles` when larger. Ignored when `fill_method != "gaussian"` or `fill_bandwidth != "knn"`. | -| freqai.label_weighting.fill_bandwidth | `fixed` | enum {`fixed`,`knn`} | Per-pivot Gaussian bandwidth selector. `fixed` applies a constant `fill_sigma_candles` to every pivot (legacy behavior). `knn` adapts each pivot's sigma to local pivot density via `sigma_p = clip(fill_bandwidth_alpha * d_k(p), fill_sigma_min_candles, fill_sigma_candles)` where `d_k(p)` is the index distance to the `k`-th nearest pivot neighbor (Loftsgaarden & Quesenberry 1965; Silverman 1986, §5.2). Mitigates the crushing of weaker pivots by stronger neighbors in dense clusters. Ignored when `fill_method != "gaussian"`. | -| freqai.label_weighting.fill_bandwidth_neighbors | 1 | int >= 1 | `k` for the k-nearest-neighbor bandwidth selector. Ignored when `fill_method != "gaussian"` or `fill_bandwidth != "knn"`. | -| freqai.label_weighting.fill_bandwidth_alpha | 0.5 | float > 0 | Multiplicative factor on the k-th neighbor distance. Smaller values produce sharper, more separated Gaussians; larger values approach the `fixed` behavior. Ignored when `fill_method != "gaussian"` or `fill_bandwidth != "knn"`. | -| _Label pipeline_ | | | | -| freqai.label_pipeline.standardization | `none` | enum {`none`,`zscore`,`robust`,`mmad`,`power_yj`} | Standardization method applied to labels before normalization. `none`=w, `zscore`=(w-μ)/σ, `robust`=(w-median)/(Q₃-Q₁), `mmad`=(w-median)/(MAD·k), `power_yj`=YJ(w). | -| freqai.label_pipeline.robust_quantiles | [0.25, 0.75] | list[float] where 0 <= Q1 < Q3 <= 1 | Quantile range for robust standardization, Q1 and Q3. | -| freqai.label_pipeline.mmad_scaling_factor | 1.4826 | float > 0 | Scaling factor for MMAD standardization. | -| freqai.label_pipeline.normalization | `maxabs` | enum {`maxabs`,`minmax`,`sigmoid`,`none`} | Normalization method applied to labels. `maxabs`=w/max(\|w\|), `minmax`=low+(w-min)/(max-min)·(high-low), `sigmoid`=2·σ(scale·w)-1, `none`=w. | -| freqai.label_pipeline.minmax_range | [-1.0, 1.0] | list[float] | Target range for `minmax` normalization, min and max. | -| freqai.label_pipeline.sigmoid_scale | 1.0 | float > 0 | Scale parameter for `sigmoid` normalization, controls steepness. | -| freqai.label_pipeline.gamma | 1.0 | float (0,10] | Contrast exponent applied to labels after normalization: >1 emphasizes extrema, values between 0 and 1 soften. | -| _Feature parameters_ | | | | -| freqai.feature_parameters.label_period_candles | min/max midpoint | int >= 1 | Zigzag labeling NATR horizon. | -| freqai.feature_parameters.min_label_period_candles | 12 | int >= 1 | Minimum labeling NATR horizon used for reversals labeling HPO. | -| freqai.feature_parameters.max_label_period_candles | 24 | int >= 1 | Maximum labeling NATR horizon used for reversals labeling HPO. | -| freqai.feature_parameters.label_natr_multiplier | min/max midpoint | float > 0 | Zigzag labeling NATR multiplier. | -| freqai.feature_parameters.min_label_natr_multiplier | 9.0 | float > 0 | Minimum labeling NATR multiplier used for reversals labeling HPO. | -| freqai.feature_parameters.max_label_natr_multiplier | 12.0 | float > 0 | Maximum labeling NATR multiplier used for reversals labeling HPO. | -| freqai.feature_parameters.label_frequency_candles | `auto` | int >= 2 \| `auto` | Reversals labeling frequency. `auto` = max(2, 2 \* number of whitelisted pairs). | -| freqai.feature_parameters.label_weights | [1/7,1/7,1/7,1/7,1/7,1/7,1/7] | list[float] | Per-objective weights for trial selection methods. Objectives: (1) number of detected reversals, (2) median swing amplitude, (3) median (swing amplitude / median volatility-threshold ratio), (4) median swing volume per candle, (5) median swing speed, (6) median swing efficiency ratio, (7) median swing volume-weighted efficiency ratio. | -| freqai.feature_parameters.label_p_order | None | float \| None | Lp exponent for parameterized metrics. Used by `minkowski` distance (default 2.0) and `power_mean` aggregation (default 1.0). Ignored by other metrics. | -| freqai.feature_parameters.label_method | `compromise_programming` | enum {`compromise_programming`,`topsis`,`kmeans`,`kmeans2`,`kmedoids`,`knn`,`medoid`} | HPO `label` Pareto front trial selection method. | -| freqai.feature_parameters.label_distance_metric | `euclidean` | string | Distance metric for `compromise_programming` and `topsis` methods. | -| freqai.feature_parameters.label_cluster_metric | `euclidean` | string | Distance metric for `kmeans`, `kmeans2`, and `kmedoids` methods. | -| freqai.feature_parameters.label_cluster_selection_method | `topsis` | enum {`compromise_programming`,`topsis`} | Cluster selection method for clustering-based label methods. | -| freqai.feature_parameters.label_cluster_trial_selection_method | `topsis` | enum {`compromise_programming`,`topsis`} | Best cluster trial selection method for clustering-based label methods. | -| freqai.feature_parameters.label_density_metric | method-dependent | string | Distance metric for `knn` and `medoid` methods. | -| freqai.feature_parameters.label_density_aggregation | `power_mean` | enum {`power_mean`,`quantile`,`min`,`max`} | Aggregation method for KNN neighbor distances. | -| freqai.feature_parameters.label_density_n_neighbors | 5 | int >= 1 | Number of neighbors for KNN. | -| freqai.feature_parameters.label_density_aggregation_param | aggregation-dependent | float \| None | Tunable for KNN neighbor distance aggregation: Lp exponent (`power_mean`) or quantile value (`quantile`). | -| freqai.feature_parameters.scaler | `minmax` | enum {`minmax`,`maxabs`,`standard`,`robust`} | Feature scaling method. `minmax`=MinMaxScaler, `maxabs`=MaxAbsScaler, `standard`=StandardScaler, `robust`=RobustScaler. Changing this parameter requires deleting trained models. | -| freqai.feature_parameters.range | [-1.0, 1.0] | list[float] | Target range for `minmax` scaler, min and max. Changing this parameter requires deleting trained models. | -| _Label prediction_ | | | | -| freqai.label_prediction.method | `thresholding` | enum {`none`,`thresholding`} | Prediction method. `none` disables threshold computation, `thresholding` enables adaptive threshold calculation. | -| freqai.label_prediction.selection_method | `rank_extrema` | enum {`rank_extrema`,`rank_peaks`,`partition`} | Extrema selection method. `rank_extrema` ranks extrema values, `rank_peaks` ranks detected peak values, `partition` uses sign-based partitioning. | -| freqai.label_prediction.threshold_method | `mean` | enum {`mean`,`isodata`,`li`,`minimum`,`otsu`,`triangle`,`yen`,`median`,`soft_extremum`} | Thresholding method for prediction thresholds. | -| freqai.label_prediction.soft_extremum_alpha | 12.0 | float >= 0 | Alpha for `soft_extremum` threshold method. | -| freqai.label_prediction.outlier_quantile | 0.999 | float (0,1) | Quantile threshold for predictions outlier filtering. | -| freqai.label_prediction.keep_fraction | 0.0075 | float (0,1] | Fraction of extrema used for thresholds. 1 uses all, lower values keep only most significant. Applies to `rank_extrema` and `rank_peaks`; ignored for `partition`. | -| _Optuna / HPO_ | | | | -| freqai.optuna_hyperopt.enabled | false | bool | Enables HPO. | -| freqai.optuna_hyperopt.sampler | `tpe` | enum {`tpe`,`auto`} | HPO sampler algorithm for `hp` namespace. `tpe` uses [TPESampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.TPESampler.html) with multivariate, group, and constant_liar (when multiple workers), `auto` uses [AutoSampler](https://hub.optuna.org/samplers/auto_sampler). | -| freqai.optuna_hyperopt.label_sampler | `auto` | enum {`auto`,`tpe`,`nsgaii`,`nsgaiii`} | HPO sampler algorithm for multi-objective `label` namespace. `nsgaii` uses [NSGAIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIISampler.html), `nsgaiii` uses [NSGAIIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIIISampler.html). | -| freqai.optuna_hyperopt.storage | `file` | enum {`file`,`sqlite`} | HPO storage backend. | -| freqai.optuna_hyperopt.continuous | true | bool | Continuous HPO. | -| freqai.optuna_hyperopt.warm_start | true | bool | Warm start HPO with previous best value(s). | -| freqai.optuna_hyperopt.n_startup_trials | 15 | int >= 0 | HPO startup trials. | -| freqai.optuna_hyperopt.n_trials | 50 | int >= 1 | Maximum HPO trials. | -| freqai.optuna_hyperopt.n_jobs | CPU threads / 4 | int >= 1 | Parallel HPO workers. | -| freqai.optuna_hyperopt.timeout | 7200 | int >= 0 | HPO wall-clock timeout in seconds. | -| freqai.optuna_hyperopt.label_candles_step | 1 | int >= 1 | Step for Zigzag NATR horizon `label` search space. | -| freqai.optuna_hyperopt.space_reduction | false | bool | Enable/disable `hp` search space reduction based on previous best parameters. | -| freqai.optuna_hyperopt.space_fraction | 0.4 | float [0,1] | Fraction of the `hp` search space to use with `space_reduction`. Lower values create narrower search ranges around the best parameters. | -| freqai.optuna_hyperopt.min_resource | 3 | int >= 1 | Minimum resource per [HyperbandPruner](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.HyperbandPruner.html) rung. | -| freqai.optuna_hyperopt.seed | 1 | int >= 0 | HPO RNG seed. | +| Path | Default | Type / Range | Description | +| -------------------------------------------------------------- | ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| _Protections_ | | | | +| custom_protections.trade_duration_candles | 72 | int >= 1 | Estimated trade duration in candles. Scales protections stop duration candles and trade limit. | +| custom_protections.lookback_period_fraction | 0.5 | float (0,1] | Fraction of `fit_live_predictions_candles` used to calculate `lookback_period_candles` for _MaxDrawdown_ and _StoplossGuard_ protections. | +| custom_protections.cooldown.enabled | true | bool | Enable/disable _CooldownPeriod_ protection. | +| custom_protections.cooldown.stop_duration_candles | 4 | int >= 1 | Number of candles to wait before allowing new trades after a trade is closed. | +| custom_protections.drawdown.enabled | true | bool | Enable/disable _MaxDrawdown_ protection. | +| custom_protections.drawdown.max_allowed_drawdown | 0.2 | float (0,1) | Maximum allowed drawdown. | +| custom_protections.stoploss.enabled | true | bool | Enable/disable _StoplossGuard_ protection. | +| _Leverage_ | | | | +| leverage | `proposed_leverage` | float [1.0, max_leverage] | Leverage. Fallback to `proposed_leverage` for the pair. | +| _Exit pricing_ | | | | +| exit_pricing.trade_price_target_method | `moving_average` | enum {`moving_average`,`quantile_interpolation`,`weighted_average`} | Trade NATR computation method. | +| exit_pricing.thresholds_calibration.decline_quantile | 0.5 | float (0,1) | PnL decline quantile threshold. | +| _Reversal confirmation_ | | | | +| reversal_confirmation.lookback_period_candles | 0 | int >= 0 | Prior confirming candles; 0 = none. | +| reversal_confirmation.decay_fraction | 0.5 | float (0,1] | Geometric per-candle volatility adjusted reversal threshold relaxation factor. | +| reversal_confirmation.min_natr_multiplier_fraction | 0.0095 | float [0,1] | Lower bound fraction for volatility adjusted reversal threshold. | +| reversal_confirmation.max_natr_multiplier_fraction | 0.0125 | float [0,1] | Upper bound fraction (>= lower bound) for volatility adjusted reversal threshold. | +| _Regressor model_ | | | | +| freqai.regressor | `xgboost` | enum {`xgboost`,`lightgbm`,`histgradientboostingregressor`,`ngboost`,`catboost`} | Machine learning regressor algorithm. | +| _Model training parameters_ | | | | +| freqai.model_training_parameters.gpu_vram_gb | 80 | enum {8,10,12,16,24,32,40,48,64,80} | Available GPU VRAM (GB) for CatBoost, not total. Constrains `depth`, `border_count`, and `max_ctr_complexity` ranges. | +| _Data split parameters_ | | | | +| freqai.data_split_parameters.method | `train_test_split` | enum {`train_test_split`,`timeseries_split`} | Data splitting strategy. `train_test_split` for sequential split, `timeseries_split` for chronological split with configurable gap. | +| freqai.data_split_parameters.test_size | 0.1 / None | float (0,1) \| int >= 1 \| None | Test set size. Float for fraction, int for count. Default: 0.1 for `train_test_split`, None for `timeseries_split` (sklearn dynamic sizing). | +| freqai.data_split_parameters.n_splits | 5 | int >= 2 | Controls train/test proportions for `timeseries_split` (higher = larger train set). | +| freqai.data_split_parameters.gap | 0 | int >= 0 | Samples to exclude between train/test for `timeseries_split`. When 0, auto-calculated from `label_period_candles` to prevent look-ahead bias. | +| freqai.data_split_parameters.max_train_size | None | int >= 1 \| None | Maximum training set size for `timeseries_split`. When set, creates a sliding window instead of expanding train set. None = no limit. | +| _Label smoothing_ | | | | +| freqai.label_smoothing.method | `gaussian` | enum {`none`,`gaussian`,`kaiser`,`kaiser_bessel_derived`,`triang`,`smm`,`sma`,`savgol`,`gaussian_filter1d`} | Label smoothing method (`kaiser_bessel_derived` uses an even-length Kaiser-Bessel-derived zero-phase kernel; `smm`=median, `sma`=mean, `savgol`=Savitzky–Golay). | +| freqai.label_smoothing.window_candles | 5 | int >= 3 | Smoothing window length (candles). | +| freqai.label_smoothing.beta | 8.0 | float > 0 | Shape parameter for `kaiser` and `kaiser_bessel_derived` kernels. | +| freqai.label_smoothing.polyorder | 3 | int >= 0 | Polynomial order for `savgol` smoothing. | +| freqai.label_smoothing.mode | `mirror` | enum {`mirror`,`constant`,`nearest`,`wrap`,`interp`} | Boundary mode for `savgol` and `gaussian_filter1d`. | +| freqai.label_smoothing.sigma | 1.0 | float > 0 | Gaussian `sigma` for `gaussian_filter1d` smoothing. | +| _Label weighting_ | | | | +| freqai.label_weighting.strategy | `none` | enum {`none`,`uniform`,`amplitude`,`amplitude_threshold_ratio`,`volume_rate`,`speed`,`efficiency_ratio`,`volume_weighted_efficiency_ratio`,`combined`} | Label weighting metric: none (`none`), uniform unit weight on every detected pivot (`uniform`), swing amplitude (`amplitude`), swing amplitude / median volatility-threshold ratio (`amplitude_threshold_ratio`), swing volume per candle (`volume_rate`), swing speed (`speed`), swing efficiency ratio (`efficiency_ratio`), swing volume-weighted efficiency ratio (`volume_weighted_efficiency_ratio`), or combined metrics aggregation (`combined`). Switching between `none` and any other strategy requires deleting trained models to realign training emphasis. | +| freqai.label_weighting.metric_coefficients | {} | dict[str, float] | Per-metric coefficients for `combined` strategy. Keys: `amplitude`, `amplitude_threshold_ratio`, `volume_rate`, `speed`, `efficiency_ratio`, `volume_weighted_efficiency_ratio`. | +| freqai.label_weighting.aggregation | `arithmetic_mean` | enum {`arithmetic_mean`,`geometric_mean`,`harmonic_mean`,`quadratic_mean`,`weighted_median`,`softmax`} | Metric aggregation method for `combined` strategy. `arithmetic_mean`=(Σ(w·m)/Σ(w)), `geometric_mean`=(∏(m^w))^(1/Σw), `harmonic_mean`=Σ(w)/(Σ(w/m)), `quadratic_mean`=(Σ(w·m²)/Σ(w))^(1/2), `weighted_median`=Q₀.₅(m,w), `softmax`=Σ(m·s_i) where s_i=w_i·exp(m_i/T)/Σ(w_j·exp(m_j/T)). | +| freqai.label_weighting.softmax_temperature | 1.0 | float > 0 | Temperature T for `softmax` aggregation, controls distribution sharpness. | +| freqai.label_weighting.fill_method | `zero` | enum {`zero`,`epsilon`,`gaussian`,`epsilon_gaussian`} | Off-pivot weighting scheme. `zero` hard-zeros off-pivot rows; `epsilon` applies the epsilon floor `fill_epsilon * (pivot_weights)`; `gaussian` applies per-pivot Gaussian bumps; `epsilon_gaussian` sums the `epsilon` floor and the `gaussian` bumps. Pivot rows take the max of their raw weight and the off-pivot field at their index (no-op for `zero`). Switching away from `zero` may require retuning tree-leaf regularization (`min_child_weight`, `lambda`) and resetting any prior Optuna study. Changing this parameter requires deleting trained models. | +| freqai.label_weighting.fill_epsilon | 0.000001 | float [0,1] | Off-pivot fraction of the pivot baseline. Ignored when `fill_method` not in {`epsilon`,`epsilon_gaussian`}. | +| freqai.label_weighting.fill_epsilon_baseline | `mean` | enum {`mean`,`median`} | Pivot baseline statistic. `mean` tracks central tendency; `median` is robust against pivot-weight skew. Ignored when `fill_method` not in {`epsilon`,`epsilon_gaussian`}. | +| freqai.label_weighting.fill_sigma_candles | 10.0 | float >= 0.5 | Gaussian standard deviation in candles for the per-pivot bumps. Acts as the upper bound on per-pivot sigma when `fill_bandwidth == "knn"`. Lower bound 0.5 prevents severe underflow in the Gaussian tail. Ignored when `fill_method` not in {`gaussian`,`epsilon_gaussian`}. | +| freqai.label_weighting.fill_sigma_min_candles | 0.5 | float >= 0.5 | Lower bound on per-pivot sigma in candles when `fill_bandwidth == "knn"`. Clipped to `fill_sigma_candles` when larger. Ignored when `fill_method` not in {`gaussian`,`epsilon_gaussian`} or `fill_bandwidth != "knn"`. | +| freqai.label_weighting.fill_bandwidth | `fixed` | enum {`fixed`,`knn`} | Per-pivot Gaussian bandwidth selector. `fixed` applies a constant `fill_sigma_candles` to every pivot (legacy behavior). `knn` adapts each pivot's sigma to local pivot density via `sigma_p = clip(fill_bandwidth_alpha * d_k(p), fill_sigma_min_candles, fill_sigma_candles)` where `d_k(p)` is the index distance to the `k`-th nearest pivot neighbor (Loftsgaarden & Quesenberry 1965; Silverman 1986, §5.2). Mitigates the crushing of weaker pivots by stronger neighbors in dense clusters. Ignored when `fill_method` not in {`gaussian`,`epsilon_gaussian`}. | +| freqai.label_weighting.fill_bandwidth_neighbors | 1 | int >= 1 | `k` for the k-nearest-neighbor bandwidth selector. Ignored when `fill_method` not in {`gaussian`,`epsilon_gaussian`} or `fill_bandwidth != "knn"`. | +| freqai.label_weighting.fill_bandwidth_alpha | 0.5 | float > 0 | Multiplicative factor on the k-th neighbor distance. Smaller values produce sharper, more separated Gaussians; larger values approach the `fixed` behavior. Ignored when `fill_method` not in {`gaussian`,`epsilon_gaussian`} or `fill_bandwidth != "knn"`. | +| _Label pipeline_ | | | | +| freqai.label_pipeline.standardization | `none` | enum {`none`,`zscore`,`robust`,`mmad`,`power_yj`} | Standardization method applied to labels before normalization. `none`=w, `zscore`=(w-μ)/σ, `robust`=(w-median)/(Q₃-Q₁), `mmad`=(w-median)/(MAD·k), `power_yj`=YJ(w). | +| freqai.label_pipeline.robust_quantiles | [0.25, 0.75] | list[float] where 0 <= Q1 < Q3 <= 1 | Quantile range for robust standardization, Q1 and Q3. | +| freqai.label_pipeline.mmad_scaling_factor | 1.4826 | float > 0 | Scaling factor for MMAD standardization. | +| freqai.label_pipeline.normalization | `maxabs` | enum {`maxabs`,`minmax`,`sigmoid`,`none`} | Normalization method applied to labels. `maxabs`=w/max(\|w\|), `minmax`=low+(w-min)/(max-min)·(high-low), `sigmoid`=2·σ(scale·w)-1, `none`=w. | +| freqai.label_pipeline.minmax_range | [-1.0, 1.0] | list[float] | Target range for `minmax` normalization, min and max. | +| freqai.label_pipeline.sigmoid_scale | 1.0 | float > 0 | Scale parameter for `sigmoid` normalization, controls steepness. | +| freqai.label_pipeline.gamma | 1.0 | float (0,10] | Contrast exponent applied to labels after normalization: >1 emphasizes extrema, values between 0 and 1 soften. | +| _Feature parameters_ | | | | +| freqai.feature_parameters.label_period_candles | min/max midpoint | int >= 1 | Zigzag labeling NATR horizon. | +| freqai.feature_parameters.min_label_period_candles | 12 | int >= 1 | Minimum labeling NATR horizon used for reversals labeling HPO. | +| freqai.feature_parameters.max_label_period_candles | 24 | int >= 1 | Maximum labeling NATR horizon used for reversals labeling HPO. | +| freqai.feature_parameters.label_natr_multiplier | min/max midpoint | float > 0 | Zigzag labeling NATR multiplier. | +| freqai.feature_parameters.min_label_natr_multiplier | 9.0 | float > 0 | Minimum labeling NATR multiplier used for reversals labeling HPO. | +| freqai.feature_parameters.max_label_natr_multiplier | 12.0 | float > 0 | Maximum labeling NATR multiplier used for reversals labeling HPO. | +| freqai.feature_parameters.label_frequency_candles | `auto` | int >= 2 \| `auto` | Reversals labeling frequency. `auto` = max(2, 2 \* number of whitelisted pairs). | +| freqai.feature_parameters.label_weights | [1/7,1/7,1/7,1/7,1/7,1/7,1/7] | list[float] | Per-objective weights for trial selection methods. Objectives: (1) number of detected reversals, (2) median swing amplitude, (3) median (swing amplitude / median volatility-threshold ratio), (4) median swing volume per candle, (5) median swing speed, (6) median swing efficiency ratio, (7) median swing volume-weighted efficiency ratio. | +| freqai.feature_parameters.label_p_order | None | float \| None | Lp exponent for parameterized metrics. Used by `minkowski` distance (default 2.0) and `power_mean` aggregation (default 1.0). Ignored by other metrics. | +| freqai.feature_parameters.label_method | `compromise_programming` | enum {`compromise_programming`,`topsis`,`kmeans`,`kmeans2`,`kmedoids`,`knn`,`medoid`} | HPO `label` Pareto front trial selection method. | +| freqai.feature_parameters.label_distance_metric | `euclidean` | string | Distance metric for `compromise_programming` and `topsis` methods. | +| freqai.feature_parameters.label_cluster_metric | `euclidean` | string | Distance metric for `kmeans`, `kmeans2`, and `kmedoids` methods. | +| freqai.feature_parameters.label_cluster_selection_method | `topsis` | enum {`compromise_programming`,`topsis`} | Cluster selection method for clustering-based label methods. | +| freqai.feature_parameters.label_cluster_trial_selection_method | `topsis` | enum {`compromise_programming`,`topsis`} | Best cluster trial selection method for clustering-based label methods. | +| freqai.feature_parameters.label_density_metric | method-dependent | string | Distance metric for `knn` and `medoid` methods. | +| freqai.feature_parameters.label_density_aggregation | `power_mean` | enum {`power_mean`,`quantile`,`min`,`max`} | Aggregation method for KNN neighbor distances. | +| freqai.feature_parameters.label_density_n_neighbors | 5 | int >= 1 | Number of neighbors for KNN. | +| freqai.feature_parameters.label_density_aggregation_param | aggregation-dependent | float \| None | Tunable for KNN neighbor distance aggregation: Lp exponent (`power_mean`) or quantile value (`quantile`). | +| freqai.feature_parameters.scaler | `minmax` | enum {`minmax`,`maxabs`,`standard`,`robust`} | Feature scaling method. `minmax`=MinMaxScaler, `maxabs`=MaxAbsScaler, `standard`=StandardScaler, `robust`=RobustScaler. Changing this parameter requires deleting trained models. | +| freqai.feature_parameters.range | [-1.0, 1.0] | list[float] | Target range for `minmax` scaler, min and max. Changing this parameter requires deleting trained models. | +| _Label prediction_ | | | | +| freqai.label_prediction.method | `thresholding` | enum {`none`,`thresholding`} | Prediction method. `none` disables threshold computation, `thresholding` enables adaptive threshold calculation. | +| freqai.label_prediction.selection_method | `rank_extrema` | enum {`rank_extrema`,`rank_peaks`,`partition`} | Extrema selection method. `rank_extrema` ranks extrema values, `rank_peaks` ranks detected peak values, `partition` uses sign-based partitioning. | +| freqai.label_prediction.threshold_method | `mean` | enum {`mean`,`isodata`,`li`,`minimum`,`otsu`,`triangle`,`yen`,`median`,`soft_extremum`} | Thresholding method for prediction thresholds. | +| freqai.label_prediction.soft_extremum_alpha | 12.0 | float >= 0 | Alpha for `soft_extremum` threshold method. | +| freqai.label_prediction.outlier_quantile | 0.999 | float (0,1) | Quantile threshold for predictions outlier filtering. | +| freqai.label_prediction.keep_fraction | 0.0075 | float (0,1] | Fraction of extrema used for thresholds. 1 uses all, lower values keep only most significant. Applies to `rank_extrema` and `rank_peaks`; ignored for `partition`. | +| _Optuna / HPO_ | | | | +| freqai.optuna_hyperopt.enabled | false | bool | Enables HPO. | +| freqai.optuna_hyperopt.sampler | `tpe` | enum {`tpe`,`auto`} | HPO sampler algorithm for `hp` namespace. `tpe` uses [TPESampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.TPESampler.html) with multivariate, group, and constant_liar (when multiple workers), `auto` uses [AutoSampler](https://hub.optuna.org/samplers/auto_sampler). | +| freqai.optuna_hyperopt.label_sampler | `auto` | enum {`auto`,`tpe`,`nsgaii`,`nsgaiii`} | HPO sampler algorithm for multi-objective `label` namespace. `nsgaii` uses [NSGAIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIISampler.html), `nsgaiii` uses [NSGAIIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIIISampler.html). | +| freqai.optuna_hyperopt.storage | `file` | enum {`file`,`sqlite`} | HPO storage backend. | +| freqai.optuna_hyperopt.continuous | true | bool | Continuous HPO. | +| freqai.optuna_hyperopt.warm_start | true | bool | Warm start HPO with previous best value(s). | +| freqai.optuna_hyperopt.n_startup_trials | 15 | int >= 0 | HPO startup trials. | +| freqai.optuna_hyperopt.n_trials | 50 | int >= 1 | Maximum HPO trials. | +| freqai.optuna_hyperopt.n_jobs | CPU threads / 4 | int >= 1 | Parallel HPO workers. | +| freqai.optuna_hyperopt.timeout | 7200 | int >= 0 | HPO wall-clock timeout in seconds. | +| freqai.optuna_hyperopt.label_candles_step | 1 | int >= 1 | Step for Zigzag NATR horizon `label` search space. | +| freqai.optuna_hyperopt.space_reduction | false | bool | Enable/disable `hp` search space reduction based on previous best parameters. | +| freqai.optuna_hyperopt.space_fraction | 0.4 | float [0,1] | Fraction of the `hp` search space to use with `space_reduction`. Lower values create narrower search ranges around the best parameters. | +| freqai.optuna_hyperopt.min_resource | 3 | int >= 1 | Minimum resource per [HyperbandPruner](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.HyperbandPruner.html) rung. | +| freqai.optuna_hyperopt.seed | 1 | int >= 0 | HPO RNG seed. | ## ReforceXY diff --git a/quickadapter/user_data/strategies/LabelTransformer.py b/quickadapter/user_data/strategies/LabelTransformer.py index 6e9a17b..e2673a7 100644 --- a/quickadapter/user_data/strategies/LabelTransformer.py +++ b/quickadapter/user_data/strategies/LabelTransformer.py @@ -64,11 +64,12 @@ WEIGHT_STRATEGIES: Final[tuple[WeightStrategy, ...]] = ( "combined", ) -FillMethod = Literal["zero", "epsilon", "gaussian"] +FillMethod = Literal["zero", "epsilon", "gaussian", "epsilon_gaussian"] FILL_METHODS: Final[tuple[FillMethod, ...]] = ( "zero", # 0 - hard zero (default) - "epsilon", # 1 - flat fraction of pivot baseline - "gaussian", # 2 - per-row Gaussian decay around each pivot + "epsilon", # 1 - epsilon floor + "gaussian", # 2 - per-pivot Gaussian bumps + "epsilon_gaussian", # 3 - additive: epsilon floor + per-pivot Gaussian bumps ) FillEpsilonBaseline = Literal["mean", "median"] diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index 4f9334d..455aeff 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -500,15 +500,22 @@ class QuickAdapterV3(IStrategy): logger.info( f" softmax_temperature: {format_number(col_weighting['softmax_temperature'])}" ) - logger.info(f" fill_method: {col_weighting['fill_method']}") - if col_weighting["fill_method"] == FILL_METHODS[1]: # "epsilon" + fill_method = col_weighting["fill_method"] + logger.info(f" fill_method: {fill_method}") + if fill_method in ( + FILL_METHODS[1], # "epsilon" + FILL_METHODS[3], # "epsilon_gaussian" + ): logger.info( f" fill_epsilon: {format_number(col_weighting['fill_epsilon'])}" ) logger.info( f" fill_epsilon_baseline: {col_weighting['fill_epsilon_baseline']}" ) - elif col_weighting["fill_method"] == FILL_METHODS[2]: # "gaussian" + if fill_method in ( + FILL_METHODS[2], # "gaussian" + FILL_METHODS[3], # "epsilon_gaussian" + ): logger.info( f" fill_sigma_candles: {format_number(col_weighting['fill_sigma_candles'])}" ) diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py index 265dbbd..a313a7b 100644 --- a/quickadapter/user_data/strategies/Utils.py +++ b/quickadapter/user_data/strategies/Utils.py @@ -45,6 +45,7 @@ from LabelTransformer import ( WEIGHT_STRATEGIES, CombinedAggregation, CombinedMetric, + FillEpsilonBaseline, SmoothingMethod, SmoothingMode, ) @@ -1182,7 +1183,7 @@ def _gaussian_fill_weights( sigma_min_candles: float = 0.5, logger: Logger | None = None, ) -> NDArray[np.floating]: - """Per-row max of Gaussian-decayed pivot weights. + """Per-row max of per-pivot Gaussian bumps. Out[i] = max over p of ``w_p * exp(-(i - p)**2 / (2 * sigma_p**2))``. @@ -1266,9 +1267,15 @@ def _scatter_weights( ) -> NDArray[np.floating]: """Scatter per-pivot weights into a full-length array. - Pivot rows (validated via ``valid_mask``) receive ``weights``; off-pivot - rows receive the corresponding entry of ``fill_weights`` (shape - ``(n_values,)``). + Pivot rows (validated via ``valid_mask``) take + ``max(weights, fill_weights)`` so a pivot row is never written below + the off-pivot field at its index. Off-pivot rows receive the + corresponding entry of ``fill_weights`` (shape ``(n_values,)``). The + ``max`` fixes the sub-floor / sub-bump pivot-row dip that arises when + the off-pivot field exceeds the pivot's raw weight: via the floor for + ``epsilon`` / ``epsilon_gaussian``, via a stronger neighbor's bump for + ``gaussian`` / ``epsilon_gaussian``. ``zero`` is bit-identical to a + plain assignment because its fill is 0. """ if fill_weights.shape != (n_values,): raise ValueError( @@ -1283,7 +1290,8 @@ def _scatter_weights( f"got {indices_array.size} indices but {weights.size} weights" ) weights_array = fill_weights.astype(float, copy=True) - weights_array[indices_array[valid_mask]] = weights[valid_mask] + pivot_idx = indices_array[valid_mask] + weights_array[pivot_idx] = np.maximum(weights[valid_mask], weights_array[pivot_idx]) return weights_array @@ -1389,6 +1397,62 @@ def _compute_combined_label_weights( ) +def _compute_epsilon_floor( + weights: NDArray[np.floating], + valid_mask: NDArray[np.bool_], + eps: float, + baseline: FillEpsilonBaseline, +) -> float: + """Flat off-pivot weight value ``phi = eps * B(W)``. + + ``B(W)`` is the mean or median of valid pivot weights, selected by + ``baseline`` (``FILL_EPSILON_BASELINES``). Returns ``0.0`` on degenerate + inputs (no valid pivots, non-finite baseline). + """ + if not valid_mask.any(): + return 0.0 + pivot_values = weights[valid_mask] + if baseline == FILL_EPSILON_BASELINES[0]: # "mean" + b = float(np.nanmean(pivot_values)) + elif baseline == FILL_EPSILON_BASELINES[1]: # "median" + b = float(np.nanmedian(pivot_values)) + else: + raise ValueError( + f"Invalid fill_epsilon_baseline value {baseline!r}: " + f"supported values are {', '.join(FILL_EPSILON_BASELINES)}" + ) + if not np.isfinite(b): + b = 0.0 + return float(eps) * b + + +def _compute_gaussian_bumps( + n_values: int, + indices_array: NDArray[np.integer], + valid_mask: NDArray[np.bool_], + weights: NDArray[np.floating], + label_weighting: dict[str, Any], + *, + logger: Logger | None, +) -> NDArray[np.floating]: + """Per-row max of per-pivot Gaussian bumps. + + Adapter over ``_gaussian_fill_weights`` that pulls tunables from + ``label_weighting`` and applies the ``valid_mask``. + """ + return _gaussian_fill_weights( + n_values=n_values, + pivot_indices=indices_array[valid_mask], + pivot_weights=weights[valid_mask], + sigma_candles=label_weighting["fill_sigma_candles"], + bandwidth=label_weighting["fill_bandwidth"], + bandwidth_neighbors=label_weighting["fill_bandwidth_neighbors"], + bandwidth_alpha=label_weighting["fill_bandwidth_alpha"], + sigma_min_candles=label_weighting["fill_sigma_min_candles"], + logger=logger, + ) + + def compute_label_weights( n_values: int, indices: Sequence[int] | NDArray[np.integer], @@ -1454,35 +1518,39 @@ def compute_label_weights( if fill_method == FILL_METHODS[0]: # "zero" fill_weights = np.zeros(n_values, dtype=float) elif fill_method == FILL_METHODS[1]: # "epsilon" - eps = label_weighting["fill_epsilon"] - baseline = label_weighting["fill_epsilon_baseline"] - if valid_mask.any(): - pivot_values = weights[valid_mask] - if baseline == FILL_EPSILON_BASELINES[0]: # "mean" - pivot_baseline = float(np.nanmean(pivot_values)) - elif baseline == FILL_EPSILON_BASELINES[1]: # "median" - pivot_baseline = float(np.nanmedian(pivot_values)) - else: - raise ValueError(f"Invalid fill_epsilon_baseline value {baseline!r}") - if not np.isfinite(pivot_baseline): - pivot_baseline = 0.0 - else: - pivot_baseline = 0.0 - fill_weights = np.full(n_values, eps * pivot_baseline, dtype=float) + fill_weights = np.full( + n_values, + _compute_epsilon_floor( + weights, + valid_mask, + label_weighting["fill_epsilon"], + label_weighting["fill_epsilon_baseline"], + ), + dtype=float, + ) elif fill_method == FILL_METHODS[2]: # "gaussian" - fill_weights = _gaussian_fill_weights( - n_values=n_values, - pivot_indices=indices_array[valid_mask], - pivot_weights=weights[valid_mask], - sigma_candles=label_weighting["fill_sigma_candles"], - bandwidth=label_weighting["fill_bandwidth"], - bandwidth_neighbors=label_weighting["fill_bandwidth_neighbors"], - bandwidth_alpha=label_weighting["fill_bandwidth_alpha"], - sigma_min_candles=label_weighting["fill_sigma_min_candles"], - logger=logger, + fill_weights = _compute_gaussian_bumps( + n_values, indices_array, valid_mask, weights, label_weighting, logger=logger + ) + elif fill_method == FILL_METHODS[3]: # "epsilon_gaussian" + fill_weights = _compute_gaussian_bumps( + n_values, indices_array, valid_mask, weights, label_weighting, logger=logger + ) + np.add( + fill_weights, + _compute_epsilon_floor( + weights, + valid_mask, + label_weighting["fill_epsilon"], + label_weighting["fill_epsilon_baseline"], + ), + out=fill_weights, ) else: - raise ValueError(f"Invalid fill_method value {fill_method!r}") + raise ValueError( + f"Invalid fill_method value {fill_method!r}: " + f"supported values are {', '.join(FILL_METHODS)}" + ) return _scatter_weights( n_values=n_values, -- 2.53.0