From 3881591e3ccbc0a22c3837bfd0b811767502dfeb Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sun, 25 Jan 2026 23:55:46 +0100 Subject: [PATCH] refactor(quickadapter): orthogonal multi-target label processing pipeline (#45) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit * refactor(quickadapter): orthogonal multi-target label processing pipeline - Refactor label processing into 4 orthogonal phases: 1. Weighting: apply weights to raw label values per column 2. Smoothing: smooth weighted values per column 3. Pipeline: LabelTransformer standardization per column 4. Prediction: threshold calculation per column - Loop over LABEL_COLUMNS for weighting and smoothing in set_freqai_targets() - Loop over dk.label_list for thresholds in fit_live_predictions() - All config helpers return {default, columns} structure with glob pattern support - Rename ExtremaWeightingTransformer to LabelTransformer - Harmonize namespace: label_weighting, label_smoothing, label_prediction - Backward compatible with flat configs and legacy column names * refactor: remove deprecated internal APIs Remove unused deprecated functions that were replaced by the orthogonal label processing architecture: - get_label_transformer_config() from Utils.py - get_label_transformer_config import from QuickAdapterV3.py - extrema_smoothing property from QuickAdapterV3.py * refactor: remove deprecated predictions_extrema property * refactor: use DEFAULTS_LABEL_PREDICTION for outlier_quantile fallback * refactor: make label weighting generic with metrics dict - compute_label_weights() takes generic metrics dict instead of hardcoded params - _compute_combined_weights() takes generic metrics dict - apply_label_weighting() takes generic metrics dict - Caller builds metrics dict, making weighting truly transverse to any label * refactor: centralize deprecation handling with PARAM_DEPRECATIONS table * refactor: call resolve_deprecated_params once at startup - Change resolve_deprecated_params to modify dict in-place (returns None) - Centralize all deprecation calls in bot_start() and __init__() - Remove calls from properties and utility functions that run multiple times - This ensures deprecation warnings are logged once, not repeatedly * fix: resolve deprecations in __init__ before regressor loads - Move deprecation resolution from bot_start() to Strategy.__init__() so it runs before FreqaiModel.__init__() (which reads same config) - Remove label_transformer legacy support (never released) - Simplify label_weighting/label_pipeline properties - Keep regressor-specific deprecations in regressor __init__ * fix: address PR review comments - Fix label_weighting['strategy'] KeyError by using ['default']['strategy'] - Respect label_prediction.method='none' in min_max_pred() - Use float('inf') specificity for exact matches in get_column_config - Reuse Utils.get_column_config in LabelTransformer - Default label_smoothing method to 'gaussian' * refactor: unify threshold column naming and soft_extremum_alpha - Remove MINIMA_THRESHOLD_COLUMN/MAXIMA_THRESHOLD_COLUMN constants - Use uniform {label}_minima_threshold/{label}_maxima_threshold for all labels - Rename internal soft_alpha to soft_extremum_alpha for consistency with config - Remove redundant docstrings from LabelTransformer (code is self-documenting) * refactor: cleanup docstrings and rename internal functions * refactor: make label_pipeline orthogonal from label_weighting * refactor: rename get_column_config to get_label_column_config * fix: add missing method field to label_prediction logging * refactor: per-column logging and deprecate label_smoothing.window - Update logging in QuickAdapterV3 and QuickAdapterRegressorV3 to show resolved per-column configs instead of just defaults with override keys - Move get_label_column_config() to LabelTransformer.py (re-export from Utils) - Add deprecation mapping for label_smoothing.window -> window_candles - Fix extrema_direction undefined variable bug in populate_any_indicators * fix: correct deprecation mappings for label_prediction params * refactor: move label_pipeline property and logging to regressor - Move label_pipeline property from QuickAdapterV3 strategy to QuickAdapterRegressorV3 - Move Pipeline configuration logging from _log_strategy_configuration() to _log_model_configuration() - Simplify define_label_pipeline() to use self.label_pipeline property - Remove unused get_label_pipeline_config import from strategy - Rename local variable label_weighting to label_weighting_raw for consistency * fix: import get_label_column_config from LabelTransformer Signed-off-by: Jérôme Benoit * refactor(quickadapter): replace string literals with constant references in LabelTransformer * refactor(quickadapter): use per-column prediction config in regressor and strategy * fix: reference correct config paths for label processing Signed-off-by: Jérôme Benoit * fix(quickadapter): warn when column doesn't match any config pattern * feat(deprecation): support cross-section parameter moves Extend PARAM_DEPRECATIONS to handle parameters that moved between config sections, not just renames within the same section. - Add tuple[str, str] value type for (old_section, old_key) moves - Add root_config parameter to resolve_deprecated_params() - Add deprecation entries for 7 params moved from label_weighting to label_pipeline: standardization, robust_quantiles, mmad_scaling_factor, normalization, minmax_range, sigmoid_scale, gamma - Add call sites in QuickAdapterV3 and QuickAdapterRegressorV3 * refactor(quickadapter): replace imperative deprecation handling with declarative path-based migrations - Replace PARAM_DEPRECATIONS dict and resolve_deprecated_params() with CONFIG_MIGRATIONS tuple and migrate_config() - Single migrate_config() call in __init__ replaces 6+ resolve_deprecated_params() calls - Fix bug in set_freqai_targets: move maxima/minima column creation after weighting - Fix DI_value_param assignment to only occur when Weibull fit succeeds * refactor(validation): replace imperative validation with declarative system - Add dataclass-based validators (_EnumValidator, _NumericValidator, etc.) - Replace ~240 lines of repetitive validation code with _validate_params() - Consolidate type aliases in LabelTransformer.py (avoid duplicates) - Fix pyright errors: float() casts, np.asarray() for pmean returns - Use np.nan as default for optuna .get() (proper 'no value' sentinel) - Add pyright to requirements-dev.txt * chore(ReforceXY): add pyright to dev dependencies * refactor(quickadapter): simplify fit_live_predictions thresholding flow - Move DI_value stats computation before label loop - Unify warmed_up conditional to single if/else block - Always set threshold values (defaults when not warmed up) * refactor(quickadapter): add OPTUNA_*_DEFAULT constants and fix static member access - Add OPTUNA_*_DEFAULT class constants for n_jobs, n_trials, timeout, n_startup_trials, min_resource, label_candles_step, space_reduction, space_fraction, and seed - Update _optuna_config property to use constants instead of hardcoded values - Update all .get() calls to use constants as defaults for type safety - Fix static method/property access: use QuickAdapterRegressorV3.method() instead of self.method() for static members - Add assertions for narrowing Optional types (weights) - Fix min_max_pred signature to accept Optional[int] for label_period_candles Reduces pyright errors from 174 to 158 (-16) * fix(quickadapter): default label_prediction method to 'thresholding' for backward compatibility DEFAULTS_LABEL_PREDICTION['method'] was 'none' which broke backward compatibility - legacy configs without explicit method would skip threshold computation. Changed to 'thresholding' to preserve historical behavior where thresholds were always computed by default. * chore: refine log message Signed-off-by: Jérôme Benoit * docs(quickadapter): update README for orthogonal label processing refactor Rename config sections: extrema_weighting → label_weighting, extrema_smoothing → label_smoothing, predictions_extrema → label_prediction. Split label_weighting into label_weighting + label_pipeline. Update parameter names and defaults. Remove deprecated alias mentions. * chore(quickadapter): bump version to 3.11.0 --------- Signed-off-by: Jérôme Benoit --- README.md | 170 +-- ReforceXY/.devcontainer/requirements-dev.txt | 1 + .../.devcontainer/requirements-dev.txt | 1 + quickadapter/user_data/config-template.json | 64 +- .../freqaimodels/QuickAdapterRegressorV3.py | 614 +++++----- .../strategies/ExtremaWeightingTransformer.py | 433 ------- .../user_data/strategies/LabelTransformer.py | 610 ++++++++++ .../user_data/strategies/QuickAdapterV3.py | 400 +++---- quickadapter/user_data/strategies/Utils.py | 1037 +++++++++++------ 9 files changed, 1859 insertions(+), 1471 deletions(-) delete mode 100644 quickadapter/user_data/strategies/ExtremaWeightingTransformer.py create mode 100644 quickadapter/user_data/strategies/LabelTransformer.py diff --git a/README.md b/README.md index 3cec2fb..09b35b4 100644 --- a/README.md +++ b/README.md @@ -37,90 +37,92 @@ docker compose up -d --build ### Configuration tunables -| Path | Default | Type / Range | Description | -| -------------------------------------------------------------- | ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| _Protections_ | | | | -| custom_protections.trade_duration_candles | 72 | int >= 1 | Estimated trade duration in candles. Scales protections stop duration candles and trade limit. | -| custom_protections.lookback_period_fraction | 0.5 | float (0,1] | Fraction of `fit_live_predictions_candles` used to calculate `lookback_period_candles` for _MaxDrawdown_ and _StoplossGuard_ protections. | -| custom_protections.cooldown.enabled | true | bool | Enable/disable _CooldownPeriod_ protection. | -| custom_protections.cooldown.stop_duration_candles | 4 | int >= 1 | Number of candles to wait before allowing new trades after a trade is closed. | -| custom_protections.drawdown.enabled | true | bool | Enable/disable _MaxDrawdown_ protection. | -| custom_protections.drawdown.max_allowed_drawdown | 0.2 | float (0,1) | Maximum allowed drawdown. | -| custom_protections.stoploss.enabled | true | bool | Enable/disable _StoplossGuard_ protection. | -| _Leverage_ | | | | -| leverage | `proposed_leverage` | float [1.0, max_leverage] | Leverage. Fallback to `proposed_leverage` for the pair. | -| _Exit pricing_ | | | | -| exit_pricing.trade_price_target_method | `moving_average` | enum {`moving_average`,`quantile_interpolation`,`weighted_average`} | Trade NATR computation method. (Deprecated alias: `exit_pricing.trade_price_target`) | -| exit_pricing.thresholds_calibration.decline_quantile | 0.75 | float (0,1) | PnL decline quantile threshold. | -| _Reversal confirmation_ | | | | -| reversal_confirmation.lookback_period_candles | 0 | int >= 0 | Prior confirming candles; 0 = none. (Deprecated alias: `reversal_confirmation.lookback_period`) | -| reversal_confirmation.decay_fraction | 0.5 | float (0,1] | Geometric per-candle volatility adjusted reversal threshold relaxation factor. (Deprecated alias: `reversal_confirmation.decay_ratio`) | -| reversal_confirmation.min_natr_multiplier_fraction | 0.0095 | float [0,1] | Lower bound fraction for volatility adjusted reversal threshold. (Deprecated alias: `reversal_confirmation.min_natr_ratio_percent`) | -| reversal_confirmation.max_natr_multiplier_fraction | 0.075 | float [0,1] | Upper bound fraction (>= lower bound) for volatility adjusted reversal threshold. (Deprecated alias: `reversal_confirmation.max_natr_ratio_percent`) | -| _Regressor model_ | | | | -| freqai.regressor | `xgboost` | enum {`xgboost`,`lightgbm`,`histgradientboostingregressor`,`ngboost`,`catboost`} | Machine learning regressor algorithm. | -| _Extrema smoothing_ | | | | -| freqai.extrema_smoothing.method | `gaussian` | enum {`gaussian`,`kaiser`,`triang`,`smm`,`sma`,`savgol`,`gaussian_filter1d`} | Extrema smoothing method (`smm`=median, `sma`=mean, `savgol`=Savitzky–Golay). | -| freqai.extrema_smoothing.window_candles | 5 | int >= 3 | Smoothing window length (candles). (Deprecated alias: `freqai.extrema_smoothing.window`) | -| freqai.extrema_smoothing.beta | 8.0 | float > 0 | Shape parameter for `kaiser` kernel. | -| freqai.extrema_smoothing.polyorder | 3 | int >= 1 | Polynomial order for `savgol` smoothing. | -| freqai.extrema_smoothing.mode | `mirror` | enum {`mirror`,`constant`,`nearest`,`wrap`,`interp`} | Boundary mode for `savgol` and `gaussian_filter1d`. | -| freqai.extrema_smoothing.sigma | 1.0 | float > 0 | Gaussian `sigma` for `gaussian_filter1d` smoothing. | -| _Extrema weighting_ | | | | -| freqai.extrema_weighting.strategy | `none` | enum {`none`,`amplitude`,`amplitude_threshold_ratio`,`volume_rate`,`speed`,`efficiency_ratio`,`volume_weighted_efficiency_ratio`,`combined`} | Extrema weighting metric: none (`none`), swing amplitude (`amplitude`), swing amplitude / median volatility-threshold ratio (`amplitude_threshold_ratio`), swing volume per candle (`volume_rate`), swing speed (`speed`), swing efficiency ratio (`efficiency_ratio`), swing volume-weighted efficiency ratio (`volume_weighted_efficiency_ratio`), or combined metrics aggregation (`combined`). Switching between `none` and any other strategy requires deleting trained models. | -| freqai.extrema_weighting.metric_coefficients | {} | dict[str, float] | Per-metric coefficients for `combined` strategy. Keys: `amplitude`, `amplitude_threshold_ratio`, `volume_rate`, `speed`, `efficiency_ratio`, `volume_weighted_efficiency_ratio`. | -| freqai.extrema_weighting.aggregation | `arithmetic_mean` | enum {`arithmetic_mean`,`geometric_mean`,`harmonic_mean`,`quadratic_mean`,`weighted_median`,`softmax`} | Metric aggregation method for `combined` strategy. `arithmetic_mean`=(Σ(w·m)/Σ(w)), `geometric_mean`=(∏(m^w))^(1/Σw), `harmonic_mean`=Σ(w)/(Σ(w/m)), `quadratic_mean`=(Σ(w·m²)/Σ(w))^(1/2), `weighted_median`=Q₀.₅(m,w), `softmax`=Σ(m·s_i) where s_i=w_i·exp(m_i/T)/Σ(w_j·exp(m_j/T)). | -| freqai.extrema_weighting.softmax_temperature | 1.0 | float > 0 | Temperature T for `softmax` aggregation, controls distribution sharpness. | -| freqai.extrema_weighting.standardization | `none` | enum {`none`,`zscore`,`robust`,`mmad`,`power_yj`} | Standardization method applied to smoothed weighted extrema before normalization. `none`=w, `zscore`=(w-μ)/σ, `robust`=(w-median)/IQR, `mmad`=(w-median)/(MAD·k), `power_yj`=YJ(w). | -| freqai.extrema_weighting.robust_quantiles | [0.25, 0.75] | list[float] where 0 <= Q1 < Q3 <= 1 | Quantile range for robust standardization, Q1 and Q3. | -| freqai.extrema_weighting.mmad_scaling_factor | 1.4826 | float > 0 | Scaling factor for MMAD standardization. | -| freqai.extrema_weighting.normalization | `maxabs` | enum {`maxabs`,`minmax`,`sigmoid`,`none`} | Normalization method applied to smoothed weighted extrema. `maxabs`=w/max(\|w\|), `minmax`=low+(w-min)/(max-min)·(high-low), `sigmoid`=2·σ(scale·w)-1, `none`=w. | -| freqai.extrema_weighting.minmax_range | [-1.0, 1.0] | list[float] | Target range for `minmax` normalization, min and max. | -| freqai.extrema_weighting.sigmoid_scale | 1.0 | float > 0 | Scale parameter for `sigmoid` normalization, controls steepness. | -| freqai.extrema_weighting.gamma | 1.0 | float (0,10] | Contrast exponent applied to smoothed weighted extrema after normalization: >1 emphasizes extrema, values between 0 and 1 soften. | -| _Feature parameters_ | | | | -| freqai.feature_parameters.label_period_candles | min/max midpoint | int >= 1 | Zigzag labeling NATR horizon. | -| freqai.feature_parameters.min_label_period_candles | 12 | int >= 1 | Minimum labeling NATR horizon used for reversals labeling HPO. | -| freqai.feature_parameters.max_label_period_candles | 24 | int >= 1 | Maximum labeling NATR horizon used for reversals labeling HPO. | -| freqai.feature_parameters.label_natr_multiplier | min/max midpoint | float > 0 | Zigzag labeling NATR multiplier. (Deprecated alias: `freqai.feature_parameters.label_natr_ratio`) | -| freqai.feature_parameters.min_label_natr_multiplier | 9.0 | float > 0 | Minimum labeling NATR multiplier used for reversals labeling HPO. (Deprecated alias: `freqai.feature_parameters.min_label_natr_ratio`) | -| freqai.feature_parameters.max_label_natr_multiplier | 12.0 | float > 0 | Maximum labeling NATR multiplier used for reversals labeling HPO. (Deprecated alias: `freqai.feature_parameters.max_label_natr_ratio`) | -| freqai.feature_parameters.label_frequency_candles | `auto` | int >= 2 \| `auto` | Reversals labeling frequency. `auto` = max(2, 2 \* number of whitelisted pairs). | -| freqai.feature_parameters.label_weights | [1/7,1/7,1/7,1/7,1/7,1/7,1/7] | list[float] | Per-objective weights used in distance calculations to ideal point. Objectives: (1) number of detected reversals, (2) median swing amplitude, (3) median (swing amplitude / median volatility-threshold ratio), (4) median swing volume per candle, (5) median swing speed, (6) median swing efficiency ratio, (7) median swing volume-weighted efficiency ratio. | -| freqai.feature_parameters.label_p_order | `None` | float \| None | p-order parameter for distance metrics. Used by `minkowski` (default 2.0) and `power_mean` (default 1.0). Ignored by other metrics. | -| freqai.feature_parameters.label_method | `compromise_programming` | enum {`compromise_programming`,`topsis`,`kmeans`,`kmeans2`,`kmedoids`,`knn`,`medoid`} | HPO `label` Pareto front trial selection method. | -| freqai.feature_parameters.label_distance_metric | `euclidean` | string | Distance metric for `compromise_programming` and `topsis` methods. | -| freqai.feature_parameters.label_cluster_metric | `euclidean` | string | Distance metric for `kmeans`, `kmeans2`, and `kmedoids` methods. | -| freqai.feature_parameters.label_cluster_selection_method | `topsis` | enum {`compromise_programming`,`topsis`} | Cluster selection method for clustering-based label methods. | -| freqai.feature_parameters.label_cluster_trial_selection_method | `topsis` | enum {`compromise_programming`,`topsis`} | Best cluster trial selection method for clustering-based label methods. | -| freqai.feature_parameters.label_density_metric | method-dependent | string | Distance metric for `knn` and `medoid` methods. | -| freqai.feature_parameters.label_density_aggregation | `power_mean` | enum {`power_mean`,`quantile`,`min`,`max`} | Aggregation method for KNN neighbor distances. | -| freqai.feature_parameters.label_density_n_neighbors | 5 | int >= 1 | Number of neighbors for KNN. | -| freqai.feature_parameters.label_density_aggregation_param | aggregation-dependent | float \| None | Tunable for KNN neighbor distance aggregation: p-order (`power_mean`) or quantile value (`quantile`). | -| freqai.feature_parameters.scaler | `minmax` | enum {`minmax`,`maxabs`,`standard`,`robust`} | Feature scaling method. `minmax`=MinMaxScaler, `maxabs`=MaxAbsScaler, `standard`=StandardScaler, `robust`=RobustScaler. Changing this parameter requires deleting trained models. | -| freqai.feature_parameters.range | [-1.0, 1.0] | list[float] | Target range for `minmax` scaler, min and max. Changing this parameter requires deleting trained models. | -| _Predictions extrema_ | | | | -| freqai.predictions_extrema.selection_method | `rank_extrema` | enum {`rank_extrema`,`rank_peaks`,`partition`} | Extrema selection method. `rank_extrema` ranks extrema values, `rank_peaks` ranks detected peak values, `partition` uses sign-based partitioning. | -| freqai.predictions_extrema.threshold_smoothing_method | `mean` | enum {`mean`,`isodata`,`li`,`minimum`,`otsu`,`triangle`,`yen`,`median`,`soft_extremum`} | Thresholding method for prediction thresholds smoothing. (Deprecated alias: `freqai.predictions_extrema.thresholds_smoothing`) | -| freqai.predictions_extrema.soft_extremum_alpha | 12.0 | float >= 0 | Alpha for `soft_extremum` thresholds smoothing. (Deprecated alias: `freqai.predictions_extrema.thresholds_alpha`) | -| freqai.predictions_extrema.outlier_threshold_quantile | 0.999 | float (0,1) | Quantile threshold for predictions outlier filtering. (Deprecated alias: `freqai.predictions_extrema.threshold_outlier`) | -| freqai.predictions_extrema.keep_extrema_fraction | 1.0 | float (0,1] | Fraction of extrema used for thresholds. `1.0` uses all, lower values keep only most significant. Applies to `rank_extrema` and `rank_peaks`; ignored for `partition`. (Deprecated alias: `freqai.predictions_extrema.extrema_fraction`) | -| _Optuna / HPO_ | | | | -| freqai.optuna_hyperopt.enabled | false | bool | Enables HPO. | -| freqai.optuna_hyperopt.sampler | `tpe` | enum {`tpe`,`auto`} | HPO sampler algorithm for `hp` namespace. `tpe` uses [TPESampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.TPESampler.html) with multivariate, group, and constant_liar (when multiple workers), `auto` uses [AutoSampler](https://hub.optuna.org/samplers/auto_sampler). | -| freqai.optuna_hyperopt.label_sampler | `auto` | enum {`auto`,`tpe`,`nsgaii`,`nsgaiii`} | HPO sampler algorithm for multi-objective `label` namespace. `nsgaii` uses [NSGAIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIISampler.html), `nsgaiii` uses [NSGAIIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIIISampler.html). | -| freqai.optuna_hyperopt.storage | `file` | enum {`file`,`sqlite`} | HPO storage backend. | -| freqai.optuna_hyperopt.continuous | true | bool | Continuous HPO. | -| freqai.optuna_hyperopt.warm_start | true | bool | Warm start HPO with previous best value(s). | -| freqai.optuna_hyperopt.n_startup_trials | 15 | int >= 0 | HPO startup trials. | -| freqai.optuna_hyperopt.n_trials | 50 | int >= 1 | Maximum HPO trials. | -| freqai.optuna_hyperopt.n_jobs | CPU threads / 4 | int >= 1 | Parallel HPO workers. | -| freqai.optuna_hyperopt.timeout | 7200 | int >= 0 | HPO wall-clock timeout in seconds. | -| freqai.optuna_hyperopt.label_candles_step | 1 | int >= 1 | Step for Zigzag NATR horizon `label` search space. | -| freqai.optuna_hyperopt.space_reduction | false | bool | Enable/disable `hp` search space reduction based on previous best parameters. | -| freqai.optuna_hyperopt.space_fraction | 0.4 | float [0,1] | Fraction of the `hp` search space to use with `space_reduction`. Lower values create narrower search ranges around the best parameters. (Deprecated alias: `freqai.optuna_hyperopt.expansion_ratio`) | -| freqai.optuna_hyperopt.min_resource | 3 | int >= 1 | Minimum resource per [HyperbandPruner](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.HyperbandPruner.html) rung. | -| freqai.optuna_hyperopt.seed | 1 | int >= 0 | HPO RNG seed. | +| Path | Default | Type / Range | Description | +| -------------------------------------------------------------- | ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| _Protections_ | | | | +| custom_protections.trade_duration_candles | 72 | int >= 1 | Estimated trade duration in candles. Scales protections stop duration candles and trade limit. | +| custom_protections.lookback_period_fraction | 0.5 | float (0,1] | Fraction of `fit_live_predictions_candles` used to calculate `lookback_period_candles` for _MaxDrawdown_ and _StoplossGuard_ protections. | +| custom_protections.cooldown.enabled | true | bool | Enable/disable _CooldownPeriod_ protection. | +| custom_protections.cooldown.stop_duration_candles | 4 | int >= 1 | Number of candles to wait before allowing new trades after a trade is closed. | +| custom_protections.drawdown.enabled | true | bool | Enable/disable _MaxDrawdown_ protection. | +| custom_protections.drawdown.max_allowed_drawdown | 0.2 | float (0,1) | Maximum allowed drawdown. | +| custom_protections.stoploss.enabled | true | bool | Enable/disable _StoplossGuard_ protection. | +| _Leverage_ | | | | +| leverage | `proposed_leverage` | float [1.0, max_leverage] | Leverage. Fallback to `proposed_leverage` for the pair. | +| _Exit pricing_ | | | | +| exit_pricing.trade_price_target_method | `moving_average` | enum {`moving_average`,`quantile_interpolation`,`weighted_average`} | Trade NATR computation method. | +| exit_pricing.thresholds_calibration.decline_quantile | 0.75 | float (0,1) | PnL decline quantile threshold. | +| _Reversal confirmation_ | | | | +| reversal_confirmation.lookback_period_candles | 0 | int >= 0 | Prior confirming candles; 0 = none. | +| reversal_confirmation.decay_fraction | 0.5 | float (0,1] | Geometric per-candle volatility adjusted reversal threshold relaxation factor. | +| reversal_confirmation.min_natr_multiplier_fraction | 0.0095 | float [0,1] | Lower bound fraction for volatility adjusted reversal threshold. | +| reversal_confirmation.max_natr_multiplier_fraction | 0.075 | float [0,1] | Upper bound fraction (>= lower bound) for volatility adjusted reversal threshold. | +| _Regressor model_ | | | | +| freqai.regressor | `xgboost` | enum {`xgboost`,`lightgbm`,`histgradientboostingregressor`,`ngboost`,`catboost`} | Machine learning regressor algorithm. | +| _Label smoothing_ | | | | +| freqai.label_smoothing.method | `gaussian` | enum {`none`,`gaussian`,`kaiser`,`triang`,`smm`,`sma`,`savgol`,`gaussian_filter1d`} | Label smoothing method (`smm`=median, `sma`=mean, `savgol`=Savitzky–Golay). | +| freqai.label_smoothing.window_candles | 5 | int >= 3 | Smoothing window length (candles). | +| freqai.label_smoothing.beta | 8.0 | float > 0 | Shape parameter for `kaiser` kernel. | +| freqai.label_smoothing.polyorder | 3 | int >= 1 | Polynomial order for `savgol` smoothing. | +| freqai.label_smoothing.mode | `mirror` | enum {`mirror`,`constant`,`nearest`,`wrap`,`interp`} | Boundary mode for `savgol` and `gaussian_filter1d`. | +| freqai.label_smoothing.sigma | 1.0 | float > 0 | Gaussian `sigma` for `gaussian_filter1d` smoothing. | +| _Label weighting_ | | | | +| freqai.label_weighting.strategy | `none` | enum {`none`,`amplitude`,`amplitude_threshold_ratio`,`volume_rate`,`speed`,`efficiency_ratio`,`volume_weighted_efficiency_ratio`,`combined`} | Label weighting metric: none (`none`), swing amplitude (`amplitude`), swing amplitude / median volatility-threshold ratio (`amplitude_threshold_ratio`), swing volume per candle (`volume_rate`), swing speed (`speed`), swing efficiency ratio (`efficiency_ratio`), swing volume-weighted efficiency ratio (`volume_weighted_efficiency_ratio`), or combined metrics aggregation (`combined`). Switching between `none` and any other strategy requires deleting trained models. | +| freqai.label_weighting.metric_coefficients | {} | dict[str, float] | Per-metric coefficients for `combined` strategy. Keys: `amplitude`, `amplitude_threshold_ratio`, `volume_rate`, `speed`, `efficiency_ratio`, `volume_weighted_efficiency_ratio`. | +| freqai.label_weighting.aggregation | `arithmetic_mean` | enum {`arithmetic_mean`,`geometric_mean`,`harmonic_mean`,`quadratic_mean`,`weighted_median`,`softmax`} | Metric aggregation method for `combined` strategy. `arithmetic_mean`=(Σ(w·m)/Σ(w)), `geometric_mean`=(∏(m^w))^(1/Σw), `harmonic_mean`=Σ(w)/(Σ(w/m)), `quadratic_mean`=(Σ(w·m²)/Σ(w))^(1/2), `weighted_median`=Q₀.₅(m,w), `softmax`=Σ(m·s_i) where s_i=w_i·exp(m_i/T)/Σ(w_j·exp(m_j/T)). | +| freqai.label_weighting.softmax_temperature | 1.0 | float > 0 | Temperature T for `softmax` aggregation, controls distribution sharpness. | +| _Label pipeline_ | | | | +| freqai.label_pipeline.standardization | `none` | enum {`none`,`zscore`,`robust`,`mmad`,`power_yj`} | Standardization method applied to labels before normalization. `none`=w, `zscore`=(w-μ)/σ, `robust`=(w-median)/IQR, `mmad`=(w-median)/(MAD·k), `power_yj`=YJ(w). | +| freqai.label_pipeline.robust_quantiles | [0.25, 0.75] | list[float] where 0 <= Q1 < Q3 <= 1 | Quantile range for robust standardization, Q1 and Q3. | +| freqai.label_pipeline.mmad_scaling_factor | 1.4826 | float > 0 | Scaling factor for MMAD standardization. | +| freqai.label_pipeline.normalization | `maxabs` | enum {`maxabs`,`minmax`,`sigmoid`,`none`} | Normalization method applied to labels. `maxabs`=w/max(\|w\|), `minmax`=low+(w-min)/(max-min)·(high-low), `sigmoid`=2·σ(scale·w)-1, `none`=w. | +| freqai.label_pipeline.minmax_range | [-1.0, 1.0] | list[float] | Target range for `minmax` normalization, min and max. | +| freqai.label_pipeline.sigmoid_scale | 1.0 | float > 0 | Scale parameter for `sigmoid` normalization, controls steepness. | +| freqai.label_pipeline.gamma | 1.0 | float (0,10] | Contrast exponent applied to labels after normalization: >1 emphasizes extrema, values between 0 and 1 soften. | +| _Feature parameters_ | | | | +| freqai.feature_parameters.label_period_candles | min/max midpoint | int >= 1 | Zigzag labeling NATR horizon. | +| freqai.feature_parameters.min_label_period_candles | 12 | int >= 1 | Minimum labeling NATR horizon used for reversals labeling HPO. | +| freqai.feature_parameters.max_label_period_candles | 24 | int >= 1 | Maximum labeling NATR horizon used for reversals labeling HPO. | +| freqai.feature_parameters.label_natr_multiplier | min/max midpoint | float > 0 | Zigzag labeling NATR multiplier. | +| freqai.feature_parameters.min_label_natr_multiplier | 9.0 | float > 0 | Minimum labeling NATR multiplier used for reversals labeling HPO. | +| freqai.feature_parameters.max_label_natr_multiplier | 12.0 | float > 0 | Maximum labeling NATR multiplier used for reversals labeling HPO. | +| freqai.feature_parameters.label_frequency_candles | `auto` | int >= 2 \| `auto` | Reversals labeling frequency. `auto` = max(2, 2 \* number of whitelisted pairs). | +| freqai.feature_parameters.label_weights | [1/7,1/7,1/7,1/7,1/7,1/7,1/7] | list[float] | Per-objective weights used in distance calculations to ideal point. Objectives: (1) number of detected reversals, (2) median swing amplitude, (3) median (swing amplitude / median volatility-threshold ratio), (4) median swing volume per candle, (5) median swing speed, (6) median swing efficiency ratio, (7) median swing volume-weighted efficiency ratio. | +| freqai.feature_parameters.label_p_order | `None` | float \| None | p-order parameter for distance metrics. Used by `minkowski` (default 2.0) and `power_mean` (default 1.0). Ignored by other metrics. | +| freqai.feature_parameters.label_method | `compromise_programming` | enum {`compromise_programming`,`topsis`,`kmeans`,`kmeans2`,`kmedoids`,`knn`,`medoid`} | HPO `label` Pareto front trial selection method. | +| freqai.feature_parameters.label_distance_metric | `euclidean` | string | Distance metric for `compromise_programming` and `topsis` methods. | +| freqai.feature_parameters.label_cluster_metric | `euclidean` | string | Distance metric for `kmeans`, `kmeans2`, and `kmedoids` methods. | +| freqai.feature_parameters.label_cluster_selection_method | `topsis` | enum {`compromise_programming`,`topsis`} | Cluster selection method for clustering-based label methods. | +| freqai.feature_parameters.label_cluster_trial_selection_method | `topsis` | enum {`compromise_programming`,`topsis`} | Best cluster trial selection method for clustering-based label methods. | +| freqai.feature_parameters.label_density_metric | method-dependent | string | Distance metric for `knn` and `medoid` methods. | +| freqai.feature_parameters.label_density_aggregation | `power_mean` | enum {`power_mean`,`quantile`,`min`,`max`} | Aggregation method for KNN neighbor distances. | +| freqai.feature_parameters.label_density_n_neighbors | 5 | int >= 1 | Number of neighbors for KNN. | +| freqai.feature_parameters.label_density_aggregation_param | aggregation-dependent | float \| None | Tunable for KNN neighbor distance aggregation: p-order (`power_mean`) or quantile value (`quantile`). | +| freqai.feature_parameters.scaler | `minmax` | enum {`minmax`,`maxabs`,`standard`,`robust`} | Feature scaling method. `minmax`=MinMaxScaler, `maxabs`=MaxAbsScaler, `standard`=StandardScaler, `robust`=RobustScaler. Changing this parameter requires deleting trained models. | +| freqai.feature_parameters.range | [-1.0, 1.0] | list[float] | Target range for `minmax` scaler, min and max. Changing this parameter requires deleting trained models. | +| _Label prediction_ | | | | +| freqai.label_prediction.method | `thresholding` | enum {`none`,`thresholding`} | Prediction method. `none` disables threshold computation, `thresholding` enables adaptive threshold calculation. | +| freqai.label_prediction.selection_method | `rank_extrema` | enum {`rank_extrema`,`rank_peaks`,`partition`} | Extrema selection method. `rank_extrema` ranks extrema values, `rank_peaks` ranks detected peak values, `partition` uses sign-based partitioning. | +| freqai.label_prediction.threshold_method | `mean` | enum {`mean`,`isodata`,`li`,`minimum`,`otsu`,`triangle`,`yen`,`median`,`soft_extremum`} | Thresholding method for prediction thresholds. | +| freqai.label_prediction.soft_extremum_alpha | 12.0 | float >= 0 | Alpha for `soft_extremum` threshold method. | +| freqai.label_prediction.outlier_quantile | 0.999 | float (0,1) | Quantile threshold for predictions outlier filtering. | +| freqai.label_prediction.keep_fraction | 0.5 | float (0,1] | Fraction of extrema used for thresholds. `1.0` uses all, lower values keep only most significant. Applies to `rank_extrema` and `rank_peaks`; ignored for `partition`. | +| _Optuna / HPO_ | | | | +| freqai.optuna_hyperopt.enabled | false | bool | Enables HPO. | +| freqai.optuna_hyperopt.sampler | `tpe` | enum {`tpe`,`auto`} | HPO sampler algorithm for `hp` namespace. `tpe` uses [TPESampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.TPESampler.html) with multivariate, group, and constant_liar (when multiple workers), `auto` uses [AutoSampler](https://hub.optuna.org/samplers/auto_sampler). | +| freqai.optuna_hyperopt.label_sampler | `auto` | enum {`auto`,`tpe`,`nsgaii`,`nsgaiii`} | HPO sampler algorithm for multi-objective `label` namespace. `nsgaii` uses [NSGAIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIISampler.html), `nsgaiii` uses [NSGAIIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIIISampler.html). | +| freqai.optuna_hyperopt.storage | `file` | enum {`file`,`sqlite`} | HPO storage backend. | +| freqai.optuna_hyperopt.continuous | true | bool | Continuous HPO. | +| freqai.optuna_hyperopt.warm_start | true | bool | Warm start HPO with previous best value(s). | +| freqai.optuna_hyperopt.n_startup_trials | 15 | int >= 0 | HPO startup trials. | +| freqai.optuna_hyperopt.n_trials | 50 | int >= 1 | Maximum HPO trials. | +| freqai.optuna_hyperopt.n_jobs | CPU threads / 4 | int >= 1 | Parallel HPO workers. | +| freqai.optuna_hyperopt.timeout | 7200 | int >= 0 | HPO wall-clock timeout in seconds. | +| freqai.optuna_hyperopt.label_candles_step | 1 | int >= 1 | Step for Zigzag NATR horizon `label` search space. | +| freqai.optuna_hyperopt.space_reduction | false | bool | Enable/disable `hp` search space reduction based on previous best parameters. | +| freqai.optuna_hyperopt.space_fraction | 0.4 | float [0,1] | Fraction of the `hp` search space to use with `space_reduction`. Lower values create narrower search ranges around the best parameters. | +| freqai.optuna_hyperopt.min_resource | 3 | int >= 1 | Minimum resource per [HyperbandPruner](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.HyperbandPruner.html) rung. | +| freqai.optuna_hyperopt.seed | 1 | int >= 0 | HPO RNG seed. | ## ReforceXY diff --git a/ReforceXY/.devcontainer/requirements-dev.txt b/ReforceXY/.devcontainer/requirements-dev.txt index 73e7736..d8ad979 100644 --- a/ReforceXY/.devcontainer/requirements-dev.txt +++ b/ReforceXY/.devcontainer/requirements-dev.txt @@ -2,3 +2,4 @@ pandas-stubs scipy-stubs uv ruff +pyright diff --git a/quickadapter/.devcontainer/requirements-dev.txt b/quickadapter/.devcontainer/requirements-dev.txt index 73e7736..d8ad979 100644 --- a/quickadapter/.devcontainer/requirements-dev.txt +++ b/quickadapter/.devcontainer/requirements-dev.txt @@ -2,3 +2,4 @@ pandas-stubs scipy-stubs uv ruff +pyright diff --git a/quickadapter/user_data/config-template.json b/quickadapter/user_data/config-template.json index a08388b..895f77e 100644 --- a/quickadapter/user_data/config-template.json +++ b/quickadapter/user_data/config-template.json @@ -124,21 +124,63 @@ "fit_live_predictions_candles": 864, "data_kitchen_thread_count": 6, // set to number of CPU threads / 4 "track_performance": false, - "extrema_weighting": { + "label_weighting": { "strategy": "none" + // Per-label format: + // "default": { + // "strategy": "none" + // }, + // "columns": { + // "&s-extrema": { + // "strategy": "amplitude", + // } + // } }, - // "extrema_weighting": { - // "strategy": "amplitude", - // "gamma": 1.5 - // }, - "extrema_smoothing": { + "label_smoothing": { "method": "kaiser", "window_candles": 5, "beta": 14.0 + // Per-label format: + // "default": { + // "method": "none" + // }, + // "columns": { + // "&s-extrema": { + // "method": "kaiser", + // "window_candles": 5, + // "beta": 14.0 + // } }, - "predictions_extrema": { - "threshold_smoothing_method": "isodata", - "keep_extrema_fraction": 0.5 + "label_pipeline": { + // Per-label format: + // "default": { + // "standardization": "none", + // "normalization": "minmax", + // "gamma": 1.0 + // }, + // "columns": { + // "&s-extrema": { + // "standardization": "none", + // "normalization": "maxabs", + // "gamma": 1.5 + // } + // } + }, + "label_prediction": { + "method": "thresholding", + "threshold_method": "isodata", + "keep_fraction": 0.5 + // Per-label format: + // "default": { + // "method": "thresholding", + // "threshold_method": "mean" + // }, + // "columns": { + // "&s-extrema": { + // "threshold_method": "isodata", + // "keep_fraction": 0.5 + // } + // } }, "optuna_hyperopt": { "enabled": true, @@ -153,8 +195,8 @@ "DI_value_param2": 0, "DI_value_param3": 0, "DI_cutoff": 2, - "&s-minima_threshold": -2, - "&s-maxima_threshold": 2, + "&s-extrema_minima_threshold": -2, + "&s-extrema_maxima_threshold": 2, "label_period_candles": 18, "label_natr_multiplier": 10.5, "hp_rmse": -1 diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py index afd50a6..ddac8dc 100644 --- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py +++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py @@ -29,36 +29,42 @@ from sklearn.preprocessing import ( ) from sklearn_extra.cluster import KMedoids -from ExtremaWeightingTransformer import ExtremaWeightingTransformer +from LabelTransformer import ( + CUSTOM_THRESHOLD_METHODS, + EXTREMA_SELECTION_METHODS, + PREDICTION_METHODS, + SKIMAGE_THRESHOLD_METHODS, + THRESHOLD_METHODS, + CustomThresholdMethod, + ExtremaSelectionMethod, + LabelTransformer, + SkimageThresholdMethod, + ThresholdMethod, + get_label_column_config, +) + from Utils import ( DEFAULT_FIT_LIVE_PREDICTIONS_CANDLES, - EXTREMA_COLUMN, - MAXIMA_THRESHOLD_COLUMN, - MINIMA_THRESHOLD_COLUMN, + DEFAULTS_LABEL_PREDICTION, + LABEL_COLUMNS, REGRESSORS, - WEIGHT_STRATEGIES, Regressor, eval_set_and_weights, fit_regressor, format_number, - get_extrema_weighting_config, get_label_defaults, + get_label_pipeline_config, + get_label_prediction_config, get_min_max_label_period_candles, get_optuna_study_model_parameters, + migrate_config, soft_extremum, - update_config_value, zigzag, ) -ExtremaSelectionMethod = Literal["rank_extrema", "rank_peaks", "partition"] OptunaSampler = Literal["tpe", "auto", "nsgaii", "nsgaiii"] OptunaNamespace = Literal["hp", "label"] ScalerType = Literal["minmax", "maxabs", "standard", "robust"] -CustomThresholdMethod = Literal["median", "soft_extremum"] -SkimageThresholdMethod = Literal[ - "mean", "isodata", "li", "minimum", "otsu", "triangle", "yen" -] -ThresholdMethod = Union[SkimageThresholdMethod, CustomThresholdMethod] DensityAggregation = Literal["power_mean", "quantile", "min", "max"] DistanceMethod = Literal["compromise_programming", "topsis"] ClusterMethod = Literal["kmeans", "kmeans2", "kmedoids"] @@ -87,40 +93,16 @@ class QuickAdapterRegressorV3(BaseRegressionModel): https://github.com/sponsors/robcaulk """ - version = "3.10.11" + version = "3.11.0" _TEST_SIZE: Final[float] = 0.1 _SQRT_2: Final[float] = np.sqrt(2.0) - _EXTREMA_SELECTION_METHODS: Final[tuple[ExtremaSelectionMethod, ...]] = ( - "rank_extrema", - "rank_peaks", - "partition", - ) - _CUSTOM_THRESHOLD_METHODS: Final[tuple[CustomThresholdMethod, ...]] = ( - "median", - "soft_extremum", - ) - _SKIMAGE_THRESHOLD_METHODS: Final[tuple[SkimageThresholdMethod, ...]] = ( - "mean", - "isodata", - "li", - "minimum", - "otsu", - "triangle", - "yen", - ) - _THRESHOLD_METHODS: Final[tuple[ThresholdMethod, ...]] = ( - *_SKIMAGE_THRESHOLD_METHODS, - *_CUSTOM_THRESHOLD_METHODS, - ) - _OPTUNA_LABEL_N_OBJECTIVES: Final[int] = 7 _OPTUNA_LABEL_DIRECTIONS: Final[tuple[optuna.study.StudyDirection, ...]] = ( optuna.study.StudyDirection.MAXIMIZE, ) * _OPTUNA_LABEL_N_OBJECTIVES - _OPTUNA_STORAGE_BACKENDS: Final[tuple[str, ...]] = ("file", "sqlite") _OPTUNA_SAMPLERS: Final[tuple[OptunaSampler, ...]] = ( "tpe", @@ -211,10 +193,6 @@ class QuickAdapterRegressorV3(BaseRegressionModel): "cubic_mean": 3.0, } - PREDICTIONS_EXTREMA_OUTLIER_THRESHOLD_QUANTILE_DEFAULT: Final[float] = 0.999 - PREDICTIONS_EXTREMA_SOFT_EXTREMUM_ALPHA_DEFAULT: Final[float] = 12.0 - PREDICTIONS_EXTREMA_KEEP_EXTREMA_FRACTION_DEFAULT: Final[float] = 1.0 - FIT_LIVE_PREDICTIONS_CANDLES_DEFAULT: Final[int] = ( DEFAULT_FIT_LIVE_PREDICTIONS_CANDLES ) @@ -240,25 +218,35 @@ class QuickAdapterRegressorV3(BaseRegressionModel): _DENSITY_AGGREGATIONS[0] # "power_mean" ) + OPTUNA_N_JOBS_DEFAULT: Final[int] = 1 + OPTUNA_N_STARTUP_TRIALS_DEFAULT: Final[int] = 15 + OPTUNA_N_TRIALS_DEFAULT: Final[int] = 50 + OPTUNA_TIMEOUT_DEFAULT: Final[int] = 7200 + OPTUNA_MIN_RESOURCE_DEFAULT: Final[int] = 3 + OPTUNA_LABEL_CANDLES_STEP_DEFAULT: Final[int] = 1 + OPTUNA_SPACE_REDUCTION_DEFAULT: Final[bool] = False + OPTUNA_SPACE_FRACTION_DEFAULT: Final[float] = 0.4 + OPTUNA_SEED_DEFAULT: Final[int] = 1 + @staticmethod @lru_cache(maxsize=None) def _extrema_selection_methods_set() -> set[ExtremaSelectionMethod]: - return set(QuickAdapterRegressorV3._EXTREMA_SELECTION_METHODS) + return set(EXTREMA_SELECTION_METHODS) @staticmethod @lru_cache(maxsize=None) def _custom_threshold_methods_set() -> set[CustomThresholdMethod]: - return set(QuickAdapterRegressorV3._CUSTOM_THRESHOLD_METHODS) + return set(CUSTOM_THRESHOLD_METHODS) @staticmethod @lru_cache(maxsize=None) def _skimage_threshold_methods_set() -> set[SkimageThresholdMethod]: - return set(QuickAdapterRegressorV3._SKIMAGE_THRESHOLD_METHODS) + return set(SKIMAGE_THRESHOLD_METHODS) @staticmethod @lru_cache(maxsize=None) def _threshold_methods_set() -> set[ThresholdMethod]: - return set(QuickAdapterRegressorV3._THRESHOLD_METHODS) + return set(THRESHOLD_METHODS) @staticmethod @lru_cache(maxsize=None) @@ -620,7 +608,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): def _resolve_label_method_config(self, label_method: str) -> dict[str, Any]: QuickAdapterRegressorV3._validate_enum_value( label_method, - self._selection_methods_set(), + QuickAdapterRegressorV3._selection_methods_set(), QuickAdapterRegressorV3._SELECTION_METHODS, ctx="label_method", ) @@ -777,35 +765,26 @@ class QuickAdapterRegressorV3(BaseRegressionModel): "n_jobs": min( self.config.get("freqai", {}) .get("optuna_hyperopt", {}) - .get("n_jobs", 1), + .get("n_jobs", QuickAdapterRegressorV3.OPTUNA_N_JOBS_DEFAULT), max(int(self.max_system_threads / 4), 1), ), "sampler": QuickAdapterRegressorV3._OPTUNA_HPO_SAMPLERS[0], # "tpe" "storage": QuickAdapterRegressorV3._OPTUNA_STORAGE_BACKENDS[0], # "file" "continuous": True, "warm_start": True, - "n_startup_trials": 15, - "n_trials": 50, - "timeout": 7200, + "n_startup_trials": QuickAdapterRegressorV3.OPTUNA_N_STARTUP_TRIALS_DEFAULT, + "n_trials": QuickAdapterRegressorV3.OPTUNA_N_TRIALS_DEFAULT, + "timeout": QuickAdapterRegressorV3.OPTUNA_TIMEOUT_DEFAULT, "label_sampler": QuickAdapterRegressorV3._OPTUNA_LABEL_SAMPLERS[ 0 ], # "auto" - "label_candles_step": 1, - "space_reduction": False, - "space_fraction": 0.4, - "min_resource": 3, - "seed": 1, + "label_candles_step": QuickAdapterRegressorV3.OPTUNA_LABEL_CANDLES_STEP_DEFAULT, + "space_reduction": QuickAdapterRegressorV3.OPTUNA_SPACE_REDUCTION_DEFAULT, + "space_fraction": QuickAdapterRegressorV3.OPTUNA_SPACE_FRACTION_DEFAULT, + "min_resource": QuickAdapterRegressorV3.OPTUNA_MIN_RESOURCE_DEFAULT, + "seed": QuickAdapterRegressorV3.OPTUNA_SEED_DEFAULT, } optuna_hyperopt = self.config.get("freqai", {}).get("optuna_hyperopt", {}) - update_config_value( - optuna_hyperopt, - new_key="space_fraction", - old_key="expansion_ratio", - default=optuna_default_config["space_fraction"], - logger=logger, - new_path="freqai.optuna_hyperopt.space_fraction", - old_path="freqai.optuna_hyperopt.expansion_ratio", - ) return { **optuna_default_config, **optuna_hyperopt, @@ -874,99 +853,18 @@ class QuickAdapterRegressorV3(BaseRegressionModel): return label_frequency_candles @property - def predictions_extrema(self) -> dict[str, Any]: - predictions_extrema = self.freqai_info.get("predictions_extrema", {}) - if not isinstance(predictions_extrema, dict): - predictions_extrema = {} - - outlier_threshold_quantile = update_config_value( - predictions_extrema, - new_key="outlier_threshold_quantile", - old_key="threshold_outlier", - default=QuickAdapterRegressorV3.PREDICTIONS_EXTREMA_OUTLIER_THRESHOLD_QUANTILE_DEFAULT, - logger=logger, - new_path="freqai.predictions_extrema.outlier_threshold_quantile", - old_path="freqai.predictions_extrema.threshold_outlier", - ) - if ( - not isinstance(outlier_threshold_quantile, (int, float)) - or not np.isfinite(outlier_threshold_quantile) - or not (0 < outlier_threshold_quantile < 1) - ): - outlier_threshold_quantile = QuickAdapterRegressorV3.PREDICTIONS_EXTREMA_OUTLIER_THRESHOLD_QUANTILE_DEFAULT + def label_pipeline(self) -> dict[str, Any]: + label_pipeline_raw = self.freqai_info.get("label_pipeline") + if not isinstance(label_pipeline_raw, dict): + label_pipeline_raw = {} + return get_label_pipeline_config(label_pipeline_raw, logger) - selection_method = str( - predictions_extrema.get( - "selection_method", - QuickAdapterRegressorV3._EXTREMA_SELECTION_METHODS[0], # "rank_extrema" - ) - ) - if ( - selection_method - not in QuickAdapterRegressorV3._extrema_selection_methods_set() - ): - selection_method = QuickAdapterRegressorV3._EXTREMA_SELECTION_METHODS[ - 0 - ] # "rank_extrema" - - threshold_smoothing_method = str( - update_config_value( - predictions_extrema, - new_key="threshold_smoothing_method", - old_key="thresholds_smoothing", - default=QuickAdapterRegressorV3._THRESHOLD_METHODS[0], # "mean" - logger=logger, - new_path="freqai.predictions_extrema.threshold_smoothing_method", - old_path="freqai.predictions_extrema.thresholds_smoothing", - ) - ) - if ( - threshold_smoothing_method - not in QuickAdapterRegressorV3._threshold_methods_set() - ): - threshold_smoothing_method = QuickAdapterRegressorV3._THRESHOLD_METHODS[ - 0 - ] # "mean" - - soft_extremum_alpha = update_config_value( - predictions_extrema, - new_key="soft_extremum_alpha", - old_key="thresholds_alpha", - default=QuickAdapterRegressorV3.PREDICTIONS_EXTREMA_SOFT_EXTREMUM_ALPHA_DEFAULT, - logger=logger, - new_path="freqai.predictions_extrema.soft_extremum_alpha", - old_path="freqai.predictions_extrema.thresholds_alpha", - ) - if ( - not isinstance(soft_extremum_alpha, (int, float)) - or not np.isfinite(soft_extremum_alpha) - or soft_extremum_alpha < 0 - ): - soft_extremum_alpha = ( - QuickAdapterRegressorV3.PREDICTIONS_EXTREMA_SOFT_EXTREMUM_ALPHA_DEFAULT - ) - - keep_extrema_fraction = update_config_value( - predictions_extrema, - new_key="keep_extrema_fraction", - old_key="extrema_fraction", - default=QuickAdapterRegressorV3.PREDICTIONS_EXTREMA_KEEP_EXTREMA_FRACTION_DEFAULT, - logger=logger, - new_path="freqai.predictions_extrema.keep_extrema_fraction", - old_path="freqai.predictions_extrema.extrema_fraction", - ) - if not isinstance(keep_extrema_fraction, (int, float)) or not ( - 0 < keep_extrema_fraction <= 1 - ): - keep_extrema_fraction = QuickAdapterRegressorV3.PREDICTIONS_EXTREMA_KEEP_EXTREMA_FRACTION_DEFAULT - - return { - "outlier_threshold_quantile": float(outlier_threshold_quantile), - "selection_method": selection_method, - "threshold_smoothing_method": threshold_smoothing_method, - "soft_extremum_alpha": float(soft_extremum_alpha), - "keep_extrema_fraction": float(keep_extrema_fraction), - } + @property + def label_prediction(self) -> dict[str, Any]: + label_prediction_raw = self.freqai_info.get("label_prediction") + if not isinstance(label_prediction_raw, dict): + label_prediction_raw = {} + return get_label_prediction_config(label_prediction_raw, logger) @property def _label_defaults(self) -> tuple[int, float]: @@ -988,6 +886,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + migrate_config(self.config, logger) self.pairs: list[str] = self.config.get("exchange", {}).get("pair_whitelist") if not self.pairs: raise ValueError( @@ -1013,7 +912,9 @@ class QuickAdapterRegressorV3(BaseRegressionModel): self._optuna_hp_params: dict[str, dict[str, Any]] = {} self._optuna_label_params: dict[str, dict[str, Any]] = {} self._optuna_label_candle_pool_full_cache: dict[int, list[int]] = {} - self._optuna_label_shuffle_rng = random.Random(self._optuna_config.get("seed")) + self._optuna_label_shuffle_rng = random.Random( + self._optuna_config.get("seed", QuickAdapterRegressorV3.OPTUNA_SEED_DEFAULT) + ) self.init_optuna_label_candle_pool() self._optuna_label_candle: dict[str, int] = {} self._optuna_label_candles: dict[str, int] = {} @@ -1030,19 +931,13 @@ class QuickAdapterRegressorV3(BaseRegressionModel): self.optuna_load_best_params( pair, QuickAdapterRegressorV3._OPTUNA_NAMESPACES[0] ) # "hp" - if self.optuna_load_best_params( - pair, QuickAdapterRegressorV3._OPTUNA_NAMESPACES[0] - ) - else {} + or {} ) self._optuna_label_params[pair] = ( self.optuna_load_best_params( pair, QuickAdapterRegressorV3._OPTUNA_NAMESPACES[1] ) # "label" - if self.optuna_load_best_params( - pair, QuickAdapterRegressorV3._OPTUNA_NAMESPACES[1] - ) - else { + or { "label_period_candles": self.ft_params.get( "label_period_candles", default_label_period_candles, @@ -1101,7 +996,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): logger.info(f" label_method: {label_method}") label_config = self._resolve_label_method_config(label_method) - self._log_label_method_config(label_config) + QuickAdapterRegressorV3._log_label_method_config(label_config) label_weights = self.ft_params.get("label_weights") if label_weights is not None: @@ -1132,23 +1027,47 @@ class QuickAdapterRegressorV3(BaseRegressionModel): f" label_p_order: {format_number(label_p_order_default)} (default for {distance_metric})" ) - logger.info("Predictions Extrema Configuration:") - predictions_extrema = self.predictions_extrema - logger.info( - f" selection_method: {predictions_extrema.get('selection_method')}" - ) - logger.info( - f" threshold_smoothing_method: {predictions_extrema.get('threshold_smoothing_method')}" - ) - logger.info( - f" outlier_threshold_quantile: {format_number(predictions_extrema.get('outlier_threshold_quantile'))}" - ) - logger.info( - f" soft_extremum_alpha: {format_number(predictions_extrema.get('soft_extremum_alpha'))}" - ) - logger.info( - f" keep_extrema_fraction: {format_number(predictions_extrema.get('keep_extrema_fraction'))}" - ) + label_pipeline = self.label_pipeline + label_prediction = self.label_prediction + for label_col in LABEL_COLUMNS: + logger.info(f"Label Configuration [{label_col}]:") + + col_pipeline = get_label_column_config( + label_col, label_pipeline["default"], label_pipeline["columns"] + ) + logger.info(" Pipeline:") + logger.info(f" standardization: {col_pipeline['standardization']}") + logger.info( + f" robust_quantiles: ({format_number(col_pipeline['robust_quantiles'][0])}, {format_number(col_pipeline['robust_quantiles'][1])})" + ) + logger.info( + f" mmad_scaling_factor: {format_number(col_pipeline['mmad_scaling_factor'])}" + ) + logger.info(f" normalization: {col_pipeline['normalization']}") + logger.info( + f" minmax_range: ({format_number(col_pipeline['minmax_range'][0])}, {format_number(col_pipeline['minmax_range'][1])})" + ) + logger.info( + f" sigmoid_scale: {format_number(col_pipeline['sigmoid_scale'])}" + ) + logger.info(f" gamma: {format_number(col_pipeline['gamma'])}") + + col_prediction = get_label_column_config( + label_col, label_prediction["default"], label_prediction["columns"] + ) + logger.info(" Prediction:") + logger.info(f" method: {col_prediction['method']}") + logger.info(f" selection_method: {col_prediction['selection_method']}") + logger.info(f" threshold_method: {col_prediction['threshold_method']}") + logger.info( + f" outlier_quantile: {format_number(col_prediction['outlier_quantile'])}" + ) + logger.info( + f" soft_extremum_alpha: {format_number(col_prediction['soft_extremum_alpha'])}" + ) + logger.info( + f" keep_fraction: {format_number(col_prediction['keep_fraction'])}" + ) default_label_period_candles, default_label_natr_multiplier = ( self._label_defaults @@ -1159,7 +1078,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): label_natr_multiplier = float( self.ft_params.get("label_natr_multiplier", default_label_natr_multiplier) ) - logger.info("Label Configuration:") + logger.info("Label Hyperparameters:") logger.info( f" fit_live_predictions_candles: {self.freqai_info.get('fit_live_predictions_candles', QuickAdapterRegressorV3.FIT_LIVE_PREDICTIONS_CANDLES_DEFAULT)}" ) @@ -1210,9 +1129,9 @@ class QuickAdapterRegressorV3(BaseRegressionModel): self, pair: str, namespace: OptunaNamespace ) -> dict[str, Any]: if namespace == QuickAdapterRegressorV3._OPTUNA_NAMESPACES[0]: # "hp" - params = self._optuna_hp_params.get(pair) + params = self._optuna_hp_params.get(pair, {}) elif namespace == QuickAdapterRegressorV3._OPTUNA_NAMESPACES[1]: # "label" - params = self._optuna_label_params.get(pair) + params = self._optuna_label_params.get(pair, {}) else: raise ValueError( f"Invalid namespace value {namespace!r}: " @@ -1235,7 +1154,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): def get_optuna_value(self, pair: str, namespace: OptunaNamespace) -> float: if namespace == QuickAdapterRegressorV3._OPTUNA_NAMESPACES[0]: # "hp" - value = self._optuna_hp_value.get(pair) + value = self._optuna_hp_value.get(pair, np.nan) else: raise ValueError( f"Invalid namespace value {namespace!r}: " @@ -1258,7 +1177,9 @@ class QuickAdapterRegressorV3(BaseRegressionModel): self, pair: str, namespace: OptunaNamespace ) -> list[float | int]: if namespace == QuickAdapterRegressorV3._OPTUNA_NAMESPACES[1]: # "label" - values = self._optuna_label_values.get(pair) + values = self._optuna_label_values.get( + pair, [np.nan] * QuickAdapterRegressorV3._OPTUNA_LABEL_N_OBJECTIVES + ) else: raise ValueError( f"Invalid namespace value {namespace!r}: " @@ -1384,23 +1305,11 @@ class QuickAdapterRegressorV3(BaseRegressionModel): return Pipeline(steps) def define_label_pipeline(self, threads: int = -1) -> Pipeline: - extrema_weighting = self.freqai_info.get("extrema_weighting", {}) - if not isinstance(extrema_weighting, dict): - extrema_weighting = {} - extrema_weighting_config = get_extrema_weighting_config( - extrema_weighting, logger - ) - - if extrema_weighting_config["strategy"] == WEIGHT_STRATEGIES[0]: # "none" - return super().define_label_pipeline(threads) - return Pipeline( [ ( - "extrema_weighting", - ExtremaWeightingTransformer( - extrema_weighting=extrema_weighting_config - ), + "label_transformer", + LabelTransformer(label_transformer=self.label_pipeline), ), ] ) @@ -1446,8 +1355,14 @@ class QuickAdapterRegressorV3(BaseRegressionModel): dk.pair, QuickAdapterRegressorV3._OPTUNA_NAMESPACES[0] ), # "hp" model_training_parameters, - self._optuna_config.get("space_reduction"), - self._optuna_config.get("space_fraction"), + self._optuna_config.get( + "space_reduction", + QuickAdapterRegressorV3.OPTUNA_SPACE_REDUCTION_DEFAULT, + ), + self._optuna_config.get( + "space_fraction", + QuickAdapterRegressorV3.OPTUNA_SPACE_FRACTION_DEFAULT, + ), dk.data_path, ), direction=optuna.study.StudyDirection.MINIMIZE, @@ -1506,8 +1421,8 @@ class QuickAdapterRegressorV3(BaseRegressionModel): if pair not in self._optuna_label_incremented_pairs: self._optuna_label_incremented_pairs.append(pair) optuna_label_remaining_candles = self._optuna_label_candle.get( - pair - ) - self._optuna_label_candles.get(pair) + pair, 0 + ) - self._optuna_label_candles.get(pair, 0) if optuna_label_remaining_candles <= 0: try: callback() @@ -1547,7 +1462,10 @@ class QuickAdapterRegressorV3(BaseRegressionModel): pair=pair, timeframe=self.config.get("timeframe") ), fit_live_predictions_candles, - self._optuna_config.get("label_candles_step"), + self._optuna_config.get( + "label_candles_step", + QuickAdapterRegressorV3.OPTUNA_LABEL_CANDLES_STEP_DEFAULT, + ), min_label_period_candles=self._min_label_period_candles, max_label_period_candles=self._max_label_period_candles, min_label_natr_multiplier=self._min_label_natr_multiplier, @@ -1575,19 +1493,51 @@ class QuickAdapterRegressorV3(BaseRegressionModel): .reset_index(drop=True) ) - if not warmed_up: - dk.data["extra_returns_per_train"][MINIMA_THRESHOLD_COLUMN] = -2 - dk.data["extra_returns_per_train"][MAXIMA_THRESHOLD_COLUMN] = 2 - else: - min_pred, max_pred = self.min_max_pred( - pred_df, - fit_live_predictions_candles, - self.get_optuna_params( - pair, QuickAdapterRegressorV3._OPTUNA_NAMESPACES[1] - ).get("label_period_candles"), # "label" + di_values = pred_df.get("DI_values") + dk.data["DI_value_mean"] = di_values.mean() + dk.data["DI_value_std"] = di_values.std(ddof=1) + + label_prediction = self.label_prediction + for label_col in dk.label_list: + col_prediction_config = get_label_column_config( + label_col, label_prediction["default"], label_prediction["columns"] ) - dk.data["extra_returns_per_train"][MINIMA_THRESHOLD_COLUMN] = min_pred - dk.data["extra_returns_per_train"][MAXIMA_THRESHOLD_COLUMN] = max_pred + method = col_prediction_config.get("method") + if method == PREDICTION_METHODS[0]: # "none" + continue + elif method == PREDICTION_METHODS[1]: # "thresholding" + if not warmed_up: + min_pred, max_pred = -2.0, 2.0 + f = [0.0, 0.0, 0.0] + cutoff = 2.0 + else: + min_pred, max_pred = self.min_max_pred( + label_col, + col_prediction_config, + pred_df, + fit_live_predictions_candles, + self.get_optuna_params( + pair, QuickAdapterRegressorV3._OPTUNA_NAMESPACES[1] + ).get("label_period_candles"), # "label" + ) + f = sp.stats.weibull_min.fit( + pd.to_numeric(di_values, errors="coerce").dropna(), floc=0 + ) + outlier_quantile = col_prediction_config.get( + "outlier_quantile", + DEFAULTS_LABEL_PREDICTION["outlier_quantile"], + ) + cutoff = sp.stats.weibull_min.ppf(outlier_quantile, *f) + dk.data["extra_returns_per_train"][f"{label_col}_minima_threshold"] = ( + min_pred + ) + dk.data["extra_returns_per_train"][f"{label_col}_maxima_threshold"] = ( + max_pred + ) + dk.data["extra_returns_per_train"]["DI_value_param1"] = f[0] + dk.data["extra_returns_per_train"]["DI_value_param2"] = f[1] + dk.data["extra_returns_per_train"]["DI_value_param3"] = f[2] + dk.data["extra_returns_per_train"]["DI_cutoff"] = cutoff dk.data["labels_mean"], dk.data["labels_std"] = {}, {} for label in dk.label_list + dk.unique_class_list: @@ -1600,27 +1550,6 @@ class QuickAdapterRegressorV3(BaseRegressionModel): f = sp.stats.norm.fit(pred_df_label) dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] - di_values = pred_df.get("DI_values") - - # Fit DI_value cutoff - if not warmed_up: - f = [0.0, 0.0, 0.0] - cutoff = 2.0 - else: - f = sp.stats.weibull_min.fit( - pd.to_numeric(di_values, errors="coerce").dropna(), floc=0 - ) - cutoff = sp.stats.weibull_min.ppf( - self.predictions_extrema["outlier_threshold_quantile"], *f - ) - - dk.data["DI_value_mean"] = di_values.mean() - dk.data["DI_value_std"] = di_values.std(ddof=1) - dk.data["extra_returns_per_train"]["DI_value_param1"] = f[0] - dk.data["extra_returns_per_train"]["DI_value_param2"] = f[1] - dk.data["extra_returns_per_train"]["DI_value_param3"] = f[2] - dk.data["extra_returns_per_train"]["DI_cutoff"] = cutoff - dk.data["extra_returns_per_train"]["label_period_candles"] = ( self.get_optuna_params( pair, QuickAdapterRegressorV3._OPTUNA_NAMESPACES[1] @@ -1633,7 +1562,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): ).get("label_natr_multiplier") ) - hp_rmse = self.optuna_validate_value( + hp_rmse = QuickAdapterRegressorV3.optuna_validate_value( self.get_optuna_value(pair, QuickAdapterRegressorV3._OPTUNA_NAMESPACES[0]) ) # "hp" dk.data["extra_returns_per_train"]["hp_rmse"] = ( @@ -1646,52 +1575,51 @@ class QuickAdapterRegressorV3(BaseRegressionModel): def min_max_pred( self, + label_col: str, + col_prediction_config: dict[str, Any], pred_df: pd.DataFrame, fit_live_predictions_candles: int, - label_period_candles: int, + label_period_candles: Optional[int], ) -> tuple[float, float]: - if not isinstance(label_period_candles, int) or label_period_candles <= 0: - label_period_candles = self.ft_params.get( - "label_period_candles", self._label_defaults[0] + if label_period_candles is None or label_period_candles <= 0: + label_period_candles = int( + self.ft_params.get("label_period_candles", self._label_defaults[0]) ) thresholds_candles = ( max(2, int(fit_live_predictions_candles / label_period_candles)) * label_period_candles ) - pred_extrema = pred_df.get(EXTREMA_COLUMN).iloc[-thresholds_candles:].copy() + pred_label = pred_df.get(label_col) + if pred_label is None: + return -2.0, 2.0 + pred_label = pred_label.iloc[-thresholds_candles:].copy() - extrema_selection = self.predictions_extrema["selection_method"] - threshold_smoothing_method = self.predictions_extrema[ - "threshold_smoothing_method" - ] - keep_extrema_fraction = self.predictions_extrema["keep_extrema_fraction"] + extrema_selection = col_prediction_config["selection_method"] + threshold_method = col_prediction_config["threshold_method"] + keep_fraction = col_prediction_config["keep_fraction"] - if ( - threshold_smoothing_method == QuickAdapterRegressorV3._THRESHOLD_METHODS[7] - ): # "median" + if threshold_method == CUSTOM_THRESHOLD_METHODS[0]: # "median" return QuickAdapterRegressorV3.median_min_max( - pred_extrema, extrema_selection, keep_extrema_fraction + pred_label, extrema_selection, keep_fraction ) - elif ( - threshold_smoothing_method == QuickAdapterRegressorV3._THRESHOLD_METHODS[8] - ): # "soft_extremum" + elif threshold_method == CUSTOM_THRESHOLD_METHODS[1]: # "soft_extremum" return QuickAdapterRegressorV3.soft_extremum_min_max( - pred_extrema, - self.predictions_extrema["soft_extremum_alpha"], + pred_label, + col_prediction_config["soft_extremum_alpha"], extrema_selection, - keep_extrema_fraction, + keep_fraction, ) elif ( - threshold_smoothing_method - in QuickAdapterRegressorV3._skimage_threshold_methods_set() + threshold_method in QuickAdapterRegressorV3._skimage_threshold_methods_set() ): return QuickAdapterRegressorV3.skimage_min_max( - pred_extrema, - threshold_smoothing_method, + pred_label, + threshold_method, extrema_selection, - keep_extrema_fraction, + keep_fraction, ) + return -2.0, 2.0 @staticmethod def _get_extrema_indices( @@ -1715,13 +1643,13 @@ class QuickAdapterRegressorV3(BaseRegressionModel): pred_extrema: pd.Series, minima_indices: NDArray[np.intp], maxima_indices: NDArray[np.intp], - keep_extrema_fraction: float = 1.0, + keep_fraction: float = 1.0, ) -> tuple[pd.Series, pd.Series]: n_kept_minima = QuickAdapterRegressorV3._calculate_n_kept_extrema( - minima_indices.size, keep_extrema_fraction + minima_indices.size, keep_fraction ) n_kept_maxima = QuickAdapterRegressorV3._calculate_n_kept_extrema( - maxima_indices.size, keep_extrema_fraction + maxima_indices.size, keep_fraction ) pred_minima = ( @@ -1741,7 +1669,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): logger.debug( f"Extrema filtering | rank_peaks: kept {n_kept_minima}/{minima_indices.size} minima, " - f"{n_kept_maxima}/{maxima_indices.size} maxima with keep_fraction={keep_extrema_fraction}" + f"{n_kept_maxima}/{maxima_indices.size} maxima with keep_fraction={keep_fraction}" ) return pred_minima, pred_maxima @@ -1750,13 +1678,13 @@ class QuickAdapterRegressorV3(BaseRegressionModel): pred_extrema: pd.Series, n_minima: int, n_maxima: int, - keep_extrema_fraction: float = 1.0, + keep_fraction: float = 1.0, ) -> tuple[pd.Series, pd.Series]: n_kept_minima = QuickAdapterRegressorV3._calculate_n_kept_extrema( - n_minima, keep_extrema_fraction + n_minima, keep_fraction ) n_kept_maxima = QuickAdapterRegressorV3._calculate_n_kept_extrema( - n_maxima, keep_extrema_fraction + n_maxima, keep_fraction ) pred_minima = ( @@ -1772,7 +1700,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): logger.debug( f"Extrema filtering | rank_extrema: kept {n_kept_minima}/{n_minima} minima, " - f"{n_kept_maxima}/{n_maxima} maxima with keep_fraction={keep_extrema_fraction}" + f"{n_kept_maxima}/{n_maxima} maxima with keep_fraction={keep_fraction}" ) return pred_minima, pred_maxima @@ -1780,7 +1708,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): def get_pred_min_max( pred_extrema: pd.Series, extrema_selection: ExtremaSelectionMethod, - keep_extrema_fraction: float = 1.0, + keep_fraction: float = 1.0, ) -> tuple[pd.Series, pd.Series]: pred_extrema = ( pd.to_numeric(pred_extrema, errors="coerce") @@ -1790,9 +1718,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): if pred_extrema.empty: return pd.Series(dtype=float), pd.Series(dtype=float) - if ( - extrema_selection == QuickAdapterRegressorV3._EXTREMA_SELECTION_METHODS[0] - ): # "rank_extrema" + if extrema_selection == EXTREMA_SELECTION_METHODS[0]: # "rank_extrema" minima_indices, maxima_indices = ( QuickAdapterRegressorV3._get_extrema_indices(pred_extrema) ) @@ -1800,22 +1726,18 @@ class QuickAdapterRegressorV3(BaseRegressionModel): pred_extrema, minima_indices.size, maxima_indices.size, - keep_extrema_fraction, + keep_fraction, ) - elif ( - extrema_selection == QuickAdapterRegressorV3._EXTREMA_SELECTION_METHODS[1] - ): # "rank_peaks" + elif extrema_selection == EXTREMA_SELECTION_METHODS[1]: # "rank_peaks" minima_indices, maxima_indices = ( QuickAdapterRegressorV3._get_extrema_indices(pred_extrema) ) pred_minima, pred_maxima = QuickAdapterRegressorV3._get_ranked_peaks( - pred_extrema, minima_indices, maxima_indices, keep_extrema_fraction + pred_extrema, minima_indices, maxima_indices, keep_fraction ) - elif ( - extrema_selection == QuickAdapterRegressorV3._EXTREMA_SELECTION_METHODS[2] - ): # "partition" + elif extrema_selection == EXTREMA_SELECTION_METHODS[2]: # "partition" eps = np.finfo(float).eps pred_maxima = pred_extrema[pred_extrema > eps] @@ -1823,7 +1745,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): else: raise ValueError( f"Invalid extrema_selection value {extrema_selection!r}: " - f"supported values are {', '.join(QuickAdapterRegressorV3._EXTREMA_SELECTION_METHODS)}" + f"supported values are {', '.join(EXTREMA_SELECTION_METHODS)}" ) return pred_minima, pred_maxima @@ -1839,7 +1761,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): and isinstance(pred_minimum, (int, float, np.number)) and np.isfinite(pred_minimum) ): - return pred_minimum + return float(pred_minimum) return -2.0 @staticmethod @@ -1853,7 +1775,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): and isinstance(pred_maximum, (int, float, np.number)) and np.isfinite(pred_maximum) ): - return pred_maximum + return float(pred_maximum) return 2.0 @staticmethod @@ -1861,12 +1783,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel): pred_extrema: pd.Series, alpha: float, extrema_selection: ExtremaSelectionMethod, - keep_extrema_fraction: float = 1.0, + keep_fraction: float = 1.0, ) -> tuple[float, float]: if alpha < 0: raise ValueError(f"Invalid alpha value {alpha!r}: must be >= 0") pred_minima, pred_maxima = QuickAdapterRegressorV3.get_pred_min_max( - pred_extrema, extrema_selection, keep_extrema_fraction + pred_extrema, extrema_selection, keep_fraction ) soft_minimum = soft_extremum(pred_minima, alpha=-alpha) if not np.isfinite(soft_minimum): @@ -1880,10 +1802,10 @@ class QuickAdapterRegressorV3(BaseRegressionModel): def median_min_max( pred_extrema: pd.Series, extrema_selection: ExtremaSelectionMethod, - keep_extrema_fraction: float = 1.0, + keep_fraction: float = 1.0, ) -> tuple[float, float]: pred_minima, pred_maxima = QuickAdapterRegressorV3.get_pred_min_max( - pred_extrema, extrema_selection, keep_extrema_fraction + pred_extrema, extrema_selection, keep_fraction ) if pred_minima.empty: @@ -1907,10 +1829,10 @@ class QuickAdapterRegressorV3(BaseRegressionModel): pred_extrema: pd.Series, method: SkimageThresholdMethod, extrema_selection: ExtremaSelectionMethod, - keep_extrema_fraction: float = 1.0, + keep_fraction: float = 1.0, ) -> tuple[float, float]: pred_minima, pred_maxima = QuickAdapterRegressorV3.get_pred_min_max( - pred_extrema, extrema_selection, keep_extrema_fraction + pred_extrema, extrema_selection, keep_fraction ) try: @@ -1918,7 +1840,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): except AttributeError: raise ValueError( f"Invalid skimage threshold method value {method!r}: " - f"supported values are {', '.join(QuickAdapterRegressorV3._SKIMAGE_THRESHOLD_METHODS)}" + f"supported values are {', '.join(SKIMAGE_THRESHOLD_METHODS)}" ) min_func = QuickAdapterRegressorV3.apply_skimage_threshold @@ -2088,6 +2010,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): if ( distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[16] ): # "weighted_sum" + assert weights is not None return QuickAdapterRegressorV3._weighted_sum_distance( normalized_matrix, ideal_point, @@ -2243,6 +2166,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): elif ( distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[16] ): # "weighted_sum" + assert weights is not None dist_to_ideal = np.abs( QuickAdapterRegressorV3._weighted_sum_distance( normalized_matrix, @@ -2310,13 +2234,15 @@ class QuickAdapterRegressorV3(BaseRegressionModel): ) -> tuple[int, float]: if best_cluster_indices.size == 1: best_trial_index = best_cluster_indices[0] - best_trial_distance = self._calculate_trial_distance_to_ideal( - normalized_matrix, - best_trial_index, - ideal_point_2d, - distance_metric, - weights=weights, - p=p, + best_trial_distance = ( + QuickAdapterRegressorV3._calculate_trial_distance_to_ideal( + normalized_matrix, + best_trial_index, + ideal_point_2d, + distance_metric, + weights=weights, + p=p, + ) ) return best_trial_index, best_trial_distance @@ -2346,13 +2272,15 @@ class QuickAdapterRegressorV3(BaseRegressionModel): min_score_position = np.nanargmin(scores) best_trial_index = best_cluster_indices[min_score_position] - best_trial_distance = self._calculate_trial_distance_to_ideal( - normalized_matrix, - best_trial_index, - ideal_point_2d, - distance_metric, - weights=weights, - p=p, + best_trial_distance = ( + QuickAdapterRegressorV3._calculate_trial_distance_to_ideal( + normalized_matrix, + best_trial_index, + ideal_point_2d, + distance_metric, + weights=weights, + p=p, + ) ) return best_trial_index, best_trial_distance @@ -2556,7 +2484,8 @@ class QuickAdapterRegressorV3(BaseRegressionModel): power, ctx="label_density_aggregation_param", ) - return sp.stats.pmean(neighbor_distances, p=power, axis=1) + assert power is not None + return np.asarray(sp.stats.pmean(neighbor_distances, p=power, axis=1)) elif ( aggregation == QuickAdapterRegressorV3._DENSITY_AGGREGATIONS[1] ): # "quantile" @@ -2573,7 +2502,8 @@ class QuickAdapterRegressorV3(BaseRegressionModel): quantile, ctx="label_density_aggregation_param", ) - return np.nanquantile(neighbor_distances, quantile, axis=1) + assert quantile is not None + return np.asarray(np.nanquantile(neighbor_distances, quantile, axis=1)) elif aggregation == QuickAdapterRegressorV3._DENSITY_AGGREGATIONS[2]: # "min" return np.nanmin(neighbor_distances, axis=1) elif aggregation == QuickAdapterRegressorV3._DENSITY_AGGREGATIONS[3]: # "max" @@ -2914,9 +2844,15 @@ class QuickAdapterRegressorV3(BaseRegressionModel): try: study.optimize( objective, - n_trials=self._optuna_config.get("n_trials"), - n_jobs=self._optuna_config.get("n_jobs"), - timeout=self._optuna_config.get("timeout"), + n_trials=self._optuna_config.get( + "n_trials", QuickAdapterRegressorV3.OPTUNA_N_TRIALS_DEFAULT + ), + n_jobs=self._optuna_config.get( + "n_jobs", QuickAdapterRegressorV3.OPTUNA_N_JOBS_DEFAULT + ), + timeout=self._optuna_config.get( + "timeout", QuickAdapterRegressorV3.OPTUNA_TIMEOUT_DEFAULT + ), gc_after_trial=True, ) except Exception as e: @@ -2964,7 +2900,9 @@ class QuickAdapterRegressorV3(BaseRegressionModel): **self.get_optuna_params(pair, namespace), } label_config = self._resolve_label_method_config( - self.ft_params.get("label_method", self.LABEL_METHOD_DEFAULT) + self.ft_params.get( + "label_method", QuickAdapterRegressorV3.LABEL_METHOD_DEFAULT + ) ) metric_log_msg = f" ({QuickAdapterRegressorV3._format_label_method_config(label_config)})" logger.info( @@ -3029,7 +2967,9 @@ class QuickAdapterRegressorV3(BaseRegressionModel): ) -> optuna.pruners.BasePruner: if is_single_objective: return optuna.pruners.HyperbandPruner( - min_resource=self._optuna_config.get("min_resource") + min_resource=self._optuna_config.get( + "min_resource", QuickAdapterRegressorV3.OPTUNA_MIN_RESOURCE_DEFAULT + ) ) else: return optuna.pruners.NopPruner() @@ -3043,23 +2983,37 @@ class QuickAdapterRegressorV3(BaseRegressionModel): ) if sampler == QuickAdapterRegressorV3._OPTUNA_SAMPLERS[0]: # "tpe" return optuna.samplers.TPESampler( - n_startup_trials=self._optuna_config.get("n_startup_trials"), + n_startup_trials=self._optuna_config.get( + "n_startup_trials", + QuickAdapterRegressorV3.OPTUNA_N_STARTUP_TRIALS_DEFAULT, + ), multivariate=True, group=True, - constant_liar=self._optuna_config.get("n_jobs") > 1, - seed=self._optuna_config.get("seed"), + constant_liar=self._optuna_config.get( + "n_jobs", QuickAdapterRegressorV3.OPTUNA_N_JOBS_DEFAULT + ) + > 1, + seed=self._optuna_config.get( + "seed", QuickAdapterRegressorV3.OPTUNA_SEED_DEFAULT + ), ) elif sampler == QuickAdapterRegressorV3._OPTUNA_SAMPLERS[1]: # "auto" return optunahub.load_module("samplers/auto_sampler").AutoSampler( - seed=self._optuna_config.get("seed") + seed=self._optuna_config.get( + "seed", QuickAdapterRegressorV3.OPTUNA_SEED_DEFAULT + ) ) elif sampler == QuickAdapterRegressorV3._OPTUNA_SAMPLERS[2]: # "nsgaii" return optuna.samplers.NSGAIISampler( - seed=self._optuna_config.get("seed"), + seed=self._optuna_config.get( + "seed", QuickAdapterRegressorV3.OPTUNA_SEED_DEFAULT + ), ) elif sampler == QuickAdapterRegressorV3._OPTUNA_SAMPLERS[3]: # "nsgaiii" return optuna.samplers.NSGAIIISampler( - seed=self._optuna_config.get("seed"), + seed=self._optuna_config.get( + "seed", QuickAdapterRegressorV3.OPTUNA_SEED_DEFAULT + ), ) else: raise ValueError( @@ -3074,12 +3028,16 @@ class QuickAdapterRegressorV3(BaseRegressionModel): if namespace == QuickAdapterRegressorV3._OPTUNA_NAMESPACES[0]: # "hp" return ( QuickAdapterRegressorV3._optuna_hpo_samplers_set(), - self._optuna_config.get("sampler"), + self._optuna_config.get( + "sampler", QuickAdapterRegressorV3._OPTUNA_HPO_SAMPLERS[0] + ), ) elif namespace == QuickAdapterRegressorV3._OPTUNA_NAMESPACES[1]: # "label" return ( QuickAdapterRegressorV3._optuna_label_samplers_set(), - self._optuna_config.get("label_sampler"), + self._optuna_config.get( + "label_sampler", QuickAdapterRegressorV3._OPTUNA_LABEL_SAMPLERS[0] + ), ) else: raise ValueError( @@ -3160,13 +3118,13 @@ class QuickAdapterRegressorV3(BaseRegressionModel): isinstance(best_values, list) and len(best_values) == n_objectives and all( - self.optuna_validate_value(value) is not None + QuickAdapterRegressorV3.optuna_validate_value(value) is not None for value in best_values ) ) else: best_value = self.get_optuna_value(pair, namespace) - return self.optuna_validate_value(best_value) is not None + return QuickAdapterRegressorV3.optuna_validate_value(best_value) is not None def optuna_enqueue_previous_best_params( self, pair: str, namespace: OptunaNamespace, study: Optional[optuna.study.Study] diff --git a/quickadapter/user_data/strategies/ExtremaWeightingTransformer.py b/quickadapter/user_data/strategies/ExtremaWeightingTransformer.py deleted file mode 100644 index ce4523c..0000000 --- a/quickadapter/user_data/strategies/ExtremaWeightingTransformer.py +++ /dev/null @@ -1,433 +0,0 @@ -import logging -from typing import Any, Final, Literal - -import numpy as np -import scipy as sp -from datasieve.transforms.base_transform import ( - ArrayOrNone, - BaseTransform, - ListOrNone, -) -from numpy.typing import ArrayLike, NDArray -from sklearn.preprocessing import ( - MaxAbsScaler, - MinMaxScaler, - PowerTransformer, - RobustScaler, - StandardScaler, -) - -logger = logging.getLogger(__name__) - -WeightStrategy = Literal[ - "none", - "amplitude", - "amplitude_threshold_ratio", - "volume_rate", - "speed", - "efficiency_ratio", - "volume_weighted_efficiency_ratio", - "combined", -] - -CombinedMetric = Literal[ - "amplitude", - "amplitude_threshold_ratio", - "volume_rate", - "speed", - "efficiency_ratio", - "volume_weighted_efficiency_ratio", -] -COMBINED_METRICS: Final[tuple[CombinedMetric, ...]] = ( - "amplitude", - "amplitude_threshold_ratio", - "volume_rate", - "speed", - "efficiency_ratio", - "volume_weighted_efficiency_ratio", -) - -CombinedAggregation = Literal[ - "arithmetic_mean", - "geometric_mean", - "harmonic_mean", - "quadratic_mean", - "weighted_median", - "softmax", -] -COMBINED_AGGREGATIONS: Final[tuple[CombinedAggregation, ...]] = ( - "arithmetic_mean", - "geometric_mean", - "harmonic_mean", - "quadratic_mean", - "weighted_median", - "softmax", -) - -WEIGHT_STRATEGIES: Final[tuple[WeightStrategy, ...]] = ( - "none", - "amplitude", - "amplitude_threshold_ratio", - "volume_rate", - "speed", - "efficiency_ratio", - "volume_weighted_efficiency_ratio", - "combined", -) - -StandardizationType = Literal["none", "zscore", "robust", "mmad", "power_yj"] -STANDARDIZATION_TYPES: Final[tuple[StandardizationType, ...]] = ( - "none", # 0 - w - "zscore", # 1 - (w - μ) / σ - "robust", # 2 - (w - median) / IQR - "mmad", # 3 - (w - median) / (MAD · k) - "power_yj", # 4 - YJ(w) (standardized) -) - -NormalizationType = Literal["maxabs", "minmax", "sigmoid", "none"] -NORMALIZATION_TYPES: Final[tuple[NormalizationType, ...]] = ( - "maxabs", # 0 - w / max(|w|) - "minmax", # 1 - low + (w - min) / (max - min) · (high - low) - "sigmoid", # 2 - 2·σ(scale · w) - 1 - "none", # 3 - w -) - -DEFAULTS_EXTREMA_WEIGHTING: Final[dict[str, Any]] = { - "strategy": WEIGHT_STRATEGIES[0], # "none" - "metric_coefficients": {}, - "aggregation": COMBINED_AGGREGATIONS[0], # "arithmetic_mean" - "softmax_temperature": 1.0, - # Phase 1: Standardization - "standardization": STANDARDIZATION_TYPES[0], # "none" - "robust_quantiles": (0.25, 0.75), - "mmad_scaling_factor": 1.4826, - # Phase 2: Normalization - "normalization": NORMALIZATION_TYPES[0], # "maxabs" - "minmax_range": (-1.0, 1.0), - "sigmoid_scale": 1.0, - # Phase 3: Post-processing - "gamma": 1.0, -} - - -class ExtremaWeightingTransformer(BaseTransform): - _STANDARDIZATION_SCALERS: dict[str, str] = { - "zscore": "_standard_scaler", - "robust": "_robust_scaler", - "power_yj": "_power_transformer", - } - _NORMALIZATION_SCALERS: dict[str, str] = { - "maxabs": "_maxabs_scaler", - "minmax": "_minmax_scaler", - } - - def __init__(self, *, extrema_weighting: dict[str, Any]) -> None: - super().__init__(name="ExtremaWeightingTransformer") - self.extrema_weighting = {**DEFAULTS_EXTREMA_WEIGHTING, **extrema_weighting} - self._fitted = False - # Phase 1: Standardization - self._standard_scaler: StandardScaler | None = None - self._robust_scaler: RobustScaler | None = None - self._power_transformer: PowerTransformer | None = None - self._median = 0.0 - self._mad = 1.0 - # Phase 2: Normalization - self._minmax_scaler: MinMaxScaler | None = None - self._maxabs_scaler: MaxAbsScaler | None = None - - def _apply_scaler( - self, - values: NDArray[np.floating], - mask: NDArray[np.bool_], - scaler: Any, - inverse: bool = False, - ) -> NDArray[np.floating]: - if values[mask].size == 0: - return values - out = values.copy() - method = scaler.inverse_transform if inverse else scaler.transform - out[mask] = method(values[mask].reshape(-1, 1)).flatten() - return out - - def _apply_mmad( - self, - values: NDArray[np.floating], - mask: NDArray[np.bool_], - inverse: bool = False, - ) -> NDArray[np.floating]: - if values[mask].size == 0: - return values - out = values.copy() - k = self.extrema_weighting["mmad_scaling_factor"] - if inverse: - out[mask] = values[mask] * (self._mad * k) + self._median - else: - out[mask] = (values[mask] - self._median) / (self._mad * k) - return out - - def _apply_sigmoid( - self, - values: NDArray[np.floating], - mask: NDArray[np.bool_], - inverse: bool = False, - ) -> NDArray[np.floating]: - if values[mask].size == 0: - return values - scale = self.extrema_weighting["sigmoid_scale"] - if not np.isfinite(scale) or np.isclose(scale, 0.0): - return values - out = values.copy() - if inverse: - out[mask] = sp.special.logit((values[mask] + 1.0) / 2.0) / scale - else: - out[mask] = 2.0 * sp.special.expit(scale * values[mask]) - 1.0 - return out - - def _standardize( - self, - values: NDArray[np.floating], - mask: NDArray[np.bool_], - ) -> NDArray[np.floating]: - method = self.extrema_weighting["standardization"] - if method == STANDARDIZATION_TYPES[0]: # "none" - return values - if method == STANDARDIZATION_TYPES[3]: # "mmad" - return self._apply_mmad(values, mask, inverse=False) - - scaler_attr = self._STANDARDIZATION_SCALERS.get(method) - if scaler_attr is None: - raise ValueError( - f"Invalid standardization value {method!r}: " - f"supported values are {', '.join(STANDARDIZATION_TYPES)}" - ) - scaler = getattr(self, scaler_attr, None) - if scaler is None: - raise RuntimeError(f"{scaler_attr[1:]} not fitted") - return self._apply_scaler(values, mask, scaler, inverse=False) - - def _normalize( - self, - values: NDArray[np.floating], - mask: NDArray[np.bool_], - ) -> NDArray[np.floating]: - method = self.extrema_weighting["normalization"] - if method == NORMALIZATION_TYPES[2]: # "sigmoid" - return self._apply_sigmoid(values, mask, inverse=False) - if method == NORMALIZATION_TYPES[3]: # "none" - return values - - scaler_attr = self._NORMALIZATION_SCALERS.get(method) - if scaler_attr is None: - raise ValueError( - f"Invalid normalization value {method!r}: " - f"supported values are {', '.join(NORMALIZATION_TYPES)}" - ) - scaler = getattr(self, scaler_attr, None) - if scaler is None: - raise RuntimeError(f"{scaler_attr[1:]} not fitted") - return self._apply_scaler(values, mask, scaler, inverse=False) - - def _apply_gamma( - self, - values: NDArray[np.floating], - mask: NDArray[np.bool_], - ) -> NDArray[np.floating]: - gamma = self.extrema_weighting["gamma"] - if np.isclose(gamma, 1.0) or not np.isfinite(gamma) or gamma <= 0: - return values - out = values.copy() - out[mask] = np.sign(values[mask]) * np.power(np.abs(values[mask]), gamma) - return out - - def _inverse_standardize( - self, - values: NDArray[np.floating], - mask: NDArray[np.bool_], - ) -> NDArray[np.floating]: - method = self.extrema_weighting["standardization"] - if method == STANDARDIZATION_TYPES[0]: # "none" - return values - if method == STANDARDIZATION_TYPES[3]: # "mmad" - return self._apply_mmad(values, mask, inverse=True) - - scaler_attr = self._STANDARDIZATION_SCALERS.get(method) - if scaler_attr is None: - raise ValueError( - f"Invalid standardization value {method!r}: " - f"supported values are {', '.join(STANDARDIZATION_TYPES)}" - ) - scaler = getattr(self, scaler_attr, None) - if scaler is None: - raise RuntimeError(f"{scaler_attr[1:]} not fitted") - return self._apply_scaler(values, mask, scaler, inverse=True) - - def _inverse_normalize( - self, - values: NDArray[np.floating], - mask: NDArray[np.bool_], - ) -> NDArray[np.floating]: - method = self.extrema_weighting["normalization"] - if method == NORMALIZATION_TYPES[2]: # "sigmoid" - return self._apply_sigmoid(values, mask, inverse=True) - if method == NORMALIZATION_TYPES[3]: # "none" - return values - - scaler_attr = self._NORMALIZATION_SCALERS.get(method) - if scaler_attr is None: - raise ValueError( - f"Invalid normalization value {method!r}: " - f"supported values are {', '.join(NORMALIZATION_TYPES)}" - ) - scaler = getattr(self, scaler_attr, None) - if scaler is None: - raise RuntimeError(f"{scaler_attr[1:]} not fitted") - return self._apply_scaler(values, mask, scaler, inverse=True) - - def _inverse_gamma( - self, - values: NDArray[np.floating], - mask: NDArray[np.bool_], - ) -> NDArray[np.floating]: - gamma = self.extrema_weighting["gamma"] - if np.isclose(gamma, 1.0) or not np.isfinite(gamma) or gamma <= 0: - return values - out = values.copy() - out[mask] = np.sign(values[mask]) * np.power(np.abs(values[mask]), 1.0 / gamma) - return out - - def _fit_standardization(self, values: NDArray[np.floating]) -> None: - method = self.extrema_weighting["standardization"] - if method == STANDARDIZATION_TYPES[0]: # "none" - return - if method == STANDARDIZATION_TYPES[1]: # "zscore" - self._standard_scaler = StandardScaler() - self._standard_scaler.fit(values.reshape(-1, 1)) - return - if method == STANDARDIZATION_TYPES[2]: # "robust" - q = self.extrema_weighting["robust_quantiles"] - self._robust_scaler = RobustScaler(quantile_range=(q[0] * 100, q[1] * 100)) - self._robust_scaler.fit(values.reshape(-1, 1)) - return - if method == STANDARDIZATION_TYPES[3]: # "mmad" - self._median = float(np.median(values)) - mad = np.median(np.abs(values - self._median)) - self._mad = ( - float(mad) if np.isfinite(mad) and not np.isclose(mad, 0.0) else 1.0 - ) - return - if method == STANDARDIZATION_TYPES[4]: # "power_yj" - self._power_transformer = PowerTransformer( - method="yeo-johnson", standardize=True - ) - self._power_transformer.fit(values.reshape(-1, 1)) - return - - raise ValueError( - f"Invalid standardization value {method!r}: supported values are {', '.join(STANDARDIZATION_TYPES)}" - ) - - def _fit_normalization(self, values: NDArray[np.floating]) -> None: - method = self.extrema_weighting["normalization"] - if method == NORMALIZATION_TYPES[0]: # "maxabs" - self._maxabs_scaler = MaxAbsScaler() - self._maxabs_scaler.fit(values.reshape(-1, 1)) - return - if method == NORMALIZATION_TYPES[1]: # "minmax" - self._minmax_scaler = MinMaxScaler( - feature_range=self.extrema_weighting["minmax_range"] - ) - self._minmax_scaler.fit(values.reshape(-1, 1)) - return - if method == NORMALIZATION_TYPES[2]: # "sigmoid" - return - if method == NORMALIZATION_TYPES[3]: # "none" - return - - raise ValueError( - f"Invalid normalization value {method!r}: supported values are {', '.join(NORMALIZATION_TYPES)}" - ) - - def fit( - self, - X: ArrayLike, - y: ArrayOrNone = None, - sample_weight: ArrayOrNone = None, - feature_list: ListOrNone = None, - **kwargs, - ) -> tuple[ArrayLike, ArrayOrNone, ArrayOrNone, ListOrNone]: - values = np.asarray(X, dtype=float) - finite_values = values[np.isfinite(values)] - - if finite_values.size == 0: - logger.warning( - "Invalid fit data: no finite values found in X, " - "using default fallback [0.0, 1.0]" - ) - fit_values = np.array([0.0, 1.0]) - else: - fit_values = finite_values - - self._fit_standardization(fit_values) - - finite_mask = np.ones(len(fit_values), dtype=bool) - standardized_fit_values = self._standardize(fit_values, finite_mask) - self._fit_normalization(standardized_fit_values) - - self._fitted = True - return X, y, sample_weight, feature_list - - def transform( - self, - X: ArrayLike, - y: ArrayOrNone = None, - sample_weight: ArrayOrNone = None, - feature_list: ListOrNone = None, - outlier_check: bool = False, - **kwargs, - ) -> tuple[ArrayLike, ArrayOrNone, ArrayOrNone, ListOrNone]: - if not self._fitted: - raise RuntimeError( - "ExtremaWeightingTransformer must be fitted before transform" - ) - - arr = np.asarray(X, dtype=float) - mask = np.isfinite(arr) - - standardized = self._standardize(arr, mask) - normalized = self._normalize(standardized, mask) - gammaized = self._apply_gamma(normalized, mask) - - return gammaized, y, sample_weight, feature_list - - def fit_transform( - self, - X: ArrayLike, - y: ArrayOrNone = None, - sample_weight: ArrayOrNone = None, - feature_list: ListOrNone = None, - **kwargs, - ) -> tuple[ArrayLike, ArrayOrNone, ArrayOrNone, ListOrNone]: - self.fit(X, y, sample_weight, feature_list, **kwargs) - return self.transform(X, y, sample_weight, feature_list, **kwargs) - - def inverse_transform( - self, - X: ArrayLike, - y: ArrayOrNone = None, - sample_weight: ArrayOrNone = None, - feature_list: ListOrNone = None, - **kwargs, - ) -> tuple[ArrayLike, ArrayOrNone, ArrayOrNone, ListOrNone]: - if not self._fitted: - raise RuntimeError( - "ExtremaWeightingTransformer must be fitted before inverse_transform" - ) - - arr = np.asarray(X, dtype=float) - mask = np.isfinite(arr) - - degammaized = self._inverse_gamma(arr, mask) - denormalized = self._inverse_normalize(degammaized, mask) - destandardized = self._inverse_standardize(denormalized, mask) - - return destandardized, y, sample_weight, feature_list diff --git a/quickadapter/user_data/strategies/LabelTransformer.py b/quickadapter/user_data/strategies/LabelTransformer.py new file mode 100644 index 0000000..fc02ee5 --- /dev/null +++ b/quickadapter/user_data/strategies/LabelTransformer.py @@ -0,0 +1,610 @@ +import copy +import fnmatch +import logging +from dataclasses import dataclass, field +from typing import Any, Final, Literal + +import numpy as np +import scipy as sp +from datasieve.transforms.base_transform import ( + ArrayOrNone, + BaseTransform, + ListOrNone, +) +from numpy.typing import ArrayLike, NDArray +from sklearn.preprocessing import ( + MaxAbsScaler, + MinMaxScaler, + PowerTransformer, + RobustScaler, + StandardScaler, +) + +logger = logging.getLogger(__name__) + +CombinedMetric = Literal[ + "amplitude", + "amplitude_threshold_ratio", + "volume_rate", + "speed", + "efficiency_ratio", + "volume_weighted_efficiency_ratio", +] +COMBINED_METRICS: Final[tuple[CombinedMetric, ...]] = ( + "amplitude", + "amplitude_threshold_ratio", + "volume_rate", + "speed", + "efficiency_ratio", + "volume_weighted_efficiency_ratio", +) + +CombinedAggregation = Literal[ + "arithmetic_mean", + "geometric_mean", + "harmonic_mean", + "quadratic_mean", + "weighted_median", + "softmax", +] +COMBINED_AGGREGATIONS: Final[tuple[CombinedAggregation, ...]] = ( + "arithmetic_mean", + "geometric_mean", + "harmonic_mean", + "quadratic_mean", + "weighted_median", + "softmax", +) + +WeightStrategy = Literal["none", "combined"] | CombinedMetric +WEIGHT_STRATEGIES: Final[tuple[WeightStrategy, ...]] = ( + "none", + *COMBINED_METRICS, + "combined", +) + +StandardizationType = Literal["none", "zscore", "robust", "mmad", "power_yj"] +STANDARDIZATION_TYPES: Final[tuple[StandardizationType, ...]] = ( + "none", # 0 - w + "zscore", # 1 - (w - μ) / σ + "robust", # 2 - (w - median) / IQR + "mmad", # 3 - (w - median) / (MAD · k) + "power_yj", # 4 - YJ(w) (standardized) +) + +NormalizationType = Literal["maxabs", "minmax", "sigmoid", "none"] +NORMALIZATION_TYPES: Final[tuple[NormalizationType, ...]] = ( + "maxabs", # 0 - w / max(|w|) + "minmax", # 1 - low + (w - min) / (max - min) · (high - low) + "sigmoid", # 2 - 2·σ(scale · w) - 1 + "none", # 3 - w +) + +DEFAULTS_LABEL_WEIGHTING: Final[dict[str, Any]] = { + "strategy": WEIGHT_STRATEGIES[0], # "none" + "metric_coefficients": {}, + "aggregation": COMBINED_AGGREGATIONS[0], # "arithmetic_mean" + "softmax_temperature": 1.0, +} + +DEFAULTS_LABEL_PIPELINE: Final[dict[str, Any]] = { + "standardization": STANDARDIZATION_TYPES[0], # "none" + "robust_quantiles": (0.25, 0.75), + "mmad_scaling_factor": 1.4826, + "normalization": NORMALIZATION_TYPES[0], # "maxabs" + "minmax_range": (-1.0, 1.0), + "sigmoid_scale": 1.0, + "gamma": 1.0, +} + + +SmoothingMethod = Literal[ + "none", "gaussian", "kaiser", "triang", "smm", "sma", "savgol", "gaussian_filter1d" +] +SMOOTHING_METHODS: Final[tuple[SmoothingMethod, ...]] = ( + "none", + "gaussian", + "kaiser", + "triang", + "smm", + "sma", + "savgol", + "gaussian_filter1d", +) + +SmoothingMode = Literal["mirror", "constant", "nearest", "wrap", "interp"] +SMOOTHING_MODES: Final[tuple[SmoothingMode, ...]] = ( + "mirror", + "constant", + "nearest", + "wrap", + "interp", +) + +DEFAULTS_LABEL_SMOOTHING: Final[dict[str, Any]] = { + "method": SMOOTHING_METHODS[1], # "gaussian" + "window_candles": 5, + "beta": 8.0, + "polyorder": 3, + "mode": SMOOTHING_MODES[0], # "mirror" + "sigma": 1.0, +} + +PredictionMethod = Literal["none", "thresholding"] +PREDICTION_METHODS: Final[tuple[PredictionMethod, ...]] = ( + "none", + "thresholding", +) + +ExtremaSelectionMethod = Literal["rank_extrema", "rank_peaks", "partition"] +EXTREMA_SELECTION_METHODS: Final[tuple[ExtremaSelectionMethod, ...]] = ( + "rank_extrema", + "rank_peaks", + "partition", +) + +SkimageThresholdMethod = Literal[ + "mean", "isodata", "li", "minimum", "otsu", "triangle", "yen" +] +SKIMAGE_THRESHOLD_METHODS: Final[tuple[SkimageThresholdMethod, ...]] = ( + "mean", + "isodata", + "li", + "minimum", + "otsu", + "triangle", + "yen", +) +CustomThresholdMethod = Literal["median", "soft_extremum"] +CUSTOM_THRESHOLD_METHODS: Final[tuple[CustomThresholdMethod, ...]] = ( + "median", + "soft_extremum", +) +ThresholdMethod = SkimageThresholdMethod | CustomThresholdMethod +THRESHOLD_METHODS: Final[tuple[ThresholdMethod, ...]] = ( + *SKIMAGE_THRESHOLD_METHODS, + *CUSTOM_THRESHOLD_METHODS, +) + +DEFAULTS_LABEL_PREDICTION: Final[dict[str, Any]] = { + "method": PREDICTION_METHODS[1], # "thresholding" + "selection_method": EXTREMA_SELECTION_METHODS[0], # "rank_extrema" + "threshold_method": SKIMAGE_THRESHOLD_METHODS[0], # "mean" + "outlier_quantile": 0.999, + "soft_extremum_alpha": 12.0, + "keep_fraction": 0.5, +} + + +def get_label_column_config( + column_name: str, + default_config: dict[str, Any], + columns_config: dict[str, dict[str, Any]], +) -> dict[str, Any]: + result = copy.deepcopy(default_config) + + matches: list[tuple[float, str, dict[str, Any]]] = [] + for pattern, col_config in columns_config.items(): + if fnmatch.fnmatch(column_name, pattern): + if "*" not in pattern and "?" not in pattern and "[" not in pattern: + specificity = float("inf") + else: + specificity = float(sum(1 for c in pattern if c not in "*?[]")) + matches.append((specificity, pattern, col_config)) + + if columns_config and not matches: + logger.warning( + f"Column '{column_name}' did not match any pattern in columns config. " + f"Available patterns: {list(columns_config.keys())}" + ) + + matches.sort(key=lambda x: x[0]) + + for _, _, col_config in matches: + result.update(col_config) + + return result + + +@dataclass +class _ColumnState: + config: dict[str, Any] + standard_scaler: StandardScaler | None = None + robust_scaler: RobustScaler | None = None + power_transformer: PowerTransformer | None = None + median: float = 0.0 + mad: float = 1.0 + minmax_scaler: MinMaxScaler | None = None + maxabs_scaler: MaxAbsScaler | None = None + + +@dataclass +class _LabelTransformerConfig: + default: dict[str, Any] = field( + default_factory=lambda: DEFAULTS_LABEL_PIPELINE.copy() + ) + columns: dict[str, dict[str, Any]] = field(default_factory=dict) + + @classmethod + def from_dict(cls, config: dict[str, Any]) -> "_LabelTransformerConfig": + if "default" in config or "columns" in config: + default = {**DEFAULTS_LABEL_PIPELINE, **config.get("default", {})} + columns = config.get("columns", {}) + return cls(default=default, columns=columns) + else: + pipeline_keys = set(DEFAULTS_LABEL_PIPELINE.keys()) + filtered_config = {k: v for k, v in config.items() if k in pipeline_keys} + default = {**DEFAULTS_LABEL_PIPELINE, **filtered_config} + return cls(default=default, columns={}) + + def get_column_config(self, column_name: str) -> dict[str, Any]: + return get_label_column_config(column_name, self.default, self.columns) + + +class LabelTransformer(BaseTransform): + _STANDARDIZATION_SCALERS: dict[str, str] = { + STANDARDIZATION_TYPES[1]: "standard_scaler", # zscore + STANDARDIZATION_TYPES[2]: "robust_scaler", # robust + STANDARDIZATION_TYPES[4]: "power_transformer", # power_yj + } + _NORMALIZATION_SCALERS: dict[str, str] = { + NORMALIZATION_TYPES[0]: "maxabs_scaler", # maxabs + NORMALIZATION_TYPES[1]: "minmax_scaler", # minmax + } + + def __init__(self, *, label_transformer: dict[str, Any]) -> None: + super().__init__(name="LabelTransformer") + self._config = _LabelTransformerConfig.from_dict(label_transformer) + self._column_states: dict[str, _ColumnState] = {} + self._fitted_columns: list[str] = [] + self._fitted = False + + @staticmethod + def _apply_scaler( + values: NDArray[np.floating], + mask: NDArray[np.bool_], + scaler: Any, + inverse: bool = False, + ) -> NDArray[np.floating]: + if values[mask].size == 0: + return values + out = values.copy() + method = scaler.inverse_transform if inverse else scaler.transform + out[mask] = method(values[mask].reshape(-1, 1)).flatten() + return out + + @staticmethod + def _apply_mmad( + values: NDArray[np.floating], + mask: NDArray[np.bool_], + median: float, + mad: float, + k: float, + inverse: bool = False, + ) -> NDArray[np.floating]: + if values[mask].size == 0: + return values + out = values.copy() + if inverse: + out[mask] = values[mask] * (mad * k) + median + else: + out[mask] = (values[mask] - median) / (mad * k) + return out + + @staticmethod + def _apply_sigmoid( + values: NDArray[np.floating], + mask: NDArray[np.bool_], + scale: float, + inverse: bool = False, + ) -> NDArray[np.floating]: + if values[mask].size == 0: + return values + if not np.isfinite(scale) or np.isclose(scale, 0.0): + return values + out = values.copy() + if inverse: + out[mask] = sp.special.logit((values[mask] + 1.0) / 2.0) / scale + else: + out[mask] = 2.0 * sp.special.expit(scale * values[mask]) - 1.0 + return out + + @staticmethod + def _apply_gamma( + values: NDArray[np.floating], + mask: NDArray[np.bool_], + gamma: float, + inverse: bool = False, + ) -> NDArray[np.floating]: + if np.isclose(gamma, 1.0) or not np.isfinite(gamma) or gamma <= 0: + return values + out = values.copy() + exp = 1.0 / gamma if inverse else gamma + out[mask] = np.sign(values[mask]) * np.power(np.abs(values[mask]), exp) + return out + + def _standardize( + self, + values: NDArray[np.floating], + mask: NDArray[np.bool_], + state: _ColumnState, + inverse: bool = False, + ) -> NDArray[np.floating]: + method = state.config["standardization"] + if method == STANDARDIZATION_TYPES[0]: # none + return values + if method == STANDARDIZATION_TYPES[3]: # mmad + return self._apply_mmad( + values, + mask, + state.median, + state.mad, + state.config["mmad_scaling_factor"], + inverse=inverse, + ) + + scaler_attr = self._STANDARDIZATION_SCALERS.get(method) + if scaler_attr is None: + raise ValueError( + f"Invalid standardization value {method!r}: " + f"supported values are {', '.join(STANDARDIZATION_TYPES)}" + ) + scaler = getattr(state, scaler_attr, None) + if scaler is None: + raise RuntimeError(f"{scaler_attr} not fitted") + return self._apply_scaler(values, mask, scaler, inverse=inverse) + + def _normalize( + self, + values: NDArray[np.floating], + mask: NDArray[np.bool_], + state: _ColumnState, + inverse: bool = False, + ) -> NDArray[np.floating]: + method = state.config["normalization"] + if method == NORMALIZATION_TYPES[2]: # sigmoid + return self._apply_sigmoid( + values, mask, state.config["sigmoid_scale"], inverse=inverse + ) + if method == NORMALIZATION_TYPES[3]: # none + return values + + scaler_attr = self._NORMALIZATION_SCALERS.get(method) + if scaler_attr is None: + raise ValueError( + f"Invalid normalization value {method!r}: " + f"supported values are {', '.join(NORMALIZATION_TYPES)}" + ) + scaler = getattr(state, scaler_attr, None) + if scaler is None: + raise RuntimeError(f"{scaler_attr} not fitted") + return self._apply_scaler(values, mask, scaler, inverse=inverse) + + def _fit_standardization( + self, values: NDArray[np.floating], state: _ColumnState + ) -> None: + method = state.config["standardization"] + if method == STANDARDIZATION_TYPES[0]: # none + return + if method == STANDARDIZATION_TYPES[1]: # zscore + state.standard_scaler = StandardScaler() + state.standard_scaler.fit(values.reshape(-1, 1)) + return + if method == STANDARDIZATION_TYPES[2]: # robust + q = state.config["robust_quantiles"] + state.robust_scaler = RobustScaler(quantile_range=(q[0] * 100, q[1] * 100)) + state.robust_scaler.fit(values.reshape(-1, 1)) + return + if method == STANDARDIZATION_TYPES[3]: # mmad + state.median = float(np.median(values)) + mad = np.median(np.abs(values - state.median)) + state.mad = ( + float(mad) if np.isfinite(mad) and not np.isclose(mad, 0.0) else 1.0 + ) + return + if method == STANDARDIZATION_TYPES[4]: # power_yj + state.power_transformer = PowerTransformer( + method="yeo-johnson", standardize=True + ) + state.power_transformer.fit(values.reshape(-1, 1)) + return + + raise ValueError( + f"Invalid standardization value {method!r}: " + f"supported values are {', '.join(STANDARDIZATION_TYPES)}" + ) + + def _fit_normalization( + self, values: NDArray[np.floating], state: _ColumnState + ) -> None: + method = state.config["normalization"] + if method == NORMALIZATION_TYPES[0]: # maxabs + state.maxabs_scaler = MaxAbsScaler() + state.maxabs_scaler.fit(values.reshape(-1, 1)) + return + if method == NORMALIZATION_TYPES[1]: # minmax + state.minmax_scaler = MinMaxScaler( + feature_range=state.config["minmax_range"] + ) + state.minmax_scaler.fit(values.reshape(-1, 1)) + return + if method in (NORMALIZATION_TYPES[2], NORMALIZATION_TYPES[3]): # sigmoid, none + return + + raise ValueError( + f"Invalid normalization value {method!r}: " + f"supported values are {', '.join(NORMALIZATION_TYPES)}" + ) + + def _fit_column( + self, column_name: str, values: NDArray[np.floating] + ) -> _ColumnState: + config = self._config.get_column_config(column_name) + state = _ColumnState(config=config) + + finite_values = values[np.isfinite(values)] + if finite_values.size == 0: + logger.warning( + f"Column {column_name!r}: no finite values found, using fallback [0.0, 1.0]" + ) + fit_values = np.array([0.0, 1.0]) + else: + fit_values = finite_values + + self._fit_standardization(fit_values, state) + + finite_mask = np.ones(len(fit_values), dtype=bool) + standardized = self._standardize(fit_values, finite_mask, state, inverse=False) + + self._fit_normalization(standardized, state) + + return state + + def _transform_column( + self, + values: NDArray[np.floating], + state: _ColumnState, + inverse: bool = False, + ) -> NDArray[np.floating]: + mask = np.isfinite(values) + + if inverse: + degamma = self._apply_gamma( + values, mask, state.config["gamma"], inverse=True + ) + denorm = self._normalize(degamma, mask, state, inverse=True) + return self._standardize(denorm, mask, state, inverse=True) + else: + standardized = self._standardize(values, mask, state, inverse=False) + normalized = self._normalize(standardized, mask, state, inverse=False) + return self._apply_gamma( + normalized, mask, state.config["gamma"], inverse=False + ) + + def fit( + self, + X: ArrayLike, + y: ArrayOrNone = None, + sample_weight: ArrayOrNone = None, + feature_list: ListOrNone = None, + **kwargs, + ) -> tuple[ArrayLike, ArrayOrNone, ArrayOrNone, ListOrNone]: + arr = np.asarray(X, dtype=float) + + if arr.ndim == 1: + arr = arr.reshape(-1, 1) + + n_columns = arr.shape[1] + + if feature_list is not None and len(feature_list) == n_columns: + column_names = list(feature_list) + else: + column_names = [f"column_{i}" for i in range(n_columns)] + + self._column_states = {} + for i, col_name in enumerate(column_names): + col_values = arr[:, i] + self._column_states[col_name] = self._fit_column(col_name, col_values) + + self._fitted_columns = column_names + self._fitted = True + + return X, y, sample_weight, feature_list + + def transform( + self, + X: ArrayLike, + y: ArrayOrNone = None, + sample_weight: ArrayOrNone = None, + feature_list: ListOrNone = None, + outlier_check: bool = False, + **kwargs, + ) -> tuple[ArrayLike, ArrayOrNone, ArrayOrNone, ListOrNone]: + if not self._fitted: + raise RuntimeError("LabelTransformer must be fitted before transform") + + arr = np.asarray(X, dtype=float) + was_1d = arr.ndim == 1 + if was_1d: + arr = arr.reshape(-1, 1) + + n_columns = arr.shape[1] + + if feature_list is not None and len(feature_list) == n_columns: + column_names = list(feature_list) + else: + column_names = self._fitted_columns + + if len(column_names) != n_columns: + raise ValueError( + f"Column count mismatch: fitted on {len(self._fitted_columns)} columns, " + f"got {n_columns}" + ) + + result = np.empty_like(arr) + for i, col_name in enumerate(column_names): + if col_name not in self._column_states: + raise ValueError(f"Column {col_name!r} was not present during fitting") + result[:, i] = self._transform_column( + arr[:, i], self._column_states[col_name] + ) + + if was_1d: + result = result.flatten() + + return result, y, sample_weight, feature_list + + def fit_transform( + self, + X: ArrayLike, + y: ArrayOrNone = None, + sample_weight: ArrayOrNone = None, + feature_list: ListOrNone = None, + **kwargs, + ) -> tuple[ArrayLike, ArrayOrNone, ArrayOrNone, ListOrNone]: + self.fit(X, y, sample_weight, feature_list, **kwargs) + return self.transform(X, y, sample_weight, feature_list, **kwargs) + + def inverse_transform( + self, + X: ArrayLike, + y: ArrayOrNone = None, + sample_weight: ArrayOrNone = None, + feature_list: ListOrNone = None, + **kwargs, + ) -> tuple[ArrayLike, ArrayOrNone, ArrayOrNone, ListOrNone]: + if not self._fitted: + raise RuntimeError( + "LabelTransformer must be fitted before inverse_transform" + ) + + arr = np.asarray(X, dtype=float) + was_1d = arr.ndim == 1 + if was_1d: + arr = arr.reshape(-1, 1) + + n_columns = arr.shape[1] + + if feature_list is not None and len(feature_list) == n_columns: + column_names = list(feature_list) + else: + column_names = self._fitted_columns + + if len(column_names) != n_columns: + raise ValueError( + f"Column count mismatch: fitted on {len(self._fitted_columns)} columns, " + f"got {n_columns}" + ) + + result = np.empty_like(arr) + for i, col_name in enumerate(column_names): + if col_name not in self._column_states: + raise ValueError(f"Column {col_name!r} was not present during fitting") + result[:, i] = self._transform_column( + arr[:, i], self._column_states[col_name], inverse=True + ) + + if was_1d: + result = result.flatten() + + return result, y, sample_weight, feature_list diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index 5ca0956..340671f 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -26,39 +26,37 @@ from pandas import DataFrame, Series, isna from scipy.stats import pearsonr, t from technical.pivots_points import pivots_points -from ExtremaWeightingTransformer import COMBINED_AGGREGATIONS +from LabelTransformer import COMBINED_AGGREGATIONS, get_label_column_config + from Utils import ( DEFAULT_FIT_LIVE_PREDICTIONS_CANDLES, - DEFAULTS_EXTREMA_SMOOTHING, EXTREMA_COLUMN, + LABEL_COLUMNS, MAXIMA_COLUMN, - MAXIMA_THRESHOLD_COLUMN, MINIMA_COLUMN, - MINIMA_THRESHOLD_COLUMN, SMOOTHED_EXTREMA_COLUMN, - SMOOTHING_METHODS, - SMOOTHING_MODES, TRADE_PRICE_TARGETS, alligator, + apply_label_weighting, bottom_log_return, calculate_quantile, ewo, format_number, + generate_label_data, get_callable_sha256, get_distance, - get_extrema_weighting_config, get_label_defaults, - get_weighted_extrema, + get_label_smoothing_config, + get_label_weighting_config, get_zl_ma_fn, + migrate_config, nan_average, non_zero_diff, price_retracement_percent, - smooth_extrema, + smooth_label, top_log_return, - update_config_value, validate_range, vwapb, - zigzag, zlema, ) @@ -110,7 +108,7 @@ class QuickAdapterV3(IStrategy): _PLOT_EXTREMA_MIN_EPS: Final[float] = 0.01 def version(self) -> str: - return "3.10.11" + return "3.11.0" timeframe = "5m" timeframe_minutes = timeframe_to_minutes(timeframe) @@ -165,6 +163,10 @@ class QuickAdapterV3(IStrategy): process_only_new_candles = True + def __init__(self, config: dict[str, Any], *args, **kwargs) -> None: + super().__init__(config, *args, **kwargs) + migrate_config(self.config, logger) + @staticmethod @lru_cache(maxsize=None) def _trade_directions_set() -> set[TradeDirection]: @@ -192,8 +194,14 @@ class QuickAdapterV3(IStrategy): "hp_rmse": {"color": "violet", "type": "line"}, }, "extrema": { - MAXIMA_THRESHOLD_COLUMN: {"color": "blue", "type": "line"}, - MINIMA_THRESHOLD_COLUMN: {"color": "cyan", "type": "line"}, + f"{EXTREMA_COLUMN}_maxima_threshold": { + "color": "blue", + "type": "line", + }, + f"{EXTREMA_COLUMN}_minima_threshold": { + "color": "cyan", + "type": "line", + }, EXTREMA_COLUMN: {"color": "orange", "type": "line"}, }, "min_max": { @@ -299,89 +307,27 @@ class QuickAdapterV3(IStrategy): return max_open_trades @property - def extrema_weighting(self) -> dict[str, Any]: - extrema_weighting = self.freqai_info.get("extrema_weighting", {}) - if not isinstance(extrema_weighting, dict): - extrema_weighting = {} - return get_extrema_weighting_config(extrema_weighting, logger) + def label_weighting(self) -> dict[str, Any]: + label_weighting_raw = self.freqai_info.get("label_weighting") + if not isinstance(label_weighting_raw, dict): + label_weighting_raw = {} + return get_label_weighting_config(label_weighting_raw, logger) @property - def extrema_smoothing(self) -> dict[str, Any]: - extrema_smoothing = self.freqai_info.get("extrema_smoothing", {}) - if not isinstance(extrema_smoothing, dict): - extrema_smoothing = {} - method = extrema_smoothing.get("method", DEFAULTS_EXTREMA_SMOOTHING["method"]) - if method not in set(SMOOTHING_METHODS): - logger.warning( - f"Invalid extrema_smoothing method value {method!r}: supported values are {', '.join(SMOOTHING_METHODS)}, using default {SMOOTHING_METHODS[0]!r}" - ) - method = SMOOTHING_METHODS[0] - - window_candles = update_config_value( - extrema_smoothing, - new_key="window_candles", - old_key="window", - default=DEFAULTS_EXTREMA_SMOOTHING["window_candles"], - logger=logger, - new_path="freqai.extrema_smoothing.window_candles", - old_path="freqai.extrema_smoothing.window", - ) - if not isinstance(window_candles, int) or window_candles < 3: - logger.warning( - f"Invalid extrema_smoothing window_candles value {window_candles!r}: must be an integer >= 3, using default {DEFAULTS_EXTREMA_SMOOTHING['window_candles']!r}" - ) - window_candles = int(DEFAULTS_EXTREMA_SMOOTHING["window_candles"]) - - beta = extrema_smoothing.get("beta", DEFAULTS_EXTREMA_SMOOTHING["beta"]) - if not isinstance(beta, (int, float)) or not np.isfinite(beta) or beta <= 0: - logger.warning( - f"Invalid extrema_smoothing beta value {beta!r}: must be a finite number > 0, using default {DEFAULTS_EXTREMA_SMOOTHING['beta']!r}" - ) - beta = DEFAULTS_EXTREMA_SMOOTHING["beta"] - - polyorder = extrema_smoothing.get( - "polyorder", DEFAULTS_EXTREMA_SMOOTHING["polyorder"] - ) - if not isinstance(polyorder, int) or polyorder < 1: - logger.warning( - f"Invalid extrema_smoothing polyorder value {polyorder!r}: must be an integer >= 1, using default {DEFAULTS_EXTREMA_SMOOTHING['polyorder']!r}" - ) - polyorder = DEFAULTS_EXTREMA_SMOOTHING["polyorder"] - - mode = str(extrema_smoothing.get("mode", DEFAULTS_EXTREMA_SMOOTHING["mode"])) - if mode not in set(SMOOTHING_MODES): - logger.warning( - f"Invalid extrema_smoothing mode value {mode!r}: supported values are {', '.join(SMOOTHING_MODES)}, using default {SMOOTHING_MODES[0]!r}" - ) - mode = SMOOTHING_MODES[0] - - sigma = extrema_smoothing.get("sigma", DEFAULTS_EXTREMA_SMOOTHING["sigma"]) - if not isinstance(sigma, (int, float)) or sigma <= 0 or not np.isfinite(sigma): - logger.warning( - f"Invalid extrema_smoothing sigma value {sigma!r}: must be a finite number > 0, using default {DEFAULTS_EXTREMA_SMOOTHING['sigma']!r}" - ) - sigma = DEFAULTS_EXTREMA_SMOOTHING["sigma"] - - return { - "method": method, - "window_candles": window_candles, - "beta": beta, - "polyorder": polyorder, - "mode": mode, - "sigma": sigma, - } + def label_smoothing(self) -> dict[str, Any]: + label_smoothing_raw = self.freqai_info.get("label_smoothing", {}) + if not isinstance(label_smoothing_raw, dict): + label_smoothing_raw = {} + return get_label_smoothing_config(label_smoothing_raw, logger) @property def trade_price_target_method(self) -> str: - exit_pricing = self.config.get("exit_pricing", {}) - trade_price_target_method = update_config_value( - exit_pricing, - new_key="trade_price_target_method", - old_key="trade_price_target", - default=TRADE_PRICE_TARGETS[0], # "moving_average" - logger=logger, - new_path="exit_pricing.trade_price_target_method", - old_path="exit_pricing.trade_price_target", + exit_pricing = self.config.get("exit_pricing") + if not isinstance(exit_pricing, dict): + exit_pricing = {} + trade_price_target_method = exit_pricing.get( + "trade_price_target_method", + TRADE_PRICE_TARGETS[0], # "moving_average" ) if trade_price_target_method not in set(TRADE_PRICE_TARGETS): logger.warning( @@ -394,50 +340,22 @@ class QuickAdapterV3(IStrategy): @property def reversal_confirmation(self) -> dict[str, int | float]: - reversal_confirmation = self.config.get("reversal_confirmation", {}) + reversal_confirmation = self.config.get("reversal_confirmation") + if not isinstance(reversal_confirmation, dict): + reversal_confirmation = {} + defaults = QuickAdapterV3.default_reversal_confirmation - lookback_period_candles = update_config_value( - reversal_confirmation, - new_key="lookback_period_candles", - old_key="lookback_period", - default=QuickAdapterV3.default_reversal_confirmation[ - "lookback_period_candles" - ], - logger=logger, - new_path="reversal_confirmation.lookback_period_candles", - old_path="reversal_confirmation.lookback_period", - ) - decay_fraction = update_config_value( - reversal_confirmation, - new_key="decay_fraction", - old_key="decay_ratio", - default=QuickAdapterV3.default_reversal_confirmation["decay_fraction"], - logger=logger, - new_path="reversal_confirmation.decay_fraction", - old_path="reversal_confirmation.decay_ratio", - ) - - min_natr_multiplier_fraction = update_config_value( - reversal_confirmation, - new_key="min_natr_multiplier_fraction", - old_key="min_natr_ratio_percent", - default=QuickAdapterV3.default_reversal_confirmation[ - "min_natr_multiplier_fraction" - ], - logger=logger, - new_path="reversal_confirmation.min_natr_multiplier_fraction", - old_path="reversal_confirmation.min_natr_ratio_percent", - ) - max_natr_multiplier_fraction = update_config_value( - reversal_confirmation, - new_key="max_natr_multiplier_fraction", - old_key="max_natr_ratio_percent", - default=QuickAdapterV3.default_reversal_confirmation[ - "max_natr_multiplier_fraction" - ], - logger=logger, - new_path="reversal_confirmation.max_natr_multiplier_fraction", - old_path="reversal_confirmation.max_natr_ratio_percent", + lookback_period_candles = reversal_confirmation.get( + "lookback_period_candles", defaults["lookback_period_candles"] + ) + decay_fraction = reversal_confirmation.get( + "decay_fraction", defaults["decay_fraction"] + ) + min_natr_multiplier_fraction = reversal_confirmation.get( + "min_natr_multiplier_fraction", defaults["min_natr_multiplier_fraction"] + ) + max_natr_multiplier_fraction = reversal_confirmation.get( + "max_natr_multiplier_fraction", defaults["max_natr_multiplier_fraction"] ) if not isinstance(lookback_period_candles, int) or lookback_period_candles < 0: @@ -548,41 +466,35 @@ class QuickAdapterV3(IStrategy): logger.info("QuickAdapter Strategy Configuration") logger.info("=" * 60) - logger.info("Extrema Weighting:") - logger.info(f" strategy: {self.extrema_weighting['strategy']}") - logger.info( - f" metric_coefficients: {self.extrema_weighting['metric_coefficients']}" - ) - logger.info(f" aggregation: {self.extrema_weighting['aggregation']}") - logger.info(f" standardization: {self.extrema_weighting['standardization']}") - logger.info( - f" robust_quantiles: ({format_number(self.extrema_weighting['robust_quantiles'][0])}, {format_number(self.extrema_weighting['robust_quantiles'][1])})" - ) - logger.info( - f" mmad_scaling_factor: {format_number(self.extrema_weighting['mmad_scaling_factor'])}" - ) - logger.info(f" normalization: {self.extrema_weighting['normalization']}") - logger.info( - f" minmax_range: ({format_number(self.extrema_weighting['minmax_range'][0])}, {format_number(self.extrema_weighting['minmax_range'][1])})" - ) - logger.info( - f" sigmoid_scale: {format_number(self.extrema_weighting['sigmoid_scale'])}" - ) - logger.info(f" gamma: {format_number(self.extrema_weighting['gamma'])}") - if ( - self.extrema_weighting["aggregation"] == COMBINED_AGGREGATIONS[5] - ): # "softmax" + label_weighting = self.label_weighting + label_smoothing = self.label_smoothing + for label_col in LABEL_COLUMNS: + logger.info(f"Label Configuration [{label_col}]:") + + col_weighting = get_label_column_config( + label_col, label_weighting["default"], label_weighting["columns"] + ) + logger.info(" Weighting:") + logger.info(f" strategy: {col_weighting['strategy']}") logger.info( - f" softmax_temperature: {format_number(self.extrema_weighting['softmax_temperature'])}" + f" metric_coefficients: {col_weighting['metric_coefficients']}" ) + logger.info(f" aggregation: {col_weighting['aggregation']}") + if col_weighting["aggregation"] == COMBINED_AGGREGATIONS[5]: # "softmax" + logger.info( + f" softmax_temperature: {format_number(col_weighting['softmax_temperature'])}" + ) - logger.info("Extrema Smoothing:") - logger.info(f" method: {self.extrema_smoothing['method']}") - logger.info(f" window_candles: {self.extrema_smoothing['window_candles']}") - logger.info(f" beta: {format_number(self.extrema_smoothing['beta'])}") - logger.info(f" polyorder: {self.extrema_smoothing['polyorder']}") - logger.info(f" mode: {self.extrema_smoothing['mode']}") - logger.info(f" sigma: {format_number(self.extrema_smoothing['sigma'])}") + col_smoothing = get_label_column_config( + label_col, label_smoothing["default"], label_smoothing["columns"] + ) + logger.info(" Smoothing:") + logger.info(f" method: {col_smoothing['method']}") + logger.info(f" window_candles: {col_smoothing['window_candles']}") + logger.info(f" beta: {format_number(col_smoothing['beta'])}") + logger.info(f" polyorder: {col_smoothing['polyorder']}") + logger.info(f" mode: {col_smoothing['mode']}") + logger.info(f" sigma: {format_number(col_smoothing['sigma'])}") logger.info("Reversal Confirmation:") logger.info( @@ -859,6 +771,14 @@ class QuickAdapterV3(IStrategy): ) return self.get_label_natr_multiplier(pair) * fraction + def get_label_params(self, pair: str, label_col: str) -> dict[str, Any]: + if label_col == EXTREMA_COLUMN: + return { + "natr_period": self.get_label_period_candles(pair), + "natr_multiplier": self.get_label_natr_multiplier(pair), + } + return {} + @staticmethod @lru_cache(maxsize=128) def _td_format( @@ -882,81 +802,81 @@ class QuickAdapterV3(IStrategy): self, dataframe: DataFrame, metadata: dict[str, Any], **kwargs ) -> DataFrame: pair = str(metadata.get("pair")) - label_period_candles = self.get_label_period_candles(pair) - label_natr_multiplier = self.get_label_natr_multiplier(pair) - ( - pivots_indices, - _, - pivots_directions, - pivots_amplitudes, - pivots_amplitude_threshold_ratios, - pivots_volume_rates, - pivots_speeds, - pivots_efficiency_ratios, - pivots_volume_weighted_efficiency_ratios, - ) = zigzag( - dataframe, - natr_period=label_period_candles, - natr_multiplier=label_natr_multiplier, - ) label_period = datetime.timedelta( minutes=len(dataframe) * self.get_timeframe_minutes() ) - dataframe[EXTREMA_COLUMN] = 0.0 - dataframe[MINIMA_COLUMN] = 0.0 - dataframe[MAXIMA_COLUMN] = 0.0 - if len(pivots_indices) == 0: - logger.warning( - f"[{pair}] No extrema to label | label_period: {QuickAdapterV3._td_format(label_period)} | label_period_candles: {label_period_candles} | label_natr_multiplier: {format_number(label_natr_multiplier)}" + label_weighting = self.label_weighting + label_smoothing = self.label_smoothing + + for label_col in LABEL_COLUMNS: + label_params = self.get_label_params(pair, label_col) + label_data = generate_label_data(dataframe, label_col, label_params) + + if len(label_data.indices) == 0: + logger.warning( + f"[{pair}] No {label_col} labels | label_period: {QuickAdapterV3._td_format(label_period)} | params: {label_params!r}" + ) + else: + logger.info( + f"[{pair}] {len(label_data.indices)} {label_col} labels | label_period: {QuickAdapterV3._td_format(label_period)} | params: {label_params!r}" + ) + + col_weighting_config = get_label_column_config( + label_col, label_weighting["default"], label_weighting["columns"] ) - else: - logger.info( - f"[{pair}] Labeled {len(pivots_indices)} extrema | label_period: {QuickAdapterV3._td_format(label_period)} | label_period_candles: {label_period_candles} | label_natr_multiplier: {format_number(label_natr_multiplier)}" + + weighted_label, _ = apply_label_weighting( + label=label_data.series, + indices=label_data.indices, + metrics=label_data.metrics, + weighting_config=col_weighting_config, + ) + + dataframe[label_col] = weighted_label + + if label_col == EXTREMA_COLUMN: + extrema = dataframe[label_col] + extrema_direction = label_data.series + plot_eps = extrema.abs().where(extrema.ne(0.0)).min() + if not np.isfinite(plot_eps): + plot_eps = 0.0 + plot_eps = max( + float(plot_eps) * 0.5, QuickAdapterV3._PLOT_EXTREMA_MIN_EPS + ) + dataframe[MAXIMA_COLUMN] = ( + extrema.where(extrema_direction.gt(0), 0.0) + .clip(lower=0.0) + .mask( + extrema_direction.gt(0) & extrema.eq(0.0), + plot_eps, + ) + ) + dataframe[MINIMA_COLUMN] = ( + extrema.where(extrema_direction.lt(0), 0.0) + .clip(upper=0.0) + .mask( + extrema_direction.lt(0) & extrema.eq(0.0), + -plot_eps, + ) + ) + + col_smoothing_config = get_label_column_config( + label_col, label_smoothing["default"], label_smoothing["columns"] ) - dataframe.loc[pivots_indices, EXTREMA_COLUMN] = pivots_directions - - extrema_direction = dataframe[EXTREMA_COLUMN] - - weighted_extrema, _ = get_weighted_extrema( - extrema=extrema_direction, - indices=pivots_indices, - amplitudes=pivots_amplitudes, - amplitude_threshold_ratios=pivots_amplitude_threshold_ratios, - volume_rates=pivots_volume_rates, - speeds=pivots_speeds, - efficiency_ratios=pivots_efficiency_ratios, - volume_weighted_efficiency_ratios=pivots_volume_weighted_efficiency_ratios, - extrema_weighting=self.extrema_weighting, - ) - - plot_eps = weighted_extrema.abs().where(weighted_extrema.ne(0.0)).min() - if not np.isfinite(plot_eps): - plot_eps = 0.0 - plot_eps = max(float(plot_eps) * 0.5, QuickAdapterV3._PLOT_EXTREMA_MIN_EPS) - dataframe[MAXIMA_COLUMN] = ( - weighted_extrema.where(extrema_direction.gt(0), 0.0) - .clip(lower=0.0) - .mask(extrema_direction.gt(0) & weighted_extrema.eq(0.0), plot_eps) - ) - dataframe[MINIMA_COLUMN] = ( - weighted_extrema.where(extrema_direction.lt(0), 0.0) - .clip(upper=0.0) - .mask(extrema_direction.lt(0) & weighted_extrema.eq(0.0), -plot_eps) - ) - - smoothed_extrema = smooth_extrema( - weighted_extrema, - self.extrema_smoothing["method"], - self.extrema_smoothing["window_candles"], - self.extrema_smoothing["beta"], - self.extrema_smoothing["polyorder"], - self.extrema_smoothing["mode"], - self.extrema_smoothing["sigma"], - ) - - dataframe[EXTREMA_COLUMN] = smoothed_extrema - dataframe[SMOOTHED_EXTREMA_COLUMN] = smoothed_extrema + + dataframe[label_col] = smooth_label( + dataframe[label_col], + col_smoothing_config["method"], + col_smoothing_config["window_candles"], + col_smoothing_config["beta"], + col_smoothing_config["polyorder"], + col_smoothing_config["mode"], + col_smoothing_config["sigma"], + ) + + if label_col == EXTREMA_COLUMN: + dataframe[SMOOTHED_EXTREMA_COLUMN] = dataframe[label_col] return dataframe @@ -985,8 +905,12 @@ class QuickAdapterV3(IStrategy): dataframe, timeperiod=self.get_label_period_candles(pair) ) - dataframe["minima_threshold"] = dataframe.get(MINIMA_THRESHOLD_COLUMN) - dataframe["maxima_threshold"] = dataframe.get(MAXIMA_THRESHOLD_COLUMN) + dataframe["minima_threshold"] = dataframe.get( + f"{EXTREMA_COLUMN}_minima_threshold", np.nan + ) + dataframe["maxima_threshold"] = dataframe.get( + f"{EXTREMA_COLUMN}_maxima_threshold", np.nan + ) return dataframe diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py index 414a608..4a8b64e 100644 --- a/quickadapter/user_data/strategies/Utils.py +++ b/quickadapter/user_data/strategies/Utils.py @@ -2,6 +2,7 @@ import copy import functools import hashlib import math +from dataclasses import dataclass from enum import IntEnum from functools import lru_cache from logging import Logger @@ -22,15 +23,25 @@ import optuna import pandas as pd import scipy as sp import talib.abstract as ta -from ExtremaWeightingTransformer import ( +from LabelTransformer import ( COMBINED_AGGREGATIONS, COMBINED_METRICS, - DEFAULTS_EXTREMA_WEIGHTING, + DEFAULTS_LABEL_PIPELINE, + DEFAULTS_LABEL_PREDICTION, + DEFAULTS_LABEL_SMOOTHING, + DEFAULTS_LABEL_WEIGHTING, + EXTREMA_SELECTION_METHODS, NORMALIZATION_TYPES, + PREDICTION_METHODS, + SMOOTHING_METHODS, + SMOOTHING_MODES, STANDARDIZATION_TYPES, + THRESHOLD_METHODS, WEIGHT_STRATEGIES, CombinedAggregation, CombinedMetric, + SmoothingMethod, + SmoothingMode, ) from numpy.typing import NDArray from scipy.ndimage import gaussian_filter1d @@ -45,9 +56,272 @@ else: T = TypeVar("T", pd.Series, float) +@dataclass(frozen=True, slots=True) +class _EnumValidator: + valid_values: tuple[str, ...] + + def __call__(self, value: Any) -> bool: + return value in self.valid_values + + def message(self, param: str) -> str: + return f"supported values are {', '.join(self.valid_values)}" + + +@dataclass(frozen=True, slots=True) +class _NumericValidator: + min_value: float | None = None + max_value: float | None = None + min_exclusive: bool = False + max_exclusive: bool = False + require_int: bool = False + + def __call__(self, value: Any) -> bool: + if self.require_int and not isinstance(value, int): + return False + if not isinstance(value, (int, float)) or not np.isfinite(value): + return False + if self.min_value is not None: + if self.min_exclusive and value <= self.min_value: + return False + if not self.min_exclusive and value < self.min_value: + return False + if self.max_value is not None: + if self.max_exclusive and value >= self.max_value: + return False + if not self.max_exclusive and value > self.max_value: + return False + return True + + def message(self, param: str) -> str: + parts = [] + if self.require_int: + parts.append("must be an integer") + else: + parts.append("must be a finite number") + if self.min_value is not None: + op = ">" if self.min_exclusive else ">=" + parts.append(f"{op} {self.min_value}") + if self.max_value is not None: + op = "<" if self.max_exclusive else "<=" + parts.append(f"{op} {self.max_value}") + return " ".join(parts) + + +@dataclass(frozen=True, slots=True) +class _RangeValidator: + min_bound: float | None = None + max_bound: float | None = None + + def __call__(self, value: Any) -> bool: + if not isinstance(value, (list, tuple)) or len(value) != 2: + return False + if not all(isinstance(x, (int, float)) and np.isfinite(x) for x in value): + return False + if value[0] >= value[1]: + return False + if self.min_bound is not None and value[0] < self.min_bound: + return False + if self.max_bound is not None and value[1] > self.max_bound: + return False + return True + + def message(self, param: str) -> str: + if self.min_bound is not None and self.max_bound is not None: + return f"must be (low, high) with {self.min_bound} <= low < high <= {self.max_bound}" + return "must be (low, high) with low < high" + + +@dataclass(frozen=True, slots=True) +class _DictValidator: + valid_keys: tuple[str, ...] | None = None + + def __call__(self, value: Any) -> bool: + return isinstance(value, dict) + + def message(self, param: str) -> str: + return "must be a mapping" + + +_Validator = _EnumValidator | _NumericValidator | _RangeValidator | _DictValidator + + +@dataclass(frozen=True, slots=True) +class _ParamSpec: + validator: _Validator + output_type: type | None = None + + +def _validate_params( + config: dict[str, Any], + logger: Logger, + config_name: str, + specs: dict[str, _ParamSpec], + defaults: dict[str, Any], +) -> dict[str, Any]: + result: dict[str, Any] = {} + for param, spec in specs.items(): + value = config.get(param, defaults[param]) + if not spec.validator(value): + logger.warning( + f"Invalid {config_name} {param} value {value!r}: " + f"{spec.validator.message(param)}, using default {defaults[param]!r}" + ) + value = defaults[param] + elif isinstance(spec.validator, _DictValidator) and spec.validator.valid_keys: + invalid_keys = set(value.keys()) - set(spec.validator.valid_keys) + if invalid_keys: + logger.warning( + f"Invalid {config_name} {param} keys {sorted(invalid_keys)!r}, " + f"valid keys: {', '.join(spec.validator.valid_keys)}" + ) + value = { + k: v for k, v in value.items() if k in spec.validator.valid_keys + } + if spec.output_type is not None: + if spec.output_type is tuple and isinstance(value, (list, tuple)): + value = (value[0], value[1]) + else: + value = spec.output_type(value) + result[param] = value + return result + + +_WEIGHTING_SPECS: Final[dict[str, _ParamSpec]] = { + "strategy": _ParamSpec(_EnumValidator(WEIGHT_STRATEGIES)), + "metric_coefficients": _ParamSpec(_DictValidator(COMBINED_METRICS)), + "aggregation": _ParamSpec(_EnumValidator(COMBINED_AGGREGATIONS)), + "softmax_temperature": _ParamSpec( + _NumericValidator(min_value=0, min_exclusive=True) + ), +} + +_PIPELINE_SPECS: Final[dict[str, _ParamSpec]] = { + "standardization": _ParamSpec(_EnumValidator(STANDARDIZATION_TYPES)), + "robust_quantiles": _ParamSpec( + _RangeValidator(min_bound=0, max_bound=1), output_type=tuple + ), + "mmad_scaling_factor": _ParamSpec( + _NumericValidator(min_value=0, min_exclusive=True) + ), + "normalization": _ParamSpec(_EnumValidator(NORMALIZATION_TYPES)), + "minmax_range": _ParamSpec(_RangeValidator(), output_type=tuple), + "sigmoid_scale": _ParamSpec(_NumericValidator(min_value=0, min_exclusive=True)), + "gamma": _ParamSpec( + _NumericValidator(min_value=0, max_value=10, min_exclusive=True) + ), +} + +_SMOOTHING_SPECS: Final[dict[str, _ParamSpec]] = { + "method": _ParamSpec(_EnumValidator(SMOOTHING_METHODS)), + "window_candles": _ParamSpec( + _NumericValidator(min_value=1, require_int=True), output_type=int + ), + "beta": _ParamSpec( + _NumericValidator(min_value=0, min_exclusive=True), output_type=float + ), + "polyorder": _ParamSpec( + _NumericValidator(min_value=0, require_int=True), output_type=int + ), + "mode": _ParamSpec(_EnumValidator(SMOOTHING_MODES)), + "sigma": _ParamSpec( + _NumericValidator(min_value=0, min_exclusive=True), output_type=float + ), +} + +_PREDICTION_SPECS: Final[dict[str, _ParamSpec]] = { + "method": _ParamSpec(_EnumValidator(PREDICTION_METHODS)), + "selection_method": _ParamSpec(_EnumValidator(EXTREMA_SELECTION_METHODS)), + "threshold_method": _ParamSpec(_EnumValidator(THRESHOLD_METHODS)), + "outlier_quantile": _ParamSpec( + _NumericValidator( + min_value=0, max_value=1, min_exclusive=True, max_exclusive=True + ), + output_type=float, + ), + "soft_extremum_alpha": _ParamSpec( + _NumericValidator(min_value=0), output_type=float + ), + "keep_fraction": _ParamSpec( + _NumericValidator(min_value=0, max_value=1, min_exclusive=True), + output_type=float, + ), +} + + EXTREMA_COLUMN: Final = "&s-extrema" -MAXIMA_THRESHOLD_COLUMN: Final = "&s-maxima_threshold" -MINIMA_THRESHOLD_COLUMN: Final = "&s-minima_threshold" +LABEL_COLUMNS: Final[tuple[str, ...]] = (EXTREMA_COLUMN,) + + +@dataclass +class LabelData: + series: pd.Series + indices: list[int] + metrics: dict[str, list[float]] + + +LabelGenerator = Callable[[pd.DataFrame, dict[str, Any]], LabelData] +_LABEL_GENERATORS: dict[str, LabelGenerator] = {} + + +def register_label_generator(label_column: str, generator: LabelGenerator) -> None: + _LABEL_GENERATORS[label_column] = generator + + +def _generate_extrema_label( + dataframe: pd.DataFrame, + params: dict[str, Any], +) -> LabelData: + natr_period = params.get("natr_period", 14) + natr_multiplier = params.get("natr_multiplier", 9.0) + + ( + pivots_indices, + _, + pivots_directions, + pivots_amplitudes, + pivots_amplitude_threshold_ratios, + pivots_volume_rates, + pivots_speeds, + pivots_efficiency_ratios, + pivots_volume_weighted_efficiency_ratios, + ) = zigzag( + dataframe, + natr_period=natr_period, + natr_multiplier=natr_multiplier, + ) + + series = pd.Series(0.0, index=dataframe.index) + if pivots_indices: + series.loc[pivots_indices] = pivots_directions + + metrics: dict[str, list[float]] = { + "amplitude": pivots_amplitudes, + "amplitude_threshold_ratio": pivots_amplitude_threshold_ratios, + "volume_rate": pivots_volume_rates, + "speed": pivots_speeds, + "efficiency_ratio": pivots_efficiency_ratios, + "volume_weighted_efficiency_ratio": pivots_volume_weighted_efficiency_ratios, + } + + return LabelData(series=series, indices=pivots_indices, metrics=metrics) + + +register_label_generator(EXTREMA_COLUMN, _generate_extrema_label) + + +def generate_label_data( + dataframe: pd.DataFrame, + label_column: str, + params: dict[str, Any], +) -> LabelData: + generator = _LABEL_GENERATORS.get(label_column) + if generator is None: + raise KeyError( + f"No label generator registered for column '{label_column}'. " + f"Available columns: {list(_LABEL_GENERATORS.keys())}" + ) + return generator(dataframe, params) + MAXIMA_COLUMN: Final = "maxima" MINIMA_COLUMN: Final = "minima" @@ -60,28 +334,6 @@ SMOOTHING_KERNELS: Final[tuple[SmoothingKernel, ...]] = ( "triang", ) -SmoothingMethod = Union[ - SmoothingKernel, Literal["smm", "sma", "savgol", "gaussian_filter1d"] -] -SMOOTHING_METHODS: Final[tuple[SmoothingMethod, ...]] = ( - "gaussian", - "kaiser", - "triang", - "smm", - "sma", - "savgol", - "gaussian_filter1d", -) - -SmoothingMode = Literal["mirror", "constant", "nearest", "wrap", "interp"] -SMOOTHING_MODES: Final[tuple[SmoothingMode, ...]] = ( - "mirror", - "constant", - "nearest", - "wrap", - "interp", -) - TradePriceTarget = Literal[ "moving_average", "quantile_interpolation", "weighted_average" ] @@ -92,194 +344,304 @@ TRADE_PRICE_TARGETS: Final[tuple[TradePriceTarget, ...]] = ( ) -DEFAULTS_EXTREMA_SMOOTHING: Final[dict[str, Any]] = { - "method": SMOOTHING_METHODS[0], # "gaussian" - "window_candles": 5, - "beta": 8.0, - "polyorder": 3, - "mode": SMOOTHING_MODES[0], # "mirror" - "sigma": 1.0, -} - -DEFAULT_EXTREMA_WEIGHT: Final[float] = 1.0 +DEFAULT_LABEL_WEIGHT: Final[float] = 1.0 DEFAULT_FIT_LIVE_PREDICTIONS_CANDLES: Final[int] = 100 -def get_extrema_weighting_config( - extrema_weighting: dict[str, Any], +ValidateParamsFn = Callable[[dict[str, Any], Logger, str], dict[str, Any]] + + +_MISSING: Final = object() + + +def _get_path(config: dict[str, Any], path: str) -> Any: + keys = path.split(".") + current = config + for key in keys: + if not isinstance(current, dict) or key not in current: + return _MISSING + current = current[key] + return current + + +def _set_path(config: dict[str, Any], path: str, value: Any) -> None: + keys = path.split(".") + current = config + for key in keys[:-1]: + if key not in current: + current[key] = {} + current = current[key] + current[keys[-1]] = value + + +def _delete_path(config: dict[str, Any], path: str) -> bool: + keys = path.split(".") + current = config + for key in keys[:-1]: + if not isinstance(current, dict) or key not in current: + return False + current = current[key] + if isinstance(current, dict) and keys[-1] in current: + del current[keys[-1]] + return True + return False + + +# Order matters: section renames before key moves (e.g. extrema_weighting.gamma -> label_weighting.gamma -> label_pipeline.gamma) +CONFIG_MIGRATIONS: Final[tuple[tuple[str, str], ...]] = ( + ("freqai.extrema_weighting", "freqai.label_weighting"), + ("freqai.extrema_smoothing", "freqai.label_smoothing"), + ("freqai.predictions_extrema", "freqai.label_prediction"), + ("freqai.label_smoothing.window", "freqai.label_smoothing.window_candles"), + ( + "freqai.label_prediction.thresholds_smoothing", + "freqai.label_prediction.threshold_smoothing_method", + ), + ( + "freqai.label_prediction.threshold_smoothing_method", + "freqai.label_prediction.threshold_method", + ), + ( + "freqai.label_prediction.threshold_outlier", + "freqai.label_prediction.outlier_threshold_quantile", + ), + ( + "freqai.label_prediction.outlier_threshold_quantile", + "freqai.label_prediction.outlier_quantile", + ), + ( + "freqai.label_prediction.extrema_fraction", + "freqai.label_prediction.keep_extrema_fraction", + ), + ( + "freqai.label_prediction.keep_extrema_fraction", + "freqai.label_prediction.keep_fraction", + ), + ( + "freqai.label_prediction.thresholds_alpha", + "freqai.label_prediction.soft_extremum_alpha", + ), + ("exit_pricing.trade_price_target", "exit_pricing.trade_price_target_method"), + ( + "reversal_confirmation.lookback_period", + "reversal_confirmation.lookback_period_candles", + ), + ("reversal_confirmation.decay_ratio", "reversal_confirmation.decay_fraction"), + ( + "reversal_confirmation.min_natr_ratio_percent", + "reversal_confirmation.min_natr_multiplier_fraction", + ), + ( + "reversal_confirmation.max_natr_ratio_percent", + "reversal_confirmation.max_natr_multiplier_fraction", + ), + ( + "freqai.feature_parameters.min_label_natr_ratio", + "freqai.feature_parameters.min_label_natr_multiplier", + ), + ( + "freqai.feature_parameters.max_label_natr_ratio", + "freqai.feature_parameters.max_label_natr_multiplier", + ), + ( + "freqai.feature_parameters.label_natr_ratio", + "freqai.feature_parameters.label_natr_multiplier", + ), + ("freqai.optuna_hyperopt.expansion_ratio", "freqai.optuna_hyperopt.space_fraction"), + ( + "freqai.label_weighting.standardization", + "freqai.label_pipeline.standardization", + ), + ( + "freqai.label_weighting.robust_quantiles", + "freqai.label_pipeline.robust_quantiles", + ), + ( + "freqai.label_weighting.mmad_scaling_factor", + "freqai.label_pipeline.mmad_scaling_factor", + ), + ("freqai.label_weighting.normalization", "freqai.label_pipeline.normalization"), + ("freqai.label_weighting.minmax_range", "freqai.label_pipeline.minmax_range"), + ("freqai.label_weighting.sigmoid_scale", "freqai.label_pipeline.sigmoid_scale"), + ("freqai.label_weighting.gamma", "freqai.label_pipeline.gamma"), +) + + +def migrate_config(config: dict[str, Any], logger: Logger) -> None: + for old_path, new_path in CONFIG_MIGRATIONS: + old_value = _get_path(config, old_path) + if old_value is _MISSING: + continue + + old_section = old_path.rsplit(".", 1)[0] if "." in old_path else "" + new_section = new_path.rsplit(".", 1)[0] if "." in new_path else "" + new_key = new_path.rsplit(".", 1)[-1] + + new_value = _get_path(config, new_path) + if new_value is _MISSING: + _set_path(config, new_path, old_value) + _delete_path(config, old_path) + if old_section == new_section: + logger.warning(f"{old_path} is deprecated, use {new_key} instead") + else: + logger.warning(f"{old_path} has moved to {new_path}") + else: + _delete_path(config, old_path) + if old_section == new_section: + logger.warning( + f"{new_section} has both {new_key} and deprecated {old_path.rsplit('.', 1)[-1]}, using {new_key}" + ) + else: + logger.warning( + f"{new_section} has {new_key} and deprecated {old_path}, using {new_path}" + ) + + +def _get_label_config( + config: dict[str, Any], logger: Logger, + config_name: str, + validate_fn: ValidateParamsFn, + defaults_dict: dict[str, Any], ) -> dict[str, Any]: - strategy = extrema_weighting.get("strategy", DEFAULTS_EXTREMA_WEIGHTING["strategy"]) - if strategy not in set(WEIGHT_STRATEGIES): - logger.warning( - f"Invalid extrema_weighting strategy value {strategy!r}: supported values are {', '.join(WEIGHT_STRATEGIES)}, using default {WEIGHT_STRATEGIES[0]!r}" - ) - strategy = WEIGHT_STRATEGIES[0] - metric_coefficients = extrema_weighting.get( - "metric_coefficients", DEFAULTS_EXTREMA_WEIGHTING["metric_coefficients"] - ) - if not isinstance(metric_coefficients, dict): - logger.warning( - f"Invalid extrema_weighting metric_coefficients value value {metric_coefficients!r}: must be a mapping, using default {DEFAULTS_EXTREMA_WEIGHTING['metric_coefficients']!r}" - ) - metric_coefficients = DEFAULTS_EXTREMA_WEIGHTING["metric_coefficients"] - elif invalid_keys := set(metric_coefficients.keys()) - set(COMBINED_METRICS): - logger.warning( - f"Invalid extrema_weighting metric_coefficients keys {sorted(invalid_keys)!r}, valid keys: {', '.join(COMBINED_METRICS)}" - ) - metric_coefficients = { - k: v for k, v in metric_coefficients.items() if k in set(COMBINED_METRICS) - } + if "default" in config or "columns" in config: + default_config = config.get("default", {}) + if not isinstance(default_config, dict): + logger.warning( + f"Invalid {config_name} default value {default_config!r}: must be a mapping, using defaults" + ) + default_config = {} - aggregation: CombinedAggregation = extrema_weighting.get( - "aggregation", DEFAULTS_EXTREMA_WEIGHTING["aggregation"] - ) - if aggregation not in set(COMBINED_AGGREGATIONS): - logger.warning( - f"Invalid extrema_weighting aggregation value {aggregation!r}: supported values are {', '.join(COMBINED_AGGREGATIONS)}, using default {DEFAULTS_EXTREMA_WEIGHTING['aggregation']!r}" + validated_default = validate_fn( + default_config, logger, f"{config_name}.default" ) - aggregation = DEFAULTS_EXTREMA_WEIGHTING["aggregation"] - softmax_temperature = extrema_weighting.get( - "softmax_temperature", DEFAULTS_EXTREMA_WEIGHTING["softmax_temperature"] + columns_config = config.get("columns", {}) + if not isinstance(columns_config, dict): + logger.warning( + f"Invalid {config_name} columns value {columns_config!r}: must be a mapping, ignoring" + ) + columns_config = {} + + validated_columns: dict[str, dict[str, Any]] = {} + for col_pattern, col_config in columns_config.items(): + if not isinstance(col_config, dict): + logger.warning( + f"Invalid {config_name} columns[{col_pattern!r}] value {col_config!r}: must be a mapping, ignoring" + ) + continue + validated_col: dict[str, Any] = {} + for key, value in col_config.items(): + if key in defaults_dict: + temp = {key: value} + validated = validate_fn( + temp, logger, f"{config_name}.columns[{col_pattern!r}]" + ) + validated_col[key] = validated[key] + else: + logger.warning( + f"Unknown {config_name}.columns[{col_pattern!r}] key {key!r}, ignoring" + ) + if validated_col: + validated_columns[col_pattern] = validated_col + + return {"default": validated_default, "columns": validated_columns} + else: + validated_default = validate_fn(config, logger, config_name) + return {"default": validated_default, "columns": {}} + + +def _validate_weighting_params( + config: dict[str, Any], + logger: Logger, + config_name: str = "label_weighting", +) -> dict[str, Any]: + return _validate_params( + config, logger, config_name, _WEIGHTING_SPECS, DEFAULTS_LABEL_WEIGHTING ) - if ( - not isinstance(softmax_temperature, (int, float)) - or not np.isfinite(softmax_temperature) - or softmax_temperature <= 0 - ): - logger.warning( - f"Invalid extrema_weighting softmax_temperature value {softmax_temperature!r}: must be a finite number > 0, using default {DEFAULTS_EXTREMA_WEIGHTING['softmax_temperature']!r}" - ) - softmax_temperature = DEFAULTS_EXTREMA_WEIGHTING["softmax_temperature"] - # Phase 1: Standardization - standardization = extrema_weighting.get( - "standardization", DEFAULTS_EXTREMA_WEIGHTING["standardization"] + +def get_label_weighting_config( + config: dict[str, Any], + logger: Logger, +) -> dict[str, Any]: + return _get_label_config( + config, + logger, + "label_weighting", + _validate_weighting_params, + DEFAULTS_LABEL_WEIGHTING, ) - if standardization not in set(STANDARDIZATION_TYPES): - logger.warning( - f"Invalid extrema_weighting standardization value {standardization!r}: supported values are {', '.join(STANDARDIZATION_TYPES)}, using default {STANDARDIZATION_TYPES[0]!r}" - ) - standardization = STANDARDIZATION_TYPES[0] - robust_quantiles = extrema_weighting.get( - "robust_quantiles", DEFAULTS_EXTREMA_WEIGHTING["robust_quantiles"] + +def _validate_pipeline_params( + config: dict[str, Any], + logger: Logger, + config_name: str = "label_pipeline", +) -> dict[str, Any]: + return _validate_params( + config, logger, config_name, _PIPELINE_SPECS, DEFAULTS_LABEL_PIPELINE ) - if ( - not isinstance(robust_quantiles, (list, tuple)) - or len(robust_quantiles) != 2 - or not all( - isinstance(q, (int, float)) and np.isfinite(q) and 0 <= q <= 1 - for q in robust_quantiles - ) - or robust_quantiles[0] >= robust_quantiles[1] - ): - logger.warning( - f"Invalid extrema_weighting robust_quantiles value {robust_quantiles!r}: must be (q1, q3) with 0 <= q1 < q3 <= 1, using default {DEFAULTS_EXTREMA_WEIGHTING['robust_quantiles']!r}" - ) - robust_quantiles = DEFAULTS_EXTREMA_WEIGHTING["robust_quantiles"] - else: - robust_quantiles = ( - robust_quantiles[0], - robust_quantiles[1], - ) - mmad_scaling_factor = extrema_weighting.get( - "mmad_scaling_factor", DEFAULTS_EXTREMA_WEIGHTING["mmad_scaling_factor"] + +def get_label_pipeline_config( + config: dict[str, Any], + logger: Logger, +) -> dict[str, Any]: + return _get_label_config( + config, + logger, + "label_pipeline", + _validate_pipeline_params, + DEFAULTS_LABEL_PIPELINE, ) - if ( - not isinstance(mmad_scaling_factor, (int, float)) - or not np.isfinite(mmad_scaling_factor) - or mmad_scaling_factor <= 0 - ): - logger.warning( - f"Invalid extrema_weighting mmad_scaling_factor value {mmad_scaling_factor!r}: must be a finite number > 0, using default {DEFAULTS_EXTREMA_WEIGHTING['mmad_scaling_factor']!r}" - ) - mmad_scaling_factor = DEFAULTS_EXTREMA_WEIGHTING["mmad_scaling_factor"] - # Phase 2: Normalization - normalization = extrema_weighting.get( - "normalization", DEFAULTS_EXTREMA_WEIGHTING["normalization"] + +def _validate_smoothing_params( + config: dict[str, Any], + logger: Logger, + config_name: str = "label_smoothing", +) -> dict[str, Any]: + return _validate_params( + config, logger, config_name, _SMOOTHING_SPECS, DEFAULTS_LABEL_SMOOTHING ) - if normalization not in set(NORMALIZATION_TYPES): - logger.warning( - f"Invalid extrema_weighting normalization value {normalization!r}: supported values are {', '.join(NORMALIZATION_TYPES)}, using default {NORMALIZATION_TYPES[0]!r}" - ) - normalization = NORMALIZATION_TYPES[0] - if ( - strategy != WEIGHT_STRATEGIES[0] # "none" - and standardization != STANDARDIZATION_TYPES[0] # "none" - and normalization == NORMALIZATION_TYPES[3] # "none" - ): - logger.warning( - f"extrema_weighting standardization={standardization!r} with normalization={normalization!r} can shift/flip ternary extrema labels. " - f"Consider using normalization in {{{NORMALIZATION_TYPES[0]!r},{NORMALIZATION_TYPES[1]!r},{NORMALIZATION_TYPES[2]!r}}} " - f"or set standardization={STANDARDIZATION_TYPES[0]!r}" - ) - minmax_range = extrema_weighting.get( - "minmax_range", DEFAULTS_EXTREMA_WEIGHTING["minmax_range"] +def get_label_smoothing_config( + config: dict[str, Any], + logger: Logger, +) -> dict[str, Any]: + return _get_label_config( + config, + logger, + "label_smoothing", + _validate_smoothing_params, + DEFAULTS_LABEL_SMOOTHING, ) - if ( - not isinstance(minmax_range, (list, tuple)) - or len(minmax_range) != 2 - or not all(isinstance(x, (int, float)) and np.isfinite(x) for x in minmax_range) - or minmax_range[0] >= minmax_range[1] - ): - logger.warning( - f"Invalid extrema_weighting minmax_range value {minmax_range!r}: must be (min, max) with min < max, using default {DEFAULTS_EXTREMA_WEIGHTING['minmax_range']!r}" - ) - minmax_range = DEFAULTS_EXTREMA_WEIGHTING["minmax_range"] - else: - minmax_range = ( - minmax_range[0], - minmax_range[1], - ) - sigmoid_scale = extrema_weighting.get( - "sigmoid_scale", DEFAULTS_EXTREMA_WEIGHTING["sigmoid_scale"] + +def _validate_prediction_params( + config: dict[str, Any], + logger: Logger, + config_name: str = "label_prediction", +) -> dict[str, Any]: + return _validate_params( + config, logger, config_name, _PREDICTION_SPECS, DEFAULTS_LABEL_PREDICTION ) - if ( - not isinstance(sigmoid_scale, (int, float)) - or not np.isfinite(sigmoid_scale) - or sigmoid_scale <= 0 - ): - logger.warning( - f"Invalid extrema_weighting sigmoid_scale value {sigmoid_scale!r}: must be a finite number > 0, using default {DEFAULTS_EXTREMA_WEIGHTING['sigmoid_scale']!r}" - ) - sigmoid_scale = DEFAULTS_EXTREMA_WEIGHTING["sigmoid_scale"] - # Phase 3: Post-processing - gamma = extrema_weighting.get("gamma", DEFAULTS_EXTREMA_WEIGHTING["gamma"]) - if ( - not isinstance(gamma, (int, float)) - or not np.isfinite(gamma) - or not (0 < gamma <= 10.0) - ): - logger.warning( - f"Invalid extrema_weighting gamma value {gamma!r}: must be in range (0, 10], using default {DEFAULTS_EXTREMA_WEIGHTING['gamma']!r}" - ) - gamma = DEFAULTS_EXTREMA_WEIGHTING["gamma"] - - return { - "strategy": strategy, - "metric_coefficients": metric_coefficients, - "aggregation": aggregation, - "softmax_temperature": softmax_temperature, - # Phase 1: Standardization - "standardization": standardization, - "robust_quantiles": robust_quantiles, - "mmad_scaling_factor": mmad_scaling_factor, - # Phase 2: Normalization - "normalization": normalization, - "minmax_range": minmax_range, - "sigmoid_scale": sigmoid_scale, - # Phase 3: Post-processing - "gamma": gamma, - } + +def get_label_prediction_config( + config: dict[str, Any], + logger: Logger, +) -> dict[str, Any]: + return _get_label_config( + config, + logger, + "label_prediction", + _validate_prediction_params, + DEFAULTS_LABEL_PREDICTION, + ) def get_distance(p1: T, p2: T) -> T: @@ -300,14 +662,14 @@ def nan_average( return np.nan if weights is None: - return np.nanmean(values) + return float(np.nanmean(values)) weights = np.asarray(weights, dtype=float) mask = np.isfinite(values) & np.isfinite(weights) if not mask.any(): return np.nan - return np.average(values[mask], weights=weights[mask]) + return float(np.average(values[mask], weights=weights[mask])) def non_zero_diff(s1: pd.Series, s2: pd.Series) -> pd.Series: @@ -345,11 +707,11 @@ def _calculate_coeffs( std: float, beta: float, ) -> NDArray[np.floating]: - if win_type == SMOOTHING_METHODS[0]: # "gaussian" + if win_type == SMOOTHING_KERNELS[0]: # "gaussian" coeffs = sp.signal.windows.gaussian(M=window, std=std, sym=True) - elif win_type == SMOOTHING_METHODS[1]: # "kaiser" + elif win_type == SMOOTHING_KERNELS[1]: # "kaiser" coeffs = sp.signal.windows.kaiser(M=window, beta=beta, sym=True) - elif win_type == SMOOTHING_METHODS[2]: # "triang" + elif win_type == SMOOTHING_KERNELS[2]: # "triang" coeffs = sp.signal.windows.triang(M=window, sym=True) else: raise ValueError( @@ -382,14 +744,14 @@ def zero_phase_filter( return pd.Series(filtered_values, index=series.index) -def smooth_extrema( +def smooth_label( series: pd.Series, - method: SmoothingMethod = DEFAULTS_EXTREMA_SMOOTHING["method"], - window_candles: int = DEFAULTS_EXTREMA_SMOOTHING["window_candles"], - beta: float = DEFAULTS_EXTREMA_SMOOTHING["beta"], - polyorder: int = DEFAULTS_EXTREMA_SMOOTHING["polyorder"], - mode: SmoothingMode = DEFAULTS_EXTREMA_SMOOTHING["mode"], - sigma: float = DEFAULTS_EXTREMA_SMOOTHING["sigma"], + method: SmoothingMethod = DEFAULTS_LABEL_SMOOTHING["method"], + window_candles: int = DEFAULTS_LABEL_SMOOTHING["window_candles"], + beta: float = DEFAULTS_LABEL_SMOOTHING["beta"], + polyorder: int = DEFAULTS_LABEL_SMOOTHING["polyorder"], + mode: SmoothingMode = DEFAULTS_LABEL_SMOOTHING["mode"], + sigma: float = DEFAULTS_LABEL_SMOOTHING["sigma"], ) -> pd.Series: n = len(series) if n == 0: @@ -405,35 +767,37 @@ def smooth_extrema( odd_window = get_odd_window(window_candles) std = get_gaussian_std(odd_window) - if method == SMOOTHING_METHODS[0]: # "gaussian" + if method == SMOOTHING_METHODS[0]: # "none" + return series + elif method == SMOOTHING_METHODS[1]: # "gaussian" return zero_phase_filter( series=series, window=odd_window, - win_type=SMOOTHING_METHODS[0], + win_type=SMOOTHING_KERNELS[0], # "gaussian" std=std, beta=beta, ) - elif method == SMOOTHING_METHODS[1]: # "kaiser" + elif method == SMOOTHING_METHODS[2]: # "kaiser" return zero_phase_filter( series=series, window=odd_window, - win_type=SMOOTHING_METHODS[1], + win_type=SMOOTHING_KERNELS[1], # "kaiser" std=std, beta=beta, ) - elif method == SMOOTHING_METHODS[2]: # "triang" + elif method == SMOOTHING_METHODS[3]: # "triang" return zero_phase_filter( series=series, window=odd_window, - win_type=SMOOTHING_METHODS[2], + win_type=SMOOTHING_KERNELS[2], # "triang" std=std, beta=beta, ) - elif method == SMOOTHING_METHODS[3]: # "smm" (Simple Moving Median) + elif method == SMOOTHING_METHODS[4]: # "smm" (Simple Moving Median) return series.rolling(window=odd_window, center=True, min_periods=1).median() - elif method == SMOOTHING_METHODS[4]: # "sma" (Simple Moving Average) + elif method == SMOOTHING_METHODS[5]: # "sma" (Simple Moving Average) return series.rolling(window=odd_window, center=True, min_periods=1).mean() - elif method == SMOOTHING_METHODS[5]: # "savgol" (Savitzky-Golay) + elif method == SMOOTHING_METHODS[6]: # "savgol" (Savitzky-Golay) w, p, m = get_savgol_params(odd_window, polyorder, mode) if n < w: return series @@ -446,7 +810,7 @@ def smooth_extrema( ), index=series.index, ) - elif method == SMOOTHING_METHODS[6]: # "gaussian_filter1d" + elif method == SMOOTHING_METHODS[7]: # "gaussian_filter1d" return pd.Series( gaussian_filter1d( series.to_numpy(), @@ -459,7 +823,7 @@ def smooth_extrema( return zero_phase_filter( series=series, window=odd_window, - win_type=SMOOTHING_METHODS[0], + win_type=SMOOTHING_KERNELS[0], # "gaussian" std=std, beta=beta, ) @@ -468,7 +832,7 @@ def smooth_extrema( def _impute_weights( weights: NDArray[np.floating], *, - default_weight: float = DEFAULT_EXTREMA_WEIGHT, + default_weight: float = DEFAULT_LABEL_WEIGHT, ) -> NDArray[np.floating]: weights = weights.astype(float, copy=True) @@ -498,10 +862,10 @@ def _build_weights_array( n_extrema: int, indices: list[int], weights: NDArray[np.floating], - default_weight: float = DEFAULT_EXTREMA_WEIGHT, + default_weight: float = DEFAULT_LABEL_WEIGHT, ) -> NDArray[np.floating]: if len(indices) == 0 or weights.size == 0: - return np.full(n_extrema, DEFAULT_EXTREMA_WEIGHT, dtype=float) + return np.full(n_extrema, DEFAULT_LABEL_WEIGHT, dtype=float) if len(indices) != weights.size: raise ValueError( @@ -543,13 +907,21 @@ def _aggregate_metrics( softmax_temperature: float, ) -> NDArray[np.floating]: if aggregation == COMBINED_AGGREGATIONS[0]: # "arithmetic_mean" - return sp.stats.pmean(stacked_metrics.T, p=1.0, weights=coefficients, axis=1) + return np.asarray( + sp.stats.pmean(stacked_metrics.T, p=1.0, weights=coefficients, axis=1) + ) elif aggregation == COMBINED_AGGREGATIONS[1]: # "geometric_mean" - return sp.stats.pmean(stacked_metrics.T, p=0.0, weights=coefficients, axis=1) + return np.asarray( + sp.stats.pmean(stacked_metrics.T, p=0.0, weights=coefficients, axis=1) + ) elif aggregation == COMBINED_AGGREGATIONS[2]: # "harmonic_mean" - return sp.stats.pmean(stacked_metrics.T, p=-1.0, weights=coefficients, axis=1) + return np.asarray( + sp.stats.pmean(stacked_metrics.T, p=-1.0, weights=coefficients, axis=1) + ) elif aggregation == COMBINED_AGGREGATIONS[3]: # "quadratic_mean" - return sp.stats.pmean(stacked_metrics.T, p=2.0, weights=coefficients, axis=1) + return np.asarray( + sp.stats.pmean(stacked_metrics.T, p=2.0, weights=coefficients, axis=1) + ) elif aggregation == COMBINED_AGGREGATIONS[4]: # "weighted_median" return np.array( [ @@ -576,49 +948,30 @@ def _aggregate_metrics( ) -def _compute_combined_weights( - indices: list[int], - amplitudes: list[float], - amplitude_threshold_ratios: list[float], - volume_rates: list[float], - speeds: list[float], - efficiency_ratios: list[float], - volume_weighted_efficiency_ratios: list[float], +def _compute_combined_label_weights( + metrics: dict[str, list[float]], metric_coefficients: dict[str, Any], aggregation: CombinedAggregation, softmax_temperature: float, ) -> NDArray[np.floating]: - if len(indices) == 0: + if len(metrics) == 0: return np.asarray([], dtype=float) coefficients = _parse_metric_coefficients(metric_coefficients) if len(coefficients) == 0: - coefficients = dict.fromkeys(COMBINED_METRICS, DEFAULT_EXTREMA_WEIGHT) - - metrics: dict[CombinedMetric, NDArray[np.floating]] = { - "amplitude": np.asarray(amplitudes, dtype=float), - "amplitude_threshold_ratio": np.asarray( - amplitude_threshold_ratios, dtype=float - ), - "volume_rate": np.asarray(volume_rates, dtype=float), - "speed": np.asarray(speeds, dtype=float), - "efficiency_ratio": np.asarray(efficiency_ratios, dtype=float), - "volume_weighted_efficiency_ratio": np.asarray( - volume_weighted_efficiency_ratios, dtype=float - ), - } + coefficients = {k: DEFAULT_LABEL_WEIGHT for k in metrics.keys()} imputed_metrics: list[NDArray[np.floating]] = [] coefficients_list: list[float] = [] - for metric_name in COMBINED_METRICS: + for metric_name, metric_values in metrics.items(): if metric_name not in coefficients: continue coefficient = coefficients[metric_name] - metric_values = metrics[metric_name] - if metric_values.size == 0: + values_array = np.asarray(metric_values, dtype=float) + if values_array.size == 0: continue - imputed_metrics.append(_impute_weights(weights=metric_values)) + imputed_metrics.append(_impute_weights(weights=values_array)) coefficients_list.append(float(coefficient)) if len(imputed_metrics) == 0: @@ -632,55 +985,33 @@ def _compute_combined_weights( ) -def compute_extrema_weights( - n_extrema: int, +def compute_label_weights( + n_values: int, indices: list[int], - amplitudes: list[float], - amplitude_threshold_ratios: list[float], - volume_rates: list[float], - speeds: list[float], - efficiency_ratios: list[float], - volume_weighted_efficiency_ratios: list[float], - extrema_weighting: dict[str, Any], + metrics: dict[str, list[float]], + weighting_config: dict[str, Any], ) -> NDArray[np.floating]: - extrema_weighting = {**DEFAULTS_EXTREMA_WEIGHTING, **extrema_weighting} - strategy = extrema_weighting["strategy"] + label_weighting = {**DEFAULTS_LABEL_WEIGHTING, **weighting_config} + strategy = label_weighting["strategy"] if len(indices) == 0 or strategy == WEIGHT_STRATEGIES[0]: # "none" - return np.full(n_extrema, DEFAULT_EXTREMA_WEIGHT, dtype=float) + return np.full(n_values, DEFAULT_LABEL_WEIGHT, dtype=float) weights: Optional[NDArray[np.floating]] = None - if strategy == WEIGHT_STRATEGIES[1]: # "amplitude" - weights = np.asarray(amplitudes, dtype=float) - elif strategy == WEIGHT_STRATEGIES[2]: # "amplitude_threshold_ratio" - weights = np.asarray(amplitude_threshold_ratios, dtype=float) - elif strategy == WEIGHT_STRATEGIES[3]: # "volume_rate" - weights = np.asarray(volume_rates, dtype=float) - elif strategy == WEIGHT_STRATEGIES[4]: # "speed" - weights = np.asarray(speeds, dtype=float) - elif strategy == WEIGHT_STRATEGIES[5]: # "efficiency_ratio" - weights = np.asarray(efficiency_ratios, dtype=float) - elif strategy == WEIGHT_STRATEGIES[6]: # "volume_weighted_efficiency_ratio" - weights = np.asarray(volume_weighted_efficiency_ratios, dtype=float) + if strategy in metrics: + weights = np.asarray(metrics[strategy], dtype=float) elif strategy == WEIGHT_STRATEGIES[7]: # "combined" - weights = _compute_combined_weights( - indices=indices, - amplitudes=amplitudes, - amplitude_threshold_ratios=amplitude_threshold_ratios, - volume_rates=volume_rates, - speeds=speeds, - efficiency_ratios=efficiency_ratios, - volume_weighted_efficiency_ratios=volume_weighted_efficiency_ratios, - metric_coefficients=extrema_weighting["metric_coefficients"], - aggregation=extrema_weighting["aggregation"], - softmax_temperature=extrema_weighting["softmax_temperature"], + weights = _compute_combined_label_weights( + metrics=metrics, + metric_coefficients=label_weighting["metric_coefficients"], + aggregation=label_weighting["aggregation"], + softmax_temperature=label_weighting["softmax_temperature"], ) - else: raise ValueError( - f"Invalid extrema weighting strategy value {strategy!r}: " - f"supported values are {', '.join(WEIGHT_STRATEGIES)}" + f"Invalid weighting strategy value {strategy!r}: " + f"supported values are {', '.join(WEIGHT_STRATEGIES)} or metric names {', '.join(metrics.keys())}" ) weights = _impute_weights( @@ -688,61 +1019,51 @@ def compute_extrema_weights( ) return _build_weights_array( - n_extrema=n_extrema, + n_extrema=n_values, indices=indices, weights=weights, default_weight=float(np.nanmedian(weights)), ) -def _apply_weights( - extrema: NDArray[np.floating], weights: NDArray[np.floating] +def _apply_label_weights( + values: NDArray[np.floating], weights: NDArray[np.floating] ) -> NDArray[np.floating]: if weights.size == 0: - return extrema + return values if not np.isfinite(weights).all(): - return extrema + return values if np.allclose(weights, weights[0]): - return extrema + return values - if np.allclose(weights, DEFAULT_EXTREMA_WEIGHT): - return extrema + if np.allclose(weights, DEFAULT_LABEL_WEIGHT): + return values - return extrema * weights + return values * weights -def get_weighted_extrema( - extrema: pd.Series, +def apply_label_weighting( + label: pd.Series, indices: list[int], - amplitudes: list[float], - amplitude_threshold_ratios: list[float], - volume_rates: list[float], - speeds: list[float], - efficiency_ratios: list[float], - volume_weighted_efficiency_ratios: list[float], - extrema_weighting: dict[str, Any], + metrics: dict[str, list[float]], + weighting_config: dict[str, Any], ) -> tuple[pd.Series, pd.Series]: - extrema_values = extrema.to_numpy(dtype=float) - extrema_index = extrema.index - n_extrema = len(extrema_values) + label_values = label.to_numpy(dtype=float) + label_index = label.index + n_values = len(label_values) - weights = compute_extrema_weights( - n_extrema=n_extrema, + weights = compute_label_weights( + n_values=n_values, indices=indices, - amplitudes=amplitudes, - amplitude_threshold_ratios=amplitude_threshold_ratios, - volume_rates=volume_rates, - speeds=speeds, - efficiency_ratios=efficiency_ratios, - volume_weighted_efficiency_ratios=volume_weighted_efficiency_ratios, - extrema_weighting=extrema_weighting, + metrics=metrics, + weighting_config=weighting_config, ) return pd.Series( - _apply_weights(extrema_values, weights), index=extrema_index - ), pd.Series(weights, index=extrema_index) + _apply_label_weights(label_values, weights), index=label_index + ), pd.Series(weights, index=label_index) def get_callable_sha256(fn: Callable[..., Any]) -> str: @@ -1684,7 +2005,12 @@ def fit_regressor( """Fit a regressor model.""" fit_callbacks = list(callbacks) if callbacks else [] - has_eval_set = eval_set is not None and len(eval_set) > 0 + has_eval_set = ( + eval_set is not None + and len(eval_set) > 0 + and eval_weights is not None + and len(eval_weights) > 0 + ) if not has_eval_set: eval_set = None eval_weights = None @@ -1947,13 +2273,15 @@ def fit_regressor( model.fit( Pool(data=X, label=y, weight=train_weights), - eval_set=Pool( - data=eval_set[0][0], - label=eval_set[0][1], - weight=eval_weights[0] if eval_weights else None, - ) - if has_eval_set - else None, + eval_set=( + Pool( + data=eval_set[0][0], + label=eval_set[0][1], + weight=eval_weights[0], + ) + if has_eval_set and eval_set is not None and eval_weights is not None + else None + ), early_stopping_rounds=early_stopping_rounds if early_stopping_rounds is not None and has_eval_set else None, @@ -2664,7 +2992,7 @@ def soft_extremum(series: pd.Series, alpha: float) -> float: if not finite_mask.any(): return np.nan if np.isclose(alpha, 0.0): - return np.nanmean(values) + return float(np.nanmean(values)) scaled_values = alpha * values max_scaled_values = np.nanmax(scaled_values) if not np.isfinite(max_scaled_values): @@ -2784,33 +3112,6 @@ def floor_to_step(value: float | int, step: int) -> int: return int(math.floor(float(value) / step) * step) -def update_config_value( - config: Any, - *, - new_key: str, - old_key: str, - default: Any, - logger: Logger, - new_path: str, - old_path: str, -) -> Any: - if not isinstance(config, dict): - return default - - if new_key in config: - return config[new_key] - - if old_key in config: - logger.warning( - f"Deprecated config key {old_path} detected; use {new_path} instead" - ) - config[new_key] = config.pop(old_key) - return config[new_key] - - config[new_key] = default - return default - - def validate_range( min_val: float | int, max_val: float | int, @@ -2894,23 +3195,11 @@ def get_label_defaults( default_min_label_natr_multiplier: float = 9.0, default_max_label_natr_multiplier: float = 12.0, ) -> tuple[int, float]: - min_label_natr_multiplier = update_config_value( - feature_parameters, - new_key="min_label_natr_multiplier", - old_key="min_label_natr_ratio", - default=default_min_label_natr_multiplier, - logger=logger, - new_path="freqai.feature_parameters.min_label_natr_multiplier", - old_path="freqai.feature_parameters.min_label_natr_ratio", + min_label_natr_multiplier = feature_parameters.get( + "min_label_natr_multiplier", default_min_label_natr_multiplier ) - max_label_natr_multiplier = update_config_value( - feature_parameters, - new_key="max_label_natr_multiplier", - old_key="max_label_natr_ratio", - default=default_max_label_natr_multiplier, - logger=logger, - new_path="freqai.feature_parameters.max_label_natr_multiplier", - old_path="freqai.feature_parameters.max_label_natr_ratio", + max_label_natr_multiplier = feature_parameters.get( + "max_label_natr_multiplier", default_max_label_natr_multiplier ) min_label_natr_multiplier, max_label_natr_multiplier = validate_range( min_label_natr_multiplier, @@ -2926,14 +3215,8 @@ def get_label_defaults( default_label_natr_multiplier = float( midpoint(min_label_natr_multiplier, max_label_natr_multiplier) ) - update_config_value( - feature_parameters, - new_key="label_natr_multiplier", - old_key="label_natr_ratio", - default=default_label_natr_multiplier, - logger=logger, - new_path="freqai.feature_parameters.label_natr_multiplier", - old_path="freqai.feature_parameters.label_natr_ratio", + feature_parameters.setdefault( + "label_natr_multiplier", default_label_natr_multiplier ) min_label_period_candles = feature_parameters.get( -- 2.53.0