From fef729aa2d9f75aa968444b06b9eaeb76f664b83 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Fri, 2 Jan 2026 00:01:19 +0100 Subject: [PATCH] refactor!: reorganize label selection with distance, cluster and density methods (#29) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit * refactor: reorganize label selection with distance, cluster and density methods * refactor: import lru_cache directly instead of functools module * chore: remove unused imports in Utils.py Signed-off-by: Jérôme Benoit * refactor: cleanup n_neighbors adjustment in QuickAdapterRegressorV3 Signed-off-by: Jérôme Benoit * fix: use unbounded cache for constant-returning helper methods Replace @lru_cache(maxsize=1) with @lru_cache(maxsize=None) for all static methods that return constant sets. Using maxsize=None is more idiomatic and efficient for parameterless functions that always return the same value. * refactor: add _prepare_distance_kwargs to centralize distance kwargs preparation Signed-off-by: Jérôme Benoit * refactor: cleanup extrema weighting API Signed-off-by: Jérôme Benoit * refactor: cleanup extrema smoothing API Signed-off-by: Jérôme Benoit * refactor: align namespace Signed-off-by: Jérôme Benoit * refactor: add more tunables validations Signed-off-by: Jérôme Benoit * refactor: simplify cluster-based label selection - Remove ClusterSelectionMethod type and related constants - Unify selection methods to use DistanceMethod for both cluster and trial selection - Add separate trial_selection_method parameter for within-cluster selection - Change power_mean default from 2.0 to 1.0 for internal consistency - Add validation for selection_method and trial_selection_method parameters * fix: add missing validations for label_distance_metric and label_density_aggregation_param - Add validation for label_distance_metric parameter at configuration time - Add early validation for label_density_aggregation_param (quantile and power_mean) - Ensures invalid configuration values fail fast with clear error messages - Harmonizes error messages with existing validation patterns in the codebase * fix: add validation for label_cluster_metric and custom metrics support in topsis - Add validation that label_cluster_metric is in _distance_metrics_set() - Implement custom metrics support in _topsis_scores (hellinger, shellinger, harmonic/geometric/arithmetic/quadratic/cubic/power_mean, weighted_sum) matching _compromise_programming_scores implementation * docs: update README.md with refactored label selection methods Signed-off-by: Jérôme Benoit * docs: fix config parameter and bump to v3.9.0 - Fix config-template.json: label_metric -> label_method - Bump version from 3.8.5 to 3.9.0 in model and strategy Parameter names now match QuickAdapterRegressorV3.py implementation. * docs: refine README label selection methods descriptions Signed-off-by: Jérôme Benoit * refactor: refine error message Signed-off-by: Jérôme Benoit --------- Signed-off-by: Jérôme Benoit --- README.md | 171 +- quickadapter/user_data/config-template.json | 2 +- .../freqaimodels/QuickAdapterRegressorV3.py | 1909 ++++++++++------- .../user_data/strategies/QuickAdapterV3.py | 241 +-- quickadapter/user_data/strategies/Utils.py | 82 +- 5 files changed, 1351 insertions(+), 1054 deletions(-) diff --git a/README.md b/README.md index 788f911..fe96964 100644 --- a/README.md +++ b/README.md @@ -37,92 +37,91 @@ docker compose up -d --build ### Configuration tunables -| Path | Default | Type / Range | Description | -| ----------------------------------------------------- | ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| _Protections_ | | | | -| custom_protections.trade_duration_candles | 72 | int >= 1 | Estimated trade duration in candles. Scales protections stop duration candles and trade limit. | -| custom_protections.lookback_period_fraction | 0.5 | float (0,1] | Fraction of `fit_live_predictions_candles` used to calculate `lookback_period_candles` for _MaxDrawdown_ and _StoplossGuard_ protections. | -| custom_protections.cooldown.enabled | true | bool | Enable/disable _CooldownPeriod_ protection. | -| custom_protections.cooldown.stop_duration_candles | 4 | int >= 1 | Number of candles to wait before allowing new trades after a trade is closed. | -| custom_protections.drawdown.enabled | true | bool | Enable/disable _MaxDrawdown_ protection. | -| custom_protections.drawdown.max_allowed_drawdown | 0.2 | float (0,1) | Maximum allowed drawdown. | -| custom_protections.stoploss.enabled | true | bool | Enable/disable _StoplossGuard_ protection. | -| _Leverage_ | | | | -| leverage | `proposed_leverage` | float [1.0, max_leverage] | Leverage. Fallback to `proposed_leverage` for the pair. | -| _Exit pricing_ | | | | -| exit_pricing.trade_price_target_method | `moving_average` | enum {`moving_average`,`quantile_interpolation`,`weighted_average`} | Trade NATR computation method. (Deprecated alias: `exit_pricing.trade_price_target`) | -| exit_pricing.thresholds_calibration.decline_quantile | 0.75 | float (0,1) | PnL decline quantile threshold. | -| _Reversal confirmation_ | | | | -| reversal_confirmation.lookback_period_candles | 0 | int >= 0 | Prior confirming candles; 0 = none. (Deprecated alias: `reversal_confirmation.lookback_period`) | -| reversal_confirmation.decay_fraction | 0.5 | float (0,1] | Geometric per-candle volatility adjusted reversal threshold relaxation factor. (Deprecated alias: `reversal_confirmation.decay_ratio`) | -| reversal_confirmation.min_natr_multiplier_fraction | 0.0095 | float [0,1] | Lower bound fraction for volatility adjusted reversal threshold. (Deprecated alias: `reversal_confirmation.min_natr_ratio_percent`) | -| reversal_confirmation.max_natr_multiplier_fraction | 0.075 | float [0,1] | Upper bound fraction (>= lower bound) for volatility adjusted reversal threshold. (Deprecated alias: `reversal_confirmation.max_natr_ratio_percent`) | -| _Regressor model_ | | | | -| freqai.regressor | `xgboost` | enum {`xgboost`,`lightgbm`,`histgradientboostingregressor`} | Machine learning regressor algorithm. | -| _Extrema smoothing_ | | | | -| freqai.extrema_smoothing.method | `gaussian` | enum {`gaussian`,`kaiser`,`triang`,`smm`,`sma`,`savgol`,`gaussian_filter1d`} | Extrema smoothing method (`smm`=median, `sma`=mean, `savgol`=Savitzky–Golay). | -| freqai.extrema_smoothing.window_candles | 5 | int >= 3 | Smoothing window length (candles). (Deprecated alias: `freqai.extrema_smoothing.window`) | -| freqai.extrema_smoothing.beta | 8.0 | float > 0 | Shape parameter for `kaiser` kernel. | -| freqai.extrema_smoothing.polyorder | 3 | int >= 1 | Polynomial order for `savgol` smoothing. | -| freqai.extrema_smoothing.mode | `mirror` | enum {`mirror`,`constant`,`nearest`,`wrap`,`interp`} | Boundary mode for `savgol` and `gaussian_filter1d`. | -| freqai.extrema_smoothing.sigma | 1.0 | float > 0 | Gaussian `sigma` for `gaussian_filter1d` smoothing. | -| _Extrema weighting_ | | | | -| freqai.extrema_weighting.strategy | `none` | enum {`none`,`amplitude`,`amplitude_threshold_ratio`,`volume_rate`,`speed`,`efficiency_ratio`,`volume_weighted_efficiency_ratio`,`hybrid`} | Extrema weighting source: unweighted (`none`), swing amplitude (`amplitude`), swing amplitude / median volatility-threshold ratio (`amplitude_threshold_ratio`), swing volume per candle (`volume_rate`), swing speed (`speed`), swing efficiency ratio (`efficiency_ratio`), swing volume-weighted efficiency ratio (`volume_weighted_efficiency_ratio`), or `hybrid`. | -| freqai.extrema_weighting.source_weights | `{}` | dict[str, float] | Weights on extrema weighting sources for `hybrid`. | -| freqai.extrema_weighting.aggregation | `weighted_sum` | enum {`weighted_sum`,`geometric_mean`} | Aggregation method applied to weighted extrema weighting sources for `hybrid`. | -| freqai.extrema_weighting.aggregation_normalization | `none` | enum {`minmax`,`sigmoid`,`softmax`,`l1`,`l2`,`rank`,`none`} | Normalization method applied to the aggregated extrema weighting source for `hybrid`. | -| freqai.extrema_weighting.standardization | `none` | enum {`none`,`zscore`,`robust`,`mmad`} | Standardization method applied to weights before normalization. `none`=no standardization, `zscore`=(w-μ)/σ, `robust`=(w-median)/IQR, `mmad`=(w-median)/MAD. | -| freqai.extrema_weighting.robust_quantiles | [0.25, 0.75] | list[float] where 0 <= Q1 < Q3 <= 1 | Quantile range for robust standardization, Q1 and Q3. | -| freqai.extrema_weighting.mmad_scaling_factor | 1.4826 | float > 0 | Scaling factor for MMAD standardization. | -| freqai.extrema_weighting.normalization | `minmax` | enum {`minmax`,`sigmoid`,`softmax`,`l1`,`l2`,`rank`,`none`} | Normalization method applied to weights. | -| freqai.extrema_weighting.minmax_range | [0.0, 1.0] | list[float] | Target range for `minmax` normalization, min and max. | -| freqai.extrema_weighting.sigmoid_scale | 1.0 | float > 0 | Scale parameter for `sigmoid` normalization, controls steepness. | -| freqai.extrema_weighting.softmax_temperature | 1.0 | float > 0 | Temperature parameter for `softmax` normalization: lower values sharpen distribution, higher values flatten it. | -| freqai.extrema_weighting.rank_method | `average` | enum {`average`,`min`,`max`,`dense`,`ordinal`} | Ranking method for `rank` normalization. | -| freqai.extrema_weighting.gamma | 1.0 | float (0,10] | Contrast exponent applied after normalization: >1 emphasizes extrema, values between 0 and 1 soften. | -| _Feature parameters_ | | | | -| freqai.feature_parameters.label_period_candles | min/max midpoint | int >= 1 | Zigzag labeling NATR horizon. | -| freqai.feature_parameters.min_label_period_candles | 12 | int >= 1 | Minimum labeling NATR horizon used for reversals labeling HPO. | -| freqai.feature_parameters.max_label_period_candles | 24 | int >= 1 | Maximum labeling NATR horizon used for reversals labeling HPO. | -| freqai.feature_parameters.label_natr_multiplier | min/max midpoint | float > 0 | Zigzag labeling NATR multiplier. (Deprecated alias: `freqai.feature_parameters.label_natr_ratio`) | -| freqai.feature_parameters.min_label_natr_multiplier | 9.0 | float > 0 | Minimum labeling NATR multiplier used for reversals labeling HPO. (Deprecated alias: `freqai.feature_parameters.min_label_natr_ratio`) | -| freqai.feature_parameters.max_label_natr_multiplier | 12.0 | float > 0 | Maximum labeling NATR multiplier used for reversals labeling HPO. (Deprecated alias: `freqai.feature_parameters.max_label_natr_ratio`) | -| freqai.feature_parameters.label_frequency_candles | `auto` | int >= 2 \| `auto` | Reversals labeling frequency. `auto` = max(2, 2 \* number of whitelisted pairs). | -| freqai.feature_parameters.label_metric | `euclidean` | string | Metric for Pareto front trial selection (SciPy distance metrics or selection metrics like `topsis`, `medoid`, `kmeans`, `kmedoids`, ...). | -| freqai.feature_parameters.label_weights | [1/7,1/7,1/7,1/7,1/7,1/7,1/7] | list[float] | Per-objective weights used in distance calculations to ideal point. Objectives: (1) number of detected reversals, (2) median swing amplitude, (3) median (swing amplitude / median volatility-threshold ratio), (4) median swing volume per candle, (5) median swing speed, (6) median swing efficiency ratio, (7) median swing volume-weighted efficiency ratio. | -| freqai.feature_parameters.label_p_order | `None` | float \| None | p-order for Minkowski distance. Used by `minkowski`, `power_mean`, `medoid`, `kmeans`, `kmedoids`, `knn`, `topsis` when their sub-metric is `minkowski`. | -| freqai.feature_parameters.label_medoid_metric | `euclidean` | string | Distance metric used with `medoid`. | -| freqai.feature_parameters.label_kmeans_metric | `euclidean` | string | Distance metric used for k-means clustering. | -| freqai.feature_parameters.label_kmeans_selection | `min` | enum {`min`,`medoid`,`topsis`} | Strategy to select trial in the best k-means cluster. | -| freqai.feature_parameters.label_kmedoids_metric | `euclidean` | string | Distance metric used for k-medoids clustering. | -| freqai.feature_parameters.label_kmedoids_selection | `min` | enum {`min`,`medoid`,`topsis`} | Strategy to select trial in the best k-medoids cluster. | -| freqai.feature_parameters.label_topsis_metric | `euclidean` | string | Distance metric for TOPSIS ideal/anti-ideal point calculations. | -| freqai.feature_parameters.label_knn_metric | `minkowski` | string | Distance metric for KNN. | -| freqai.feature_parameters.label_knn_p_order | `None` | float \| None | Tunable for KNN neighbor distances aggregation methods: p-order (`knn_power_mean`, default: 1.0) or quantile (`knn_quantile`, default: 0.5). | -| freqai.feature_parameters.label_knn_n_neighbors | 5 | int >= 1 | Number of neighbors for KNN. | -| _Predictions extrema_ | | | | -| freqai.predictions_extrema.selection_method | `rank_extrema` | enum {`rank_extrema`,`rank_peaks`,`partition`} | Extrema selection method. `rank_extrema` ranks extrema values, `rank_peaks` ranks detected peak values, `partition` uses sign-based partitioning. | -| freqai.predictions_extrema.threshold_smoothing_method | `mean` | enum {`mean`,`isodata`,`li`,`minimum`,`otsu`,`triangle`,`yen`,`median`,`soft_extremum`} | Thresholding method for prediction thresholds smoothing. (Deprecated alias: `freqai.predictions_extrema.thresholds_smoothing`) | -| freqai.predictions_extrema.soft_extremum_alpha | 12.0 | float >= 0 | Alpha for `soft_extremum` thresholds smoothing. (Deprecated alias: `freqai.predictions_extrema.thresholds_alpha`) | -| freqai.predictions_extrema.outlier_threshold_quantile | 0.999 | float (0,1) | Quantile threshold for predictions outlier filtering. (Deprecated alias: `freqai.predictions_extrema.threshold_outlier`) | -| freqai.predictions_extrema.keep_extrema_fraction | 1.0 | float (0,1] | Fraction of extrema used for thresholds. `1.0` uses all, lower values keep only most significant. Applies to `rank_extrema` and `rank_peaks`; ignored for `partition`. (Deprecated alias: `freqai.predictions_extrema.extrema_fraction`) | -| _Optuna / HPO_ | | | | -| freqai.optuna_hyperopt.enabled | false | bool | Enables HPO. | -| freqai.optuna_hyperopt.sampler | `tpe` | enum {`tpe`,`auto`} | HPO sampler algorithm for `hp` and `train` namespaces. `tpe` uses [TPESampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.TPESampler.html) with multivariate and group, `auto` uses [AutoSampler](https://hub.optuna.org/samplers/auto_sampler). | -| freqai.optuna_hyperopt.label_sampler | `auto` | enum {`auto`,`tpe`,`nsgaii`,`nsgaiii`} | HPO sampler algorithm for multi-objective `label` namespace. `nsgaii` uses [NSGAIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIISampler.html), `nsgaiii` uses [NSGAIIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIIISampler.html). | -| freqai.optuna_hyperopt.storage | `file` | enum {`file`,`sqlite`} | HPO storage backend. | -| freqai.optuna_hyperopt.continuous | true | bool | Continuous HPO. | -| freqai.optuna_hyperopt.warm_start | true | bool | Warm start HPO with previous best value(s). | -| freqai.optuna_hyperopt.n_startup_trials | 15 | int >= 0 | HPO startup trials. | -| freqai.optuna_hyperopt.n_trials | 50 | int >= 1 | Maximum HPO trials. | -| freqai.optuna_hyperopt.n_jobs | CPU threads / 4 | int >= 1 | Parallel HPO workers. | -| freqai.optuna_hyperopt.timeout | 7200 | int >= 0 | HPO wall-clock timeout in seconds. | -| freqai.optuna_hyperopt.label_candles_step | 1 | int >= 1 | Step for Zigzag NATR horizon `label` search space. | -| freqai.optuna_hyperopt.train_candles_step | 10 | int >= 1 | Step for training sets size `train` search space. | -| freqai.optuna_hyperopt.space_reduction | false | bool | Enable/disable `hp` search space reduction based on previous best parameters. | -| freqai.optuna_hyperopt.space_fraction | 0.4 | float [0,1] | Fraction of the `hp` search space to use with `space_reduction`. Lower values create narrower search ranges around the best parameters. (Deprecated alias: `freqai.optuna_hyperopt.expansion_ratio`) | -| freqai.optuna_hyperopt.min_resource | 3 | int >= 1 | Minimum resource per [HyperbandPruner](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.HyperbandPruner.html) rung. | -| freqai.optuna_hyperopt.seed | 1 | int >= 0 | HPO RNG seed. | +| Path | Default | Type / Range | Description | +| -------------------------------------------------------------- | ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| _Protections_ | | | | +| custom_protections.trade_duration_candles | 72 | int >= 1 | Estimated trade duration in candles. Scales protections stop duration candles and trade limit. | +| custom_protections.lookback_period_fraction | 0.5 | float (0,1] | Fraction of `fit_live_predictions_candles` used to calculate `lookback_period_candles` for _MaxDrawdown_ and _StoplossGuard_ protections. | +| custom_protections.cooldown.enabled | true | bool | Enable/disable _CooldownPeriod_ protection. | +| custom_protections.cooldown.stop_duration_candles | 4 | int >= 1 | Number of candles to wait before allowing new trades after a trade is closed. | +| custom_protections.drawdown.enabled | true | bool | Enable/disable _MaxDrawdown_ protection. | +| custom_protections.drawdown.max_allowed_drawdown | 0.2 | float (0,1) | Maximum allowed drawdown. | +| custom_protections.stoploss.enabled | true | bool | Enable/disable _StoplossGuard_ protection. | +| _Leverage_ | | | | +| leverage | `proposed_leverage` | float [1.0, max_leverage] | Leverage. Fallback to `proposed_leverage` for the pair. | +| _Exit pricing_ | | | | +| exit_pricing.trade_price_target_method | `moving_average` | enum {`moving_average`,`quantile_interpolation`,`weighted_average`} | Trade NATR computation method. (Deprecated alias: `exit_pricing.trade_price_target`) | +| exit_pricing.thresholds_calibration.decline_quantile | 0.75 | float (0,1) | PnL decline quantile threshold. | +| _Reversal confirmation_ | | | | +| reversal_confirmation.lookback_period_candles | 0 | int >= 0 | Prior confirming candles; 0 = none. (Deprecated alias: `reversal_confirmation.lookback_period`) | +| reversal_confirmation.decay_fraction | 0.5 | float (0,1] | Geometric per-candle volatility adjusted reversal threshold relaxation factor. (Deprecated alias: `reversal_confirmation.decay_ratio`) | +| reversal_confirmation.min_natr_multiplier_fraction | 0.0095 | float [0,1] | Lower bound fraction for volatility adjusted reversal threshold. (Deprecated alias: `reversal_confirmation.min_natr_ratio_percent`) | +| reversal_confirmation.max_natr_multiplier_fraction | 0.075 | float [0,1] | Upper bound fraction (>= lower bound) for volatility adjusted reversal threshold. (Deprecated alias: `reversal_confirmation.max_natr_ratio_percent`) | +| _Regressor model_ | | | | +| freqai.regressor | `xgboost` | enum {`xgboost`,`lightgbm`,`histgradientboostingregressor`} | Machine learning regressor algorithm. | +| _Extrema smoothing_ | | | | +| freqai.extrema_smoothing.method | `gaussian` | enum {`gaussian`,`kaiser`,`triang`,`smm`,`sma`,`savgol`,`gaussian_filter1d`} | Extrema smoothing method (`smm`=median, `sma`=mean, `savgol`=Savitzky–Golay). | +| freqai.extrema_smoothing.window_candles | 5 | int >= 3 | Smoothing window length (candles). (Deprecated alias: `freqai.extrema_smoothing.window`) | +| freqai.extrema_smoothing.beta | 8.0 | float > 0 | Shape parameter for `kaiser` kernel. | +| freqai.extrema_smoothing.polyorder | 3 | int >= 1 | Polynomial order for `savgol` smoothing. | +| freqai.extrema_smoothing.mode | `mirror` | enum {`mirror`,`constant`,`nearest`,`wrap`,`interp`} | Boundary mode for `savgol` and `gaussian_filter1d`. | +| freqai.extrema_smoothing.sigma | 1.0 | float > 0 | Gaussian `sigma` for `gaussian_filter1d` smoothing. | +| _Extrema weighting_ | | | | +| freqai.extrema_weighting.strategy | `none` | enum {`none`,`amplitude`,`amplitude_threshold_ratio`,`volume_rate`,`speed`,`efficiency_ratio`,`volume_weighted_efficiency_ratio`,`hybrid`} | Extrema weighting source: unweighted (`none`), swing amplitude (`amplitude`), swing amplitude / median volatility-threshold ratio (`amplitude_threshold_ratio`), swing volume per candle (`volume_rate`), swing speed (`speed`), swing efficiency ratio (`efficiency_ratio`), swing volume-weighted efficiency ratio (`volume_weighted_efficiency_ratio`), or `hybrid`. | +| freqai.extrema_weighting.source_weights | `{}` | dict[str, float] | Weights on extrema weighting sources for `hybrid`. | +| freqai.extrema_weighting.aggregation | `weighted_sum` | enum {`weighted_sum`,`geometric_mean`} | Aggregation method applied to weighted extrema weighting sources for `hybrid`. | +| freqai.extrema_weighting.aggregation_normalization | `none` | enum {`minmax`,`sigmoid`,`softmax`,`l1`,`l2`,`rank`,`none`} | Normalization method applied to the aggregated extrema weighting source for `hybrid`. | +| freqai.extrema_weighting.standardization | `none` | enum {`none`,`zscore`,`robust`,`mmad`} | Standardization method applied to weights before normalization. `none`=no standardization, `zscore`=(w-μ)/σ, `robust`=(w-median)/IQR, `mmad`=(w-median)/MAD. | +| freqai.extrema_weighting.robust_quantiles | [0.25, 0.75] | list[float] where 0 <= Q1 < Q3 <= 1 | Quantile range for robust standardization, Q1 and Q3. | +| freqai.extrema_weighting.mmad_scaling_factor | 1.4826 | float > 0 | Scaling factor for MMAD standardization. | +| freqai.extrema_weighting.normalization | `minmax` | enum {`minmax`,`sigmoid`,`softmax`,`l1`,`l2`,`rank`,`none`} | Normalization method applied to weights. | +| freqai.extrema_weighting.minmax_range | [0.0, 1.0] | list[float] | Target range for `minmax` normalization, min and max. | +| freqai.extrema_weighting.sigmoid_scale | 1.0 | float > 0 | Scale parameter for `sigmoid` normalization, controls steepness. | +| freqai.extrema_weighting.softmax_temperature | 1.0 | float > 0 | Temperature parameter for `softmax` normalization: lower values sharpen distribution, higher values flatten it. | +| freqai.extrema_weighting.rank_method | `average` | enum {`average`,`min`,`max`,`dense`,`ordinal`} | Ranking method for `rank` normalization. | +| freqai.extrema_weighting.gamma | 1.0 | float (0,10] | Contrast exponent applied after normalization: >1 emphasizes extrema, values between 0 and 1 soften. | +| _Feature parameters_ | | | | +| freqai.feature_parameters.label_period_candles | min/max midpoint | int >= 1 | Zigzag labeling NATR horizon. | +| freqai.feature_parameters.min_label_period_candles | 12 | int >= 1 | Minimum labeling NATR horizon used for reversals labeling HPO. | +| freqai.feature_parameters.max_label_period_candles | 24 | int >= 1 | Maximum labeling NATR horizon used for reversals labeling HPO. | +| freqai.feature_parameters.label_natr_multiplier | min/max midpoint | float > 0 | Zigzag labeling NATR multiplier. (Deprecated alias: `freqai.feature_parameters.label_natr_ratio`) | +| freqai.feature_parameters.min_label_natr_multiplier | 9.0 | float > 0 | Minimum labeling NATR multiplier used for reversals labeling HPO. (Deprecated alias: `freqai.feature_parameters.min_label_natr_ratio`) | +| freqai.feature_parameters.max_label_natr_multiplier | 12.0 | float > 0 | Maximum labeling NATR multiplier used for reversals labeling HPO. (Deprecated alias: `freqai.feature_parameters.max_label_natr_ratio`) | +| freqai.feature_parameters.label_frequency_candles | `auto` | int >= 2 \| `auto` | Reversals labeling frequency. `auto` = max(2, 2 \* number of whitelisted pairs). | +| freqai.feature_parameters.label_weights | [1/7,1/7,1/7,1/7,1/7,1/7,1/7] | list[float] | Per-objective weights used in distance calculations to ideal point. Objectives: (1) number of detected reversals, (2) median swing amplitude, (3) median (swing amplitude / median volatility-threshold ratio), (4) median swing volume per candle, (5) median swing speed, (6) median swing efficiency ratio, (7) median swing volume-weighted efficiency ratio. | +| freqai.feature_parameters.label_p_order | `None` | float \| None | p-order parameter for distance metrics. Used by minkowski (default 2.0) and power_mean (default 1.0). Ignored by other metrics. | +| freqai.feature_parameters.label_method | `compromise_programming` | enum {`compromise_programming`,`topsis`,`kmeans`,`kmeans2`,`kmedoids`,`knn`,`medoid`} | HPO `label` Pareto front trial selection method. | +| freqai.feature_parameters.label_distance_metric | `euclidean` | string | Distance metric for `compromise_programming` and `topsis` methods. | +| freqai.feature_parameters.label_cluster_metric | `euclidean` | string | Distance metric for `kmeans`, `kmeans2`, and `kmedoids` methods. | +| freqai.feature_parameters.label_cluster_selection_method | `topsis` | enum {`compromise_programming`,`topsis`} | Cluster selection method for clustering-based label methods. | +| freqai.feature_parameters.label_cluster_trial_selection_method | `topsis` | enum {`compromise_programming`,`topsis`} | Best cluster trial selection method for clustering-based label methods. | +| freqai.feature_parameters.label_density_metric | method-dependent | string | Distance metric for `knn` and `medoid` methods. | +| freqai.feature_parameters.label_density_aggregation | `power_mean` | enum {`power_mean`,`quantile`,`min`,`max`} | Aggregation method for KNN neighbor distances. | +| freqai.feature_parameters.label_density_n_neighbors | 5 | int >= 1 | Number of neighbors for KNN. | +| freqai.feature_parameters.label_density_aggregation_param | aggregation-dependent | float \| None | Tunable for KNN neighbor distance aggregation: p-order (`power_mean`) or quantile value (`quantile`). | +| _Predictions extrema_ | | | | +| freqai.predictions_extrema.selection_method | `rank_extrema` | enum {`rank_extrema`,`rank_peaks`,`partition`} | Extrema selection method. `rank_extrema` ranks extrema values, `rank_peaks` ranks detected peak values, `partition` uses sign-based partitioning. | +| freqai.predictions_extrema.threshold_smoothing_method | `mean` | enum {`mean`,`isodata`,`li`,`minimum`,`otsu`,`triangle`,`yen`,`median`,`soft_extremum`} | Thresholding method for prediction thresholds smoothing. (Deprecated alias: `freqai.predictions_extrema.thresholds_smoothing`) | +| freqai.predictions_extrema.soft_extremum_alpha | 12.0 | float >= 0 | Alpha for `soft_extremum` thresholds smoothing. (Deprecated alias: `freqai.predictions_extrema.thresholds_alpha`) | +| freqai.predictions_extrema.outlier_threshold_quantile | 0.999 | float (0,1) | Quantile threshold for predictions outlier filtering. (Deprecated alias: `freqai.predictions_extrema.threshold_outlier`) | +| freqai.predictions_extrema.keep_extrema_fraction | 1.0 | float (0,1] | Fraction of extrema used for thresholds. `1.0` uses all, lower values keep only most significant. Applies to `rank_extrema` and `rank_peaks`; ignored for `partition`. (Deprecated alias: `freqai.predictions_extrema.extrema_fraction`) | +| _Optuna / HPO_ | | | | +| freqai.optuna_hyperopt.enabled | false | bool | Enables HPO. | +| freqai.optuna_hyperopt.sampler | `tpe` | enum {`tpe`,`auto`} | HPO sampler algorithm for `hp` and `train` namespaces. `tpe` uses [TPESampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.TPESampler.html) with multivariate and group, `auto` uses [AutoSampler](https://hub.optuna.org/samplers/auto_sampler). | +| freqai.optuna_hyperopt.label_sampler | `auto` | enum {`auto`,`tpe`,`nsgaii`,`nsgaiii`} | HPO sampler algorithm for multi-objective `label` namespace. `nsgaii` uses [NSGAIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIISampler.html), `nsgaiii` uses [NSGAIIISampler](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIIISampler.html). | +| freqai.optuna_hyperopt.storage | `file` | enum {`file`,`sqlite`} | HPO storage backend. | +| freqai.optuna_hyperopt.continuous | true | bool | Continuous HPO. | +| freqai.optuna_hyperopt.warm_start | true | bool | Warm start HPO with previous best value(s). | +| freqai.optuna_hyperopt.n_startup_trials | 15 | int >= 0 | HPO startup trials. | +| freqai.optuna_hyperopt.n_trials | 50 | int >= 1 | Maximum HPO trials. | +| freqai.optuna_hyperopt.n_jobs | CPU threads / 4 | int >= 1 | Parallel HPO workers. | +| freqai.optuna_hyperopt.timeout | 7200 | int >= 0 | HPO wall-clock timeout in seconds. | +| freqai.optuna_hyperopt.label_candles_step | 1 | int >= 1 | Step for Zigzag NATR horizon `label` search space. | +| freqai.optuna_hyperopt.train_candles_step | 10 | int >= 1 | Step for training sets size `train` search space. | +| freqai.optuna_hyperopt.space_reduction | false | bool | Enable/disable `hp` search space reduction based on previous best parameters. | +| freqai.optuna_hyperopt.space_fraction | 0.4 | float [0,1] | Fraction of the `hp` search space to use with `space_reduction`. Lower values create narrower search ranges around the best parameters. (Deprecated alias: `freqai.optuna_hyperopt.expansion_ratio`) | +| freqai.optuna_hyperopt.min_resource | 3 | int >= 1 | Minimum resource per [HyperbandPruner](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.HyperbandPruner.html) rung. | +| freqai.optuna_hyperopt.seed | 1 | int >= 0 | HPO RNG seed. | ## ReforceXY diff --git a/quickadapter/user_data/config-template.json b/quickadapter/user_data/config-template.json index e5b93d7..f5acd8d 100644 --- a/quickadapter/user_data/config-template.json +++ b/quickadapter/user_data/config-template.json @@ -175,7 +175,7 @@ ], "label_period_candles": 18, "label_natr_multiplier": 10.5, - "label_metric": "kmedoids", + "label_method": "kmedoids", "include_shifted_candles": 6, "DI_threshold": 10, "weight_factor": 0.9, diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py index c2bfdb9..cffad8d 100644 --- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py +++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py @@ -4,8 +4,9 @@ import logging import random import time import warnings +from functools import lru_cache from pathlib import Path -from typing import Any, Callable, Final, Literal, Optional, Union +from typing import Any, Callable, Final, Literal, Optional, Union, cast import numpy as np import optuna @@ -43,13 +44,16 @@ from Utils import ( ExtremaSelectionMethod = Literal["rank_extrema", "rank_peaks", "partition"] OptunaNamespace = Literal["hp", "train", "label"] OptunaSampler = Literal["tpe", "auto", "nsgaii", "nsgaiii"] -ClusterSelectionMethod = Literal["medoid", "min", "topsis"] CustomThresholdMethod = Literal["median", "soft_extremum"] SkimageThresholdMethod = Literal[ "mean", "isodata", "li", "minimum", "otsu", "triangle", "yen" ] ThresholdMethod = Union[SkimageThresholdMethod, CustomThresholdMethod] - +DensityAggregation = Literal["power_mean", "quantile", "min", "max"] +DistanceMethod = Literal["compromise_programming", "topsis"] +ClusterMethod = Literal["kmeans", "kmeans2", "kmedoids"] +DensityMethod = Literal["knn", "medoid"] +SelectionMethod = Union[DistanceMethod, ClusterMethod, DensityMethod] warnings.simplefilter(action="ignore", category=FutureWarning) logger = logging.getLogger(__name__) @@ -72,7 +76,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): https://github.com/sponsors/robcaulk """ - version = "3.8.5" + version = "3.9.0" _TEST_SIZE: Final[float] = 0.1 @@ -101,59 +105,59 @@ class QuickAdapterRegressorV3(BaseRegressionModel): *_CUSTOM_THRESHOLD_METHODS, ) - _CLUSTER_SELECTION_METHODS: Final[tuple[ClusterSelectionMethod, ...]] = ( - "medoid", - "min", - "topsis", - ) - _OPTUNA_LABEL_N_OBJECTIVES: Final[int] = 7 _OPTUNA_LABEL_DIRECTIONS: Final[tuple[optuna.study.StudyDirection, ...]] = ( optuna.study.StudyDirection.MAXIMIZE, ) * _OPTUNA_LABEL_N_OBJECTIVES _OPTUNA_STORAGE_BACKENDS: Final[tuple[str, ...]] = ("file", "sqlite") - _OPTUNA_HPO_SAMPLERS: Final[tuple[OptunaSampler, ...]] = ("tpe", "auto") - _OPTUNA_LABEL_SAMPLERS: Final[tuple[OptunaSampler, ...]] = ( - "auto", - "tpe", - "nsgaii", - "nsgaiii", - ) _OPTUNA_SAMPLERS: Final[tuple[OptunaSampler, ...]] = ( "tpe", "auto", "nsgaii", "nsgaiii", ) + _OPTUNA_HPO_SAMPLERS: Final[tuple[OptunaSampler, ...]] = _OPTUNA_SAMPLERS[:2] + _OPTUNA_LABEL_SAMPLERS: Final[tuple[OptunaSampler, ...]] = ( + _OPTUNA_SAMPLERS[1], # "auto" + _OPTUNA_SAMPLERS[0], # "tpe" + _OPTUNA_SAMPLERS[2], # "nsgaii" + _OPTUNA_SAMPLERS[3], # "nsgaiii" + ) _OPTUNA_NAMESPACES: Final[tuple[OptunaNamespace, ...]] = ("hp", "train", "label") - _SCIPY_METRICS: Final[tuple[str, ...]] = ( - # "braycurtis", - # "canberra", - "chebyshev", - "cityblock", - # "correlation", - # "cosine", - # "dice", + _DISTANCE_METHODS: Final[tuple[DistanceMethod, ...]] = ( + "compromise_programming", + "topsis", + ) + _CLUSTER_METHODS: Final[tuple[ClusterMethod, ...]] = ( + "kmeans", + "kmeans2", + "kmedoids", + ) + _DENSITY_METHODS: Final[tuple[DensityMethod, ...]] = ("knn", "medoid") + + _SELECTION_CATEGORIES: Final[dict[str, tuple[SelectionMethod, ...]]] = { + "distance": _DISTANCE_METHODS, + "cluster": _CLUSTER_METHODS, + "density": _DENSITY_METHODS, + } + + _SELECTION_METHODS: Final[tuple[SelectionMethod, ...]] = ( + *_DISTANCE_METHODS, + *_CLUSTER_METHODS, + *_DENSITY_METHODS, + ) + + _DISTANCE_METRICS: Final[tuple[str, ...]] = ( "euclidean", - # "hamming", - # "jaccard", - "jensenshannon", - # "kulczynski1", # Deprecated in SciPy ≥ 1.15.0; do not use. - "mahalanobis", - # "matching", "minkowski", - # "rogerstanimoto", - # "russellrao", - "seuclidean", - # "sokalmichener", # Deprecated in SciPy ≥ 1.15.0; do not use. - # "sokalsneath", + "chebyshev", + "cityblock", "sqeuclidean", - # "yule", - ) - - _CUSTOM_METRICS: Final[tuple[str, ...]] = ( + "seuclidean", + "mahalanobis", + "jensenshannon", "hellinger", "shellinger", "harmonic_mean", @@ -163,26 +167,19 @@ class QuickAdapterRegressorV3(BaseRegressionModel): "cubic_mean", "power_mean", "weighted_sum", - "kmeans", - "kmeans2", - "kmedoids", - "knn_power_mean", - "knn_quantile", - "knn_min", - "knn_max", - "medoid", - "topsis", ) - _METRICS: Final[tuple[str, ...]] = ( - *_SCIPY_METRICS, - *_CUSTOM_METRICS, + _UNSUPPORTED_CLUSTER_METRICS: Final[tuple[str, ...]] = ( + _DISTANCE_METRICS[6], # "mahalanobis" + _DISTANCE_METRICS[5], # "seuclidean" + _DISTANCE_METRICS[7], # "jensenshannon" ) - _UNSUPPORTED_CLUSTER_METRICS: Final[tuple[str, ...]] = ( - "mahalanobis", - "seuclidean", - "jensenshannon", + _DENSITY_AGGREGATIONS: Final[tuple[DensityAggregation, ...]] = ( + "power_mean", + "quantile", + "min", + "max", ) PREDICTIONS_EXTREMA_OUTLIER_THRESHOLD_QUANTILE_DEFAULT: Final[float] = 0.999 @@ -196,120 +193,355 @@ class QuickAdapterRegressorV3(BaseRegressionModel): MAX_LABEL_PERIOD_CANDLES_DEFAULT: Final[int] = 24 MIN_LABEL_NATR_MULTIPLIER_DEFAULT: Final[float] = 9.0 MAX_LABEL_NATR_MULTIPLIER_DEFAULT: Final[float] = 12.0 - LABEL_KNN_N_NEIGHBORS_DEFAULT: Final[int] = 5 + + LABEL_METHOD_DEFAULT: Final[str] = _SELECTION_METHODS[0] # "compromise_programming" + + LABEL_DISTANCE_METRIC_DEFAULT: Final[str] = _DISTANCE_METRICS[0] # "euclidean" + + LABEL_CLUSTER_METRIC_DEFAULT: Final[str] = _DISTANCE_METRICS[0] # "euclidean" + LABEL_CLUSTER_SELECTION_METHOD_DEFAULT: Final[DistanceMethod] = _DISTANCE_METHODS[ + 1 + ] # "topsis" + LABEL_CLUSTER_TRIAL_SELECTION_METHOD_DEFAULT: Final[DistanceMethod] = ( + _DISTANCE_METHODS[1] # "topsis" + ) + + LABEL_DENSITY_N_NEIGHBORS_DEFAULT: Final[int] = 5 + LABEL_DENSITY_AGGREGATION_DEFAULT: Final[DensityAggregation] = ( + _DENSITY_AGGREGATIONS[0] # "power_mean" + ) @staticmethod + @lru_cache(maxsize=None) def _extrema_selection_methods_set() -> set[ExtremaSelectionMethod]: return set(QuickAdapterRegressorV3._EXTREMA_SELECTION_METHODS) @staticmethod + @lru_cache(maxsize=None) def _custom_threshold_methods_set() -> set[CustomThresholdMethod]: return set(QuickAdapterRegressorV3._CUSTOM_THRESHOLD_METHODS) @staticmethod + @lru_cache(maxsize=None) def _skimage_threshold_methods_set() -> set[SkimageThresholdMethod]: return set(QuickAdapterRegressorV3._SKIMAGE_THRESHOLD_METHODS) @staticmethod + @lru_cache(maxsize=None) def _threshold_methods_set() -> set[ThresholdMethod]: return set(QuickAdapterRegressorV3._THRESHOLD_METHODS) @staticmethod + @lru_cache(maxsize=None) def _optuna_namespaces_set() -> set[OptunaNamespace]: return set(QuickAdapterRegressorV3._OPTUNA_NAMESPACES) @staticmethod + @lru_cache(maxsize=None) def _scipy_metrics_set() -> set[str]: - return set(QuickAdapterRegressorV3._SCIPY_METRICS) + return set(QuickAdapterRegressorV3._DISTANCE_METRICS[:8]) + + @staticmethod + @lru_cache(maxsize=None) + def _unsupported_cluster_metrics_set() -> set[str]: + return set(QuickAdapterRegressorV3._UNSUPPORTED_CLUSTER_METRICS) @staticmethod - def _custom_metrics_set() -> set[str]: - return set(QuickAdapterRegressorV3._CUSTOM_METRICS) + @lru_cache(maxsize=None) + def _distance_methods_set() -> set[DistanceMethod]: + return set(QuickAdapterRegressorV3._DISTANCE_METHODS) @staticmethod - def _metrics_set() -> set[str]: - return set(QuickAdapterRegressorV3._METRICS) + @lru_cache(maxsize=None) + def _selection_methods_set() -> set[str]: + return set(QuickAdapterRegressorV3._SELECTION_METHODS) @staticmethod - def _unsupported_cluster_metrics_set() -> set[str]: - return set(QuickAdapterRegressorV3._UNSUPPORTED_CLUSTER_METRICS) + @lru_cache(maxsize=None) + def _distance_metrics_set() -> set[str]: + return set(QuickAdapterRegressorV3._DISTANCE_METRICS) @staticmethod - def _cluster_selection_methods_set() -> set[ClusterSelectionMethod]: - return set(QuickAdapterRegressorV3._CLUSTER_SELECTION_METHODS) + @lru_cache(maxsize=None) + def _density_aggregations_set() -> set[str]: + return set(QuickAdapterRegressorV3._DENSITY_AGGREGATIONS) @staticmethod - def _get_label_p_order_default(metric: str) -> Optional[float]: - if metric == QuickAdapterRegressorV3._SCIPY_METRICS[5]: # "minkowski" + def _get_selection_category(method: str) -> Optional[str]: + for ( + category, + methods, + ) in QuickAdapterRegressorV3._SELECTION_CATEGORIES.items(): + if method in methods: + return category + return None + + @staticmethod + def _get_label_p_order_default(distance_metric: str) -> Optional[float]: + if ( + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[1] + ): # "minkowski" return 2.0 - elif metric == QuickAdapterRegressorV3._CUSTOM_METRICS[7]: # "power_mean" + elif ( + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[15] + ): # "power_mean" return 1.0 return None @staticmethod - def _get_label_knn_p_order_default(metric: str) -> Optional[float]: - if metric == QuickAdapterRegressorV3._CUSTOM_METRICS[12]: # "knn_power_mean" + def _get_label_density_metric_default(method: DensityMethod) -> Optional[str]: + if method == QuickAdapterRegressorV3._DENSITY_METHODS[1]: # "medoid" + return QuickAdapterRegressorV3._DISTANCE_METRICS[0] # "euclidean" + elif method == QuickAdapterRegressorV3._DENSITY_METHODS[0]: # "knn" + return QuickAdapterRegressorV3._DISTANCE_METRICS[1] # "minkowski" + return None + + @staticmethod + def _get_label_density_aggregation_param_default( + aggregation: DensityAggregation, + ) -> Optional[float]: + if ( + aggregation == QuickAdapterRegressorV3._DENSITY_AGGREGATIONS[0] + ): # "power_mean" return 1.0 - elif metric == QuickAdapterRegressorV3._CUSTOM_METRICS[13]: # "knn_quantile" + elif ( + aggregation == QuickAdapterRegressorV3._DENSITY_AGGREGATIONS[1] + ): # "quantile" return 0.5 return None - def _get_distance_metric(self, label_metric: str) -> tuple[str, str, str]: - """Resolve distance metric for composite label metrics. + @staticmethod + def _validate_minkowski_p(p: Optional[float], *, ctx: str) -> Optional[float]: + if p is None: + return None + if not np.isfinite(p): + raise ValueError(f"Invalid {ctx} p {p!r}: must be finite") + if p <= 0: + raise ValueError(f"Invalid {ctx} p {p!r}: must be > 0") + return float(p) - Args: - label_metric: Label metric name. + @staticmethod + def _prepare_distance_kwargs( + distance_metric: str, + weights: Optional[NDArray[np.floating]] = None, + p: Optional[float] = None, + validate_p: bool = True, + check_unsupported_metrics: bool = False, + validation_context: str = "distance calculation", + ) -> dict[str, Any]: + kwargs: dict[str, Any] = {} - Returns: - Tuple (distance_metric, param_name, default_metric). - Returns (label_metric, "", "") when label_metric is not composite. - """ - # Mapping: label_metric -> (param_name, default_metric) - composite_metrics: dict[str, tuple[str, str]] = { - QuickAdapterRegressorV3._CUSTOM_METRICS[16]: ( # "medoid" - "label_medoid_metric", - QuickAdapterRegressorV3._SCIPY_METRICS[2], # "euclidean" - ), - QuickAdapterRegressorV3._CUSTOM_METRICS[9]: ( # "kmeans" - "label_kmeans_metric", - QuickAdapterRegressorV3._SCIPY_METRICS[2], # "euclidean" - ), - QuickAdapterRegressorV3._CUSTOM_METRICS[10]: ( # "kmeans2" - "label_kmeans_metric", - QuickAdapterRegressorV3._SCIPY_METRICS[2], # "euclidean" - ), - QuickAdapterRegressorV3._CUSTOM_METRICS[11]: ( # "kmedoids" - "label_kmedoids_metric", - QuickAdapterRegressorV3._SCIPY_METRICS[2], # "euclidean" - ), - QuickAdapterRegressorV3._CUSTOM_METRICS[12]: ( # "knn_power_mean" - "label_knn_metric", - QuickAdapterRegressorV3._SCIPY_METRICS[5], # "minkowski" - ), - QuickAdapterRegressorV3._CUSTOM_METRICS[13]: ( # "knn_quantile" - "label_knn_metric", - QuickAdapterRegressorV3._SCIPY_METRICS[5], # "minkowski" - ), - QuickAdapterRegressorV3._CUSTOM_METRICS[14]: ( # "knn_min" - "label_knn_metric", - QuickAdapterRegressorV3._SCIPY_METRICS[5], # "minkowski" - ), - QuickAdapterRegressorV3._CUSTOM_METRICS[15]: ( # "knn_max" - "label_knn_metric", - QuickAdapterRegressorV3._SCIPY_METRICS[5], # "minkowski" - ), - QuickAdapterRegressorV3._CUSTOM_METRICS[17]: ( # "topsis" - "label_topsis_metric", - QuickAdapterRegressorV3._SCIPY_METRICS[2], # "euclidean" - ), + if weights is not None: + if check_unsupported_metrics: + if ( + distance_metric + not in QuickAdapterRegressorV3._unsupported_cluster_metrics_set() + ): + kwargs["w"] = weights + else: + kwargs["w"] = weights + + if ( + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[1] + ): # "minkowski" + if p is not None and np.isfinite(p): + if validate_p: + kwargs["p"] = QuickAdapterRegressorV3._validate_minkowski_p( + p, ctx=validation_context + ) + else: + kwargs["p"] = p + + return kwargs + + @staticmethod + def _validate_quantile_q(q: Optional[float], *, ctx: str) -> Optional[float]: + if q is None: + return None + if not np.isfinite(q): + raise ValueError(f"Invalid {ctx} q {q!r}: must be finite") + if q < 0.0 or q > 1.0: + raise ValueError(f"Invalid {ctx} q {q!r}: must be in [0, 1]") + return float(q) + + @staticmethod + def _validate_metric_supported(metric: str, *, category: str) -> None: + if metric in QuickAdapterRegressorV3._unsupported_cluster_metrics_set(): + supported_metrics = [ + m + for m in QuickAdapterRegressorV3._DISTANCE_METRICS + if m not in QuickAdapterRegressorV3._UNSUPPORTED_CLUSTER_METRICS + ] + raise ValueError( + f"Invalid label_{category}_metric {metric!r}. " + f"This metric does not support weighted distance calculations. " + f"Supported: {', '.join(supported_metrics)}" + ) + + @staticmethod + def _resolve_p_order( + distance_metric: str, + label_p_order: Optional[float], + *, + ctx: str, + ) -> Optional[float]: + p = ( + label_p_order + if label_p_order is not None + else QuickAdapterRegressorV3._get_label_p_order_default(distance_metric) + ) + if ( + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[1] + ): # "minkowski" + p = QuickAdapterRegressorV3._validate_minkowski_p(p, ctx=ctx) + return p + + def _resolve_label_method_config(self, label_method: str) -> dict[str, Any]: + if label_method not in self._selection_methods_set(): + raise ValueError( + f"Invalid label_method {label_method!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._SELECTION_METHODS)}" + ) + + category = QuickAdapterRegressorV3._get_selection_category(label_method) + config: dict[str, Any] = { + "category": category, + "method": label_method, } - if label_metric not in composite_metrics: - return (label_metric, "", "") + if category == "distance": + distance_metric = self.ft_params.get( + "label_distance_metric", + QuickAdapterRegressorV3.LABEL_DISTANCE_METRIC_DEFAULT, + ) + if distance_metric not in QuickAdapterRegressorV3._distance_metrics_set(): + raise ValueError( + f"Invalid label_distance_metric {distance_metric!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DISTANCE_METRICS)}" + ) + config["distance_metric"] = distance_metric + elif category == "cluster": + distance_metric = self.ft_params.get( + "label_cluster_metric", + QuickAdapterRegressorV3.LABEL_CLUSTER_METRIC_DEFAULT, + ) + if distance_metric not in QuickAdapterRegressorV3._distance_metrics_set(): + raise ValueError( + f"Invalid label_cluster_metric {distance_metric!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DISTANCE_METRICS)}" + ) + config["distance_metric"] = distance_metric + selection_method = self.ft_params.get( + "label_cluster_selection_method", + QuickAdapterRegressorV3.LABEL_CLUSTER_SELECTION_METHOD_DEFAULT, + ) + if selection_method not in QuickAdapterRegressorV3._distance_methods_set(): + raise ValueError( + f"Invalid label_cluster_selection_method {selection_method!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DISTANCE_METHODS)}" + ) + config["selection_method"] = selection_method + + trial_selection_method = self.ft_params.get( + "label_cluster_trial_selection_method", + QuickAdapterRegressorV3.LABEL_CLUSTER_TRIAL_SELECTION_METHOD_DEFAULT, + ) + if ( + trial_selection_method + not in QuickAdapterRegressorV3._distance_methods_set() + ): + raise ValueError( + f"Invalid label_cluster_trial_selection_method {trial_selection_method!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DISTANCE_METHODS)}" + ) + config["trial_selection_method"] = trial_selection_method + elif category == "density": + density_method = cast(DensityMethod, label_method) + distance_metric = self.ft_params.get( + "label_density_metric", + QuickAdapterRegressorV3._get_label_density_metric_default( + density_method + ), + ) + if distance_metric not in QuickAdapterRegressorV3._distance_metrics_set(): + raise ValueError( + f"Invalid label_density_metric {distance_metric!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DISTANCE_METRICS)}" + ) + config["distance_metric"] = distance_metric + + if density_method == QuickAdapterRegressorV3._DENSITY_METHODS[0]: # "knn" + aggregation = cast( + DensityAggregation, + self.ft_params.get( + "label_density_aggregation", + QuickAdapterRegressorV3.LABEL_DENSITY_AGGREGATION_DEFAULT, + ), + ) + if ( + aggregation + not in QuickAdapterRegressorV3._density_aggregations_set() + ): + raise ValueError( + f"Invalid label_density_aggregation {aggregation!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DENSITY_AGGREGATIONS)}" + ) + config["aggregation"] = aggregation + + n_neighbors = self.ft_params.get( + "label_density_n_neighbors", + QuickAdapterRegressorV3.LABEL_DENSITY_N_NEIGHBORS_DEFAULT, + ) + if not isinstance(n_neighbors, int) or n_neighbors < 1: + raise ValueError( + f"Invalid label_density_n_neighbors: must be >= 1, got {n_neighbors!r}" + ) + config["n_neighbors"] = n_neighbors + + aggregation_param = self.ft_params.get( + "label_density_aggregation_param", + QuickAdapterRegressorV3._get_label_density_aggregation_param_default( + aggregation + ), + ) + + if aggregation_param is not None: + if aggregation == "quantile": + QuickAdapterRegressorV3._validate_quantile_q( + aggregation_param, + ctx="label_density_aggregation_param", + ) + elif aggregation == "power_mean": + if not np.isfinite(aggregation_param): + raise ValueError( + f"Invalid label_density_aggregation_param p {aggregation_param!r}: must be finite" + ) + + config["aggregation_param"] = aggregation_param + + return config - param_name, default_metric = composite_metrics[label_metric] - distance_metric = self.ft_params.get(param_name, default_metric) - return (distance_metric, param_name, default_metric) + @staticmethod + def _format_label_method_config(config: dict[str, Any]) -> str: + return ", ".join(f"{k}={v}" for k, v in config.items()) + + _CONFIG_KEY_TO_TUNABLE_SUFFIX: Final[dict[str, str]] = { + "distance_metric": "metric", + } + + @staticmethod + def _log_label_method_config(config: dict[str, Any]) -> None: + category = config.get("category", "") + for key, value in config.items(): + if key in ("category", "method"): + continue + suffix = QuickAdapterRegressorV3._CONFIG_KEY_TO_TUNABLE_SUFFIX.get(key, key) + tunable_name = f"label_{category}_{suffix}" + if isinstance(value, float): + formatted_value = format_number(value) + else: + formatted_value = value + logger.info(f" {tunable_name}: {formatted_value}") @property def _optuna_config(self) -> dict[str, Any]: @@ -383,20 +615,6 @@ class QuickAdapterRegressorV3(BaseRegressionModel): @property def _label_frequency_candles(self) -> int: - """ - Calculate label_frequency_candles. - - Default behavior is 'auto' which equals max(2, 2 * number_of_pairs). - User can override with: - - "auto" string value - - Integer value between 2 and 10000 - - Returns: - int: The calculated label_frequency_candles value - - Raises: - ValueError: If no trading pairs are configured - """ default_label_frequency_candles = max(2, 2 * len(self.pairs)) label_frequency_candles = self.config.get("feature_parameters", {}).get( @@ -410,7 +628,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): label_frequency_candles = default_label_frequency_candles else: logger.warning( - f"Invalid label_frequency_candles {label_frequency_candles!r}: only 'auto' is supported for string values. Using default {default_label_frequency_candles!r}" + f"Invalid label_frequency_candles {label_frequency_candles!r}: only 'auto' is supported for string values, using default {default_label_frequency_candles!r}" ) label_frequency_candles = default_label_frequency_candles elif isinstance(label_frequency_candles, (int, float)): @@ -418,12 +636,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel): label_frequency_candles = int(label_frequency_candles) else: logger.warning( - f"Invalid label_frequency_candles {label_frequency_candles!r}: must be in range [2, 10000]. Using default {default_label_frequency_candles!r}" + f"Invalid label_frequency_candles {label_frequency_candles!r}: must be in range [2, 10000], using default {default_label_frequency_candles!r}" ) label_frequency_candles = default_label_frequency_candles else: logger.warning( - f"Invalid label_frequency_candles {label_frequency_candles!r}: expected int, float, or 'auto'. Using default {default_label_frequency_candles!r}" + f"Invalid label_frequency_candles {label_frequency_candles!r}: expected int, float, or 'auto', using default {default_label_frequency_candles!r}" ) label_frequency_candles = default_label_frequency_candles @@ -545,14 +763,14 @@ class QuickAdapterRegressorV3(BaseRegressionModel): self.pairs: list[str] = self.config.get("exchange", {}).get("pair_whitelist") if not self.pairs: raise ValueError( - "FreqAI model requires StaticPairList method defined in pairlists configuration and 'pair_whitelist' defined in exchange section configuration" + "Invalid configuration: 'pair_whitelist' must be defined in exchange section and StaticPairList must be configured in pairlists" ) if ( not isinstance(self.freqai_info.get("identifier"), str) or not self.freqai_info.get("identifier", "").strip() ): raise ValueError( - "FreqAI model requires 'identifier' defined in the freqai section configuration" + "Invalid freqai configuration: 'identifier' must be a non-empty string" ) self._optuna_hyperopt: Optional[bool] = ( self.freqai_info.get("enabled", False) @@ -663,9 +881,14 @@ class QuickAdapterRegressorV3(BaseRegressionModel): ) logger.info(f" min_resource: {optuna_config.get('min_resource')}") logger.info(f" seed: {optuna_config.get('seed')}") - logger.info( - f" label_metric: {self.ft_params.get('label_metric', QuickAdapterRegressorV3._SCIPY_METRICS[2])}" + + label_method = self.ft_params.get( + "label_method", QuickAdapterRegressorV3.LABEL_METHOD_DEFAULT ) + logger.info(f" label_method: {label_method}") + + label_config = self._resolve_label_method_config(label_method) + self._log_label_method_config(label_config) label_weights = self.ft_params.get("label_weights") if label_weights is not None: @@ -677,121 +900,25 @@ class QuickAdapterRegressorV3(BaseRegressionModel): ) label_p_order_config = self.ft_params.get("label_p_order") - label_metric = self.ft_params.get( - "label_metric", QuickAdapterRegressorV3._SCIPY_METRICS[2] - ) - - label_p_order_is_used = False - label_p_order_reason = None - - if label_metric in { - QuickAdapterRegressorV3._SCIPY_METRICS[5], # "minkowski" - QuickAdapterRegressorV3._CUSTOM_METRICS[7], # "power_mean" - }: - label_p_order_is_used = True - label_p_order_reason = label_metric - else: - distance_metric, param_name, _ = self._get_distance_metric(label_metric) - if ( - param_name - and distance_metric - == QuickAdapterRegressorV3._SCIPY_METRICS[5] # "minkowski" - ): - label_p_order_is_used = True - label_p_order_reason = ( - f"{label_metric} (via {param_name}={distance_metric})" - ) - if label_p_order_config is not None: logger.info( f" label_p_order: {format_number(float(label_p_order_config))}" ) - elif label_p_order_is_used: - if label_metric in { - QuickAdapterRegressorV3._SCIPY_METRICS[5], # "minkowski" - QuickAdapterRegressorV3._CUSTOM_METRICS[7], # "power_mean" + else: + distance_metric = label_config["distance_metric"] + if distance_metric in { + QuickAdapterRegressorV3._DISTANCE_METRICS[1], # "minkowski" + QuickAdapterRegressorV3._DISTANCE_METRICS[15], # "power_mean" }: - label_p_order_default = ( - QuickAdapterRegressorV3._get_label_p_order_default(label_metric) - ) - else: label_p_order_default = ( QuickAdapterRegressorV3._get_label_p_order_default( - QuickAdapterRegressorV3._SCIPY_METRICS[ - 5 - ] # "minkowski" default + distance_metric ) ) - logger.info( - f" label_p_order: {format_number(label_p_order_default)} (default for {label_p_order_reason})" - ) - - _, param_name, default_metric = self._get_distance_metric(label_metric) - if param_name: - config_value = self.ft_params.get(param_name) - if config_value is not None: - logger.info(f" {param_name}: {config_value}") - else: logger.info( - f" {param_name}: {default_metric} (default for {label_metric})" + f" label_p_order: {format_number(label_p_order_default)} (default for {distance_metric})" ) - label_kmeans_selection_config = self.ft_params.get("label_kmeans_selection") - if label_kmeans_selection_config is not None: - logger.info( - f" label_kmeans_selection: {label_kmeans_selection_config}" - ) - elif label_metric in { - QuickAdapterRegressorV3._CUSTOM_METRICS[9], # "kmeans" - QuickAdapterRegressorV3._CUSTOM_METRICS[10], # "kmeans2" - }: - logger.info( - f" label_kmeans_selection: {QuickAdapterRegressorV3._CLUSTER_SELECTION_METHODS[1]} (default for {label_metric})" - ) - - label_kmedoids_selection_config = self.ft_params.get( - "label_kmedoids_selection" - ) - if label_kmedoids_selection_config is not None: - logger.info( - f" label_kmedoids_selection: {label_kmedoids_selection_config}" - ) - elif ( - label_metric == QuickAdapterRegressorV3._CUSTOM_METRICS[11] - ): # "kmedoids" - logger.info( - f" label_kmedoids_selection: {QuickAdapterRegressorV3._CLUSTER_SELECTION_METHODS[1]} (default for {label_metric})" - ) - - label_knn_n_neighbors = self.ft_params.get("label_knn_n_neighbors") - if label_knn_n_neighbors is not None: - logger.info(f" label_knn_n_neighbors: {label_knn_n_neighbors}") - elif label_metric in { - QuickAdapterRegressorV3._CUSTOM_METRICS[12], # "knn_power_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[13], # "knn_quantile" - QuickAdapterRegressorV3._CUSTOM_METRICS[14], # "knn_min" - QuickAdapterRegressorV3._CUSTOM_METRICS[15], # "knn_max" - }: - logger.info( - f" label_knn_n_neighbors: {QuickAdapterRegressorV3.LABEL_KNN_N_NEIGHBORS_DEFAULT} (default for {label_metric})" - ) - - label_knn_p_order_config = self.ft_params.get("label_knn_p_order") - if label_knn_p_order_config is not None: - logger.info( - f" label_knn_p_order: {format_number(float(label_knn_p_order_config))}" - ) - elif label_metric in { - QuickAdapterRegressorV3._CUSTOM_METRICS[12], # "knn_power_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[13], # "knn_quantile" - }: - label_knn_p_order_default = ( - QuickAdapterRegressorV3._get_label_knn_p_order_default(label_metric) - ) - logger.info( - f" label_knn_p_order: {format_number(label_knn_p_order_default)} (default for {label_metric})" - ) - logger.info("Predictions Extrema Configuration:") predictions_extrema = self.predictions_extrema logger.info( @@ -897,7 +1024,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): else: raise ValueError( f"Invalid namespace {namespace!r}. " - f"Supported: {', '.join(QuickAdapterRegressorV3._OPTUNA_NAMESPACES[:2])}" # Only hp and train + f"Supported: {', '.join(QuickAdapterRegressorV3._OPTUNA_NAMESPACES[:2])}" # Only "hp" and "train" ) return value @@ -911,7 +1038,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): else: raise ValueError( f"Invalid namespace {namespace!r}. " - f"Supported: {', '.join(QuickAdapterRegressorV3._OPTUNA_NAMESPACES[:2])}" # Only hp and train + f"Supported: {', '.join(QuickAdapterRegressorV3._OPTUNA_NAMESPACES[:2])}" # Only "hp" and "train" ) def get_optuna_values( @@ -922,7 +1049,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): else: raise ValueError( f"Invalid namespace {namespace!r}. " - f"Supported: {QuickAdapterRegressorV3._OPTUNA_NAMESPACES[2]}" # Only label + f"Supported: {QuickAdapterRegressorV3._OPTUNA_NAMESPACES[2]}" # Only "label" ) return values @@ -934,17 +1061,21 @@ class QuickAdapterRegressorV3(BaseRegressionModel): else: raise ValueError( f"Invalid namespace {namespace!r}. " - f"Supported: {QuickAdapterRegressorV3._OPTUNA_NAMESPACES[2]}" # Only label + f"Supported: {QuickAdapterRegressorV3._OPTUNA_NAMESPACES[2]}" # Only "label" ) def init_optuna_label_candle_pool(self) -> None: optuna_label_candle_pool_full = self._optuna_label_candle_pool_full if len(optuna_label_candle_pool_full) == 0: - raise RuntimeError("Failed to initialize optuna label candle pool full") + raise RuntimeError( + "Failed to initialize optuna label candle pool: initial pool is empty" + ) self._optuna_label_candle_pool = optuna_label_candle_pool_full self._optuna_label_shuffle_rng.shuffle(self._optuna_label_candle_pool) if len(self._optuna_label_candle_pool) == 0: - raise RuntimeError("Failed to initialize optuna label candle pool") + raise RuntimeError( + "Failed to initialize optuna label candle pool: pool became empty after shuffle" + ) def set_optuna_label_candle(self, pair: str) -> None: if len(self._optuna_label_candle_pool) == 0: @@ -1138,10 +1269,12 @@ class QuickAdapterRegressorV3(BaseRegressionModel): }: # Only "label" raise ValueError( f"Invalid namespace {namespace!r}. " - f"Supported: {QuickAdapterRegressorV3._OPTUNA_NAMESPACES[2]}" # Only label + f"Supported: {QuickAdapterRegressorV3._OPTUNA_NAMESPACES[2]}" # Only "label" ) if not callable(callback): - raise ValueError("Invalid callback: must be callable") + raise ValueError( + f"Invalid callback {type(callback).__name__!r}: must be callable" + ) self._optuna_label_candles[pair] += 1 if pair not in self._optuna_label_incremented_pairs: self._optuna_label_incremented_pairs.append(pair) @@ -1280,7 +1413,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): hp_rmse if hp_rmse is not None else np.inf ) train_rmse = self.optuna_validate_value( - self.get_optuna_value(pair, self._OPTUNA_NAMESPACES[1]) + self.get_optuna_value(pair, QuickAdapterRegressorV3._OPTUNA_NAMESPACES[1]) ) # "train" dk.data["extra_returns_per_train"]["train_rmse"] = ( train_rmse @@ -1531,7 +1664,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): @staticmethod def skimage_min_max( pred_extrema: pd.Series, - method: str, + method: SkimageThresholdMethod, extrema_selection: ExtremaSelectionMethod, keep_extrema_fraction: float = 1.0, ) -> tuple[float, float]: @@ -1584,153 +1717,328 @@ class QuickAdapterRegressorV3(BaseRegressionModel): return np.nanmedian(values) @staticmethod - def _pairwise_distance_sums( - matrix: NDArray[np.floating], - metric: str, - *, - weights: Optional[NDArray[np.floating]] = None, - p: Optional[float] = None, + def _normalize_weights( + weights: Optional[NDArray[np.floating]], + n_objectives: int, ) -> NDArray[np.floating]: - """Compute sum of pairwise distances per row. - - Args: - matrix: 2D array, shape (n_samples, n_features). - Must contain only finite values (no NaN or inf). - metric: scipy.spatial.distance.pdist metric name. - weights: Optional 1D array, shape (n_features,). - Must be finite and non-negative. - p: Minkowski order, used only when metric == 'minkowski'. - - Returns: - 1D array, shape (n_samples,). Returns [] when n_samples == 0, [0.0] when n_samples == 1. - """ - - if matrix.ndim != 2: - raise ValueError("Invalid matrix: must be 2-dimensional") - if matrix.shape[1] == 0: - raise ValueError("Invalid matrix: must have at least one feature") + if weights is None: + return np.full(n_objectives, 1.0 / n_objectives) - if not np.all(np.isfinite(matrix)): + np_weights = np.asarray(weights, dtype=float) + if np_weights.size != n_objectives: raise ValueError( - "Invalid matrix: must contain only finite values (no NaN or inf)" + "Invalid label_weights: length must match number of objectives" + ) + if not np.all(np.isfinite(np_weights)): + raise ValueError( + f"Invalid label_weights (shape={np_weights.shape}, dtype={np_weights.dtype}): " + f"must contain only finite values" + ) + if np.any(np_weights < 0): + raise ValueError( + f"Invalid label_weights (shape={np_weights.shape}, dtype={np_weights.dtype}): " + f"values must be non-negative" ) - if weights is not None: - if weights.size != matrix.shape[1]: - raise ValueError( - f"Invalid weights: size {weights.size} must match number of features {matrix.shape[1]}" - ) - if not np.all(np.isfinite(weights)) or np.any(weights < 0): - raise ValueError("Invalid weights: must be finite and non-negative") - if metric in QuickAdapterRegressorV3._unsupported_cluster_metrics_set(): - raise ValueError( - f"Invalid weights: not supported for metric {metric!r}" - ) - - matrix = np.asarray(matrix, dtype=np.float64) - if weights is not None: - weights = np.asarray(weights, dtype=np.float64) - - n = matrix.shape[0] - if n == 0: - return np.array([]) - if n == 1: - return np.array([0.0]) - - pdist_kwargs = {} - if weights is not None: - pdist_kwargs["w"] = weights - if ( - metric == QuickAdapterRegressorV3._SCIPY_METRICS[5] # "minkowski" - and p is not None - and np.isfinite(p) - ): - pdist_kwargs["p"] = p - - pairwise_distances_vector = sp.spatial.distance.pdist( - matrix, metric=metric, **pdist_kwargs - ) - - sums = np.zeros(n, dtype=float) - - idx_i, idx_j = np.triu_indices(n, k=1) - np.add.at(sums, idx_i, pairwise_distances_vector) - np.add.at(sums, idx_j, pairwise_distances_vector) + weights_sum = np.nansum(np_weights) + if np.isclose(weights_sum, 0.0): + raise ValueError( + f"Invalid label_weights (shape={np_weights.shape}, sum={weights_sum}): " + f"sum cannot be zero" + ) - return sums + return np_weights / weights_sum @staticmethod - def _topsis_scores( + def _compromise_programming_scores( normalized_matrix: NDArray[np.floating], - metric: str, + distance_metric: str, *, weights: Optional[NDArray[np.floating]] = None, p: Optional[float] = None, ) -> NDArray[np.floating]: - """Compute TOPSIS score S = D+ / (D+ + D-) per row. - - Args: - normalized_matrix: 2D array, shape (n_samples, n_objectives), values in [0, 1]. - Must contain only finite values (no NaN or inf). - metric: scipy.spatial.distance.cdist metric name. - weights: Optional 1D array, shape (n_objectives,). - Must be finite and non-negative. - p: Minkowski order, used only when metric == 'minkowski'. - - Returns: - 1D array, shape (n_samples,), values in [0, 1]. Lower is better. - Returns [] when n_samples == 0, [0.5] when n_samples == 1. - """ - if normalized_matrix.ndim != 2: - raise ValueError("Invalid normalized_matrix: must be 2-dimensional") - n_samples, n_objectives = normalized_matrix.shape - if n_objectives == 0: - raise ValueError( - "Invalid normalized_matrix: must have at least one objective" + + if n_samples == 0: + return np.array([]) + if n_samples == 1: + return np.array([0.0]) + + if weights is None: + weights = np.ones(n_objectives) + + ideal_point = np.ones(n_objectives) + ideal_point_2d = ideal_point.reshape(1, -1) + + if distance_metric in QuickAdapterRegressorV3._scipy_metrics_set(): + cdist_kwargs = QuickAdapterRegressorV3._prepare_distance_kwargs( + distance_metric=distance_metric, + weights=weights, + p=p, + validate_p=True, + check_unsupported_metrics=True, + validation_context="compromise_programming minkowski p", ) + return sp.spatial.distance.cdist( + normalized_matrix, + ideal_point_2d, + metric=distance_metric, + **cdist_kwargs, + ).flatten() - if not np.all(np.isfinite(normalized_matrix)): + if distance_metric in { + QuickAdapterRegressorV3._DISTANCE_METRICS[8], # "hellinger" + QuickAdapterRegressorV3._DISTANCE_METRICS[9], # "shellinger" + }: + np_sqrt_normalized_matrix = np.sqrt(normalized_matrix) + if ( + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[9] + ): # "shellinger" + variances = np.nanvar(np_sqrt_normalized_matrix, axis=0, ddof=1) + if np.any(variances <= 0): + raise ValueError( + "Invalid data for shellinger metric: requires non-zero variance for all objectives" + ) + weights = 1 / variances + return ( + np.sqrt( + np.nansum( + weights + * (np_sqrt_normalized_matrix - np.sqrt(ideal_point)) ** 2, + axis=1, + ) + ) + / QuickAdapterRegressorV3._SQRT_2 + ) + + if distance_metric in { + QuickAdapterRegressorV3._DISTANCE_METRICS[10], # "harmonic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[11], # "geometric_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[12], # "arithmetic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[13], # "quadratic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[14], # "cubic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[15], # "power_mean" + }: + if ( + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[15] + ): # "power_mean" + power = p if p is not None and np.isfinite(p) else 1.0 + else: + power_map: dict[str, float] = { + QuickAdapterRegressorV3._DISTANCE_METRICS[ + 10 + ]: -1.0, # "harmonic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[ + 11 + ]: 0.0, # "geometric_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[ + 12 + ]: 1.0, # "arithmetic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[ + 13 + ]: 2.0, # "quadratic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[14]: 3.0, # "cubic_mean" + } + power = power_map[distance_metric] + return sp.stats.pmean( + ideal_point, p=power, weights=weights + ) - sp.stats.pmean(normalized_matrix, p=power, weights=weights, axis=1) + + if ( + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[16] + ): # "weighted_sum" + return (ideal_point - normalized_matrix) @ weights + + raise ValueError( + f"Invalid distance_metric {distance_metric!r} for compromise_programming. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DISTANCE_METRICS)}" + ) + + @staticmethod + def _pairwise_distance_sums( + matrix: NDArray[np.floating], + distance_metric: str, + *, + weights: Optional[NDArray[np.floating]] = None, + p: Optional[float] = None, + ) -> NDArray[np.floating]: + if matrix.ndim != 2: raise ValueError( - "Invalid normalized_matrix: must contain only finite values (no NaN or inf)" + f"Invalid matrix (shape={matrix.shape}, ndim={matrix.ndim}): " + f"must be 2-dimensional" + ) + if matrix.shape[1] == 0: + raise ValueError( + f"Invalid matrix (shape={matrix.shape}): must have at least one feature" ) - if weights is not None: - if weights.size != n_objectives: - raise ValueError( - f"Invalid weights: size {weights.size} must match number of objectives {n_objectives}" - ) - if not np.all(np.isfinite(weights)) or np.any(weights < 0): - raise ValueError("Invalid weights: must be finite and non-negative") + if not np.all(np.isfinite(matrix)): + raise ValueError( + "Invalid matrix: must contain only finite values (no NaN or inf)" + ) - normalized_matrix = np.asarray(normalized_matrix, dtype=np.float64) - if weights is not None: - weights = np.asarray(weights, dtype=np.float64) + if ( + weights is not None + and distance_metric + in QuickAdapterRegressorV3._unsupported_cluster_metrics_set() + ): + raise ValueError( + f"Invalid weights: unsupported for distance_metric {distance_metric!r}" + ) + + n = matrix.shape[0] + if n == 0: + return np.array([]) + if n == 1: + return np.array([0.0]) + + pdist_kwargs = QuickAdapterRegressorV3._prepare_distance_kwargs( + distance_metric=distance_metric, + weights=weights, + p=p, + validate_p=True, + check_unsupported_metrics=False, + validation_context="pairwise_distance_sums minkowski p", + ) + + pairwise_distances_vector = sp.spatial.distance.pdist( + matrix, metric=distance_metric, **pdist_kwargs + ) + + sums = np.zeros(n, dtype=float) + + idx_i, idx_j = np.triu_indices(n, k=1) + np.add.at(sums, idx_i, pairwise_distances_vector) + np.add.at(sums, idx_j, pairwise_distances_vector) + + return sums + + @staticmethod + def _topsis_scores( + normalized_matrix: NDArray[np.floating], + distance_metric: str, + *, + weights: Optional[NDArray[np.floating]] = None, + p: Optional[float] = None, + ) -> NDArray[np.floating]: + n_samples, n_objectives = normalized_matrix.shape if n_samples == 0: return np.array([]) if n_samples == 1: return np.array([0.5]) + if weights is None: + weights = np.ones(n_objectives) + ideal_point = np.ones((1, n_objectives)) anti_ideal_point = np.zeros((1, n_objectives)) - cdist_kwargs: dict[str, Any] = {} - if weights is not None: - cdist_kwargs["w"] = weights - if ( - metric == QuickAdapterRegressorV3._SCIPY_METRICS[5] # "minkowski" - and p is not None - and np.isfinite(p) - ): - cdist_kwargs["p"] = p + if distance_metric in QuickAdapterRegressorV3._scipy_metrics_set(): + cdist_kwargs = QuickAdapterRegressorV3._prepare_distance_kwargs( + distance_metric=distance_metric, + weights=weights, + p=p, + validate_p=True, + check_unsupported_metrics=True, + validation_context="topsis minkowski p", + ) - dist_to_ideal = sp.spatial.distance.cdist( - normalized_matrix, ideal_point, metric=metric, **cdist_kwargs - ).flatten() - dist_to_anti_ideal = sp.spatial.distance.cdist( - normalized_matrix, anti_ideal_point, metric=metric, **cdist_kwargs - ).flatten() + dist_to_ideal = sp.spatial.distance.cdist( + normalized_matrix, ideal_point, metric=distance_metric, **cdist_kwargs + ).flatten() + dist_to_anti_ideal = sp.spatial.distance.cdist( + normalized_matrix, + anti_ideal_point, + metric=distance_metric, + **cdist_kwargs, + ).flatten() + elif distance_metric in { + QuickAdapterRegressorV3._DISTANCE_METRICS[8], # "hellinger" + QuickAdapterRegressorV3._DISTANCE_METRICS[9], # "shellinger" + }: + np_sqrt_normalized_matrix = np.sqrt(normalized_matrix) + if ( + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[9] + ): # "shellinger" + variances = np.nanvar(np_sqrt_normalized_matrix, axis=0, ddof=1) + if np.any(variances <= 0): + raise ValueError( + "Invalid data for shellinger metric: requires non-zero variance for all objectives" + ) + weights = 1 / variances + dist_to_ideal = ( + np.sqrt( + np.nansum( + weights + * (np_sqrt_normalized_matrix - np.sqrt(ideal_point)) ** 2, + axis=1, + ) + ) + / QuickAdapterRegressorV3._SQRT_2 + ) + dist_to_anti_ideal = ( + np.sqrt( + np.nansum( + weights + * (np_sqrt_normalized_matrix - np.sqrt(anti_ideal_point)) ** 2, + axis=1, + ) + ) + / QuickAdapterRegressorV3._SQRT_2 + ) + elif distance_metric in { + QuickAdapterRegressorV3._DISTANCE_METRICS[10], # "harmonic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[11], # "geometric_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[12], # "arithmetic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[13], # "quadratic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[14], # "cubic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[15], # "power_mean" + }: + if ( + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[15] + ): # "power_mean" + power = p if p is not None and np.isfinite(p) else 1.0 + else: + power_map: dict[str, float] = { + QuickAdapterRegressorV3._DISTANCE_METRICS[ + 10 + ]: -1.0, # "harmonic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[ + 11 + ]: 0.0, # "geometric_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[ + 12 + ]: 1.0, # "arithmetic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[ + 13 + ]: 2.0, # "quadratic_mean" + QuickAdapterRegressorV3._DISTANCE_METRICS[14]: 3.0, # "cubic_mean" + } + power = power_map[distance_metric] + ideal_pmean = sp.stats.pmean( + ideal_point.flatten(), p=power, weights=weights + ) + anti_ideal_pmean = sp.stats.pmean( + anti_ideal_point.flatten(), p=power, weights=weights + ) + matrix_pmean = sp.stats.pmean( + normalized_matrix, p=power, weights=weights, axis=1 + ) + dist_to_ideal = np.abs(ideal_pmean - matrix_pmean) + dist_to_anti_ideal = np.abs(matrix_pmean - anti_ideal_pmean) + elif ( + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[16] + ): # "weighted_sum" + dist_to_ideal = np.abs((ideal_point - normalized_matrix) @ weights) + dist_to_anti_ideal = np.abs( + (normalized_matrix - anti_ideal_point) @ weights + ) + else: + raise ValueError( + f"Invalid distance_metric {distance_metric!r} for topsis. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DISTANCE_METRICS)}" + ) denominator = dist_to_ideal + dist_to_anti_ideal zero_mask = np.isclose(denominator, 0.0) @@ -1740,114 +2048,313 @@ class QuickAdapterRegressorV3(BaseRegressionModel): return scores + @staticmethod + def _calculate_trial_distance_to_ideal( + normalized_matrix: NDArray[np.floating], + trial_index: int, + ideal_point_2d: NDArray[np.floating], + distance_metric: str, + *, + weights: Optional[NDArray[np.floating]] = None, + p: Optional[float] = None, + ) -> float: + cdist_kwargs = QuickAdapterRegressorV3._prepare_distance_kwargs( + distance_metric=distance_metric, + weights=weights, + p=p, + validate_p=True, + check_unsupported_metrics=False, + validation_context="calculate_trial_distance_to_ideal minkowski p", + ) + + return sp.spatial.distance.cdist( + normalized_matrix[[trial_index]], + ideal_point_2d, + metric=distance_metric, + **cdist_kwargs, + ).item() + def _select_best_trial_from_cluster( self, - selection_method: ClusterSelectionMethod, - best_cluster_indices: NDArray[np.intp], normalized_matrix: NDArray[np.floating], + trial_selection_method: DistanceMethod, + best_cluster_indices: NDArray[np.intp], ideal_point_2d: NDArray[np.floating], - metric: str, - cdist_kwargs: dict[str, Any], - np_weights: Optional[NDArray[np.floating]], + distance_metric: str, *, - known_medoid_index: Optional[int] = None, - known_medoid_distance: Optional[float] = None, + weights: Optional[NDArray[np.floating]] = None, + p: Optional[float] = None, ) -> tuple[int, float]: - """Select one trial from a cluster. - - Args: - selection_method: Cluster selection method ("medoid", "min", "topsis"). - best_cluster_indices: 1D array of trial indices belonging to the cluster. - normalized_matrix: Normalized objective matrix, shape (n_trials, n_objectives). - ideal_point_2d: Ideal objective point, shape (1, n_objectives). - metric: Distance metric used for scoring (scipy.cdist/pdist). - cdist_kwargs: Optional metric parameters for distance scoring (e.g., Minkowski p). - np_weights: Optional objective weights (used for weighted distances and TOPSIS). - known_medoid_index: Optional precomputed cluster medoid index. - known_medoid_distance: Optional precomputed medoid distance to the ideal point. - - Returns: - (trial_index, distance_to_ideal) for the selected trial. - """ - local_cdist_kwargs = dict(cdist_kwargs) - if np_weights is not None: - local_cdist_kwargs["w"] = np_weights - if best_cluster_indices.size == 1: best_trial_index = best_cluster_indices[0] - if known_medoid_distance is not None: - return best_trial_index, known_medoid_distance - best_trial_distance = sp.spatial.distance.cdist( - normalized_matrix[[best_trial_index]], + best_trial_distance = self._calculate_trial_distance_to_ideal( + normalized_matrix, + best_trial_index, ideal_point_2d, - metric=metric, - **local_cdist_kwargs, - ).item() - + distance_metric, + weights=weights, + p=p, + ) return best_trial_index, best_trial_distance - if ( - selection_method - == QuickAdapterRegressorV3._CLUSTER_SELECTION_METHODS[0] # "medoid" - ): - if known_medoid_index is not None and known_medoid_distance is not None: - return known_medoid_index, known_medoid_distance - p = cdist_kwargs.get("p") - best_medoid_position = np.nanargmin( - self._pairwise_distance_sums( - normalized_matrix[best_cluster_indices], - metric, - weights=np_weights, + if trial_selection_method == "topsis": + scores = QuickAdapterRegressorV3._topsis_scores( + normalized_matrix[best_cluster_indices], + distance_metric, + weights=weights, + p=p, + ) + elif trial_selection_method == "compromise_programming": + scores = QuickAdapterRegressorV3._compromise_programming_scores( + normalized_matrix[best_cluster_indices], + distance_metric, + weights=weights, + p=p, + ) + else: + raise ValueError( + f"Invalid trial_selection_method {trial_selection_method!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DISTANCE_METHODS)}" + ) + + min_score_position = np.nanargmin(scores) + best_trial_index = best_cluster_indices[min_score_position] + best_trial_distance = self._calculate_trial_distance_to_ideal( + normalized_matrix, + best_trial_index, + ideal_point_2d, + distance_metric, + weights=weights, + p=p, + ) + return best_trial_index, best_trial_distance + + def _cluster_based_selection( + self, + normalized_matrix: NDArray[np.floating], + cluster_method: ClusterMethod, + *, + distance_metric: str, + selection_method: DistanceMethod, + trial_selection_method: DistanceMethod, + weights: Optional[NDArray[np.floating]] = None, + p: Optional[float] = None, + ) -> NDArray[np.floating]: + n_samples, n_objectives = normalized_matrix.shape + + if n_samples == 0: + return np.array([]) + if n_samples == 1: + return np.array([0.0]) + + ideal_point_2d = np.ones((1, n_objectives)) + + n_clusters = QuickAdapterRegressorV3._get_n_clusters(normalized_matrix) + + if cluster_method in { + QuickAdapterRegressorV3._SELECTION_METHODS[2], # "kmeans" + QuickAdapterRegressorV3._SELECTION_METHODS[3], # kmeans2 + }: + if ( + cluster_method == QuickAdapterRegressorV3._SELECTION_METHODS[2] + ): # "kmeans" + kmeans = sklearn.cluster.KMeans( + n_clusters=n_clusters, random_state=42, n_init=10 + ) + cluster_labels = kmeans.fit_predict(normalized_matrix) + cluster_centers = kmeans.cluster_centers_ + else: # kmeans2 + cluster_centers, cluster_labels = sp.cluster.vq.kmeans2( + normalized_matrix, n_clusters, rng=42, minit="++" + ) + + if selection_method == "compromise_programming": + cluster_center_scores = ( + QuickAdapterRegressorV3._compromise_programming_scores( + cluster_centers, + distance_metric, + p=p, + ) + ) + elif selection_method == "topsis": + cluster_center_scores = QuickAdapterRegressorV3._topsis_scores( + cluster_centers, + distance_metric, + p=p, + ) + else: + raise ValueError( + f"Invalid selection_method {selection_method!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DISTANCE_METHODS)}" + ) + ordered_cluster_indices = np.argsort(cluster_center_scores) + + best_cluster_indices = None + for cluster_index in ordered_cluster_indices: + cluster_indices = np.flatnonzero(cluster_labels == cluster_index) + if cluster_indices.size > 0: + best_cluster_indices = cluster_indices + break + + trial_distances = np.full(n_samples, np.inf) + if best_cluster_indices is not None and best_cluster_indices.size > 0: + best_trial_index, best_trial_distance = ( + self._select_best_trial_from_cluster( + normalized_matrix, + trial_selection_method, + best_cluster_indices, + ideal_point_2d, + distance_metric, + weights=weights, + p=p, + ) + ) + trial_distances[best_trial_index] = best_trial_distance + return trial_distances + + elif ( + cluster_method == QuickAdapterRegressorV3._SELECTION_METHODS[4] + ): # "kmedoids" + kmedoids_kwargs: dict[str, Any] = { + "metric": distance_metric, + "random_state": 42, + "init": "k-medoids++", + "method": "pam", + } + kmedoids = KMedoids(n_clusters=n_clusters, **kmedoids_kwargs) + cluster_labels = kmedoids.fit_predict(normalized_matrix) + medoid_indices = kmedoids.medoid_indices_ + + if selection_method == "compromise_programming": + medoid_scores = QuickAdapterRegressorV3._compromise_programming_scores( + normalized_matrix[medoid_indices], + distance_metric, + p=p, + ) + elif selection_method == "topsis": + medoid_scores = QuickAdapterRegressorV3._topsis_scores( + normalized_matrix[medoid_indices], + distance_metric, p=p, ) + else: + raise ValueError( + f"Invalid selection_method {selection_method!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DISTANCE_METHODS)}" + ) + best_medoid_score_position = np.nanargmin(medoid_scores) + best_medoid_index = medoid_indices[best_medoid_score_position] + cluster_index = cluster_labels[best_medoid_index] + best_cluster_indices = np.flatnonzero(cluster_labels == cluster_index) + + trial_distances = np.full(n_samples, np.inf) + if best_cluster_indices is not None and best_cluster_indices.size > 0: + best_trial_index, best_trial_distance = ( + self._select_best_trial_from_cluster( + normalized_matrix, + trial_selection_method, + best_cluster_indices, + ideal_point_2d, + distance_metric, + weights=weights, + p=p, + ) + ) + trial_distances[best_trial_index] = best_trial_distance + return trial_distances + + else: + raise ValueError( + f"Invalid cluster_method {cluster_method!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._CLUSTER_METHODS)}" ) - best_trial_index = best_cluster_indices[best_medoid_position] - best_trial_distance = sp.spatial.distance.cdist( - normalized_matrix[[best_trial_index]], - ideal_point_2d, - metric=metric, - **cdist_kwargs, - ).item() - return best_trial_index, best_trial_distance - if ( - selection_method - == QuickAdapterRegressorV3._CLUSTER_SELECTION_METHODS[1] # "min" - ): - best_cluster_distances = sp.spatial.distance.cdist( - normalized_matrix[best_cluster_indices], - ideal_point_2d, - metric=metric, - **local_cdist_kwargs, - ).flatten() - min_distance_position = np.nanargmin(best_cluster_distances) - best_trial_index = best_cluster_indices[min_distance_position] - return best_trial_index, best_cluster_distances[min_distance_position] + @staticmethod + def _knn_based_selection( + normalized_matrix: NDArray[np.floating], + aggregation: DensityAggregation, + *, + distance_metric: str, + n_neighbors: int, + weights: Optional[NDArray[np.floating]] = None, + p: Optional[float] = None, + aggregation_param: Optional[float] = None, + ) -> NDArray[np.floating]: + n_samples, _ = normalized_matrix.shape + + if n_samples == 0: + return np.array([]) + if n_samples == 1: + return np.array([0.0]) + knn_kwargs: dict[str, Any] = {} if ( - selection_method - == QuickAdapterRegressorV3._CLUSTER_SELECTION_METHODS[2] # "topsis" - ): - topsis_scores = QuickAdapterRegressorV3._topsis_scores( - normalized_matrix[best_cluster_indices], - metric, - weights=np_weights, - p=cdist_kwargs.get("p"), - ) - min_score_position = np.nanargmin(topsis_scores) - best_trial_index = best_cluster_indices[min_score_position] - best_trial_distance = sp.spatial.distance.cdist( - normalized_matrix[[best_trial_index]], - ideal_point_2d, - metric=metric, - **cdist_kwargs, - ).item() - return best_trial_index, best_trial_distance + distance_metric == QuickAdapterRegressorV3._DISTANCE_METRICS[1] + ): # "minkowski" + if p is not None and np.isfinite(p): + knn_kwargs["p"] = QuickAdapterRegressorV3._validate_minkowski_p( + p, + ctx="knn minkowski p", + ) + if weights is not None: + knn_kwargs["metric_params"] = {"w": weights} + else: + if weights is not None and not np.allclose(weights, weights[0]): + raise ValueError( + f"Invalid configuration: weights are only supported for Minkowski distance metric, " + f"but got distance_metric={distance_metric!r}" + ) - raise ValueError( - f"Invalid selection_method {selection_method!r}. " - f"Supported: {', '.join(QuickAdapterRegressorV3._CLUSTER_SELECTION_METHODS)}" - ) + nbrs = sklearn.neighbors.NearestNeighbors( + n_neighbors=min(n_neighbors, n_samples - 1) + 1, + metric=distance_metric, + **knn_kwargs, + ).fit(normalized_matrix) + distances, _ = nbrs.kneighbors(normalized_matrix) + neighbor_distances = distances[:, 1:] + + if neighbor_distances.shape[1] < 1: + return np.full(n_samples, np.inf) + + if ( + aggregation == QuickAdapterRegressorV3._DENSITY_AGGREGATIONS[0] + ): # "power_mean" + power = ( + aggregation_param + if aggregation_param is not None and np.isfinite(aggregation_param) + else QuickAdapterRegressorV3._get_label_density_aggregation_param_default( + aggregation + ) + ) + if power is None: + power = 1.0 + return sp.stats.pmean(neighbor_distances, p=power, axis=1) + elif ( + aggregation == QuickAdapterRegressorV3._DENSITY_AGGREGATIONS[1] + ): # "quantile" + quantile = ( + aggregation_param + if aggregation_param is not None + else QuickAdapterRegressorV3._get_label_density_aggregation_param_default( + aggregation + ) + ) + if quantile is None: + quantile = 0.5 + quantile = QuickAdapterRegressorV3._validate_quantile_q( + quantile, + ctx="knn quantile q", + ) + return np.nanquantile(neighbor_distances, quantile, axis=1) + elif aggregation == QuickAdapterRegressorV3._DENSITY_AGGREGATIONS[2]: # "min" + return np.nanmin(neighbor_distances, axis=1) + elif aggregation == QuickAdapterRegressorV3._DENSITY_AGGREGATIONS[3]: # "max" + return np.nanmax(neighbor_distances, axis=1) + else: + raise ValueError( + f"Invalid aggregation {aggregation!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DENSITY_AGGREGATIONS)}" + ) @staticmethod def _normalize_objective_values( @@ -1855,7 +2362,10 @@ class QuickAdapterRegressorV3(BaseRegressionModel): directions: list[optuna.study.StudyDirection], ) -> NDArray[np.floating]: if objective_values_matrix.ndim != 2: - raise ValueError("Invalid objective_values_matrix: must be 2-dimensional") + raise ValueError( + f"Invalid objective_values_matrix (shape={objective_values_matrix.shape}, " + f"ndim={objective_values_matrix.ndim}): must be 2-dimensional" + ) n_samples, n_objectives = objective_values_matrix.shape if n_samples == 0 or n_objectives == 0: @@ -1946,16 +2456,18 @@ class QuickAdapterRegressorV3(BaseRegressionModel): n_clusters = int(round((np.log2(n_uniques) + np.sqrt(n_uniques)) / 2.0)) return min(max(lower_bound, n_clusters), upper_bound) - def _calculate_distances_to_ideal( + def _calculate_distances( self, normalized_matrix: NDArray[np.floating], - metric: str, - metrics: set[str], + selection_method: SelectionMethod, ) -> NDArray[np.floating]: if normalized_matrix.ndim != 2: - raise ValueError("Invalid normalized_matrix: must be 2-dimensional") - n_objectives = normalized_matrix.shape[1] - n_samples = normalized_matrix.shape[0] + raise ValueError( + f"Invalid normalized_matrix (shape={normalized_matrix.shape}, " + f"ndim={normalized_matrix.ndim}): must be 2-dimensional" + ) + + n_samples, n_objectives = normalized_matrix.shape if n_samples == 0 or n_objectives == 0: raise ValueError( "Invalid normalized_matrix: must have at least one sample and one objective" @@ -1964,347 +2476,131 @@ class QuickAdapterRegressorV3(BaseRegressionModel): raise ValueError( "Invalid normalized_matrix: must contain only finite values (no NaN or inf)" ) + + label_config = self._resolve_label_method_config(selection_method) + method = label_config["method"] + category = label_config["category"] + label_p_order = self.ft_params.get("label_p_order") label_weights = self.ft_params.get("label_weights") - if label_weights is None: - np_weights = np.array([1.0] * n_objectives) - elif isinstance(label_weights, (list, tuple, np.ndarray)): - np_weights = np.array(label_weights, dtype=float) - else: + + if label_weights is not None and not isinstance( + label_weights, (list, tuple, np.ndarray) + ): raise ValueError( f"Invalid label_weights: must be a list, tuple, or array, got {type(label_weights).__name__}" ) - if np_weights.size != n_objectives: - raise ValueError( - "Invalid label_weights: length must match number of objectives" - ) - if not np.all(np.isfinite(np_weights)): - raise ValueError("Invalid label_weights: must contain only finite values") - if np.any(np_weights < 0): - raise ValueError("Invalid label_weights: values must be non-negative") - label_weights_sum = np.nansum(np.abs(np_weights)) - if np.isclose(label_weights_sum, 0.0): - raise ValueError("Invalid label_weights: sum cannot be zero") - np_weights = np_weights / label_weights_sum - - ideal_point = np.ones(n_objectives) - ideal_point_2d = ideal_point.reshape(1, -1) + weights = QuickAdapterRegressorV3._normalize_weights( + np.array(label_weights, dtype=float) if label_weights is not None else None, + n_objectives, + ) - if n_samples == 0: - return np.array([]) if n_samples == 1: - if metric in { - QuickAdapterRegressorV3._CUSTOM_METRICS[16], # "medoid" - QuickAdapterRegressorV3._CUSTOM_METRICS[9], # "kmeans" - QuickAdapterRegressorV3._CUSTOM_METRICS[10], # "kmeans2" - QuickAdapterRegressorV3._CUSTOM_METRICS[11], # "kmedoids" - QuickAdapterRegressorV3._CUSTOM_METRICS[12], # "knn_power_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[13], # "knn_quantile" - QuickAdapterRegressorV3._CUSTOM_METRICS[14], # "knn_min" - QuickAdapterRegressorV3._CUSTOM_METRICS[15], # "knn_max" + if method in { + QuickAdapterRegressorV3._SELECTION_METHODS[6], # "medoid" + QuickAdapterRegressorV3._SELECTION_METHODS[2], # "kmeans" + QuickAdapterRegressorV3._SELECTION_METHODS[3], # "kmeans2" + QuickAdapterRegressorV3._SELECTION_METHODS[4], # "kmedoids" + QuickAdapterRegressorV3._SELECTION_METHODS[5], # "knn" }: return np.array([0.0]) - if metric in QuickAdapterRegressorV3._scipy_metrics_set(): - cdist_kwargs: dict[str, Any] = {} - if metric not in QuickAdapterRegressorV3._unsupported_cluster_metrics_set(): - cdist_kwargs["w"] = np_weights - if metric == QuickAdapterRegressorV3._SCIPY_METRICS[5]: # "minkowski" - cdist_kwargs["p"] = ( - label_p_order - if label_p_order is not None and np.isfinite(label_p_order) - else self._get_label_p_order_default(metric) - ) - return sp.spatial.distance.cdist( - normalized_matrix, - ideal_point_2d, - metric=metric, - **cdist_kwargs, - ).flatten() - elif metric in { - QuickAdapterRegressorV3._CUSTOM_METRICS[0], # "hellinger" - QuickAdapterRegressorV3._CUSTOM_METRICS[1], # "shellinger" - }: - np_sqrt_normalized_matrix = np.sqrt(normalized_matrix) - if metric == QuickAdapterRegressorV3._CUSTOM_METRICS[1]: # "shellinger" - variances = np.nanvar(np_sqrt_normalized_matrix, axis=0, ddof=1) - if np.any(variances <= 0): - raise ValueError( - "Invalid data for shellinger metric: requires non-zero variance for all objectives" - ) - np_weights = 1 / variances - return ( - np.sqrt( - np.nansum( - np_weights - * (np_sqrt_normalized_matrix - np.sqrt(ideal_point)) ** 2, - axis=1, - ) - ) - / QuickAdapterRegressorV3._SQRT_2 + if category == "distance": + distance_metric = label_config["distance_metric"] + p = QuickAdapterRegressorV3._resolve_p_order( + distance_metric, + label_p_order, + ctx=f"label_p_order for {method}", ) - elif metric in { - QuickAdapterRegressorV3._CUSTOM_METRICS[2], # "harmonic_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[3], # "geometric_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[4], # "arithmetic_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[5], # "quadratic_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[6], # "cubic_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[7], # "power_mean" - }: - p = { - QuickAdapterRegressorV3._CUSTOM_METRICS[2]: -1.0, # "harmonic_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[3]: 0.0, # "geometric_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[4]: 1.0, # "arithmetic_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[5]: 2.0, # "quadratic_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[6]: 3.0, # "cubic_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[ - 7 - ]: label_p_order # "power_mean" - if label_p_order is not None and np.isfinite(label_p_order) - else self._get_label_p_order_default(metric), - }[metric] - return sp.stats.pmean( - ideal_point, p=p, weights=np_weights - ) - sp.stats.pmean(normalized_matrix, p=p, weights=np_weights, axis=1) - elif metric == QuickAdapterRegressorV3._CUSTOM_METRICS[8]: # "weighted_sum" - return (ideal_point - normalized_matrix) @ np_weights - elif metric == QuickAdapterRegressorV3._CUSTOM_METRICS[16]: # "medoid" - label_medoid_metric, _, _ = self._get_distance_metric(metric) + if ( - label_medoid_metric - in QuickAdapterRegressorV3._unsupported_cluster_metrics_set() - ): - raise ValueError( - f"Invalid label_medoid_metric {label_medoid_metric!r}. " - f"Unsupported: {', '.join(QuickAdapterRegressorV3._UNSUPPORTED_CLUSTER_METRICS)}" + method == QuickAdapterRegressorV3._DISTANCE_METHODS[0] + ): # "compromise_programming" + return QuickAdapterRegressorV3._compromise_programming_scores( + normalized_matrix, + distance_metric, + weights=weights, + p=p, ) - p = None - if ( - label_medoid_metric - == QuickAdapterRegressorV3._SCIPY_METRICS[5] # "minkowski" - ): - p = ( - label_p_order - if label_p_order is not None and np.isfinite(label_p_order) - else self._get_label_p_order_default(label_medoid_metric) + if method == QuickAdapterRegressorV3._DISTANCE_METHODS[1]: # "topsis" + return QuickAdapterRegressorV3._topsis_scores( + normalized_matrix, + distance_metric, + weights=weights, + p=p, ) - return self._pairwise_distance_sums( + + if category == "cluster": + cluster_metric = label_config["distance_metric"] + cluster_selection_method = label_config["selection_method"] + trial_selection_method = label_config["trial_selection_method"] + + QuickAdapterRegressorV3._validate_metric_supported( + cluster_metric, category="cluster" + ) + p = QuickAdapterRegressorV3._resolve_p_order( + cluster_metric, + label_p_order, + ctx=f"label_p_order for {method}", + ) + return self._cluster_based_selection( normalized_matrix, - label_medoid_metric, - weights=np_weights, + method, + distance_metric=cluster_metric, + selection_method=cluster_selection_method, + trial_selection_method=trial_selection_method, + weights=weights, p=p, ) - elif metric in { - QuickAdapterRegressorV3._CUSTOM_METRICS[9], # "kmeans" - QuickAdapterRegressorV3._CUSTOM_METRICS[10], # "kmeans2" - }: - n_clusters = QuickAdapterRegressorV3._get_n_clusters(normalized_matrix) - if metric == QuickAdapterRegressorV3._CUSTOM_METRICS[9]: # "kmeans" - kmeans = sklearn.cluster.KMeans( - n_clusters=n_clusters, random_state=42, n_init=10 - ) - cluster_labels = kmeans.fit_predict(normalized_matrix) - cluster_centers = kmeans.cluster_centers_ - elif metric == QuickAdapterRegressorV3._CUSTOM_METRICS[10]: # "kmeans2" - cluster_centers, cluster_labels = sp.cluster.vq.kmeans2( - normalized_matrix, n_clusters, rng=42, minit="++" - ) - label_kmeans_metric, _, _ = self._get_distance_metric(metric) - if ( - label_kmeans_metric - in QuickAdapterRegressorV3._unsupported_cluster_metrics_set() - ): - raise ValueError( - f"Invalid label_kmeans_metric {label_kmeans_metric!r}. " - f"Unsupported: {', '.join(QuickAdapterRegressorV3._UNSUPPORTED_CLUSTER_METRICS)}" - ) - cdist_kwargs: dict[str, Any] = {} - if ( - label_kmeans_metric - == QuickAdapterRegressorV3._SCIPY_METRICS[5] # "minkowski" - ): - cdist_kwargs["p"] = ( - label_p_order - if label_p_order is not None and np.isfinite(label_p_order) - else self._get_label_p_order_default(label_kmeans_metric) - ) - cluster_center_distances_to_ideal = sp.spatial.distance.cdist( - cluster_centers, - ideal_point_2d, - metric=label_kmeans_metric, - **cdist_kwargs, - ).flatten() - label_kmeans_selection = self.ft_params.get( - "label_kmeans_selection", - QuickAdapterRegressorV3._CLUSTER_SELECTION_METHODS[1], # "min" + + if category == "density": + density_method = cast(DensityMethod, method) + density_metric = label_config["distance_metric"] + QuickAdapterRegressorV3._validate_metric_supported( + density_metric, category="density" ) - ordered_cluster_indices = np.argsort(cluster_center_distances_to_ideal) - best_cluster_indices = None - for cluster_index in ordered_cluster_indices: - cluster_indices = np.flatnonzero(cluster_labels == cluster_index) - if cluster_indices.size > 0: - best_cluster_indices = cluster_indices - break - trial_distances = np.full(n_samples, np.inf) - if best_cluster_indices is not None and best_cluster_indices.size > 0: - best_trial_index, best_trial_distance = ( - self._select_best_trial_from_cluster( - label_kmeans_selection, - best_cluster_indices, - normalized_matrix, - ideal_point_2d, - label_kmeans_metric, - cdist_kwargs, - np_weights, - ) - ) - trial_distances[best_trial_index] = best_trial_distance - return trial_distances - elif metric == QuickAdapterRegressorV3._CUSTOM_METRICS[11]: # "kmedoids" - n_clusters = QuickAdapterRegressorV3._get_n_clusters(normalized_matrix) - label_kmedoids_metric, _, _ = self._get_distance_metric(metric) - if ( - label_kmedoids_metric - in QuickAdapterRegressorV3._unsupported_cluster_metrics_set() - ): - raise ValueError( - f"Invalid label_kmedoids_metric {label_kmedoids_metric!r}. " - f"Unsupported: {', '.join(QuickAdapterRegressorV3._UNSUPPORTED_CLUSTER_METRICS)}" - ) - kmedoids_kwargs: dict[str, Any] = { - "metric": label_kmedoids_metric, - "random_state": 42, - "init": "k-medoids++", - "method": "pam", - } - kmedoids = KMedoids(n_clusters=n_clusters, **kmedoids_kwargs) - cluster_labels = kmedoids.fit_predict(normalized_matrix) - medoid_indices = kmedoids.medoid_indices_ - cdist_kwargs: dict[str, Any] = {} - if ( - label_kmedoids_metric - == QuickAdapterRegressorV3._SCIPY_METRICS[5] # "minkowski" - ): - cdist_kwargs["p"] = ( - label_p_order - if label_p_order is not None and np.isfinite(label_p_order) - else self._get_label_p_order_default(label_kmedoids_metric) - ) - medoid_distances_to_ideal = sp.spatial.distance.cdist( - normalized_matrix[medoid_indices], - ideal_point_2d, - metric=label_kmedoids_metric, - **cdist_kwargs, - ).flatten() - label_kmedoids_selection = self.ft_params.get( - "label_kmedoids_selection", - QuickAdapterRegressorV3._CLUSTER_SELECTION_METHODS[1], # "min" + p = QuickAdapterRegressorV3._resolve_p_order( + density_metric, + label_p_order, + ctx=f"label_p_order for {density_method}", ) - best_medoid_distance_position = np.nanargmin(medoid_distances_to_ideal) - best_medoid_index = medoid_indices[best_medoid_distance_position] - cluster_index = cluster_labels[best_medoid_index] - best_cluster_indices = np.flatnonzero(cluster_labels == cluster_index) - trial_distances = np.full(n_samples, np.inf) - if best_cluster_indices is not None and best_cluster_indices.size > 0: - best_trial_index, best_trial_distance = ( - self._select_best_trial_from_cluster( - label_kmedoids_selection, - best_cluster_indices, - normalized_matrix, - ideal_point_2d, - label_kmedoids_metric, - cdist_kwargs, - np_weights, - known_medoid_index=best_medoid_index, - known_medoid_distance=medoid_distances_to_ideal[ - best_medoid_distance_position - ], + + if density_method == QuickAdapterRegressorV3._DENSITY_METHODS[0]: # "knn" + knn_n_neighbors = int(label_config["n_neighbors"]) + knn_aggregation = cast(DensityAggregation, label_config["aggregation"]) + if ( + knn_aggregation + not in QuickAdapterRegressorV3._density_aggregations_set() + ): + raise ValueError( + f"Invalid aggregation in label_config {knn_aggregation!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._DENSITY_AGGREGATIONS)}" ) + knn_aggregation_param = label_config["aggregation_param"] + return QuickAdapterRegressorV3._knn_based_selection( + normalized_matrix, + knn_aggregation, + distance_metric=density_metric, + n_neighbors=knn_n_neighbors, + weights=weights, + p=p, + aggregation_param=knn_aggregation_param, ) - trial_distances[best_trial_index] = best_trial_distance - return trial_distances - elif metric in { - QuickAdapterRegressorV3._CUSTOM_METRICS[12], # "knn_power_mean" - QuickAdapterRegressorV3._CUSTOM_METRICS[13], # "knn_quantile" - QuickAdapterRegressorV3._CUSTOM_METRICS[14], # "knn_min" - QuickAdapterRegressorV3._CUSTOM_METRICS[15], # "knn_max" - }: - label_knn_metric, _, _ = self._get_distance_metric(metric) - knn_kwargs: dict[str, Any] = {} - if ( - label_knn_metric - == QuickAdapterRegressorV3._SCIPY_METRICS[5] # "minkowski" - ): - knn_kwargs["p"] = ( - label_p_order - if label_p_order is not None and np.isfinite(label_p_order) - else self._get_label_p_order_default(label_knn_metric) - ) - knn_kwargs["metric_params"] = {"w": np_weights} - label_knn_p_order = self.ft_params.get("label_knn_p_order") - n_neighbors = ( - min( - int( - self.ft_params.get( - "label_knn_n_neighbors", - QuickAdapterRegressorV3.LABEL_KNN_N_NEIGHBORS_DEFAULT, - ) - ), - n_samples - 1, - ) - + 1 - ) - nbrs = sklearn.neighbors.NearestNeighbors( - n_neighbors=n_neighbors, metric=label_knn_metric, **knn_kwargs - ).fit(normalized_matrix) - distances, _ = nbrs.kneighbors(normalized_matrix) - neighbor_distances = distances[:, 1:] - if neighbor_distances.shape[1] < 1: - return np.full(n_samples, np.inf) - if ( - metric == QuickAdapterRegressorV3._CUSTOM_METRICS[12] - ): # "knn_power_mean" - label_knn_p_order = ( - label_knn_p_order - if label_knn_p_order is not None and np.isfinite(label_knn_p_order) - else self._get_label_knn_p_order_default(metric) - ) - return sp.stats.pmean(neighbor_distances, p=label_knn_p_order, axis=1) - elif ( - metric == QuickAdapterRegressorV3._CUSTOM_METRICS[13] - ): # "knn_quantile" - label_knn_p_order = ( - label_knn_p_order - if label_knn_p_order is not None and np.isfinite(label_knn_p_order) - else self._get_label_knn_p_order_default(metric) - ) - return np.nanquantile(neighbor_distances, label_knn_p_order, axis=1) - elif metric == QuickAdapterRegressorV3._CUSTOM_METRICS[14]: # "knn_min" - return np.nanmin(neighbor_distances, axis=1) - elif metric == QuickAdapterRegressorV3._CUSTOM_METRICS[15]: # "knn_max" - return np.nanmax(neighbor_distances, axis=1) - elif metric == QuickAdapterRegressorV3._CUSTOM_METRICS[17]: # "topsis" - label_topsis_metric, _, _ = self._get_distance_metric(metric) - p = None + if ( - label_topsis_metric - == QuickAdapterRegressorV3._SCIPY_METRICS[5] # "minkowski" - ): - p = ( - label_p_order - if label_p_order is not None and np.isfinite(label_p_order) - else self._get_label_p_order_default(label_topsis_metric) + density_method == QuickAdapterRegressorV3._DENSITY_METHODS[1] + ): # "medoid" + return QuickAdapterRegressorV3._pairwise_distance_sums( + normalized_matrix, + density_metric, + weights=weights, + p=p, ) - return QuickAdapterRegressorV3._topsis_scores( - normalized_matrix, - label_topsis_metric, - weights=np_weights, - p=p, - ) - else: - raise ValueError( - f"Invalid label metric {metric!r}. Supported: {', '.join(metrics)}" - ) + + raise ValueError( + f"Invalid label_method {selection_method!r}. " + f"Supported: {', '.join(QuickAdapterRegressorV3._SELECTION_METHODS)}" + ) def _get_multi_objective_study_best_trial( self, namespace: OptunaNamespace, study: optuna.study.Study @@ -2314,7 +2610,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): }: # Only "label" raise ValueError( f"Invalid namespace {namespace!r}. " - f"Supported: {QuickAdapterRegressorV3._OPTUNA_NAMESPACES[2]}" # Only label + f"Supported: {QuickAdapterRegressorV3._OPTUNA_NAMESPACES[2]}" # Only "label" ) n_objectives = len(study.directions) if n_objectives < 2: @@ -2324,14 +2620,9 @@ class QuickAdapterRegressorV3(BaseRegressionModel): if not QuickAdapterRegressorV3.optuna_study_has_best_trials(study): return None - metrics = QuickAdapterRegressorV3._metrics_set() - label_metric = self.ft_params.get( - "label_metric", QuickAdapterRegressorV3._SCIPY_METRICS[2] - ) # "euclidean" - if label_metric not in metrics: - raise ValueError( - f"Invalid label_metric {label_metric!r}. Supported: {', '.join(metrics)}" - ) + label_method = self.ft_params.get( + "label_method", QuickAdapterRegressorV3.LABEL_METHOD_DEFAULT + ) # "compromise_programming" best_trials = [ trial @@ -2356,8 +2647,9 @@ class QuickAdapterRegressorV3(BaseRegressionModel): objective_values_matrix, study.directions ) - trial_distances = self._calculate_distances_to_ideal( - normalized_matrix, metric=label_metric, metrics=metrics + trial_distances = self._calculate_distances( + normalized_matrix, + selection_method=label_method, ) return best_trials[np.nanargmin(trial_distances)] @@ -2453,18 +2745,15 @@ class QuickAdapterRegressorV3(BaseRegressionModel): "values": self.get_optuna_values(pair, namespace), **self.get_optuna_params(pair, namespace), } - label_metric = self.ft_params.get( - "label_metric", QuickAdapterRegressorV3._SCIPY_METRICS[2] + label_config = self._resolve_label_method_config( + self.ft_params.get("label_method", self.LABEL_METHOD_DEFAULT) + ) + metric_log_msg = ( + f"{QuickAdapterRegressorV3._format_label_method_config(label_config)}" ) - distance_metric, param_name, _ = self._get_distance_metric(label_metric) - if param_name: - metric_log_msg = ( - f" using {label_metric} metric ({distance_metric} distance)" - ) - else: - metric_log_msg = f" using {label_metric} metric" logger.info( - f"[{pair}] Optuna {namespace} {objective_type} objective hyperopt completed{metric_log_msg} ({time_spent:.2f} secs)" + f"[{pair}] Optuna {namespace} {objective_type} objective hyperopt completed" + f" ({metric_log_msg}) ({time_spent:.2f} secs)" ) max_study_results_key_length = ( max(len(str(key)) for key in study_best_results.keys()) diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index 1b3a467..6e789d0 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -31,8 +31,6 @@ from Utils import ( DEFAULTS_EXTREMA_SMOOTHING, DEFAULTS_EXTREMA_WEIGHTING, EXTREMA_COLUMN, - HYBRID_AGGREGATIONS, - HYBRID_WEIGHT_SOURCES, MAXIMA_THRESHOLD_COLUMN, MINIMA_THRESHOLD_COLUMN, NORMALIZATION_TYPES, @@ -41,6 +39,8 @@ from Utils import ( SMOOTHING_MODES, STANDARDIZATION_TYPES, TRADE_PRICE_TARGETS, + WEIGHT_AGGREGATIONS, + WEIGHT_SOURCES, WEIGHT_STRATEGIES, alligator, bottom_change_percent, @@ -106,7 +106,7 @@ class QuickAdapterV3(IStrategy): _TRADING_MODES: Final[tuple[TradingMode, ...]] = ("spot", "margin", "futures") def version(self) -> str: - return "3.8.5" + return "3.9.0" timeframe = "5m" @@ -381,7 +381,7 @@ class QuickAdapterV3(IStrategy): if not isinstance(lookback_period_candles, int) or lookback_period_candles < 0: logger.warning( - f"Invalid reversal_confirmation lookback_period_candles {lookback_period_candles!r}: must be >= 0. Using default {QuickAdapterV3.default_reversal_confirmation['lookback_period_candles']!r}" + f"Invalid reversal_confirmation lookback_period_candles {lookback_period_candles!r}: must be >= 0, using default {QuickAdapterV3.default_reversal_confirmation['lookback_period_candles']!r}" ) lookback_period_candles = QuickAdapterV3.default_reversal_confirmation[ "lookback_period_candles" @@ -391,7 +391,7 @@ class QuickAdapterV3(IStrategy): 0.0 < decay_fraction <= 1.0 ): logger.warning( - f"Invalid reversal_confirmation decay_fraction {decay_fraction!r}: must be in range (0, 1]. Using default {QuickAdapterV3.default_reversal_confirmation['decay_fraction']!r}" + f"Invalid reversal_confirmation decay_fraction {decay_fraction!r}: must be in range (0, 1], using default {QuickAdapterV3.default_reversal_confirmation['decay_fraction']!r}" ) decay_fraction = QuickAdapterV3.default_reversal_confirmation[ "decay_fraction" @@ -429,14 +429,14 @@ class QuickAdapterV3(IStrategy): self.pairs: list[str] = self.config.get("exchange", {}).get("pair_whitelist") if not self.pairs: raise ValueError( - "FreqAI strategy requires StaticPairList method defined in pairlists configuration and 'pair_whitelist' defined in exchange section configuration" + "Invalid configuration: FreqAI strategy requires StaticPairList method in pairlists and 'pair_whitelist' in exchange section" ) if ( not isinstance(self.freqai_info.get("identifier"), str) or not self.freqai_info.get("identifier", "").strip() ): raise ValueError( - "FreqAI strategy requires 'identifier' defined in the freqai section configuration" + "Invalid freqai configuration: 'identifier' must be defined in freqai section" ) self.models_full_path = Path( self.config.get("user_data_dir") @@ -805,80 +805,78 @@ class QuickAdapterV3(IStrategy): extrema_weighting: dict[str, Any], ) -> dict[str, Any]: # Strategy - weighting_strategy = str( + strategy = str( extrema_weighting.get("strategy", DEFAULTS_EXTREMA_WEIGHTING["strategy"]) ) - if weighting_strategy not in set(WEIGHT_STRATEGIES): + if strategy not in set(WEIGHT_STRATEGIES): logger.warning( - f"Invalid extrema_weighting strategy {weighting_strategy!r}. Supported: {', '.join(WEIGHT_STRATEGIES)}. Using default {WEIGHT_STRATEGIES[0]!r}" + f"Invalid extrema_weighting strategy {strategy!r}, supported: {', '.join(WEIGHT_STRATEGIES)}, using default {WEIGHT_STRATEGIES[0]!r}" ) - weighting_strategy = WEIGHT_STRATEGIES[0] + strategy = WEIGHT_STRATEGIES[0] # Phase 1: Standardization - weighting_standardization = str( + standardization = str( extrema_weighting.get( "standardization", DEFAULTS_EXTREMA_WEIGHTING["standardization"] ) ) - if weighting_standardization not in set(STANDARDIZATION_TYPES): + if standardization not in set(STANDARDIZATION_TYPES): logger.warning( - f"Invalid extrema_weighting standardization {weighting_standardization!r}. Supported: {', '.join(STANDARDIZATION_TYPES)}. Using default {STANDARDIZATION_TYPES[0]!r}" + f"Invalid extrema_weighting standardization {standardization!r}, supported: {', '.join(STANDARDIZATION_TYPES)}, using default {STANDARDIZATION_TYPES[0]!r}" ) - weighting_standardization = STANDARDIZATION_TYPES[0] + standardization = STANDARDIZATION_TYPES[0] - weighting_robust_quantiles = extrema_weighting.get( + robust_quantiles = extrema_weighting.get( "robust_quantiles", DEFAULTS_EXTREMA_WEIGHTING["robust_quantiles"] ) if ( - not isinstance(weighting_robust_quantiles, (list, tuple)) - or len(weighting_robust_quantiles) != 2 + not isinstance(robust_quantiles, (list, tuple)) + or len(robust_quantiles) != 2 or not all( isinstance(q, (int, float)) and np.isfinite(q) and 0 <= q <= 1 - for q in weighting_robust_quantiles + for q in robust_quantiles ) - or weighting_robust_quantiles[0] >= weighting_robust_quantiles[1] + or robust_quantiles[0] >= robust_quantiles[1] ): logger.warning( - f"Invalid extrema_weighting robust_quantiles {weighting_robust_quantiles!r}: must be (q1, q3) with 0 <= q1 < q3 <= 1. Using default {DEFAULTS_EXTREMA_WEIGHTING['robust_quantiles']!r}" + f"Invalid extrema_weighting robust_quantiles {robust_quantiles!r}: must be (q1, q3) with 0 <= q1 < q3 <= 1, using default {DEFAULTS_EXTREMA_WEIGHTING['robust_quantiles']!r}" ) - weighting_robust_quantiles = DEFAULTS_EXTREMA_WEIGHTING["robust_quantiles"] + robust_quantiles = DEFAULTS_EXTREMA_WEIGHTING["robust_quantiles"] else: - weighting_robust_quantiles = ( - float(weighting_robust_quantiles[0]), - float(weighting_robust_quantiles[1]), + robust_quantiles = ( + float(robust_quantiles[0]), + float(robust_quantiles[1]), ) - weighting_mmad_scaling_factor = extrema_weighting.get( + mmad_scaling_factor = extrema_weighting.get( "mmad_scaling_factor", DEFAULTS_EXTREMA_WEIGHTING["mmad_scaling_factor"] ) if ( - not isinstance(weighting_mmad_scaling_factor, (int, float)) - or not np.isfinite(weighting_mmad_scaling_factor) - or weighting_mmad_scaling_factor <= 0 + not isinstance(mmad_scaling_factor, (int, float)) + or not np.isfinite(mmad_scaling_factor) + or mmad_scaling_factor <= 0 ): logger.warning( - f"Invalid extrema_weighting mmad_scaling_factor {weighting_mmad_scaling_factor!r}: must be a finite number > 0. Using default {DEFAULTS_EXTREMA_WEIGHTING['mmad_scaling_factor']!r}" + f"Invalid extrema_weighting mmad_scaling_factor {mmad_scaling_factor!r}: must be a finite number > 0, using default {DEFAULTS_EXTREMA_WEIGHTING['mmad_scaling_factor']!r}" ) - weighting_mmad_scaling_factor = DEFAULTS_EXTREMA_WEIGHTING[ - "mmad_scaling_factor" - ] + mmad_scaling_factor = DEFAULTS_EXTREMA_WEIGHTING["mmad_scaling_factor"] # Phase 2: Normalization - weighting_normalization = str( + normalization = str( extrema_weighting.get( "normalization", DEFAULTS_EXTREMA_WEIGHTING["normalization"] ) ) - if weighting_normalization not in set(NORMALIZATION_TYPES): + if normalization not in set(NORMALIZATION_TYPES): logger.warning( - f"Invalid extrema_weighting normalization {weighting_normalization!r}. Supported: {', '.join(NORMALIZATION_TYPES)}. Using default {NORMALIZATION_TYPES[0]!r}" + f"Invalid extrema_weighting normalization {normalization!r}, supported: {', '.join(NORMALIZATION_TYPES)}, using default {NORMALIZATION_TYPES[0]!r}" ) - weighting_normalization = NORMALIZATION_TYPES[0] + normalization = NORMALIZATION_TYPES[0] if ( - weighting_strategy != WEIGHT_STRATEGIES[0] # "none" - and weighting_standardization != STANDARDIZATION_TYPES[0] # "none" - and weighting_normalization + strategy != WEIGHT_STRATEGIES[0] # "none" + and standardization != STANDARDIZATION_TYPES[0] # "none" + and normalization in { NORMALIZATION_TYPES[3], # "l1" NORMALIZATION_TYPES[4], # "l2" @@ -887,99 +885,94 @@ class QuickAdapterV3(IStrategy): ): raise ValueError( f"Invalid extrema_weighting configuration: " - f"standardization='{weighting_standardization}' with normalization='{weighting_normalization}' " + f"standardization={standardization!r} with normalization={normalization!r} " "can produce negative weights and flip ternary extrema labels. " - f"Use normalization in {{'{NORMALIZATION_TYPES[0]}','{NORMALIZATION_TYPES[1]}','{NORMALIZATION_TYPES[2]}','{NORMALIZATION_TYPES[5]}'}} " - f"or set standardization='{STANDARDIZATION_TYPES[0]}'." + f"Use normalization in {{{NORMALIZATION_TYPES[0]!r},{NORMALIZATION_TYPES[1]!r},{NORMALIZATION_TYPES[2]!r},{NORMALIZATION_TYPES[5]!r}}} " + f"or set standardization={STANDARDIZATION_TYPES[0]!r}" ) - weighting_minmax_range = extrema_weighting.get( + minmax_range = extrema_weighting.get( "minmax_range", DEFAULTS_EXTREMA_WEIGHTING["minmax_range"] ) if ( - not isinstance(weighting_minmax_range, (list, tuple)) - or len(weighting_minmax_range) != 2 + not isinstance(minmax_range, (list, tuple)) + or len(minmax_range) != 2 or not all( - isinstance(x, (int, float)) and np.isfinite(x) - for x in weighting_minmax_range + isinstance(x, (int, float)) and np.isfinite(x) for x in minmax_range ) - or weighting_minmax_range[0] >= weighting_minmax_range[1] + or minmax_range[0] >= minmax_range[1] ): logger.warning( - f"Invalid extrema_weighting minmax_range {weighting_minmax_range!r}: must be (min, max) with min < max. Using default {DEFAULTS_EXTREMA_WEIGHTING['minmax_range']!r}" + f"Invalid extrema_weighting minmax_range {minmax_range!r}: must be (min, max) with min < max, using default {DEFAULTS_EXTREMA_WEIGHTING['minmax_range']!r}" ) - weighting_minmax_range = DEFAULTS_EXTREMA_WEIGHTING["minmax_range"] + minmax_range = DEFAULTS_EXTREMA_WEIGHTING["minmax_range"] else: - weighting_minmax_range = ( - float(weighting_minmax_range[0]), - float(weighting_minmax_range[1]), + minmax_range = ( + float(minmax_range[0]), + float(minmax_range[1]), ) - weighting_sigmoid_scale = extrema_weighting.get( + sigmoid_scale = extrema_weighting.get( "sigmoid_scale", DEFAULTS_EXTREMA_WEIGHTING["sigmoid_scale"] ) if ( - not isinstance(weighting_sigmoid_scale, (int, float)) - or not np.isfinite(weighting_sigmoid_scale) - or weighting_sigmoid_scale <= 0 + not isinstance(sigmoid_scale, (int, float)) + or not np.isfinite(sigmoid_scale) + or sigmoid_scale <= 0 ): logger.warning( - f"Invalid extrema_weighting sigmoid_scale {weighting_sigmoid_scale!r}: must be a finite number > 0. Using default {DEFAULTS_EXTREMA_WEIGHTING['sigmoid_scale']!r}" + f"Invalid extrema_weighting sigmoid_scale {sigmoid_scale!r}: must be a finite number > 0, using default {DEFAULTS_EXTREMA_WEIGHTING['sigmoid_scale']!r}" ) - weighting_sigmoid_scale = DEFAULTS_EXTREMA_WEIGHTING["sigmoid_scale"] + sigmoid_scale = DEFAULTS_EXTREMA_WEIGHTING["sigmoid_scale"] - weighting_softmax_temperature = extrema_weighting.get( + softmax_temperature = extrema_weighting.get( "softmax_temperature", DEFAULTS_EXTREMA_WEIGHTING["softmax_temperature"] ) if ( - not isinstance(weighting_softmax_temperature, (int, float)) - or not np.isfinite(weighting_softmax_temperature) - or weighting_softmax_temperature <= 0 + not isinstance(softmax_temperature, (int, float)) + or not np.isfinite(softmax_temperature) + or softmax_temperature <= 0 ): logger.warning( - f"Invalid extrema_weighting softmax_temperature {weighting_softmax_temperature!r}: must be a finite number > 0. Using default {DEFAULTS_EXTREMA_WEIGHTING['softmax_temperature']!r}" + f"Invalid extrema_weighting softmax_temperature {softmax_temperature!r}: must be a finite number > 0, using default {DEFAULTS_EXTREMA_WEIGHTING['softmax_temperature']!r}" ) - weighting_softmax_temperature = DEFAULTS_EXTREMA_WEIGHTING[ - "softmax_temperature" - ] + softmax_temperature = DEFAULTS_EXTREMA_WEIGHTING["softmax_temperature"] - weighting_rank_method = str( + rank_method = str( extrema_weighting.get( "rank_method", DEFAULTS_EXTREMA_WEIGHTING["rank_method"] ) ) - if weighting_rank_method not in set(RANK_METHODS): + if rank_method not in set(RANK_METHODS): logger.warning( - f"Invalid extrema_weighting rank_method {weighting_rank_method!r}. Supported: {', '.join(RANK_METHODS)}. Using default {RANK_METHODS[0]!r}" + f"Invalid extrema_weighting rank_method {rank_method!r}, supported: {', '.join(RANK_METHODS)}, using default {RANK_METHODS[0]!r}" ) - weighting_rank_method = RANK_METHODS[0] + rank_method = RANK_METHODS[0] # Phase 3: Post-processing - weighting_gamma = extrema_weighting.get( - "gamma", DEFAULTS_EXTREMA_WEIGHTING["gamma"] - ) + gamma = extrema_weighting.get("gamma", DEFAULTS_EXTREMA_WEIGHTING["gamma"]) if ( - not isinstance(weighting_gamma, (int, float)) - or not np.isfinite(weighting_gamma) - or not (0 < weighting_gamma <= 10.0) + not isinstance(gamma, (int, float)) + or not np.isfinite(gamma) + or not (0 < gamma <= 10.0) ): logger.warning( - f"Invalid extrema_weighting gamma {weighting_gamma!r}: must be in range (0, 10]. Using default {DEFAULTS_EXTREMA_WEIGHTING['gamma']!r}" + f"Invalid extrema_weighting gamma {gamma!r}: must be in range (0, 10], using default {DEFAULTS_EXTREMA_WEIGHTING['gamma']!r}" ) - weighting_gamma = DEFAULTS_EXTREMA_WEIGHTING["gamma"] + gamma = DEFAULTS_EXTREMA_WEIGHTING["gamma"] - weighting_source_weights = extrema_weighting.get( + source_weights = extrema_weighting.get( "source_weights", DEFAULTS_EXTREMA_WEIGHTING["source_weights"] ) - if not isinstance(weighting_source_weights, dict): + if not isinstance(source_weights, dict): logger.warning( - f"Invalid extrema_weighting source_weights {weighting_source_weights!r}: must be a dict of source name to weight. Using default {DEFAULTS_EXTREMA_WEIGHTING['source_weights']!r}" + f"Invalid extrema_weighting source_weights {source_weights!r}: must be a dict of source name to weight, using default {DEFAULTS_EXTREMA_WEIGHTING['source_weights']!r}" ) - weighting_source_weights = DEFAULTS_EXTREMA_WEIGHTING["source_weights"] + source_weights = DEFAULTS_EXTREMA_WEIGHTING["source_weights"] else: sanitized_source_weights: dict[str, float] = {} - for source, weight in weighting_source_weights.items(): - if source not in set(HYBRID_WEIGHT_SOURCES): + for source, weight in source_weights.items(): + if source not in set(WEIGHT_SOURCES): continue if ( not isinstance(weight, (int, float)) @@ -990,65 +983,63 @@ class QuickAdapterV3(IStrategy): sanitized_source_weights[str(source)] = float(weight) if not sanitized_source_weights: logger.warning( - f"Invalid extrema_weighting source_weights {weighting_source_weights!r}: empty after sanitization. Using default {DEFAULTS_EXTREMA_WEIGHTING['source_weights']!r}" + f"Invalid extrema_weighting source_weights {source_weights!r}: empty after sanitization, using default {DEFAULTS_EXTREMA_WEIGHTING['source_weights']!r}" ) - weighting_source_weights = DEFAULTS_EXTREMA_WEIGHTING["source_weights"] + source_weights = DEFAULTS_EXTREMA_WEIGHTING["source_weights"] else: - weighting_source_weights = sanitized_source_weights - weighting_aggregation = str( + source_weights = sanitized_source_weights + aggregation = str( extrema_weighting.get( "aggregation", DEFAULTS_EXTREMA_WEIGHTING["aggregation"], ) ) - if weighting_aggregation not in set(HYBRID_AGGREGATIONS): + if aggregation not in set(WEIGHT_AGGREGATIONS): logger.warning( - f"Invalid extrema_weighting aggregation {weighting_aggregation!r}. Supported: {', '.join(HYBRID_AGGREGATIONS)}. Using default {HYBRID_AGGREGATIONS[0]!r}" + f"Invalid extrema_weighting aggregation {aggregation!r}, supported: {', '.join(WEIGHT_AGGREGATIONS)}, using default {WEIGHT_AGGREGATIONS[0]!r}" ) - weighting_aggregation = DEFAULTS_EXTREMA_WEIGHTING["aggregation"] - weighting_aggregation_normalization = str( + aggregation = DEFAULTS_EXTREMA_WEIGHTING["aggregation"] + aggregation_normalization = str( extrema_weighting.get( "aggregation_normalization", DEFAULTS_EXTREMA_WEIGHTING["aggregation_normalization"], ) ) - if weighting_aggregation_normalization not in set(NORMALIZATION_TYPES): + if aggregation_normalization not in set(NORMALIZATION_TYPES): logger.warning( - f"Invalid extrema_weighting aggregation_normalization {weighting_aggregation_normalization!r}. Supported: {', '.join(NORMALIZATION_TYPES)}. Using default {NORMALIZATION_TYPES[6]!r}" + f"Invalid extrema_weighting aggregation_normalization {aggregation_normalization!r}, supported: {', '.join(NORMALIZATION_TYPES)}, using default {NORMALIZATION_TYPES[6]!r}" ) - weighting_aggregation_normalization = DEFAULTS_EXTREMA_WEIGHTING[ + aggregation_normalization = DEFAULTS_EXTREMA_WEIGHTING[ "aggregation_normalization" ] - if weighting_aggregation == HYBRID_AGGREGATIONS[ - 1 - ] and weighting_normalization in { + if aggregation == WEIGHT_AGGREGATIONS[1] and normalization in { NORMALIZATION_TYPES[0], # "minmax" NORMALIZATION_TYPES[5], # "rank" }: logger.warning( - f"extrema_weighting aggregation='{weighting_aggregation}' with normalization='{weighting_normalization}' " + f"extrema_weighting aggregation='{aggregation}' with normalization='{normalization}' " "can produce zero weights (gmean collapses to 0 when any source has min value). " - f"Consider using normalization='{NORMALIZATION_TYPES[1]}' (sigmoid) or aggregation='{HYBRID_AGGREGATIONS[0]}' (weighted_sum)." + f"Consider using normalization='{NORMALIZATION_TYPES[1]}' (sigmoid) or aggregation='{WEIGHT_AGGREGATIONS[0]}' (weighted_sum)." ) return { - "strategy": weighting_strategy, - "source_weights": weighting_source_weights, - "aggregation": weighting_aggregation, - "aggregation_normalization": weighting_aggregation_normalization, + "strategy": strategy, + "source_weights": source_weights, + "aggregation": aggregation, + "aggregation_normalization": aggregation_normalization, # Phase 1: Standardization - "standardization": weighting_standardization, - "robust_quantiles": weighting_robust_quantiles, - "mmad_scaling_factor": weighting_mmad_scaling_factor, + "standardization": standardization, + "robust_quantiles": robust_quantiles, + "mmad_scaling_factor": mmad_scaling_factor, # Phase 2: Normalization - "normalization": weighting_normalization, - "minmax_range": weighting_minmax_range, - "sigmoid_scale": weighting_sigmoid_scale, - "softmax_temperature": weighting_softmax_temperature, - "rank_method": weighting_rank_method, + "normalization": normalization, + "minmax_range": minmax_range, + "sigmoid_scale": sigmoid_scale, + "softmax_temperature": softmax_temperature, + "rank_method": rank_method, # Phase 3: Post-processing - "gamma": weighting_gamma, + "gamma": gamma, } @staticmethod @@ -1060,7 +1051,7 @@ class QuickAdapterV3(IStrategy): ) if smoothing_method not in set(SMOOTHING_METHODS): logger.warning( - f"Invalid extrema_smoothing method {smoothing_method!r}. Supported: {', '.join(SMOOTHING_METHODS)}. Using default {SMOOTHING_METHODS[0]!r}" + f"Invalid extrema_smoothing method {smoothing_method!r}, supported: {', '.join(SMOOTHING_METHODS)}, using default {SMOOTHING_METHODS[0]!r}" ) smoothing_method = SMOOTHING_METHODS[0] @@ -1078,7 +1069,7 @@ class QuickAdapterV3(IStrategy): or smoothing_window_candles < 3 ): logger.warning( - f"Invalid extrema_smoothing window_candles {smoothing_window_candles!r}: must be an integer >= 3. Using default {DEFAULTS_EXTREMA_SMOOTHING['window_candles']!r}" + f"Invalid extrema_smoothing window_candles {smoothing_window_candles!r}: must be an integer >= 3, using default {DEFAULTS_EXTREMA_SMOOTHING['window_candles']!r}" ) smoothing_window_candles = int(DEFAULTS_EXTREMA_SMOOTHING["window_candles"]) @@ -1091,7 +1082,7 @@ class QuickAdapterV3(IStrategy): or smoothing_beta <= 0 ): logger.warning( - f"Invalid extrema_smoothing beta {smoothing_beta!r}: must be a finite number > 0. Using default {DEFAULTS_EXTREMA_SMOOTHING['beta']!r}" + f"Invalid extrema_smoothing beta {smoothing_beta!r}: must be a finite number > 0, using default {DEFAULTS_EXTREMA_SMOOTHING['beta']!r}" ) smoothing_beta = DEFAULTS_EXTREMA_SMOOTHING["beta"] @@ -1100,7 +1091,7 @@ class QuickAdapterV3(IStrategy): ) if not isinstance(smoothing_polyorder, int) or smoothing_polyorder < 1: logger.warning( - f"Invalid extrema_smoothing polyorder {smoothing_polyorder!r}: must be an integer >= 1. Using default {DEFAULTS_EXTREMA_SMOOTHING['polyorder']!r}" + f"Invalid extrema_smoothing polyorder {smoothing_polyorder!r}: must be an integer >= 1, using default {DEFAULTS_EXTREMA_SMOOTHING['polyorder']!r}" ) smoothing_polyorder = DEFAULTS_EXTREMA_SMOOTHING["polyorder"] @@ -1109,7 +1100,7 @@ class QuickAdapterV3(IStrategy): ) if smoothing_mode not in set(SMOOTHING_MODES): logger.warning( - f"Invalid extrema_smoothing mode {smoothing_mode!r}. Supported: {', '.join(SMOOTHING_MODES)}. Using default {SMOOTHING_MODES[0]!r}" + f"Invalid extrema_smoothing mode {smoothing_mode!r}, supported: {', '.join(SMOOTHING_MODES)}, using default {SMOOTHING_MODES[0]!r}" ) smoothing_mode = SMOOTHING_MODES[0] @@ -1122,7 +1113,7 @@ class QuickAdapterV3(IStrategy): or not np.isfinite(smoothing_sigma) ): logger.warning( - f"Invalid extrema_smoothing sigma {smoothing_sigma!r}: must be a finite number > 0. Using default {DEFAULTS_EXTREMA_SMOOTHING['sigma']!r}" + f"Invalid extrema_smoothing sigma {smoothing_sigma!r}: must be a finite number > 0, using default {DEFAULTS_EXTREMA_SMOOTHING['sigma']!r}" ) smoothing_sigma = DEFAULTS_EXTREMA_SMOOTHING["sigma"] @@ -1150,7 +1141,9 @@ class QuickAdapterV3(IStrategy): try: return pattern.format(**duration) except (KeyError, ValueError) as e: - raise ValueError(f"Invalid pattern {pattern!r}: {e!r}") + raise ValueError( + f"Invalid pattern {pattern!r}: failed to format with {e!r}" + ) def set_freqai_targets( self, dataframe: DataFrame, metadata: dict[str, Any], **kwargs @@ -1573,7 +1566,7 @@ class QuickAdapterV3(IStrategy): callback: Callable[[], None], ) -> None: if not callable(callback): - raise ValueError("Invalid callback: must be callable") + raise ValueError(f"Invalid callback {callback!r}: must be callable") timestamp = int(current_time.timestamp()) candle_duration_secs = max(1, int(self._candle_duration_secs)) candle_start_secs = (timestamp // candle_duration_secs) * candle_duration_secs @@ -1976,7 +1969,7 @@ class QuickAdapterV3(IStrategy): candle_threshold = base_price * (1 - current_deviation) else: raise ValueError( - f"Invalid side {side!r}. Supported: {', '.join(QuickAdapterV3._TRADE_DIRECTIONS)}" + f"Invalid side {side!r}, supported: {', '.join(QuickAdapterV3._TRADE_DIRECTIONS)}" ) self._candle_threshold_cache[cache_key] = candle_threshold return self._candle_threshold_cache[cache_key] diff --git a/quickadapter/user_data/strategies/Utils.py b/quickadapter/user_data/strategies/Utils.py index e38a667..28b41e9 100644 --- a/quickadapter/user_data/strategies/Utils.py +++ b/quickadapter/user_data/strategies/Utils.py @@ -55,7 +55,7 @@ WEIGHT_STRATEGIES: Final[tuple[WeightStrategy, ...]] = ( "hybrid", ) -HybridWeightSource = Literal[ +WeightSource = Literal[ "amplitude", "amplitude_threshold_ratio", "volume_rate", @@ -63,7 +63,7 @@ HybridWeightSource = Literal[ "efficiency_ratio", "volume_weighted_efficiency_ratio", ] -HYBRID_WEIGHT_SOURCES: Final[tuple[HybridWeightSource, ...]] = ( +WEIGHT_SOURCES: Final[tuple[WeightSource, ...]] = ( "amplitude", "amplitude_threshold_ratio", "volume_rate", @@ -72,8 +72,8 @@ HYBRID_WEIGHT_SOURCES: Final[tuple[HybridWeightSource, ...]] = ( "volume_weighted_efficiency_ratio", ) -HybridAggregation = Literal["weighted_sum", "geometric_mean"] -HYBRID_AGGREGATIONS: Final[tuple[HybridAggregation, ...]] = ( +WeightAggregation = Literal["weighted_sum", "geometric_mean"] +WEIGHT_AGGREGATIONS: Final[tuple[WeightAggregation, ...]] = ( "weighted_sum", "geometric_mean", ) @@ -111,6 +111,11 @@ RANK_METHODS: Final[tuple[RankMethod, ...]] = ( ) SmoothingKernel = Literal["gaussian", "kaiser", "triang"] +SMOOTHING_KERNELS: Final[tuple[SmoothingKernel, ...]] = ( + "gaussian", + "kaiser", + "triang", +) SmoothingMethod = Union[ SmoothingKernel, Literal["smm", "sma", "savgol", "gaussian_filter1d"] ] @@ -154,8 +159,8 @@ DEFAULTS_EXTREMA_SMOOTHING: Final[dict[str, Any]] = { DEFAULTS_EXTREMA_WEIGHTING: Final[dict[str, Any]] = { "strategy": WEIGHT_STRATEGIES[0], # "none" - "source_weights": {s: 1.0 for s in HYBRID_WEIGHT_SOURCES}, - "aggregation": HYBRID_AGGREGATIONS[0], # "weighted_sum" + "source_weights": {s: 1.0 for s in WEIGHT_SOURCES}, + "aggregation": WEIGHT_AGGREGATIONS[0], # "weighted_sum" "aggregation_normalization": NORMALIZATION_TYPES[6], # "none" # Phase 1: Standardization "standardization": STANDARDIZATION_TYPES[0], # "none" @@ -248,7 +253,7 @@ def _calculate_coeffs( else: raise ValueError( f"Invalid window type {win_type!r}. " - f"Supported: {', '.join(SMOOTHING_METHODS[:3])}" + f"Supported: {', '.join(SMOOTHING_KERNELS)}" ) return coeffs / np.sum(coeffs) @@ -279,7 +284,7 @@ def zero_phase_filter( def smooth_extrema( series: pd.Series, method: SmoothingMethod = DEFAULTS_EXTREMA_SMOOTHING["method"], - window: int = DEFAULTS_EXTREMA_SMOOTHING["window_candles"], + window_candles: int = DEFAULTS_EXTREMA_SMOOTHING["window_candles"], beta: float = DEFAULTS_EXTREMA_SMOOTHING["beta"], polyorder: int = DEFAULTS_EXTREMA_SMOOTHING["polyorder"], mode: SmoothingMode = DEFAULTS_EXTREMA_SMOOTHING["mode"], @@ -288,14 +293,15 @@ def smooth_extrema( n = len(series) if n == 0: return series - if window < 3: - window = 3 - if n < window: + + if window_candles < 3: + window_candles = 3 + if n < window_candles: return series if beta <= 0 or not np.isfinite(beta): beta = 1.0 - odd_window = get_odd_window(window) + odd_window = get_odd_window(window_candles) std = get_gaussian_std(odd_window) if method == SMOOTHING_METHODS[0]: # "gaussian" @@ -673,7 +679,7 @@ def _build_weights_array( if len(indices) != weights.size: raise ValueError( - f"Invalid indices/weights: length mismatch ({len(indices)} indices but {weights.size} weights)" + f"Invalid indices/weights: length mismatch, got {len(indices)} indices but {weights.size} weights" ) weights_array = np.full(n_extrema, default_weight, dtype=float) @@ -698,7 +704,7 @@ def calculate_hybrid_extrema_weights( efficiency_ratios: list[float], volume_weighted_efficiency_ratios: list[float], source_weights: dict[str, float], - aggregation: HybridAggregation = DEFAULTS_EXTREMA_WEIGHTING["aggregation"], + aggregation: WeightAggregation = DEFAULTS_EXTREMA_WEIGHTING["aggregation"], aggregation_normalization: NormalizationType = DEFAULTS_EXTREMA_WEIGHTING[ "aggregation_normalization" ], @@ -726,7 +732,7 @@ def calculate_hybrid_extrema_weights( if not isinstance(source_weights, dict): source_weights = {} - weights_array_by_source: dict[HybridWeightSource, NDArray[np.floating]] = { + weights_array_by_source: dict[WeightSource, NDArray[np.floating]] = { "amplitude": np.asarray(amplitudes, dtype=float), "amplitude_threshold_ratio": np.asarray( amplitude_threshold_ratios, dtype=float @@ -739,9 +745,9 @@ def calculate_hybrid_extrema_weights( ), } - enabled_sources: list[HybridWeightSource] = [] + enabled_sources: list[WeightSource] = [] source_weights_list: list[float] = [] - for source in HYBRID_WEIGHT_SOURCES: + for source in WEIGHT_SOURCES: source_weight = source_weights.get(source) if source_weight is None: continue @@ -755,12 +761,12 @@ def calculate_hybrid_extrema_weights( source_weights_list.append(float(source_weight)) if len(enabled_sources) == 0: - enabled_sources = list(HYBRID_WEIGHT_SOURCES) + enabled_sources = list(WEIGHT_SOURCES) source_weights_list = [1.0 for _ in enabled_sources] if any(weights_array_by_source[s].size != n for s in enabled_sources): raise ValueError( - f"Invalid hybrid weights: length mismatch ({n} indices but inconsistent weights lengths)" + f"Invalid hybrid weights: length mismatch, got {n} indices but inconsistent weights lengths" ) source_weights_array: NDArray[np.floating] = np.asarray( @@ -788,13 +794,13 @@ def calculate_hybrid_extrema_weights( ) normalized_source_weights_array.append(normalized_source_weights) - if aggregation == HYBRID_AGGREGATIONS[0]: # "weighted_sum" + if aggregation == WEIGHT_AGGREGATIONS[0]: # "weighted_sum" combined_source_weights_array: NDArray[np.floating] = np.average( np.vstack(normalized_source_weights_array), axis=0, weights=source_weights_array, ) - elif aggregation == HYBRID_AGGREGATIONS[1]: # "geometric_mean" + elif aggregation == WEIGHT_AGGREGATIONS[1]: # "geometric_mean" combined_source_weights_array: NDArray[np.floating] = gmean( np.vstack([np.abs(values) for values in normalized_source_weights_array]), axis=0, @@ -803,7 +809,7 @@ def calculate_hybrid_extrema_weights( else: raise ValueError( f"Invalid hybrid aggregation method {aggregation!r}. " - f"Supported: {', '.join(HYBRID_AGGREGATIONS)}" + f"Supported: {', '.join(WEIGHT_AGGREGATIONS)}" ) if aggregation_normalization != NORMALIZATION_TYPES[6]: # "none" @@ -879,7 +885,7 @@ def compute_extrema_weights( volume_weighted_efficiency_ratios: list[float], source_weights: dict[str, float], strategy: WeightStrategy = DEFAULTS_EXTREMA_WEIGHTING["strategy"], - aggregation: HybridAggregation = DEFAULTS_EXTREMA_WEIGHTING["aggregation"], + aggregation: WeightAggregation = DEFAULTS_EXTREMA_WEIGHTING["aggregation"], aggregation_normalization: NormalizationType = DEFAULTS_EXTREMA_WEIGHTING[ "aggregation_normalization" ], @@ -1017,7 +1023,7 @@ def get_weighted_extrema( volume_weighted_efficiency_ratios: list[float], source_weights: dict[str, float], strategy: WeightStrategy = DEFAULTS_EXTREMA_WEIGHTING["strategy"], - aggregation: HybridAggregation = DEFAULTS_EXTREMA_WEIGHTING["aggregation"], + aggregation: WeightAggregation = DEFAULTS_EXTREMA_WEIGHTING["aggregation"], aggregation_normalization: NormalizationType = DEFAULTS_EXTREMA_WEIGHTING[ "aggregation_normalization" ], @@ -1073,7 +1079,7 @@ def get_weighted_extrema( def get_callable_sha256(fn: Callable[..., Any]) -> str: if not callable(fn): - raise ValueError("Invalid fn: must be callable") + raise ValueError(f"Invalid fn {type(fn).__name__!r}: must be callable") code = getattr(fn, "__code__", None) if code is None and isinstance(fn, functools.partial): fn = fn.func @@ -1085,7 +1091,9 @@ def get_callable_sha256(fn: Callable[..., Any]) -> str: if code is None and hasattr(fn, "__call__"): code = getattr(fn.__call__, "__code__", None) if code is None: - raise ValueError("Invalid fn: unable to retrieve code object") + raise ValueError( + f"Invalid fn: unable to retrieve code object, got {type(fn).__name__!r}" + ) return hashlib.sha256(code.co_code).hexdigest() @@ -2168,7 +2176,7 @@ def get_optuna_study_model_parameters( 0.0 <= space_fraction <= 1.0 ): raise ValueError( - f"Invalid space_fraction {space_fraction!r}: must be in range [0, 1]" + f"Invalid space_fraction: must be in range [0, 1], got {space_fraction!r}" ) def _build_ranges( @@ -2576,8 +2584,8 @@ def get_min_max_label_period_candles( ) -> tuple[int, int, int]: if min_label_period_candles > max_label_period_candles: raise ValueError( - f"Invalid label_period_candles range: min ({min_label_period_candles}) " - f"must be <= max ({max_label_period_candles})" + f"Invalid label_period_candles range: min must be <= max, " + f"got min={min_label_period_candles!r}, max={max_label_period_candles!r}" ) capped_period_candles = max(1, floor_to_step(max_period_candles, candles_step)) @@ -2716,9 +2724,15 @@ def validate_range( if not isinstance(default_min, (int, float)) or not isinstance( default_max, (int, float) ): - raise ValueError(f"Invalid {name}: defaults must be numeric") + raise ValueError( + f"Invalid {name}: defaults must be numeric, " + f"got min={type(default_min).__name__!r}, max={type(default_max).__name__!r}" + ) if default_min > default_max or (not allow_equal and default_min == default_max): - raise ValueError(f"Invalid {name}: defaults ordering must have min < max") + raise ValueError( + f"Invalid {name}: defaults ordering must have min < max, " + f"got min={default_min!r}, max={default_max!r}" + ) def _validate_component( value: float | int | None, name: str, default_value: float | int @@ -2737,7 +2751,7 @@ def validate_range( or (non_negative and value < 0) ): logger.warning( - f"Invalid {name} {value!r}: must be {constraint_str}. Using default {default_value!r}" + f"Invalid {name} {value!r}: must be {constraint_str}, using default {default_value!r}" ) return default_value return value @@ -2752,7 +2766,9 @@ def validate_range( ) if not ordering_ok: logger.warning( - f"Invalid {name} ordering ({min_name}={sanitized_min!r}, {max_name}={sanitized_max!r}), must have {min_name} < {max_name}, using defaults ({default_min!r}, {default_max!r})" + f"Invalid {name} ordering: must have {min_name} < {max_name}, " + f"got {min_name}={sanitized_min!r}, {max_name}={sanitized_max!r}, " + f"using defaults {default_min!r}, {default_max!r}" ) sanitized_min, sanitized_max = default_min, default_max -- 2.43.0