From 2068ed26b3092ce3ca5ed8f56a272ede8e961450 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Thu, 12 Feb 2026 15:17:10 +0100 Subject: [PATCH] feat(ReforceXY): tune reward sensitivity and extend training period - Increase pnl_amplification_sensitivity from 0.5 to 2.0 for stronger reward signal differentiation - Extend train_period_days from 60 to 120 for more training data --- ReforceXY/reward_space_analysis/README.md | 2 +- ReforceXY/reward_space_analysis/reward_space_analysis.py | 2 +- ReforceXY/user_data/config-template.json | 2 +- ReforceXY/user_data/freqaimodels/ReforceXY.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md index ce75865..1f5956c 100644 --- a/ReforceXY/reward_space_analysis/README.md +++ b/ReforceXY/reward_space_analysis/README.md @@ -244,7 +244,7 @@ The exit factor is computed as: | `profit_aim` | 0.03 | Profit target threshold | | `risk_reward_ratio` | 2.0 | Risk/reward multiplier | | `win_reward_factor` | 2.0 | Profit target bonus factor | -| `pnl_amplification_sensitivity` | 0.5 | PnL amplification sensitivity | +| `pnl_amplification_sensitivity` | 2.0 | PnL amplification sensitivity | **Note:** In ReforceXY, `risk_reward_ratio` maps to `rr`. diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index bef1074..4cfa3e2 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -151,7 +151,7 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = { "efficiency_center": 0.5, # Profit factor defaults "win_reward_factor": 2.0, - "pnl_amplification_sensitivity": 0.5, + "pnl_amplification_sensitivity": 2.0, # Invariant / safety defaults "check_invariants": True, "exit_factor_threshold": 1000.0, diff --git a/ReforceXY/user_data/config-template.json b/ReforceXY/user_data/config-template.json index 5ca9d04..545645e 100644 --- a/ReforceXY/user_data/config-template.json +++ b/ReforceXY/user_data/config-template.json @@ -111,7 +111,7 @@ "conv_width": 1, "purge_old_models": 2, "expiration_hours": 48, - "train_period_days": 60, + "train_period_days": 120, // "live_retrain_hours": 0.5, "backtest_period_days": 2, "write_metrics_to_disk": false, diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 174a78e..ed05752 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -172,7 +172,7 @@ class ReforceXY(BaseReinforcementLearningModel): DEFAULT_EXIT_LINEAR_SLOPE: Final[float] = 1.0 DEFAULT_EXIT_HALF_LIFE: Final[float] = 0.5 - DEFAULT_PNL_AMPLIFICATION_SENSITIVITY: Final[float] = 0.5 + DEFAULT_PNL_AMPLIFICATION_SENSITIVITY: Final[float] = 2.0 DEFAULT_WIN_REWARD_FACTOR: Final[float] = 2.0 DEFAULT_EFFICIENCY_WEIGHT: Final[float] = 1.0 DEFAULT_EFFICIENCY_CENTER: Final[float] = 0.5 -- 2.53.0