]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
feat(ReforceXY): tune reward sensitivity and extend training period
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Thu, 12 Feb 2026 14:17:10 +0000 (15:17 +0100)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Thu, 12 Feb 2026 14:17:10 +0000 (15:17 +0100)
- Increase pnl_amplification_sensitivity from 0.5 to 2.0 for stronger
  reward signal differentiation
- Extend train_period_days from 60 to 120 for more training data

ReforceXY/reward_space_analysis/README.md
ReforceXY/reward_space_analysis/reward_space_analysis.py
ReforceXY/user_data/config-template.json
ReforceXY/user_data/freqaimodels/ReforceXY.py

index ce75865c914d643a2d4b9ca75ca2a0a3c32dc4bd..1f5956c25a00fdce2a488774f805f242cb4b60a3 100644 (file)
@@ -244,7 +244,7 @@ The exit factor is computed as:
 | `profit_aim`                    | 0.03    | Profit target threshold       |
 | `risk_reward_ratio`             | 2.0     | Risk/reward multiplier        |
 | `win_reward_factor`             | 2.0     | Profit target bonus factor    |
-| `pnl_amplification_sensitivity` | 0.5     | PnL amplification sensitivity |
+| `pnl_amplification_sensitivity` | 2.0     | PnL amplification sensitivity |
 
 **Note:** In ReforceXY, `risk_reward_ratio` maps to `rr`.
 
index bef107468116fdc5ed099e5ddb6e820f2088ac56..4cfa3e23e92dafc72d9a2235ea27feae9ec53b22 100644 (file)
@@ -151,7 +151,7 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = {
     "efficiency_center": 0.5,
     # Profit factor defaults
     "win_reward_factor": 2.0,
-    "pnl_amplification_sensitivity": 0.5,
+    "pnl_amplification_sensitivity": 2.0,
     # Invariant / safety defaults
     "check_invariants": True,
     "exit_factor_threshold": 1000.0,
index 5ca9d048e57ded13edd00888ad47bc3e03bef1b4..545645e106d62b04c87df029d68017690504407f 100644 (file)
     "conv_width": 1,
     "purge_old_models": 2,
     "expiration_hours": 48,
-    "train_period_days": 60,
+    "train_period_days": 120,
     // "live_retrain_hours": 0.5,
     "backtest_period_days": 2,
     "write_metrics_to_disk": false,
index 174a78e1694d63635afe61d9464563f42db924c3..ed05752db7bbc00ce3b1a7aabdf27b943793f9c8 100644 (file)
@@ -172,7 +172,7 @@ class ReforceXY(BaseReinforcementLearningModel):
     DEFAULT_EXIT_LINEAR_SLOPE: Final[float] = 1.0
     DEFAULT_EXIT_HALF_LIFE: Final[float] = 0.5
 
-    DEFAULT_PNL_AMPLIFICATION_SENSITIVITY: Final[float] = 0.5
+    DEFAULT_PNL_AMPLIFICATION_SENSITIVITY: Final[float] = 2.0
     DEFAULT_WIN_REWARD_FACTOR: Final[float] = 2.0
     DEFAULT_EFFICIENCY_WEIGHT: Final[float] = 1.0
     DEFAULT_EFFICIENCY_CENTER: Final[float] = 0.5