From 069e60cb56b69e8295a3711541551e7fe3f25b0d Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Tue, 30 Dec 2025 19:20:32 +0100 Subject: [PATCH] fix(ReforceXY): reduce PBRS defaults to prevent reward exploitation Disable hold potential by default and reduce additive ratios to prevent the agent from exploiting shaping rewards with many short losing trades. Changes: - hold_potential_enabled: true -> false (disabled by default) - hold_potential_ratio: 0.03125 -> 0.001 (reduced when enabled) - entry_additive_ratio: 0.125 -> 0.0625 (halved) - exit_additive_ratio: 0.125 -> 0.0625 (halved) These conservative defaults encourage the agent to focus on actual PnL rather than gaming intermediate shaping rewards. --- ReforceXY/reward_space_analysis/README.md | 8 ++++---- ReforceXY/reward_space_analysis/reward_space_analysis.py | 8 ++++---- ReforceXY/reward_space_analysis/tests/constants.py | 8 ++++---- ReforceXY/user_data/freqaimodels/ReforceXY.py | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md index e8d625c..82aa3b6 100644 --- a/ReforceXY/reward_space_analysis/README.md +++ b/ReforceXY/reward_space_analysis/README.md @@ -317,7 +317,7 @@ where `kernel_function` depends on `exit_attenuation_mode`. See [Exit Attenuatio | `potential_gamma` | 0.95 | Discount factor γ for potential Φ | | `exit_potential_mode` | canonical | Potential release mode | | `exit_potential_decay` | 0.5 | Decay for progressive_release | -| `hold_potential_enabled` | true | Enable hold potential Φ | +| `hold_potential_enabled` | false | Enable hold potential Φ | | `entry_fee_rate` | 0.0 | Entry fee rate (`price · (1 + fee)`) | | `exit_fee_rate` | 0.0 | Exit fee rate (`price / (1 + fee)`) | @@ -334,7 +334,7 @@ across samples) and does not apply any drift correction in post-processing. | Parameter | Default | Description | | ----------------------------------- | ------- | -------------------- | -| `hold_potential_ratio` | 0.03125 | Hold potential ratio | +| `hold_potential_ratio` | 0.001 | Hold potential ratio | | `hold_potential_gain` | 1.0 | Gain multiplier | | `hold_potential_transform_pnl` | tanh | PnL transform | | `hold_potential_transform_duration` | tanh | Duration transform | @@ -367,7 +367,7 @@ losses compared to symmetric treatment. | Parameter | Default | Description | | ----------------------------------- | ------- | --------------------- | | `entry_additive_enabled` | false | Enable entry additive | -| `entry_additive_ratio` | 0.125 | Ratio | +| `entry_additive_ratio` | 0.0625 | Ratio | | `entry_additive_gain` | 1.0 | Gain | | `entry_additive_transform_pnl` | tanh | PnL transform | | `entry_additive_transform_duration` | tanh | Duration transform | @@ -377,7 +377,7 @@ losses compared to symmetric treatment. | Parameter | Default | Description | | ---------------------------------- | ------- | -------------------- | | `exit_additive_enabled` | false | Enable exit additive | -| `exit_additive_ratio` | 0.125 | Ratio | +| `exit_additive_ratio` | 0.0625 | Ratio | | `exit_additive_gain` | 1.0 | Gain | | `exit_additive_transform_pnl` | tanh | PnL transform | | `exit_additive_transform_duration` | tanh | Duration transform | diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index fcb2539..7a4e537 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -163,14 +163,14 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = { "exit_potential_mode": "canonical", "exit_potential_decay": 0.5, # Hold potential (PBRS function Φ) - "hold_potential_enabled": True, - "hold_potential_ratio": 0.03125, + "hold_potential_enabled": False, + "hold_potential_ratio": 0.001, "hold_potential_gain": 1.0, "hold_potential_transform_pnl": "tanh", "hold_potential_transform_duration": "tanh", # Entry additive (non-PBRS additive term) "entry_additive_enabled": False, - "entry_additive_ratio": 0.125, + "entry_additive_ratio": 0.0625, "entry_additive_gain": 1.0, "entry_additive_transform_pnl": "tanh", "entry_additive_transform_duration": "tanh", @@ -178,7 +178,7 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = { "exit_fee_rate": 0.0, # Exit additive (non-PBRS additive term) "exit_additive_enabled": False, - "exit_additive_ratio": 0.125, + "exit_additive_ratio": 0.0625, "exit_additive_gain": 1.0, "exit_additive_transform_pnl": "tanh", "exit_additive_transform_duration": "tanh", diff --git a/ReforceXY/reward_space_analysis/tests/constants.py b/ReforceXY/reward_space_analysis/tests/constants.py index 887aafe..b5a71ca 100644 --- a/ReforceXY/reward_space_analysis/tests/constants.py +++ b/ReforceXY/reward_space_analysis/tests/constants.py @@ -281,11 +281,11 @@ class TestParameters: MAX_TRADE_DURATION_HETEROSCEDASTICITY: Max trade duration used for heteroscedasticity tests (10) # Common additive parameters - ADDITIVE_RATIO_DEFAULT: Default additive ratio (0.125) + ADDITIVE_RATIO_DEFAULT: Default additive ratio (0.0625) ADDITIVE_GAIN_DEFAULT: Default additive gain (1.0) # PBRS hold potential parameters - HOLD_POTENTIAL_RATIO_DEFAULT: Default hold potential ratio (0.03125) + HOLD_POTENTIAL_RATIO_DEFAULT: Default hold potential ratio (0.001) """ BASE_FACTOR: float = 90.0 @@ -311,11 +311,11 @@ class TestParameters: MAX_TRADE_DURATION_HETEROSCEDASTICITY: int = 10 # Additive parameters - ADDITIVE_RATIO_DEFAULT: float = 0.125 + ADDITIVE_RATIO_DEFAULT: float = 0.0625 ADDITIVE_GAIN_DEFAULT: float = 1.0 # PBRS hold potential parameters - HOLD_POTENTIAL_RATIO_DEFAULT: float = 0.03125 + HOLD_POTENTIAL_RATIO_DEFAULT: float = 0.001 @dataclass(frozen=True) diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index c3022d8..f8ec4f3 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -158,13 +158,13 @@ class ReforceXY(BaseReinforcementLearningModel): DEFAULT_EXIT_POTENTIAL_DECAY: Final[float] = 0.5 DEFAULT_ENTRY_ADDITIVE_ENABLED: Final[bool] = False - DEFAULT_ENTRY_ADDITIVE_RATIO: Final[float] = 0.125 + DEFAULT_ENTRY_ADDITIVE_RATIO: Final[float] = 0.0625 DEFAULT_ENTRY_ADDITIVE_GAIN: Final[float] = 1.0 - DEFAULT_HOLD_POTENTIAL_ENABLED: Final[bool] = True - DEFAULT_HOLD_POTENTIAL_RATIO: Final[float] = 0.03125 + DEFAULT_HOLD_POTENTIAL_ENABLED: Final[bool] = False + DEFAULT_HOLD_POTENTIAL_RATIO: Final[float] = 0.001 DEFAULT_HOLD_POTENTIAL_GAIN: Final[float] = 1.0 DEFAULT_EXIT_ADDITIVE_ENABLED: Final[bool] = False - DEFAULT_EXIT_ADDITIVE_RATIO: Final[float] = 0.125 + DEFAULT_EXIT_ADDITIVE_RATIO: Final[float] = 0.0625 DEFAULT_EXIT_ADDITIVE_GAIN: Final[float] = 1.0 DEFAULT_EXIT_PLATEAU: Final[bool] = True -- 2.43.0