From 069e60cb56b69e8295a3711541551e7fe3f25b0d Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Tue, 30 Dec 2025 19:20:32 +0100
Subject: [PATCH] fix(ReforceXY): reduce PBRS defaults to prevent reward
 exploitation

Disable hold potential by default and reduce additive ratios to prevent
the agent from exploiting shaping rewards with many short losing trades.

Changes:
- hold_potential_enabled: true -> false (disabled by default)
- hold_potential_ratio: 0.03125 -> 0.001 (reduced when enabled)
- entry_additive_ratio: 0.125 -> 0.0625 (halved)
- exit_additive_ratio: 0.125 -> 0.0625 (halved)

These conservative defaults encourage the agent to focus on actual PnL
rather than gaming intermediate shaping rewards.
---
 ReforceXY/reward_space_analysis/README.md                | 8 ++++----
 ReforceXY/reward_space_analysis/reward_space_analysis.py | 8 ++++----
 ReforceXY/reward_space_analysis/tests/constants.py       | 8 ++++----
 ReforceXY/user_data/freqaimodels/ReforceXY.py            | 8 ++++----
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md
index e8d625c..82aa3b6 100644
--- a/ReforceXY/reward_space_analysis/README.md
+++ b/ReforceXY/reward_space_analysis/README.md
@@ -317,7 +317,7 @@ where `kernel_function` depends on `exit_attenuation_mode`. See [Exit Attenuatio
 | `potential_gamma`        | 0.95      | Discount factor Î³ for potential Î¦    |
 | `exit_potential_mode`    | canonical | Potential release mode               |
 | `exit_potential_decay`   | 0.5       | Decay for progressive_release        |
-| `hold_potential_enabled` | true      | Enable hold potential Î¦              |
+| `hold_potential_enabled` | false     | Enable hold potential Î¦              |
 | `entry_fee_rate`         | 0.0       | Entry fee rate (`price Â· (1 + fee)`) |
 | `exit_fee_rate`          | 0.0       | Exit fee rate (`price / (1 + fee)`)  |
 
@@ -334,7 +334,7 @@ across samples) and does not apply any drift correction in post-processing.
 
 | Parameter                           | Default | Description          |
 | ----------------------------------- | ------- | -------------------- |
-| `hold_potential_ratio`              | 0.03125 | Hold potential ratio |
+| `hold_potential_ratio`              | 0.001   | Hold potential ratio |
 | `hold_potential_gain`               | 1.0     | Gain multiplier      |
 | `hold_potential_transform_pnl`      | tanh    | PnL transform        |
 | `hold_potential_transform_duration` | tanh    | Duration transform   |
@@ -367,7 +367,7 @@ losses compared to symmetric treatment.
 | Parameter                           | Default | Description           |
 | ----------------------------------- | ------- | --------------------- |
 | `entry_additive_enabled`            | false   | Enable entry additive |
-| `entry_additive_ratio`              | 0.125   | Ratio                 |
+| `entry_additive_ratio`              | 0.0625  | Ratio                 |
 | `entry_additive_gain`               | 1.0     | Gain                  |
 | `entry_additive_transform_pnl`      | tanh    | PnL transform         |
 | `entry_additive_transform_duration` | tanh    | Duration transform    |
@@ -377,7 +377,7 @@ losses compared to symmetric treatment.
 | Parameter                          | Default | Description          |
 | ---------------------------------- | ------- | -------------------- |
 | `exit_additive_enabled`            | false   | Enable exit additive |
-| `exit_additive_ratio`              | 0.125   | Ratio                |
+| `exit_additive_ratio`              | 0.0625  | Ratio                |
 | `exit_additive_gain`               | 1.0     | Gain                 |
 | `exit_additive_transform_pnl`      | tanh    | PnL transform        |
 | `exit_additive_transform_duration` | tanh    | Duration transform   |
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py
index fcb2539..7a4e537 100644
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -163,14 +163,14 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = {
     "exit_potential_mode": "canonical",
     "exit_potential_decay": 0.5,
     # Hold potential (PBRS function Î¦)
-    "hold_potential_enabled": True,
-    "hold_potential_ratio": 0.03125,
+    "hold_potential_enabled": False,
+    "hold_potential_ratio": 0.001,
     "hold_potential_gain": 1.0,
     "hold_potential_transform_pnl": "tanh",
     "hold_potential_transform_duration": "tanh",
     # Entry additive (non-PBRS additive term)
     "entry_additive_enabled": False,
-    "entry_additive_ratio": 0.125,
+    "entry_additive_ratio": 0.0625,
     "entry_additive_gain": 1.0,
     "entry_additive_transform_pnl": "tanh",
     "entry_additive_transform_duration": "tanh",
@@ -178,7 +178,7 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = {
     "exit_fee_rate": 0.0,
     # Exit additive (non-PBRS additive term)
     "exit_additive_enabled": False,
-    "exit_additive_ratio": 0.125,
+    "exit_additive_ratio": 0.0625,
     "exit_additive_gain": 1.0,
     "exit_additive_transform_pnl": "tanh",
     "exit_additive_transform_duration": "tanh",
diff --git a/ReforceXY/reward_space_analysis/tests/constants.py b/ReforceXY/reward_space_analysis/tests/constants.py
index 887aafe..b5a71ca 100644
--- a/ReforceXY/reward_space_analysis/tests/constants.py
+++ b/ReforceXY/reward_space_analysis/tests/constants.py
@@ -281,11 +281,11 @@ class TestParameters:
         MAX_TRADE_DURATION_HETEROSCEDASTICITY: Max trade duration used for heteroscedasticity tests (10)
 
         # Common additive parameters
-        ADDITIVE_RATIO_DEFAULT: Default additive ratio (0.125)
+        ADDITIVE_RATIO_DEFAULT: Default additive ratio (0.0625)
         ADDITIVE_GAIN_DEFAULT: Default additive gain (1.0)
 
         # PBRS hold potential parameters
-        HOLD_POTENTIAL_RATIO_DEFAULT: Default hold potential ratio (0.03125)
+        HOLD_POTENTIAL_RATIO_DEFAULT: Default hold potential ratio (0.001)
     """
 
     BASE_FACTOR: float = 90.0
@@ -311,11 +311,11 @@ class TestParameters:
     MAX_TRADE_DURATION_HETEROSCEDASTICITY: int = 10
 
     # Additive parameters
-    ADDITIVE_RATIO_DEFAULT: float = 0.125
+    ADDITIVE_RATIO_DEFAULT: float = 0.0625
     ADDITIVE_GAIN_DEFAULT: float = 1.0
 
     # PBRS hold potential parameters
-    HOLD_POTENTIAL_RATIO_DEFAULT: float = 0.03125
+    HOLD_POTENTIAL_RATIO_DEFAULT: float = 0.001
 
 
 @dataclass(frozen=True)
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py
index c3022d8..f8ec4f3 100644
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -158,13 +158,13 @@ class ReforceXY(BaseReinforcementLearningModel):
 
     DEFAULT_EXIT_POTENTIAL_DECAY: Final[float] = 0.5
     DEFAULT_ENTRY_ADDITIVE_ENABLED: Final[bool] = False
-    DEFAULT_ENTRY_ADDITIVE_RATIO: Final[float] = 0.125
+    DEFAULT_ENTRY_ADDITIVE_RATIO: Final[float] = 0.0625
     DEFAULT_ENTRY_ADDITIVE_GAIN: Final[float] = 1.0
-    DEFAULT_HOLD_POTENTIAL_ENABLED: Final[bool] = True
-    DEFAULT_HOLD_POTENTIAL_RATIO: Final[float] = 0.03125
+    DEFAULT_HOLD_POTENTIAL_ENABLED: Final[bool] = False
+    DEFAULT_HOLD_POTENTIAL_RATIO: Final[float] = 0.001
     DEFAULT_HOLD_POTENTIAL_GAIN: Final[float] = 1.0
     DEFAULT_EXIT_ADDITIVE_ENABLED: Final[bool] = False
-    DEFAULT_EXIT_ADDITIVE_RATIO: Final[float] = 0.125
+    DEFAULT_EXIT_ADDITIVE_RATIO: Final[float] = 0.0625
     DEFAULT_EXIT_ADDITIVE_GAIN: Final[float] = 1.0
 
     DEFAULT_EXIT_PLATEAU: Final[bool] = True
-- 
2.43.0