From: Jérôme Benoit <jerome.benoit@piment-noir.org>
Date: Mon, 22 Dec 2025 12:37:22 +0000 (+0100)
Subject: fix(ReforceXY): fix off-by-one in PBRS next state duration computation
X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=af0263848fcb3f0d4d52e14720d68556aa6bd7b4;p=freqai-strategies.git

fix(ReforceXY): fix off-by-one in PBRS next state duration computation

Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
---

diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md
index 3b00794..06230e2 100644
--- a/ReforceXY/reward_space_analysis/README.md
+++ b/ReforceXY/reward_space_analysis/README.md
@@ -320,6 +320,8 @@ where `kernel_function` depends on `exit_attenuation_mode`. See [Exit Attenuatio
 | `exit_potential_mode`    | canonical | Potential release mode            |
 | `exit_potential_decay`   | 0.5       | Decay for progressive_release     |
 | `hold_potential_enabled` | true      | Enable hold potential Î¦           |
+| `entry_fee_rate`         | 0.0       | Entry fee rate                    |
+| `exit_fee_rate`          | 0.0       | Exit fee rate                     |
 
 PBRS invariance holds when: `exit_potential_mode=canonical`.
 
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py
index e4e8d03..ee9720f 100644
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -153,6 +153,8 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = {
     "entry_additive_gain": 1.0,
     "entry_additive_transform_pnl": "tanh",
     "entry_additive_transform_duration": "tanh",
+    "entry_fee_rate": 0.0,
+    "exit_fee_rate": 0.0,
     # Exit additive (non-PBRS additive term)
     "exit_additive_enabled": False,
     "exit_additive_scale": 1.0,
@@ -196,6 +198,8 @@ DEFAULT_MODEL_REWARD_PARAMETERS_HELP: Dict[str, str] = {
     "entry_additive_gain": "Entry additive gain",
     "entry_additive_transform_pnl": "Entry PnL transform",
     "entry_additive_transform_duration": "Entry duration transform",
+    "entry_fee_rate": "Entry fee rate",
+    "exit_fee_rate": "Exit fee rate",
     "exit_additive_enabled": "Enable exit additive",
     "exit_additive_scale": "Exit additive scale",
     "exit_additive_gain": "Exit additive gain",
@@ -233,6 +237,8 @@ _PARAMETER_BOUNDS: Dict[str, Dict[str, float]] = {
     "hold_potential_gain": {"min": 0.0},
     "entry_additive_scale": {"min": 0.0},
     "entry_additive_gain": {"min": 0.0},
+    "entry_fee_rate": {"min": 0.0, "max": 0.1},
+    "exit_fee_rate": {"min": 0.0, "max": 0.1},
     "exit_additive_scale": {"min": 0.0},
     "exit_additive_gain": {"min": 0.0},
 }
@@ -1174,11 +1180,16 @@ def calculate_reward(
     is_neutral = context.position == Positions.Neutral and context.action == Actions.Neutral
 
     if is_entry:
-        next_pnl = current_pnl
         next_duration_ratio = 0.0
+        if context.action == Actions.Long_enter:
+            next_pnl = _compute_entry_unrealized_pnl_estimate(Positions.Long, params)
+        elif context.action == Actions.Short_enter:
+            next_pnl = _compute_entry_unrealized_pnl_estimate(Positions.Short, params)
+        else:
+            next_pnl = current_pnl
     elif is_hold:
         next_duration_ratio = _compute_duration_ratio(
-            context.trade_duration + 1, max_trade_duration_candles
+            context.trade_duration, max_trade_duration_candles
         )
         # Optionally simulate unrealized PnL during holds to feed Î¦(s)
         if _get_bool_param(params, "unrealized_pnl", False):
@@ -1468,10 +1479,16 @@ def simulate_samples(
                 position = Positions.Long
                 trade_duration = 0
                 idle_duration = 0
+                pnl = _compute_entry_unrealized_pnl_estimate(Positions.Long, params)
+                max_unrealized_profit = pnl
+                min_unrealized_profit = pnl
             elif action == Actions.Short_enter and short_allowed:
                 position = Positions.Short
                 trade_duration = 0
                 idle_duration = 0
+                pnl = _compute_entry_unrealized_pnl_estimate(Positions.Short, params)
+                max_unrealized_profit = pnl
+                min_unrealized_profit = pnl
         else:
             idle_duration = 0
             if action == Actions.Neutral:
@@ -2773,6 +2790,66 @@ def _get_potential_gamma(params: RewardParams) -> float:
 # === PBRS IMPLEMENTATION ===
 
 
+def _compute_entry_unrealized_pnl_estimate(next_position: Positions, params: RewardParams) -> float:
+    """Estimate immediate unrealized PnL after entry fees.
+
+    For Long entry:
+        current_price = open * (1 - exit_fee_rate)
+        last_trade_price = open * (1 + entry_fee_rate)
+        pnl = (current_price - last_trade_price) / last_trade_price
+
+    For Short entry:
+        current_price = open * (1 + entry_fee_rate)
+        last_trade_price = open * (1 - exit_fee_rate)
+        pnl = (last_trade_price - current_price) / last_trade_price
+    """
+
+    entry_fee_rate = _get_float_param(
+        params,
+        "entry_fee_rate",
+        DEFAULT_MODEL_REWARD_PARAMETERS.get("entry_fee_rate", 0.0),
+    )
+    exit_fee_rate = _get_float_param(
+        params,
+        "exit_fee_rate",
+        DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_fee_rate", 0.0),
+    )
+
+    if not np.isfinite(entry_fee_rate):
+        entry_fee_rate = 0.0
+    if not np.isfinite(exit_fee_rate):
+        exit_fee_rate = 0.0
+
+    entry_fee_bounds = _PARAMETER_BOUNDS.get("entry_fee_rate", {"min": 0.0, "max": 1.0})
+    exit_fee_bounds = _PARAMETER_BOUNDS.get("exit_fee_rate", {"min": 0.0, "max": 1.0})
+
+    entry_fee_min = float(entry_fee_bounds.get("min", 0.0))
+    entry_fee_max = float(entry_fee_bounds.get("max", 1.0))
+    exit_fee_min = float(exit_fee_bounds.get("min", 0.0))
+    exit_fee_max = float(exit_fee_bounds.get("max", 1.0))
+
+    entry_fee_rate = float(np.clip(entry_fee_rate, entry_fee_min, entry_fee_max))
+    exit_fee_rate = float(np.clip(exit_fee_rate, exit_fee_min, exit_fee_max))
+
+    current_open = 1.0
+    next_pnl = 0.0
+
+    if next_position == Positions.Long:
+        current_price = current_open * (1.0 - exit_fee_rate)
+        last_trade_price = current_open * (1.0 + entry_fee_rate)
+        if last_trade_price != 0.0 and np.isfinite(last_trade_price):
+            next_pnl = (current_price - last_trade_price) / last_trade_price
+    elif next_position == Positions.Short:
+        current_price = current_open * (1.0 + entry_fee_rate)
+        last_trade_price = current_open * (1.0 - exit_fee_rate)
+        if last_trade_price != 0.0 and np.isfinite(last_trade_price):
+            next_pnl = (last_trade_price - current_price) / last_trade_price
+
+    if not np.isfinite(next_pnl):
+        return 0.0
+    return float(next_pnl)
+
+
 def _compute_hold_potential(
     pnl: float,
     pnl_target: float,
diff --git a/ReforceXY/reward_space_analysis/tests/.docstring_template.md b/ReforceXY/reward_space_analysis/tests/.docstring_template.md
index 0551814..2c8e705 100644
--- a/ReforceXY/reward_space_analysis/tests/.docstring_template.md
+++ b/ReforceXY/reward_space_analysis/tests/.docstring_template.md
@@ -199,13 +199,12 @@ def test_property_holds_for_all_inputs(self):
 ### Regression Test
 
 ```python
-def test_bug_fix_issue_123_no_regression(self):
+def test_issue_123_regression_is_prevented(self):
     """Regression test for Issue #123: [brief description].
 
-    Ensures fix for [bug description] remains effective.
-    Bug manifested when [conditions]; this test reproduces those conditions.
+    Validates behavior for [conditions] and asserts the expected outcome.
 
-    **Fixed in:** PR #456 (commit abc1234)
+    **Tracking:** Issue #123
     """
 ```
 
diff --git a/ReforceXY/reward_space_analysis/tests/README.md b/ReforceXY/reward_space_analysis/tests/README.md
index c3d1597..a3605a5 100644
--- a/ReforceXY/reward_space_analysis/tests/README.md
+++ b/ReforceXY/reward_space_analysis/tests/README.md
@@ -194,7 +194,7 @@ Columns:
 | statistics-constant-dist-widened-ci-113a     | statistics  | Non-strict: widened CI with warning                                                 | statistics/test_statistics.py:533         | Test docstring labels "Invariant 113 (non-strict)"                                                                          |
 | statistics-constant-dist-strict-omit-113b    | statistics  | Strict: omit metrics (no widened CI)                                                | statistics/test_statistics.py:565         | Test docstring labels "Invariant 113 (strict)"                                                                              |
 | statistics-fallback-diagnostics-115          | statistics  | Fallback diagnostics constant distribution (qq_r2=1.0 etc.)                         | statistics/test_statistics.py:190         | Docstring line                                                                                                              |
-| robustness-exit-pnl-only-117                 | robustness  | Only exit actions have non-zero PnL                                                 | robustness/test_robustness.py:126         | Newly assigned ID (previously unnumbered)                                                                                   |
+| robustness-exit-pnl-only-117                 | robustness  | Only exit actions have non-zero PnL                                                 | robustness/test_robustness.py:126         | Newly assigned ID                                                                                                           |
 | pbrs-absence-shift-placeholder-118           | pbrs        | Placeholder shift line present (absence displayed)                                  | pbrs/test_pbrs.py:979                     | Ensures placeholder appears when shaping shift absent                                                                       |
 | components-pbrs-breakdown-fields-119         | components  | PBRS breakdown fields finite and mathematically aligned                             | components/test_reward_components.py:454  | Tests base_reward, pbrs_delta, invariance_correction fields and their alignment                                             |
 | integration-pbrs-metrics-section-120         | integration | PBRS Metrics section present in report with tracing metrics                         | integration/test_report_formatting.py:156 | Verifies PBRS Metrics (Tracing) subsection rendering in statistical_analysis.md                                             |
diff --git a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
index ef7b356..fb00325 100644
--- a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
+++ b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
@@ -13,12 +13,16 @@ from reward_space_analysis import (
     DEFAULT_IDLE_DURATION_MULTIPLIER,
     DEFAULT_MODEL_REWARD_PARAMETERS,
     PBRS_INVARIANCE_TOL,
+    Actions,
+    Positions,
     _compute_entry_additive,
+    _compute_entry_unrealized_pnl_estimate,
     _compute_exit_additive,
     _compute_exit_potential,
     _compute_hold_potential,
     _get_float_param,
     apply_potential_shaping,
+    calculate_reward,
     get_max_idle_duration_candles,
     simulate_samples,
     validate_reward_parameters,
@@ -485,6 +489,153 @@ class TestPBRS(RewardSpaceTestBase):
             "Unexpected total difference under gamma NaN fallback",
         )
 
+    def test_calculate_reward_entry_next_pnl_fee_aware(self):
+        """calculate_reward() entry PBRS uses fee-aware next_pnl estimate."""
+        params = self.base_params(
+            exit_potential_mode="non_canonical",
+            hold_potential_enabled=True,
+            entry_additive_enabled=False,
+            exit_additive_enabled=False,
+            potential_gamma=0.9,
+            entry_fee_rate=0.001,
+            exit_fee_rate=0.001,
+        )
+
+        for action in (Actions.Long_enter, Actions.Short_enter):
+            ctx = self.make_ctx(
+                position=Positions.Neutral, action=action, pnl=0.0, trade_duration=0
+            )
+            breakdown = calculate_reward(
+                ctx,
+                params,
+                base_factor=PARAMS.BASE_FACTOR,
+                profit_aim=PARAMS.PROFIT_AIM,
+                risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
+                short_allowed=True,
+                action_masking=True,
+                prev_potential=0.0,
+            )
+            self.assertTrue(np.isfinite(breakdown.next_potential))
+            # With any nonzero fees, immediate unrealized pnl should be negative.
+            self.assertLess(
+                breakdown.next_potential,
+                0.0,
+                f"Expected negative next_potential on entry for action={action}",
+            )
+
+    def test_fee_rates_are_clamped_to_parameter_bounds(self):
+        """Fee clamping uses _PARAMETER_BOUNDS (max 0.1)."""
+        cases = [
+            ("entry_fee_rate", self.base_params(entry_fee_rate=999.0, exit_fee_rate=0.0)),
+            ("exit_fee_rate", self.base_params(entry_fee_rate=0.0, exit_fee_rate=999.0)),
+        ]
+
+        for key, params in cases:
+            pnl_clamped = _compute_entry_unrealized_pnl_estimate(Positions.Long, params)
+            pnl_expected = _compute_entry_unrealized_pnl_estimate(
+                Positions.Long,
+                {**params, key: 0.1},
+            )
+            self.assertAlmostEqualFloat(
+                pnl_clamped,
+                pnl_expected,
+                tolerance=TOLERANCE.IDENTITY_STRICT,
+                msg=f"Expected {key} values above max to clamp to 0.1",
+            )
+
+    def test_simulate_samples_initializes_pnl_on_entry(self):
+        """simulate_samples() sets in-position pnl to entry fee estimate."""
+        params = self.base_params(
+            exit_potential_mode="non_canonical",
+            hold_potential_enabled=True,
+            entry_additive_enabled=False,
+            exit_additive_enabled=False,
+            entry_fee_rate=0.001,
+            exit_fee_rate=0.001,
+        )
+
+        df = simulate_samples(
+            num_samples=50,
+            seed=1,
+            params=params,
+            base_factor=PARAMS.BASE_FACTOR,
+            profit_aim=PARAMS.PROFIT_AIM,
+            risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
+            max_duration_ratio=1.0,
+            trading_mode="futures",
+            pnl_base_std=0.0,
+            pnl_duration_vol_scale=0.0,
+        )
+
+        enter_rows = df[df["action"] == float(Actions.Long_enter.value)]
+        self.assertGreater(len(enter_rows), 0, "Expected at least one Long_enter in sample")
+
+        enter_pos = df.reset_index(drop=True)
+        enter_mask = enter_pos["action"].to_numpy() == float(Actions.Long_enter.value)
+        enter_positions = np.flatnonzero(enter_mask)
+        first_enter_pos = int(enter_positions[0])
+        next_pos = first_enter_pos + 1
+
+        self.assertLess(next_pos, len(enter_pos), "Sample must include post-entry step")
+        self.assertEqual(
+            float(enter_pos.iloc[next_pos]["position"]),
+            float(Positions.Long.value),
+            "Expected Long position immediately after Long_enter",
+        )
+
+        expected_pnl = _compute_entry_unrealized_pnl_estimate(Positions.Long, params)
+        post_entry_pnl = float(enter_pos.iloc[next_pos]["pnl"])
+        self.assertAlmostEqualFloat(
+            post_entry_pnl,
+            expected_pnl,
+            tolerance=TOLERANCE.IDENTITY_STRICT,
+            msg="Expected pnl after entry to match entry fee estimate",
+        )
+
+    def test_calculate_reward_hold_uses_current_duration_ratio(self):
+        """calculate_reward() hold next_duration_ratio uses trade_duration."""
+        params = self.base_params(
+            exit_potential_mode="non_canonical",
+            hold_potential_enabled=True,
+            entry_additive_enabled=False,
+            exit_additive_enabled=False,
+            potential_gamma=0.9,
+        )
+
+        trade_duration = 5
+        max_trade_duration_candles = 10
+
+        ctx = self.make_ctx(
+            position=Positions.Long,
+            action=Actions.Neutral,
+            pnl=0.01,
+            trade_duration=trade_duration,
+        )
+
+        breakdown = calculate_reward(
+            ctx,
+            {**params, "max_trade_duration_candles": max_trade_duration_candles},
+            base_factor=PARAMS.BASE_FACTOR,
+            profit_aim=PARAMS.PROFIT_AIM,
+            risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
+            short_allowed=True,
+            action_masking=True,
+            prev_potential=0.0,
+        )
+
+        expected_next_potential = _compute_hold_potential(
+            pnl=ctx.pnl,
+            pnl_target=PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO,
+            duration_ratio=(trade_duration / max_trade_duration_candles),
+            params=params,
+        )
+        self.assertAlmostEqualFloat(
+            breakdown.next_potential,
+            expected_next_potential,
+            tolerance=TOLERANCE.IDENTITY_RELAXED,
+            msg="Hold next_potential mismatch (duration ratio mismatch)",
+        )
+
     # ---------------- Validation parameter batch & relaxed aggregation ---------------- #
 
     def test_validate_reward_parameters_batch_and_relaxed_aggregation(self):
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py
index 3086750..c69f63d 100644
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -1793,6 +1793,29 @@ class MyRLEnv(Base5ActionRLEnv):
             return Positions.Neutral
         return self._position
 
+    def _get_entry_unrealized_profit(self, next_position: Positions) -> float:
+        current_open = self.prices.iloc[self._current_tick].open
+        if not isinstance(current_open, (int, float, np.floating)) or not np.isfinite(
+            current_open
+        ):
+            return 0.0
+
+        next_pnl = 0.0
+        if next_position == Positions.Long:
+            current_price = self.add_exit_fee(current_open)
+            last_trade_price = self.add_entry_fee(current_open)
+            if not np.isclose(last_trade_price, 0.0) and np.isfinite(last_trade_price):
+                next_pnl = (current_price - last_trade_price) / last_trade_price
+        elif next_position == Positions.Short:
+            current_price = self.add_entry_fee(current_open)
+            last_trade_price = self.add_exit_fee(current_open)
+            if not np.isclose(last_trade_price, 0.0) and np.isfinite(last_trade_price):
+                next_pnl = (last_trade_price - current_price) / last_trade_price
+
+        if not np.isfinite(next_pnl):
+            return 0.0
+        return float(next_pnl)
+
     def _get_next_transition_state(
         self,
         action: int,
@@ -1801,24 +1824,28 @@ class MyRLEnv(Base5ActionRLEnv):
     ) -> Tuple[Positions, int, float]:
         """Compute next transition state tuple."""
         next_position = self._get_next_position(action)
+
         # Entry
         if self._position == Positions.Neutral and next_position in (
             Positions.Long,
             Positions.Short,
         ):
-            return next_position, 0, pnl
+            return next_position, 0, self._get_entry_unrealized_profit(next_position)
+
         # Exit
         if (
             self._position in (Positions.Long, Positions.Short)
             and next_position == Positions.Neutral
         ):
             return next_position, 0, 0.0
+
         # Hold
         if self._position in (Positions.Long, Positions.Short) and next_position in (
             Positions.Long,
             Positions.Short,
         ):
-            return next_position, int(trade_duration) + 1, pnl
+            return next_position, int(trade_duration), pnl
+
         # Neutral self-loop
         return next_position, 0, 0.0