From: Jérôme Benoit <jerome.benoit@piment-noir.org>
Date: Mon, 22 Dec 2025 18:47:37 +0000 (+0100)
Subject: refactor(ReforceXY): add invariant checks to efficiency coefficient computation
X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=654fc01f17f7deff259e37c9c1a47b495eb0342d;p=freqai-strategies.git

refactor(ReforceXY): add invariant checks to efficiency coefficient computation

Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
---

diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py
index cdeb3c9..4e6273f 100644
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -1031,6 +1031,22 @@ def _compute_efficiency_coefficient(
                     efficiency_center - efficiency_ratio
                 )
 
+    if not np.isfinite(efficiency_coefficient):
+        efficiency_coefficient = 0.0
+
+    if efficiency_coefficient < 0.0:
+        if _get_bool_param(
+            params,
+            "check_invariants",
+            bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("check_invariants", True)),
+        ):
+            warnings.warn(
+                f"efficiency_coefficient={efficiency_coefficient:.6f} < 0; clamping to 0.0",
+                RewardDiagnosticsWarning,
+                stacklevel=2,
+            )
+        efficiency_coefficient = 0.0
+
     return efficiency_coefficient
 
 
diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
index a7b9b4b..f139952 100644
--- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
+++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
@@ -199,7 +199,7 @@ class TestRewardComponents(RewardSpaceTestBase):
         """Test efficiency zero policy produces expected PnL coefficient.
 
         Verifies:
-        - efficiency_weight = 0 â pnl_coefficient â 1.0
+        - efficiency_weight = 0 -> pnl_coefficient ~= 1.0
         - Coefficient is finite and positive
         """
         ctx = self.make_ctx(
@@ -210,7 +210,7 @@ class TestRewardComponents(RewardSpaceTestBase):
             position=Positions.Long,
             action=Actions.Long_exit,
         )
-        params = self.base_params()
+        params = self.base_params(efficiency_weight=0.0)
         pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO
         pnl_target_coefficient = _compute_pnl_target_coefficient(
             params, ctx.pnl, pnl_target, PARAMS.RISK_REWARD_RATIO
@@ -220,6 +220,49 @@ class TestRewardComponents(RewardSpaceTestBase):
         self.assertFinite(pnl_coefficient, name="pnl_coefficient")
         self.assertAlmostEqualFloat(pnl_coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ)
 
+    def test_exit_reward_never_positive_for_loss_due_to_efficiency(self):
+        """Exit reward should not become positive for a loss trade.
+
+        This guards against a configuration where the efficiency coefficient becomes
+        negative (e.g., extreme efficiency_weight/efficiency_center), which would
+        otherwise flip the sign of pnl * exit_factor.
+        """
+        params = self.base_params(
+            efficiency_weight=2.0,
+            efficiency_center=0.0,
+            exit_attenuation_mode="linear",
+            exit_plateau=False,
+            exit_linear_slope=0.0,
+            hold_potential_enabled=False,
+            entry_additive_enabled=False,
+            exit_additive_enabled=False,
+        )
+        params.pop("base_factor", None)
+
+        context = self.make_ctx(
+            pnl=-0.01,
+            trade_duration=10,
+            idle_duration=0,
+            max_unrealized_profit=0.0,
+            min_unrealized_profit=-0.05,
+            position=Positions.Long,
+            action=Actions.Long_exit,
+        )
+        breakdown = calculate_reward(
+            context,
+            params,
+            base_factor=1.0,
+            profit_aim=0.03,
+            risk_reward_ratio=1.0,
+            short_allowed=True,
+            action_masking=True,
+        )
+        self.assertLessEqual(
+            breakdown.exit_component,
+            0.0,
+            "Exit component must not be positive when pnl < 0",
+        )
+
     def test_max_idle_duration_candles_logic(self):
         """Test max idle duration candles parameter affects penalty magnitude.
 
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py
index 83e0de3..dbde6c6 100644
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -2565,9 +2565,14 @@ class MyRLEnv(Base5ActionRLEnv):
                     "_get_exit_factor produced non-finite factor; resetting to 0.0"
                 )
                 return 0.0
+            if efficiency_coefficient < 0.0:
+                logger.debug(
+                    "_compute_efficiency_coefficient produced negative coefficient %.5f",
+                    efficiency_coefficient,
+                )
             if exit_factor < 0.0 and pnl >= 0.0:
                 logger.debug(
-                    "_get_exit_factor negative with positive pnl (exit_factor=%.5f, pnl=%.5f); clamping to 0.0",
+                    "_get_exit_factor produced negative factor with positive pnl (exit_factor=%.5f, pnl=%.5f); clamping to 0.0",
                     exit_factor,
                     pnl,
                 )