refactor(ReforceXY): add invariant checks to efficiency coefficient computation

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Mon, 22 Dec 2025 18:47:37 +0000 (19:47 +0100)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Mon, 22 Dec 2025 18:47:37 +0000 (19:47 +0100)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Mon, 22 Dec 2025 18:47:37 +0000 (19:47 +0100)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Mon, 22 Dec 2025 18:47:37 +0000 (19:47 +0100)
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py

index cdeb3c9c44108ae3943b0964f6908addaadebfc9..4e6273f0f56e8d19ade7a6d51a5b26949cff42f2 100644 (file)
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -1031,6 +1031,22 @@ def _compute_efficiency_coefficient(
                      efficiency_center - efficiency_ratio
                  )
  
+    if not np.isfinite(efficiency_coefficient):
+        efficiency_coefficient = 0.0
+
+    if efficiency_coefficient < 0.0:
+        if _get_bool_param(
+            params,
+            "check_invariants",
+            bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("check_invariants", True)),
+        ):
+            warnings.warn(
+                f"efficiency_coefficient={efficiency_coefficient:.6f} < 0; clamping to 0.0",
+                RewardDiagnosticsWarning,
+                stacklevel=2,
+            )
+        efficiency_coefficient = 0.0
+
      return efficiency_coefficient
  
  
diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py

index a7b9b4bd5b3fffc0b254b950bb029b4893fe9e06..f1399520b944b0981f9dedc2472ad3ccb9c50f81 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
+++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
@@ -199,7 +199,7 @@ class TestRewardComponents(RewardSpaceTestBase):
          """Test efficiency zero policy produces expected PnL coefficient.
  
          Verifies:
-        - efficiency_weight = 0 → pnl_coefficient ≈ 1.0
+        - efficiency_weight = 0 -> pnl_coefficient ~= 1.0
          - Coefficient is finite and positive
          """
          ctx = self.make_ctx(
@@ -210,7 +210,7 @@ class TestRewardComponents(RewardSpaceTestBase):
              position=Positions.Long,
              action=Actions.Long_exit,
          )
-        params = self.base_params()
+        params = self.base_params(efficiency_weight=0.0)
          pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO
          pnl_target_coefficient = _compute_pnl_target_coefficient(
              params, ctx.pnl, pnl_target, PARAMS.RISK_REWARD_RATIO
@@ -220,6 +220,49 @@ class TestRewardComponents(RewardSpaceTestBase):
          self.assertFinite(pnl_coefficient, name="pnl_coefficient")
          self.assertAlmostEqualFloat(pnl_coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ)
  
+    def test_exit_reward_never_positive_for_loss_due_to_efficiency(self):
+        """Exit reward should not become positive for a loss trade.
+
+        This guards against a configuration where the efficiency coefficient becomes
+        negative (e.g., extreme efficiency_weight/efficiency_center), which would
+        otherwise flip the sign of pnl * exit_factor.
+        """
+        params = self.base_params(
+            efficiency_weight=2.0,
+            efficiency_center=0.0,
+            exit_attenuation_mode="linear",
+            exit_plateau=False,
+            exit_linear_slope=0.0,
+            hold_potential_enabled=False,
+            entry_additive_enabled=False,
+            exit_additive_enabled=False,
+        )
+        params.pop("base_factor", None)
+
+        context = self.make_ctx(
+            pnl=-0.01,
+            trade_duration=10,
+            idle_duration=0,
+            max_unrealized_profit=0.0,
+            min_unrealized_profit=-0.05,
+            position=Positions.Long,
+            action=Actions.Long_exit,
+        )
+        breakdown = calculate_reward(
+            context,
+            params,
+            base_factor=1.0,
+            profit_aim=0.03,
+            risk_reward_ratio=1.0,
+            short_allowed=True,
+            action_masking=True,
+        )
+        self.assertLessEqual(
+            breakdown.exit_component,
+            0.0,
+            "Exit component must not be positive when pnl < 0",
+        )
+
      def test_max_idle_duration_candles_logic(self):
          """Test max idle duration candles parameter affects penalty magnitude.
  
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py

index 83e0de30af43c405c61301bb16f37137bdf023e6..dbde6c63e607128e35c698dcc00d54a2f2b97dee 100644 (file)
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -2565,9 +2565,14 @@ class MyRLEnv(Base5ActionRLEnv):
                      "_get_exit_factor produced non-finite factor; resetting to 0.0"
                  )
                  return 0.0
+            if efficiency_coefficient < 0.0:
+                logger.debug(
+                    "_compute_efficiency_coefficient produced negative coefficient %.5f",
+                    efficiency_coefficient,
+                )
              if exit_factor < 0.0 and pnl >= 0.0:
                  logger.debug(
-                    "_get_exit_factor negative with positive pnl (exit_factor=%.5f, pnl=%.5f); clamping to 0.0",
+                    "_get_exit_factor produced negative factor with positive pnl (exit_factor=%.5f, pnl=%.5f); clamping to 0.0",
                      exit_factor,
                      pnl,
                  )
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Mon, 22 Dec 2025 18:47:37 +0000 (19:47 +0100)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Mon, 22 Dec 2025 18:47:37 +0000 (19:47 +0100)
ReforceXY/reward_space_analysis/reward_space_analysis.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/components/test_reward_components.py		patch \| blob \| blame \| history
ReforceXY/user_data/freqaimodels/ReforceXY.py		patch \| blob \| blame \| history