From: Jérôme Benoit Date: Mon, 22 Dec 2025 18:47:37 +0000 (+0100) Subject: refactor(ReforceXY): add invariant checks to efficiency coefficient computation X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=654fc01f17f7deff259e37c9c1a47b495eb0342d;p=freqai-strategies.git refactor(ReforceXY): add invariant checks to efficiency coefficient computation Signed-off-by: Jérôme Benoit --- diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index cdeb3c9..4e6273f 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -1031,6 +1031,22 @@ def _compute_efficiency_coefficient( efficiency_center - efficiency_ratio ) + if not np.isfinite(efficiency_coefficient): + efficiency_coefficient = 0.0 + + if efficiency_coefficient < 0.0: + if _get_bool_param( + params, + "check_invariants", + bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("check_invariants", True)), + ): + warnings.warn( + f"efficiency_coefficient={efficiency_coefficient:.6f} < 0; clamping to 0.0", + RewardDiagnosticsWarning, + stacklevel=2, + ) + efficiency_coefficient = 0.0 + return efficiency_coefficient diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py index a7b9b4b..f139952 100644 --- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py +++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py @@ -199,7 +199,7 @@ class TestRewardComponents(RewardSpaceTestBase): """Test efficiency zero policy produces expected PnL coefficient. Verifies: - - efficiency_weight = 0 → pnl_coefficient ≈ 1.0 + - efficiency_weight = 0 -> pnl_coefficient ~= 1.0 - Coefficient is finite and positive """ ctx = self.make_ctx( @@ -210,7 +210,7 @@ class TestRewardComponents(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - params = self.base_params() + params = self.base_params(efficiency_weight=0.0) pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO pnl_target_coefficient = _compute_pnl_target_coefficient( params, ctx.pnl, pnl_target, PARAMS.RISK_REWARD_RATIO @@ -220,6 +220,49 @@ class TestRewardComponents(RewardSpaceTestBase): self.assertFinite(pnl_coefficient, name="pnl_coefficient") self.assertAlmostEqualFloat(pnl_coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ) + def test_exit_reward_never_positive_for_loss_due_to_efficiency(self): + """Exit reward should not become positive for a loss trade. + + This guards against a configuration where the efficiency coefficient becomes + negative (e.g., extreme efficiency_weight/efficiency_center), which would + otherwise flip the sign of pnl * exit_factor. + """ + params = self.base_params( + efficiency_weight=2.0, + efficiency_center=0.0, + exit_attenuation_mode="linear", + exit_plateau=False, + exit_linear_slope=0.0, + hold_potential_enabled=False, + entry_additive_enabled=False, + exit_additive_enabled=False, + ) + params.pop("base_factor", None) + + context = self.make_ctx( + pnl=-0.01, + trade_duration=10, + idle_duration=0, + max_unrealized_profit=0.0, + min_unrealized_profit=-0.05, + position=Positions.Long, + action=Actions.Long_exit, + ) + breakdown = calculate_reward( + context, + params, + base_factor=1.0, + profit_aim=0.03, + risk_reward_ratio=1.0, + short_allowed=True, + action_masking=True, + ) + self.assertLessEqual( + breakdown.exit_component, + 0.0, + "Exit component must not be positive when pnl < 0", + ) + def test_max_idle_duration_candles_logic(self): """Test max idle duration candles parameter affects penalty magnitude. diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 83e0de3..dbde6c6 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -2565,9 +2565,14 @@ class MyRLEnv(Base5ActionRLEnv): "_get_exit_factor produced non-finite factor; resetting to 0.0" ) return 0.0 + if efficiency_coefficient < 0.0: + logger.debug( + "_compute_efficiency_coefficient produced negative coefficient %.5f", + efficiency_coefficient, + ) if exit_factor < 0.0 and pnl >= 0.0: logger.debug( - "_get_exit_factor negative with positive pnl (exit_factor=%.5f, pnl=%.5f); clamping to 0.0", + "_get_exit_factor produced negative factor with positive pnl (exit_factor=%.5f, pnl=%.5f); clamping to 0.0", exit_factor, pnl, )