From: Jérôme Benoit Date: Thu, 12 Feb 2026 23:10:08 +0000 (+0100) Subject: fix(ReforceXY): add context-aware guard for efficiency coefficient division X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=80d9ccc9fc2052acbdc32ebfe87c030064e167bb;p=freqai-strategies.git fix(ReforceXY): add context-aware guard for efficiency coefficient division Prevent division explosion in _compute_efficiency_coefficient() when max_unrealized_profit ≈ min_unrealized_profit by requiring a minimum meaningful range based on pnl_target. Also adds validation warnings for potential_gamma=0 and pnl_target<=0 edge cases. --- diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index bef1074..2c61c2a 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -99,6 +99,8 @@ INTERNAL_GUARDS: dict[str, float] = { "sim_extreme_pnl_threshold": 0.2, "histogram_epsilon": 1e-10, "distribution_identity_epsilon": 1e-12, + "efficiency_min_range_epsilon": 1e-6, + "efficiency_min_range_fraction": 0.01, } # PBRS constants @@ -943,7 +945,7 @@ def _get_exit_factor( pnl_target, risk_reward_ratio, ) - * _compute_efficiency_coefficient(params, context, pnl) + * _compute_efficiency_coefficient(params, context, pnl, pnl_target) ) if _get_bool_param( @@ -1013,6 +1015,7 @@ def _compute_efficiency_coefficient( params: RewardParams, context: RewardContext, pnl: float, + pnl_target: float, ) -> float: """ Compute exit efficiency coefficient based on PnL position relative to unrealized extremes. @@ -1027,6 +1030,7 @@ def _compute_efficiency_coefficient( - efficiency_center: Target efficiency ratio (0.0-1.0) context: Trade context with unrealized profit/loss extremes pnl: Realized profit/loss + pnl_target: Target profit threshold for context-aware range validation Returns: float: Coefficient ≥ 0.0 (typically 0.5-1.5 range) @@ -1038,7 +1042,11 @@ def _compute_efficiency_coefficient( max_pnl = max(context.max_unrealized_profit, pnl) min_pnl = min(context.min_unrealized_profit, pnl) range_pnl = max_pnl - min_pnl - if np.isfinite(range_pnl) and not np.isclose(range_pnl, 0.0): + # Guard against division explosion when max_pnl ≈ min_pnl + eps = float(INTERNAL_GUARDS.get("efficiency_min_range_epsilon", 1e-6)) + frac = float(INTERNAL_GUARDS.get("efficiency_min_range_fraction", 0.01)) + min_meaningful_range = max(eps, frac * pnl_target) + if np.isfinite(range_pnl) and range_pnl >= min_meaningful_range: efficiency_ratio = (pnl - min_pnl) / range_pnl # For profits (pnl > 0): high ratio = good exit → higher coefficient → amplify gain # For losses (pnl < 0): high ratio = good exit → LOWER coefficient → attenuate penalty diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py index 3004f75..c87bfb8 100644 --- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py +++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py @@ -33,6 +33,8 @@ from ..helpers import ( ) from ..test_base import RewardSpaceTestBase +_DEFAULT_PNL_TARGET = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO + pytestmark = pytest.mark.components @@ -301,7 +303,9 @@ class TestRewardComponents(RewardSpaceTestBase): action=Actions.Long_exit, ) - coefficient = _compute_efficiency_coefficient(params, ctx, ctx.current_pnl) + coefficient = _compute_efficiency_coefficient( + params, ctx, ctx.current_pnl, _DEFAULT_PNL_TARGET + ) self.assertFinite(coefficient, name="efficiency_coefficient") self.assertAlmostEqualFloat(coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ) @@ -339,7 +343,9 @@ class TestRewardComponents(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - coefficient = _compute_efficiency_coefficient(params, ctx, ctx.current_pnl) + coefficient = _compute_efficiency_coefficient( + params, ctx, ctx.current_pnl, _DEFAULT_PNL_TARGET + ) self.assertFinite(coefficient, name=f"efficiency_coefficient[pnl={pnl}]") coefficients.append(coefficient) @@ -420,7 +426,9 @@ class TestRewardComponents(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - coefficient = _compute_efficiency_coefficient(params, ctx, ctx.current_pnl) + coefficient = _compute_efficiency_coefficient( + params, ctx, ctx.current_pnl, _DEFAULT_PNL_TARGET + ) self.assertFinite(coefficient, name=f"efficiency_coefficient[pnl={pnl}]") coefficients.append(coefficient) # Simplified reward calculation (ignoring other factors for this test) diff --git a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py index 25c1b49..a362ba1 100644 --- a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py +++ b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py @@ -668,7 +668,9 @@ def assert_exit_mode_mathematical_validation( pnl_target_coefficient = _compute_pnl_target_coefficient( params, context.current_pnl, pnl_target, risk_reward_ratio ) - efficiency_coefficient = _compute_efficiency_coefficient(params, context, context.current_pnl) + efficiency_coefficient = _compute_efficiency_coefficient( + params, context, context.current_pnl, pnl_target + ) observed_exit_factor = _get_exit_factor( base_factor, diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 174a78e..4af4079 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -186,6 +186,9 @@ class ReforceXY(BaseReinforcementLearningModel): DEFAULT_CHECK_INVARIANTS: Final[bool] = True DEFAULT_EXIT_FACTOR_THRESHOLD: Final[float] = 1_000.0 + DEFAULT_EFFICIENCY_MIN_RANGE_EPSILON: Final[float] = 1e-6 + DEFAULT_EFFICIENCY_MIN_RANGE_FRACTION: Final[float] = 0.01 + _MODEL_TYPES: Final[Tuple[ModelType, ...]] = ( "PPO", "RecurrentPPO", @@ -1940,6 +1943,12 @@ class MyRLEnv(Base5ActionRLEnv): self._potential_gamma = float( model_reward_parameters.get("potential_gamma", 0.95) ) + if np.isclose(self._potential_gamma, 0.0): + logger.warning( + "PBRS [%s]: potential_gamma=0 detected; PBRS delta will be -Φ(s) " + "instead of γΦ(s')-Φ(s). This may cause unexpected reward behavior.", + self.id, + ) # === EXIT POTENTIAL MODE === # exit_potential_mode options: @@ -2083,6 +2092,16 @@ class MyRLEnv(Base5ActionRLEnv): # === PNL TARGET === self._pnl_target = float(self.profit_aim * self.rr) + if self._pnl_target <= 0: + logger.warning( + "PBRS [%s]: pnl_target=%.6f must be > 0 (profit_aim=%.4f, rr=%.4f); " + "defaulting to 0.01", + self.id, + self._pnl_target, + self.profit_aim, + self.rr, + ) + self._pnl_target = 0.01 def _get_next_position(self, action: int) -> Positions: if action == Actions.Long_enter.value and self._position == Positions.Neutral: @@ -3028,7 +3047,12 @@ class MyRLEnv(Base5ActionRLEnv): max_pnl = max(self.get_max_unrealized_profit(), pnl) min_pnl = min(self.get_min_unrealized_profit(), pnl) range_pnl = max_pnl - min_pnl - if np.isfinite(range_pnl) and not np.isclose(range_pnl, 0.0): + # Guard against division explosion when max_pnl ≈ min_pnl + min_meaningful_range = max( + ReforceXY.DEFAULT_EFFICIENCY_MIN_RANGE_EPSILON, + ReforceXY.DEFAULT_EFFICIENCY_MIN_RANGE_FRACTION * self._pnl_target, + ) + if np.isfinite(range_pnl) and range_pnl >= min_meaningful_range: efficiency_ratio = (pnl - min_pnl) / range_pnl if pnl > 0.0: efficiency_coefficient = 1.0 + efficiency_weight * (