efficiency_center - efficiency_ratio
)
+ if not np.isfinite(efficiency_coefficient):
+ efficiency_coefficient = 0.0
+
+ if efficiency_coefficient < 0.0:
+ if _get_bool_param(
+ params,
+ "check_invariants",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("check_invariants", True)),
+ ):
+ warnings.warn(
+ f"efficiency_coefficient={efficiency_coefficient:.6f} < 0; clamping to 0.0",
+ RewardDiagnosticsWarning,
+ stacklevel=2,
+ )
+ efficiency_coefficient = 0.0
+
return efficiency_coefficient
"""Test efficiency zero policy produces expected PnL coefficient.
Verifies:
- - efficiency_weight = 0 → pnl_coefficient ≈ 1.0
+ - efficiency_weight = 0 -> pnl_coefficient ~= 1.0
- Coefficient is finite and positive
"""
ctx = self.make_ctx(
position=Positions.Long,
action=Actions.Long_exit,
)
- params = self.base_params()
+ params = self.base_params(efficiency_weight=0.0)
pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO
pnl_target_coefficient = _compute_pnl_target_coefficient(
params, ctx.pnl, pnl_target, PARAMS.RISK_REWARD_RATIO
self.assertFinite(pnl_coefficient, name="pnl_coefficient")
self.assertAlmostEqualFloat(pnl_coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ)
+ def test_exit_reward_never_positive_for_loss_due_to_efficiency(self):
+ """Exit reward should not become positive for a loss trade.
+
+ This guards against a configuration where the efficiency coefficient becomes
+ negative (e.g., extreme efficiency_weight/efficiency_center), which would
+ otherwise flip the sign of pnl * exit_factor.
+ """
+ params = self.base_params(
+ efficiency_weight=2.0,
+ efficiency_center=0.0,
+ exit_attenuation_mode="linear",
+ exit_plateau=False,
+ exit_linear_slope=0.0,
+ hold_potential_enabled=False,
+ entry_additive_enabled=False,
+ exit_additive_enabled=False,
+ )
+ params.pop("base_factor", None)
+
+ context = self.make_ctx(
+ pnl=-0.01,
+ trade_duration=10,
+ idle_duration=0,
+ max_unrealized_profit=0.0,
+ min_unrealized_profit=-0.05,
+ position=Positions.Long,
+ action=Actions.Long_exit,
+ )
+ breakdown = calculate_reward(
+ context,
+ params,
+ base_factor=1.0,
+ profit_aim=0.03,
+ risk_reward_ratio=1.0,
+ short_allowed=True,
+ action_masking=True,
+ )
+ self.assertLessEqual(
+ breakdown.exit_component,
+ 0.0,
+ "Exit component must not be positive when pnl < 0",
+ )
+
def test_max_idle_duration_candles_logic(self):
"""Test max idle duration candles parameter affects penalty magnitude.
"_get_exit_factor produced non-finite factor; resetting to 0.0"
)
return 0.0
+ if efficiency_coefficient < 0.0:
+ logger.debug(
+ "_compute_efficiency_coefficient produced negative coefficient %.5f",
+ efficiency_coefficient,
+ )
if exit_factor < 0.0 and pnl >= 0.0:
logger.debug(
- "_get_exit_factor negative with positive pnl (exit_factor=%.5f, pnl=%.5f); clamping to 0.0",
+ "_get_exit_factor produced negative factor with positive pnl (exit_factor=%.5f, pnl=%.5f); clamping to 0.0",
exit_factor,
pnl,
)