"sim_extreme_pnl_threshold": 0.2,
"histogram_epsilon": 1e-10,
"distribution_identity_epsilon": 1e-12,
+ "efficiency_min_range_epsilon": 1e-6,
+ "efficiency_min_range_fraction": 0.01,
}
# PBRS constants
pnl_target,
risk_reward_ratio,
)
- * _compute_efficiency_coefficient(params, context, pnl)
+ * _compute_efficiency_coefficient(params, context, pnl, pnl_target)
)
if _get_bool_param(
params: RewardParams,
context: RewardContext,
pnl: float,
+ pnl_target: float,
) -> float:
"""
Compute exit efficiency coefficient based on PnL position relative to unrealized extremes.
- efficiency_center: Target efficiency ratio (0.0-1.0)
context: Trade context with unrealized profit/loss extremes
pnl: Realized profit/loss
+ pnl_target: Target profit threshold for context-aware range validation
Returns:
float: Coefficient ≥ 0.0 (typically 0.5-1.5 range)
max_pnl = max(context.max_unrealized_profit, pnl)
min_pnl = min(context.min_unrealized_profit, pnl)
range_pnl = max_pnl - min_pnl
- if np.isfinite(range_pnl) and not np.isclose(range_pnl, 0.0):
+ # Guard against division explosion when max_pnl ≈ min_pnl
+ eps = float(INTERNAL_GUARDS.get("efficiency_min_range_epsilon", 1e-6))
+ frac = float(INTERNAL_GUARDS.get("efficiency_min_range_fraction", 0.01))
+ min_meaningful_range = max(eps, frac * pnl_target)
+ if np.isfinite(range_pnl) and range_pnl >= min_meaningful_range:
efficiency_ratio = (pnl - min_pnl) / range_pnl
# For profits (pnl > 0): high ratio = good exit → higher coefficient → amplify gain
# For losses (pnl < 0): high ratio = good exit → LOWER coefficient → attenuate penalty
)
from ..test_base import RewardSpaceTestBase
+_DEFAULT_PNL_TARGET = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO
+
pytestmark = pytest.mark.components
action=Actions.Long_exit,
)
- coefficient = _compute_efficiency_coefficient(params, ctx, ctx.current_pnl)
+ coefficient = _compute_efficiency_coefficient(
+ params, ctx, ctx.current_pnl, _DEFAULT_PNL_TARGET
+ )
self.assertFinite(coefficient, name="efficiency_coefficient")
self.assertAlmostEqualFloat(coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ)
position=Positions.Long,
action=Actions.Long_exit,
)
- coefficient = _compute_efficiency_coefficient(params, ctx, ctx.current_pnl)
+ coefficient = _compute_efficiency_coefficient(
+ params, ctx, ctx.current_pnl, _DEFAULT_PNL_TARGET
+ )
self.assertFinite(coefficient, name=f"efficiency_coefficient[pnl={pnl}]")
coefficients.append(coefficient)
position=Positions.Long,
action=Actions.Long_exit,
)
- coefficient = _compute_efficiency_coefficient(params, ctx, ctx.current_pnl)
+ coefficient = _compute_efficiency_coefficient(
+ params, ctx, ctx.current_pnl, _DEFAULT_PNL_TARGET
+ )
self.assertFinite(coefficient, name=f"efficiency_coefficient[pnl={pnl}]")
coefficients.append(coefficient)
# Simplified reward calculation (ignoring other factors for this test)
pnl_target_coefficient = _compute_pnl_target_coefficient(
params, context.current_pnl, pnl_target, risk_reward_ratio
)
- efficiency_coefficient = _compute_efficiency_coefficient(params, context, context.current_pnl)
+ efficiency_coefficient = _compute_efficiency_coefficient(
+ params, context, context.current_pnl, pnl_target
+ )
observed_exit_factor = _get_exit_factor(
base_factor,
DEFAULT_CHECK_INVARIANTS: Final[bool] = True
DEFAULT_EXIT_FACTOR_THRESHOLD: Final[float] = 1_000.0
+ DEFAULT_EFFICIENCY_MIN_RANGE_EPSILON: Final[float] = 1e-6
+ DEFAULT_EFFICIENCY_MIN_RANGE_FRACTION: Final[float] = 0.01
+
_MODEL_TYPES: Final[Tuple[ModelType, ...]] = (
"PPO",
"RecurrentPPO",
self._potential_gamma = float(
model_reward_parameters.get("potential_gamma", 0.95)
)
+ if np.isclose(self._potential_gamma, 0.0):
+ logger.warning(
+ "PBRS [%s]: potential_gamma=0 detected; PBRS delta will be -Φ(s) "
+ "instead of γΦ(s')-Φ(s). This may cause unexpected reward behavior.",
+ self.id,
+ )
# === EXIT POTENTIAL MODE ===
# exit_potential_mode options:
# === PNL TARGET ===
self._pnl_target = float(self.profit_aim * self.rr)
+ if self._pnl_target <= 0:
+ logger.warning(
+ "PBRS [%s]: pnl_target=%.6f must be > 0 (profit_aim=%.4f, rr=%.4f); "
+ "defaulting to 0.01",
+ self.id,
+ self._pnl_target,
+ self.profit_aim,
+ self.rr,
+ )
+ self._pnl_target = 0.01
def _get_next_position(self, action: int) -> Positions:
if action == Actions.Long_enter.value and self._position == Positions.Neutral:
max_pnl = max(self.get_max_unrealized_profit(), pnl)
min_pnl = min(self.get_min_unrealized_profit(), pnl)
range_pnl = max_pnl - min_pnl
- if np.isfinite(range_pnl) and not np.isclose(range_pnl, 0.0):
+ # Guard against division explosion when max_pnl ≈ min_pnl
+ min_meaningful_range = max(
+ ReforceXY.DEFAULT_EFFICIENCY_MIN_RANGE_EPSILON,
+ ReforceXY.DEFAULT_EFFICIENCY_MIN_RANGE_FRACTION * self._pnl_target,
+ )
+ if np.isfinite(range_pnl) and range_pnl >= min_meaningful_range:
efficiency_ratio = (pnl - min_pnl) / range_pnl
if pnl > 0.0:
efficiency_coefficient = 1.0 + efficiency_weight * (