| `exit_potential_mode` | canonical | Potential release mode |
| `exit_potential_decay` | 0.5 | Decay for progressive_release |
| `hold_potential_enabled` | true | Enable hold potential Φ |
+| `entry_fee_rate` | 0.0 | Entry fee rate |
+| `exit_fee_rate` | 0.0 | Exit fee rate |
PBRS invariance holds when: `exit_potential_mode=canonical`.
"entry_additive_gain": 1.0,
"entry_additive_transform_pnl": "tanh",
"entry_additive_transform_duration": "tanh",
+ "entry_fee_rate": 0.0,
+ "exit_fee_rate": 0.0,
# Exit additive (non-PBRS additive term)
"exit_additive_enabled": False,
"exit_additive_scale": 1.0,
"entry_additive_gain": "Entry additive gain",
"entry_additive_transform_pnl": "Entry PnL transform",
"entry_additive_transform_duration": "Entry duration transform",
+ "entry_fee_rate": "Entry fee rate",
+ "exit_fee_rate": "Exit fee rate",
"exit_additive_enabled": "Enable exit additive",
"exit_additive_scale": "Exit additive scale",
"exit_additive_gain": "Exit additive gain",
"hold_potential_gain": {"min": 0.0},
"entry_additive_scale": {"min": 0.0},
"entry_additive_gain": {"min": 0.0},
+ "entry_fee_rate": {"min": 0.0, "max": 0.1},
+ "exit_fee_rate": {"min": 0.0, "max": 0.1},
"exit_additive_scale": {"min": 0.0},
"exit_additive_gain": {"min": 0.0},
}
is_neutral = context.position == Positions.Neutral and context.action == Actions.Neutral
if is_entry:
- next_pnl = current_pnl
next_duration_ratio = 0.0
+ if context.action == Actions.Long_enter:
+ next_pnl = _compute_entry_unrealized_pnl_estimate(Positions.Long, params)
+ elif context.action == Actions.Short_enter:
+ next_pnl = _compute_entry_unrealized_pnl_estimate(Positions.Short, params)
+ else:
+ next_pnl = current_pnl
elif is_hold:
next_duration_ratio = _compute_duration_ratio(
- context.trade_duration + 1, max_trade_duration_candles
+ context.trade_duration, max_trade_duration_candles
)
# Optionally simulate unrealized PnL during holds to feed Φ(s)
if _get_bool_param(params, "unrealized_pnl", False):
position = Positions.Long
trade_duration = 0
idle_duration = 0
+ pnl = _compute_entry_unrealized_pnl_estimate(Positions.Long, params)
+ max_unrealized_profit = pnl
+ min_unrealized_profit = pnl
elif action == Actions.Short_enter and short_allowed:
position = Positions.Short
trade_duration = 0
idle_duration = 0
+ pnl = _compute_entry_unrealized_pnl_estimate(Positions.Short, params)
+ max_unrealized_profit = pnl
+ min_unrealized_profit = pnl
else:
idle_duration = 0
if action == Actions.Neutral:
# === PBRS IMPLEMENTATION ===
+def _compute_entry_unrealized_pnl_estimate(next_position: Positions, params: RewardParams) -> float:
+ """Estimate immediate unrealized PnL after entry fees.
+
+ For Long entry:
+ current_price = open * (1 - exit_fee_rate)
+ last_trade_price = open * (1 + entry_fee_rate)
+ pnl = (current_price - last_trade_price) / last_trade_price
+
+ For Short entry:
+ current_price = open * (1 + entry_fee_rate)
+ last_trade_price = open * (1 - exit_fee_rate)
+ pnl = (last_trade_price - current_price) / last_trade_price
+ """
+
+ entry_fee_rate = _get_float_param(
+ params,
+ "entry_fee_rate",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("entry_fee_rate", 0.0),
+ )
+ exit_fee_rate = _get_float_param(
+ params,
+ "exit_fee_rate",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_fee_rate", 0.0),
+ )
+
+ if not np.isfinite(entry_fee_rate):
+ entry_fee_rate = 0.0
+ if not np.isfinite(exit_fee_rate):
+ exit_fee_rate = 0.0
+
+ entry_fee_bounds = _PARAMETER_BOUNDS.get("entry_fee_rate", {"min": 0.0, "max": 1.0})
+ exit_fee_bounds = _PARAMETER_BOUNDS.get("exit_fee_rate", {"min": 0.0, "max": 1.0})
+
+ entry_fee_min = float(entry_fee_bounds.get("min", 0.0))
+ entry_fee_max = float(entry_fee_bounds.get("max", 1.0))
+ exit_fee_min = float(exit_fee_bounds.get("min", 0.0))
+ exit_fee_max = float(exit_fee_bounds.get("max", 1.0))
+
+ entry_fee_rate = float(np.clip(entry_fee_rate, entry_fee_min, entry_fee_max))
+ exit_fee_rate = float(np.clip(exit_fee_rate, exit_fee_min, exit_fee_max))
+
+ current_open = 1.0
+ next_pnl = 0.0
+
+ if next_position == Positions.Long:
+ current_price = current_open * (1.0 - exit_fee_rate)
+ last_trade_price = current_open * (1.0 + entry_fee_rate)
+ if last_trade_price != 0.0 and np.isfinite(last_trade_price):
+ next_pnl = (current_price - last_trade_price) / last_trade_price
+ elif next_position == Positions.Short:
+ current_price = current_open * (1.0 + entry_fee_rate)
+ last_trade_price = current_open * (1.0 - exit_fee_rate)
+ if last_trade_price != 0.0 and np.isfinite(last_trade_price):
+ next_pnl = (last_trade_price - current_price) / last_trade_price
+
+ if not np.isfinite(next_pnl):
+ return 0.0
+ return float(next_pnl)
+
+
def _compute_hold_potential(
pnl: float,
pnl_target: float,
### Regression Test
```python
-def test_bug_fix_issue_123_no_regression(self):
+def test_issue_123_regression_is_prevented(self):
"""Regression test for Issue #123: [brief description].
- Ensures fix for [bug description] remains effective.
- Bug manifested when [conditions]; this test reproduces those conditions.
+ Validates behavior for [conditions] and asserts the expected outcome.
- **Fixed in:** PR #456 (commit abc1234)
+ **Tracking:** Issue #123
"""
```
| statistics-constant-dist-widened-ci-113a | statistics | Non-strict: widened CI with warning | statistics/test_statistics.py:533 | Test docstring labels "Invariant 113 (non-strict)" |
| statistics-constant-dist-strict-omit-113b | statistics | Strict: omit metrics (no widened CI) | statistics/test_statistics.py:565 | Test docstring labels "Invariant 113 (strict)" |
| statistics-fallback-diagnostics-115 | statistics | Fallback diagnostics constant distribution (qq_r2=1.0 etc.) | statistics/test_statistics.py:190 | Docstring line |
-| robustness-exit-pnl-only-117 | robustness | Only exit actions have non-zero PnL | robustness/test_robustness.py:126 | Newly assigned ID (previously unnumbered) |
+| robustness-exit-pnl-only-117 | robustness | Only exit actions have non-zero PnL | robustness/test_robustness.py:126 | Newly assigned ID |
| pbrs-absence-shift-placeholder-118 | pbrs | Placeholder shift line present (absence displayed) | pbrs/test_pbrs.py:979 | Ensures placeholder appears when shaping shift absent |
| components-pbrs-breakdown-fields-119 | components | PBRS breakdown fields finite and mathematically aligned | components/test_reward_components.py:454 | Tests base_reward, pbrs_delta, invariance_correction fields and their alignment |
| integration-pbrs-metrics-section-120 | integration | PBRS Metrics section present in report with tracing metrics | integration/test_report_formatting.py:156 | Verifies PBRS Metrics (Tracing) subsection rendering in statistical_analysis.md |
DEFAULT_IDLE_DURATION_MULTIPLIER,
DEFAULT_MODEL_REWARD_PARAMETERS,
PBRS_INVARIANCE_TOL,
+ Actions,
+ Positions,
_compute_entry_additive,
+ _compute_entry_unrealized_pnl_estimate,
_compute_exit_additive,
_compute_exit_potential,
_compute_hold_potential,
_get_float_param,
apply_potential_shaping,
+ calculate_reward,
get_max_idle_duration_candles,
simulate_samples,
validate_reward_parameters,
"Unexpected total difference under gamma NaN fallback",
)
+ def test_calculate_reward_entry_next_pnl_fee_aware(self):
+ """calculate_reward() entry PBRS uses fee-aware next_pnl estimate."""
+ params = self.base_params(
+ exit_potential_mode="non_canonical",
+ hold_potential_enabled=True,
+ entry_additive_enabled=False,
+ exit_additive_enabled=False,
+ potential_gamma=0.9,
+ entry_fee_rate=0.001,
+ exit_fee_rate=0.001,
+ )
+
+ for action in (Actions.Long_enter, Actions.Short_enter):
+ ctx = self.make_ctx(
+ position=Positions.Neutral, action=action, pnl=0.0, trade_duration=0
+ )
+ breakdown = calculate_reward(
+ ctx,
+ params,
+ base_factor=PARAMS.BASE_FACTOR,
+ profit_aim=PARAMS.PROFIT_AIM,
+ risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
+ short_allowed=True,
+ action_masking=True,
+ prev_potential=0.0,
+ )
+ self.assertTrue(np.isfinite(breakdown.next_potential))
+ # With any nonzero fees, immediate unrealized pnl should be negative.
+ self.assertLess(
+ breakdown.next_potential,
+ 0.0,
+ f"Expected negative next_potential on entry for action={action}",
+ )
+
+ def test_fee_rates_are_clamped_to_parameter_bounds(self):
+ """Fee clamping uses _PARAMETER_BOUNDS (max 0.1)."""
+ cases = [
+ ("entry_fee_rate", self.base_params(entry_fee_rate=999.0, exit_fee_rate=0.0)),
+ ("exit_fee_rate", self.base_params(entry_fee_rate=0.0, exit_fee_rate=999.0)),
+ ]
+
+ for key, params in cases:
+ pnl_clamped = _compute_entry_unrealized_pnl_estimate(Positions.Long, params)
+ pnl_expected = _compute_entry_unrealized_pnl_estimate(
+ Positions.Long,
+ {**params, key: 0.1},
+ )
+ self.assertAlmostEqualFloat(
+ pnl_clamped,
+ pnl_expected,
+ tolerance=TOLERANCE.IDENTITY_STRICT,
+ msg=f"Expected {key} values above max to clamp to 0.1",
+ )
+
+ def test_simulate_samples_initializes_pnl_on_entry(self):
+ """simulate_samples() sets in-position pnl to entry fee estimate."""
+ params = self.base_params(
+ exit_potential_mode="non_canonical",
+ hold_potential_enabled=True,
+ entry_additive_enabled=False,
+ exit_additive_enabled=False,
+ entry_fee_rate=0.001,
+ exit_fee_rate=0.001,
+ )
+
+ df = simulate_samples(
+ num_samples=50,
+ seed=1,
+ params=params,
+ base_factor=PARAMS.BASE_FACTOR,
+ profit_aim=PARAMS.PROFIT_AIM,
+ risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
+ max_duration_ratio=1.0,
+ trading_mode="futures",
+ pnl_base_std=0.0,
+ pnl_duration_vol_scale=0.0,
+ )
+
+ enter_rows = df[df["action"] == float(Actions.Long_enter.value)]
+ self.assertGreater(len(enter_rows), 0, "Expected at least one Long_enter in sample")
+
+ enter_pos = df.reset_index(drop=True)
+ enter_mask = enter_pos["action"].to_numpy() == float(Actions.Long_enter.value)
+ enter_positions = np.flatnonzero(enter_mask)
+ first_enter_pos = int(enter_positions[0])
+ next_pos = first_enter_pos + 1
+
+ self.assertLess(next_pos, len(enter_pos), "Sample must include post-entry step")
+ self.assertEqual(
+ float(enter_pos.iloc[next_pos]["position"]),
+ float(Positions.Long.value),
+ "Expected Long position immediately after Long_enter",
+ )
+
+ expected_pnl = _compute_entry_unrealized_pnl_estimate(Positions.Long, params)
+ post_entry_pnl = float(enter_pos.iloc[next_pos]["pnl"])
+ self.assertAlmostEqualFloat(
+ post_entry_pnl,
+ expected_pnl,
+ tolerance=TOLERANCE.IDENTITY_STRICT,
+ msg="Expected pnl after entry to match entry fee estimate",
+ )
+
+ def test_calculate_reward_hold_uses_current_duration_ratio(self):
+ """calculate_reward() hold next_duration_ratio uses trade_duration."""
+ params = self.base_params(
+ exit_potential_mode="non_canonical",
+ hold_potential_enabled=True,
+ entry_additive_enabled=False,
+ exit_additive_enabled=False,
+ potential_gamma=0.9,
+ )
+
+ trade_duration = 5
+ max_trade_duration_candles = 10
+
+ ctx = self.make_ctx(
+ position=Positions.Long,
+ action=Actions.Neutral,
+ pnl=0.01,
+ trade_duration=trade_duration,
+ )
+
+ breakdown = calculate_reward(
+ ctx,
+ {**params, "max_trade_duration_candles": max_trade_duration_candles},
+ base_factor=PARAMS.BASE_FACTOR,
+ profit_aim=PARAMS.PROFIT_AIM,
+ risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
+ short_allowed=True,
+ action_masking=True,
+ prev_potential=0.0,
+ )
+
+ expected_next_potential = _compute_hold_potential(
+ pnl=ctx.pnl,
+ pnl_target=PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO,
+ duration_ratio=(trade_duration / max_trade_duration_candles),
+ params=params,
+ )
+ self.assertAlmostEqualFloat(
+ breakdown.next_potential,
+ expected_next_potential,
+ tolerance=TOLERANCE.IDENTITY_RELAXED,
+ msg="Hold next_potential mismatch (duration ratio mismatch)",
+ )
+
# ---------------- Validation parameter batch & relaxed aggregation ---------------- #
def test_validate_reward_parameters_batch_and_relaxed_aggregation(self):
return Positions.Neutral
return self._position
+ def _get_entry_unrealized_profit(self, next_position: Positions) -> float:
+ current_open = self.prices.iloc[self._current_tick].open
+ if not isinstance(current_open, (int, float, np.floating)) or not np.isfinite(
+ current_open
+ ):
+ return 0.0
+
+ next_pnl = 0.0
+ if next_position == Positions.Long:
+ current_price = self.add_exit_fee(current_open)
+ last_trade_price = self.add_entry_fee(current_open)
+ if not np.isclose(last_trade_price, 0.0) and np.isfinite(last_trade_price):
+ next_pnl = (current_price - last_trade_price) / last_trade_price
+ elif next_position == Positions.Short:
+ current_price = self.add_entry_fee(current_open)
+ last_trade_price = self.add_exit_fee(current_open)
+ if not np.isclose(last_trade_price, 0.0) and np.isfinite(last_trade_price):
+ next_pnl = (last_trade_price - current_price) / last_trade_price
+
+ if not np.isfinite(next_pnl):
+ return 0.0
+ return float(next_pnl)
+
def _get_next_transition_state(
self,
action: int,
) -> Tuple[Positions, int, float]:
"""Compute next transition state tuple."""
next_position = self._get_next_position(action)
+
# Entry
if self._position == Positions.Neutral and next_position in (
Positions.Long,
Positions.Short,
):
- return next_position, 0, pnl
+ return next_position, 0, self._get_entry_unrealized_profit(next_position)
+
# Exit
if (
self._position in (Positions.Long, Positions.Short)
and next_position == Positions.Neutral
):
return next_position, 0, 0.0
+
# Hold
if self._position in (Positions.Long, Positions.Short) and next_position in (
Positions.Long,
Positions.Short,
):
- return next_position, int(trade_duration) + 1, pnl
+ return next_position, int(trade_duration), pnl
+
# Neutral self-loop
return next_position, 0, 0.0