From: Jérôme Benoit Date: Mon, 22 Dec 2025 12:37:22 +0000 (+0100) Subject: fix(ReforceXY): fix off-by-one in PBRS next state duration computation X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=af0263848fcb3f0d4d52e14720d68556aa6bd7b4;p=freqai-strategies.git fix(ReforceXY): fix off-by-one in PBRS next state duration computation Signed-off-by: Jérôme Benoit --- diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md index 3b00794..06230e2 100644 --- a/ReforceXY/reward_space_analysis/README.md +++ b/ReforceXY/reward_space_analysis/README.md @@ -320,6 +320,8 @@ where `kernel_function` depends on `exit_attenuation_mode`. See [Exit Attenuatio | `exit_potential_mode` | canonical | Potential release mode | | `exit_potential_decay` | 0.5 | Decay for progressive_release | | `hold_potential_enabled` | true | Enable hold potential Φ | +| `entry_fee_rate` | 0.0 | Entry fee rate | +| `exit_fee_rate` | 0.0 | Exit fee rate | PBRS invariance holds when: `exit_potential_mode=canonical`. diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index e4e8d03..ee9720f 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -153,6 +153,8 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = { "entry_additive_gain": 1.0, "entry_additive_transform_pnl": "tanh", "entry_additive_transform_duration": "tanh", + "entry_fee_rate": 0.0, + "exit_fee_rate": 0.0, # Exit additive (non-PBRS additive term) "exit_additive_enabled": False, "exit_additive_scale": 1.0, @@ -196,6 +198,8 @@ DEFAULT_MODEL_REWARD_PARAMETERS_HELP: Dict[str, str] = { "entry_additive_gain": "Entry additive gain", "entry_additive_transform_pnl": "Entry PnL transform", "entry_additive_transform_duration": "Entry duration transform", + "entry_fee_rate": "Entry fee rate", + "exit_fee_rate": "Exit fee rate", "exit_additive_enabled": "Enable exit additive", "exit_additive_scale": "Exit additive scale", "exit_additive_gain": "Exit additive gain", @@ -233,6 +237,8 @@ _PARAMETER_BOUNDS: Dict[str, Dict[str, float]] = { "hold_potential_gain": {"min": 0.0}, "entry_additive_scale": {"min": 0.0}, "entry_additive_gain": {"min": 0.0}, + "entry_fee_rate": {"min": 0.0, "max": 0.1}, + "exit_fee_rate": {"min": 0.0, "max": 0.1}, "exit_additive_scale": {"min": 0.0}, "exit_additive_gain": {"min": 0.0}, } @@ -1174,11 +1180,16 @@ def calculate_reward( is_neutral = context.position == Positions.Neutral and context.action == Actions.Neutral if is_entry: - next_pnl = current_pnl next_duration_ratio = 0.0 + if context.action == Actions.Long_enter: + next_pnl = _compute_entry_unrealized_pnl_estimate(Positions.Long, params) + elif context.action == Actions.Short_enter: + next_pnl = _compute_entry_unrealized_pnl_estimate(Positions.Short, params) + else: + next_pnl = current_pnl elif is_hold: next_duration_ratio = _compute_duration_ratio( - context.trade_duration + 1, max_trade_duration_candles + context.trade_duration, max_trade_duration_candles ) # Optionally simulate unrealized PnL during holds to feed Φ(s) if _get_bool_param(params, "unrealized_pnl", False): @@ -1468,10 +1479,16 @@ def simulate_samples( position = Positions.Long trade_duration = 0 idle_duration = 0 + pnl = _compute_entry_unrealized_pnl_estimate(Positions.Long, params) + max_unrealized_profit = pnl + min_unrealized_profit = pnl elif action == Actions.Short_enter and short_allowed: position = Positions.Short trade_duration = 0 idle_duration = 0 + pnl = _compute_entry_unrealized_pnl_estimate(Positions.Short, params) + max_unrealized_profit = pnl + min_unrealized_profit = pnl else: idle_duration = 0 if action == Actions.Neutral: @@ -2773,6 +2790,66 @@ def _get_potential_gamma(params: RewardParams) -> float: # === PBRS IMPLEMENTATION === +def _compute_entry_unrealized_pnl_estimate(next_position: Positions, params: RewardParams) -> float: + """Estimate immediate unrealized PnL after entry fees. + + For Long entry: + current_price = open * (1 - exit_fee_rate) + last_trade_price = open * (1 + entry_fee_rate) + pnl = (current_price - last_trade_price) / last_trade_price + + For Short entry: + current_price = open * (1 + entry_fee_rate) + last_trade_price = open * (1 - exit_fee_rate) + pnl = (last_trade_price - current_price) / last_trade_price + """ + + entry_fee_rate = _get_float_param( + params, + "entry_fee_rate", + DEFAULT_MODEL_REWARD_PARAMETERS.get("entry_fee_rate", 0.0), + ) + exit_fee_rate = _get_float_param( + params, + "exit_fee_rate", + DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_fee_rate", 0.0), + ) + + if not np.isfinite(entry_fee_rate): + entry_fee_rate = 0.0 + if not np.isfinite(exit_fee_rate): + exit_fee_rate = 0.0 + + entry_fee_bounds = _PARAMETER_BOUNDS.get("entry_fee_rate", {"min": 0.0, "max": 1.0}) + exit_fee_bounds = _PARAMETER_BOUNDS.get("exit_fee_rate", {"min": 0.0, "max": 1.0}) + + entry_fee_min = float(entry_fee_bounds.get("min", 0.0)) + entry_fee_max = float(entry_fee_bounds.get("max", 1.0)) + exit_fee_min = float(exit_fee_bounds.get("min", 0.0)) + exit_fee_max = float(exit_fee_bounds.get("max", 1.0)) + + entry_fee_rate = float(np.clip(entry_fee_rate, entry_fee_min, entry_fee_max)) + exit_fee_rate = float(np.clip(exit_fee_rate, exit_fee_min, exit_fee_max)) + + current_open = 1.0 + next_pnl = 0.0 + + if next_position == Positions.Long: + current_price = current_open * (1.0 - exit_fee_rate) + last_trade_price = current_open * (1.0 + entry_fee_rate) + if last_trade_price != 0.0 and np.isfinite(last_trade_price): + next_pnl = (current_price - last_trade_price) / last_trade_price + elif next_position == Positions.Short: + current_price = current_open * (1.0 + entry_fee_rate) + last_trade_price = current_open * (1.0 - exit_fee_rate) + if last_trade_price != 0.0 and np.isfinite(last_trade_price): + next_pnl = (last_trade_price - current_price) / last_trade_price + + if not np.isfinite(next_pnl): + return 0.0 + return float(next_pnl) + + def _compute_hold_potential( pnl: float, pnl_target: float, diff --git a/ReforceXY/reward_space_analysis/tests/.docstring_template.md b/ReforceXY/reward_space_analysis/tests/.docstring_template.md index 0551814..2c8e705 100644 --- a/ReforceXY/reward_space_analysis/tests/.docstring_template.md +++ b/ReforceXY/reward_space_analysis/tests/.docstring_template.md @@ -199,13 +199,12 @@ def test_property_holds_for_all_inputs(self): ### Regression Test ```python -def test_bug_fix_issue_123_no_regression(self): +def test_issue_123_regression_is_prevented(self): """Regression test for Issue #123: [brief description]. - Ensures fix for [bug description] remains effective. - Bug manifested when [conditions]; this test reproduces those conditions. + Validates behavior for [conditions] and asserts the expected outcome. - **Fixed in:** PR #456 (commit abc1234) + **Tracking:** Issue #123 """ ``` diff --git a/ReforceXY/reward_space_analysis/tests/README.md b/ReforceXY/reward_space_analysis/tests/README.md index c3d1597..a3605a5 100644 --- a/ReforceXY/reward_space_analysis/tests/README.md +++ b/ReforceXY/reward_space_analysis/tests/README.md @@ -194,7 +194,7 @@ Columns: | statistics-constant-dist-widened-ci-113a | statistics | Non-strict: widened CI with warning | statistics/test_statistics.py:533 | Test docstring labels "Invariant 113 (non-strict)" | | statistics-constant-dist-strict-omit-113b | statistics | Strict: omit metrics (no widened CI) | statistics/test_statistics.py:565 | Test docstring labels "Invariant 113 (strict)" | | statistics-fallback-diagnostics-115 | statistics | Fallback diagnostics constant distribution (qq_r2=1.0 etc.) | statistics/test_statistics.py:190 | Docstring line | -| robustness-exit-pnl-only-117 | robustness | Only exit actions have non-zero PnL | robustness/test_robustness.py:126 | Newly assigned ID (previously unnumbered) | +| robustness-exit-pnl-only-117 | robustness | Only exit actions have non-zero PnL | robustness/test_robustness.py:126 | Newly assigned ID | | pbrs-absence-shift-placeholder-118 | pbrs | Placeholder shift line present (absence displayed) | pbrs/test_pbrs.py:979 | Ensures placeholder appears when shaping shift absent | | components-pbrs-breakdown-fields-119 | components | PBRS breakdown fields finite and mathematically aligned | components/test_reward_components.py:454 | Tests base_reward, pbrs_delta, invariance_correction fields and their alignment | | integration-pbrs-metrics-section-120 | integration | PBRS Metrics section present in report with tracing metrics | integration/test_report_formatting.py:156 | Verifies PBRS Metrics (Tracing) subsection rendering in statistical_analysis.md | diff --git a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py index ef7b356..fb00325 100644 --- a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py +++ b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py @@ -13,12 +13,16 @@ from reward_space_analysis import ( DEFAULT_IDLE_DURATION_MULTIPLIER, DEFAULT_MODEL_REWARD_PARAMETERS, PBRS_INVARIANCE_TOL, + Actions, + Positions, _compute_entry_additive, + _compute_entry_unrealized_pnl_estimate, _compute_exit_additive, _compute_exit_potential, _compute_hold_potential, _get_float_param, apply_potential_shaping, + calculate_reward, get_max_idle_duration_candles, simulate_samples, validate_reward_parameters, @@ -485,6 +489,153 @@ class TestPBRS(RewardSpaceTestBase): "Unexpected total difference under gamma NaN fallback", ) + def test_calculate_reward_entry_next_pnl_fee_aware(self): + """calculate_reward() entry PBRS uses fee-aware next_pnl estimate.""" + params = self.base_params( + exit_potential_mode="non_canonical", + hold_potential_enabled=True, + entry_additive_enabled=False, + exit_additive_enabled=False, + potential_gamma=0.9, + entry_fee_rate=0.001, + exit_fee_rate=0.001, + ) + + for action in (Actions.Long_enter, Actions.Short_enter): + ctx = self.make_ctx( + position=Positions.Neutral, action=action, pnl=0.0, trade_duration=0 + ) + breakdown = calculate_reward( + ctx, + params, + base_factor=PARAMS.BASE_FACTOR, + profit_aim=PARAMS.PROFIT_AIM, + risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, + short_allowed=True, + action_masking=True, + prev_potential=0.0, + ) + self.assertTrue(np.isfinite(breakdown.next_potential)) + # With any nonzero fees, immediate unrealized pnl should be negative. + self.assertLess( + breakdown.next_potential, + 0.0, + f"Expected negative next_potential on entry for action={action}", + ) + + def test_fee_rates_are_clamped_to_parameter_bounds(self): + """Fee clamping uses _PARAMETER_BOUNDS (max 0.1).""" + cases = [ + ("entry_fee_rate", self.base_params(entry_fee_rate=999.0, exit_fee_rate=0.0)), + ("exit_fee_rate", self.base_params(entry_fee_rate=0.0, exit_fee_rate=999.0)), + ] + + for key, params in cases: + pnl_clamped = _compute_entry_unrealized_pnl_estimate(Positions.Long, params) + pnl_expected = _compute_entry_unrealized_pnl_estimate( + Positions.Long, + {**params, key: 0.1}, + ) + self.assertAlmostEqualFloat( + pnl_clamped, + pnl_expected, + tolerance=TOLERANCE.IDENTITY_STRICT, + msg=f"Expected {key} values above max to clamp to 0.1", + ) + + def test_simulate_samples_initializes_pnl_on_entry(self): + """simulate_samples() sets in-position pnl to entry fee estimate.""" + params = self.base_params( + exit_potential_mode="non_canonical", + hold_potential_enabled=True, + entry_additive_enabled=False, + exit_additive_enabled=False, + entry_fee_rate=0.001, + exit_fee_rate=0.001, + ) + + df = simulate_samples( + num_samples=50, + seed=1, + params=params, + base_factor=PARAMS.BASE_FACTOR, + profit_aim=PARAMS.PROFIT_AIM, + risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, + max_duration_ratio=1.0, + trading_mode="futures", + pnl_base_std=0.0, + pnl_duration_vol_scale=0.0, + ) + + enter_rows = df[df["action"] == float(Actions.Long_enter.value)] + self.assertGreater(len(enter_rows), 0, "Expected at least one Long_enter in sample") + + enter_pos = df.reset_index(drop=True) + enter_mask = enter_pos["action"].to_numpy() == float(Actions.Long_enter.value) + enter_positions = np.flatnonzero(enter_mask) + first_enter_pos = int(enter_positions[0]) + next_pos = first_enter_pos + 1 + + self.assertLess(next_pos, len(enter_pos), "Sample must include post-entry step") + self.assertEqual( + float(enter_pos.iloc[next_pos]["position"]), + float(Positions.Long.value), + "Expected Long position immediately after Long_enter", + ) + + expected_pnl = _compute_entry_unrealized_pnl_estimate(Positions.Long, params) + post_entry_pnl = float(enter_pos.iloc[next_pos]["pnl"]) + self.assertAlmostEqualFloat( + post_entry_pnl, + expected_pnl, + tolerance=TOLERANCE.IDENTITY_STRICT, + msg="Expected pnl after entry to match entry fee estimate", + ) + + def test_calculate_reward_hold_uses_current_duration_ratio(self): + """calculate_reward() hold next_duration_ratio uses trade_duration.""" + params = self.base_params( + exit_potential_mode="non_canonical", + hold_potential_enabled=True, + entry_additive_enabled=False, + exit_additive_enabled=False, + potential_gamma=0.9, + ) + + trade_duration = 5 + max_trade_duration_candles = 10 + + ctx = self.make_ctx( + position=Positions.Long, + action=Actions.Neutral, + pnl=0.01, + trade_duration=trade_duration, + ) + + breakdown = calculate_reward( + ctx, + {**params, "max_trade_duration_candles": max_trade_duration_candles}, + base_factor=PARAMS.BASE_FACTOR, + profit_aim=PARAMS.PROFIT_AIM, + risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, + short_allowed=True, + action_masking=True, + prev_potential=0.0, + ) + + expected_next_potential = _compute_hold_potential( + pnl=ctx.pnl, + pnl_target=PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO, + duration_ratio=(trade_duration / max_trade_duration_candles), + params=params, + ) + self.assertAlmostEqualFloat( + breakdown.next_potential, + expected_next_potential, + tolerance=TOLERANCE.IDENTITY_RELAXED, + msg="Hold next_potential mismatch (duration ratio mismatch)", + ) + # ---------------- Validation parameter batch & relaxed aggregation ---------------- # def test_validate_reward_parameters_batch_and_relaxed_aggregation(self): diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 3086750..c69f63d 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -1793,6 +1793,29 @@ class MyRLEnv(Base5ActionRLEnv): return Positions.Neutral return self._position + def _get_entry_unrealized_profit(self, next_position: Positions) -> float: + current_open = self.prices.iloc[self._current_tick].open + if not isinstance(current_open, (int, float, np.floating)) or not np.isfinite( + current_open + ): + return 0.0 + + next_pnl = 0.0 + if next_position == Positions.Long: + current_price = self.add_exit_fee(current_open) + last_trade_price = self.add_entry_fee(current_open) + if not np.isclose(last_trade_price, 0.0) and np.isfinite(last_trade_price): + next_pnl = (current_price - last_trade_price) / last_trade_price + elif next_position == Positions.Short: + current_price = self.add_entry_fee(current_open) + last_trade_price = self.add_exit_fee(current_open) + if not np.isclose(last_trade_price, 0.0) and np.isfinite(last_trade_price): + next_pnl = (last_trade_price - current_price) / last_trade_price + + if not np.isfinite(next_pnl): + return 0.0 + return float(next_pnl) + def _get_next_transition_state( self, action: int, @@ -1801,24 +1824,28 @@ class MyRLEnv(Base5ActionRLEnv): ) -> Tuple[Positions, int, float]: """Compute next transition state tuple.""" next_position = self._get_next_position(action) + # Entry if self._position == Positions.Neutral and next_position in ( Positions.Long, Positions.Short, ): - return next_position, 0, pnl + return next_position, 0, self._get_entry_unrealized_profit(next_position) + # Exit if ( self._position in (Positions.Long, Positions.Short) and next_position == Positions.Neutral ): return next_position, 0, 0.0 + # Hold if self._position in (Positions.Long, Positions.Short) and next_position in ( Positions.Long, Positions.Short, ): - return next_position, int(trade_duration) + 1, pnl + return next_position, int(trade_duration), pnl + # Neutral self-loop return next_position, 0, 0.0