]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
fix(ReforceXY): make the data generation duration aware
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 23 Dec 2025 17:02:49 +0000 (18:02 +0100)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 23 Dec 2025 17:02:49 +0000 (18:02 +0100)
Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
ReforceXY/reward_space_analysis/reward_space_analysis.py
ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py
ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
ReforceXY/reward_space_analysis/tests/helpers/assertions.py
ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
ReforceXY/user_data/freqaimodels/ReforceXY.py

index 4f34e1cd14835e111f400634771e5b98b7f4f912..21ea4ea121aafd15b8e4a432130899088b0789e1 100644 (file)
@@ -62,9 +62,16 @@ class Positions(Enum):
 
 # Mathematical constants pre-computed for performance
 _LOG_2 = math.log(2.0)
+
 DEFAULT_IDLE_DURATION_MULTIPLIER = 4
 
-# Tolerance for PBRS invariance classification (canonical if |Σ shaping| < PBRS_INVARIANCE_TOL)
+# Tolerance for PBRS invariance classification.
+#
+# When `reward_invariance_correction` is available (reward_shaping - reward_pbrs_delta),
+# canonical PBRS should satisfy max|correction| < PBRS_INVARIANCE_TOL.
+#
+# When that diagnostic column is not available (e.g., reporting from partial datasets),
+# we fall back to the weaker heuristic |Σ shaping| < PBRS_INVARIANCE_TOL.
 PBRS_INVARIANCE_TOL: float = 1e-6
 # Default discount factor γ for potential-based reward shaping
 POTENTIAL_GAMMA_DEFAULT: float = 0.95
@@ -1195,7 +1202,7 @@ def calculate_reward(
 
     pnl_target = float(profit_aim * risk_reward_ratio)
 
-    idle_factor = factor * pnl_target / 4.0
+    idle_factor = factor * (profit_aim / 4.0)
     hold_factor = idle_factor
 
     max_trade_duration_candles = _get_int_param(
@@ -1380,26 +1387,118 @@ def calculate_reward(
     return breakdown
 
 
+def _duration_hazard_probability(
+    *,
+    duration_ratio: float,
+    base_probability: float,
+    overtime_multiplier: float,
+    max_probability: float,
+) -> float:
+    """Compute a bounded hazard probability keyed on a duration ratio.
+
+    Behavior:
+    - duration_ratio <= 1 -> returns base_probability
+    - duration_ratio > 1 -> increases linearly with overtime
+
+    Notes
+    -----
+    This is used for:
+    - exit probability when holding past max trade duration
+    - entry probability when idling past max idle duration
+    """
+
+    if not np.isfinite(duration_ratio):
+        return float(np.clip(base_probability, 0.0, max_probability))
+
+    overtime = max(0.0, float(duration_ratio - 1.0))
+    probability = base_probability * (1.0 + overtime_multiplier * overtime)
+    return float(np.clip(probability, 0.0, max_probability))
+
+
+_SAMPLE_ENTRY_PROBABILITY_MARGIN = 0.4
+_SAMPLE_ENTRY_PROBABILITY_SPOT = 0.3
+_SAMPLE_DURATION_HAZARD_OVERTIME_MULTIPLIER = 4.0
+_SAMPLE_DURATION_HAZARD_MAX_PROBABILITY = 0.9
+_SAMPLE_EXIT_PROBABILITY_MIN = 0.002
+_SAMPLE_EXIT_PROBABILITY_MAX = 0.2
+
+
+def _sampling_probabilities(
+    position: Positions,
+    *,
+    short_allowed: bool,
+    trade_duration: int,
+    max_trade_duration_candles: int,
+    idle_duration: int,
+    max_idle_duration_candles: int,
+) -> tuple[float, float, float]:
+    if position == Positions.Neutral:
+        base_entry_prob = (
+            _SAMPLE_ENTRY_PROBABILITY_MARGIN if short_allowed else _SAMPLE_ENTRY_PROBABILITY_SPOT
+        )
+        idle_ratio = idle_duration / max(1, int(max_idle_duration_candles))
+        entry_prob = _duration_hazard_probability(
+            duration_ratio=idle_ratio,
+            base_probability=base_entry_prob,
+            overtime_multiplier=_SAMPLE_DURATION_HAZARD_OVERTIME_MULTIPLIER,
+            max_probability=_SAMPLE_DURATION_HAZARD_MAX_PROBABILITY,
+        )
+        neutral_prob = max(0.0, 1.0 - entry_prob)
+        return float(entry_prob), float("nan"), float(neutral_prob)
+
+    duration_ratio = _compute_duration_ratio(trade_duration, max_trade_duration_candles)
+
+    base_exit_prob = 1.0 / max(1, int(max_trade_duration_candles))
+    base_exit_prob = float(
+        np.clip(base_exit_prob, _SAMPLE_EXIT_PROBABILITY_MIN, _SAMPLE_EXIT_PROBABILITY_MAX)
+    )
+
+    exit_prob = _duration_hazard_probability(
+        duration_ratio=duration_ratio,
+        base_probability=base_exit_prob,
+        overtime_multiplier=_SAMPLE_DURATION_HAZARD_OVERTIME_MULTIPLIER,
+        max_probability=_SAMPLE_DURATION_HAZARD_MAX_PROBABILITY,
+    )
+    return float("nan"), float(exit_prob), float("nan")
+
+
 def _sample_action(
     position: Positions,
     rng: random.Random,
     *,
     short_allowed: bool,
-) -> Actions:
+    trade_duration: int,
+    max_trade_duration_candles: int,
+    idle_duration: int,
+    max_idle_duration_candles: int,
+) -> tuple[Actions, float, float, float]:
+    entry_prob, exit_prob, neutral_prob = _sampling_probabilities(
+        position,
+        short_allowed=short_allowed,
+        trade_duration=trade_duration,
+        max_trade_duration_candles=max_trade_duration_candles,
+        idle_duration=idle_duration,
+        max_idle_duration_candles=max_idle_duration_candles,
+    )
+
     if position == Positions.Neutral:
         if short_allowed:
             choices = [Actions.Neutral, Actions.Long_enter, Actions.Short_enter]
-            weights = [0.6, 0.2, 0.2]
+            weights = [neutral_prob, entry_prob * 0.5, entry_prob * 0.5]
         else:
             choices = [Actions.Neutral, Actions.Long_enter]
-            weights = [0.7, 0.3]
-    elif position == Positions.Long:
+            weights = [neutral_prob, entry_prob]
+        action = rng.choices(choices, weights=weights, k=1)[0]
+        return action, entry_prob, exit_prob, neutral_prob
+
+    if position == Positions.Long:
         choices = [Actions.Neutral, Actions.Long_exit]
-        weights = [0.55, 0.45]
     else:  # Positions.Short
         choices = [Actions.Neutral, Actions.Short_exit]
-        weights = [0.55, 0.45]
-    return rng.choices(choices, weights=weights, k=1)[0]
+
+    weights = [1.0 - exit_prob, exit_prob]
+    action = rng.choices(choices, weights=weights, k=1)[0]
+    return action, entry_prob, exit_prob, neutral_prob
 
 
 def parse_overrides(overrides: Iterable[str]) -> RewardParams:
@@ -1531,7 +1630,15 @@ def simulate_samples(
             max_unrealized_profit = 0.0
             min_unrealized_profit = 0.0
 
-        action = _sample_action(position, rng, short_allowed=short_allowed)
+        action, sample_entry_prob, sample_exit_prob, sample_neutral_prob = _sample_action(
+            position,
+            rng,
+            short_allowed=short_allowed,
+            trade_duration=trade_duration,
+            max_trade_duration_candles=max_trade_duration_candles,
+            idle_duration=idle_duration,
+            max_idle_duration_candles=max_idle_duration_candles,
+        )
 
         context = RewardContext(
             pnl=pnl,
@@ -1567,6 +1674,10 @@ def simulate_samples(
                 "idle_ratio": idle_ratio,
                 "position": float(context.position.value),
                 "action": int(context.action.value),
+                # Sampling diagnostics
+                "sample_entry_prob": sample_entry_prob,
+                "sample_exit_prob": sample_exit_prob,
+                "sample_neutral_prob": sample_neutral_prob,
                 "reward": breakdown.total,
                 "reward_invalid": breakdown.invalid_penalty,
                 "reward_idle": breakdown.idle_penalty,
@@ -3887,10 +3998,23 @@ def write_complete_statistical_analysis(
                 exit_additive_enabled_raw,
             )
 
-            # True invariance requires canonical mode AND no effective additives.
+            # True PBRS invariance classification:
+            # - Canonical requires canonical mode AND no effective additives.
+            # - When `reward_invariance_correction` is present, we use it as the primary
+            #   diagnostic (reward_shaping - reward_pbrs_delta).
+            # - Otherwise, we fall back to the weaker heuristic |Σ shaping| ≈ 0.
             is_theoretically_invariant = exit_potential_mode == "canonical" and not (
                 entry_additive_effective or exit_additive_effective
             )
+
+            has_inv_correction = "reward_invariance_correction" in df.columns
+            max_abs_inv_correction: float | None
+            if has_inv_correction:
+                max_abs_inv_correction = float(df["reward_invariance_correction"].abs().max())
+                correction_near_zero = max_abs_inv_correction < PBRS_INVARIANCE_TOL
+            else:
+                max_abs_inv_correction = None
+                correction_near_zero = None
             shaping_near_zero = abs(total_shaping) < PBRS_INVARIANCE_TOL
 
             suppression_note = ""
@@ -3903,18 +4027,33 @@ def write_complete_statistical_analysis(
 
             # Prepare invariance summary markdown block
             if is_theoretically_invariant:
-                if shaping_near_zero:
+                if correction_near_zero is True:
                     invariance_status = "✅ Canonical"
                     invariance_note = (
-                        "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)."
+                        "Theoretical invariance preserved (canonical mode, no additives, max|correction|≈0)."
                         + suppression_note
                     )
-                else:
+                elif correction_near_zero is False:
                     invariance_status = "⚠️ Canonical (with warning)"
                     invariance_note = (
-                        f"Canonical mode but unexpected shaping sum = {total_shaping:.6f}."
-                        + suppression_note
+                        "Canonical mode but invariance correction is non-zero"
+                        f" (max|correction|={max_abs_inv_correction:.6e})." + suppression_note
                     )
+                else:
+                    # Fallback: without invariance correction, use Σ shaping as a heuristic.
+                    if shaping_near_zero:
+                        invariance_status = "✅ Canonical"
+                        invariance_note = (
+                            "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)."
+                            + suppression_note
+                        )
+                    else:
+                        invariance_status = "⚠️ Canonical (with warning)"
+                        invariance_note = (
+                            "Canonical mode but Σ shaping is non-zero"
+                            f" (Σ={total_shaping:.6f}; correction column unavailable)."
+                            + suppression_note
+                        )
             else:
                 invariance_status = "❌ Non-canonical"
                 reasons = []
@@ -4156,17 +4295,24 @@ def write_complete_statistical_analysis(
         else:
             f.write("6. **Distribution Shift** - Not performed (no real episodes provided)\n")
         if "reward_shaping" in df.columns:
-            _total_shaping = df["reward_shaping"].sum()
-            _canonical = abs(_total_shaping) < PBRS_INVARIANCE_TOL
-            f.write(
-                "7. **PBRS Invariance** - "
-                + (
+            _total_shaping = float(df["reward_shaping"].sum())
+            if "reward_invariance_correction" in df.columns:
+                _max_abs_corr = float(df["reward_invariance_correction"].abs().max())
+                _canonical = _max_abs_corr < PBRS_INVARIANCE_TOL
+                _pbrs_summary = (
+                    "Canonical (max|correction| ≈ 0)"
+                    if _canonical
+                    else f"Canonical (with warning; max|correction|={_max_abs_corr:.6e})"
+                )
+            else:
+                _canonical = abs(_total_shaping) < PBRS_INVARIANCE_TOL
+                _pbrs_summary = (
                     "Canonical (Σ shaping ≈ 0)"
                     if _canonical
-                    else f"Non-canonical (Σ shaping = {_total_shaping:.6f})"
+                    else f"Canonical (with warning; Σ shaping={_total_shaping:.6f})"
                 )
-                + "\n"
-            )
+
+            f.write("7. **PBRS Invariance** - " + _pbrs_summary + "\n")
         f.write("\n")
         f.write("**Generated Files:**\n")
         f.write("- `reward_samples.csv` - Raw synthetic samples\n")
index caf89860e36cde5654b7ca104dedbbf5cd854e49..d0f17bcb0a795898c57efe5af3dbaa1450f6858b 100644 (file)
@@ -2,6 +2,7 @@
 """Tests for public API and helper functions."""
 
 import math
+import random
 import tempfile
 import unittest
 from pathlib import Path
@@ -19,6 +20,7 @@ from reward_space_analysis import (
     _get_float_param,
     _get_int_param,
     _get_str_param,
+    _sample_action,
     build_argument_parser,
     calculate_reward,
     parse_overrides,
@@ -35,6 +37,47 @@ pytestmark = pytest.mark.api
 class TestAPIAndHelpers(RewardSpaceTestBase):
     """Public API + helper utility tests."""
 
+    def test_sample_action_idle_hazard_increases_entry_rate(self):
+        """_sample_action() increases entry probability past idle cap.
+
+        This guards the synthetic simulator against unrealistically long neutral streaks.
+        The test is statistical but deterministic via fixed RNG seeds.
+        """
+
+        max_idle_duration_candles = 20
+        max_trade_duration_candles = 100
+
+        def sample_entry_rate(*, idle_duration: int, short_allowed: bool) -> float:
+            rng = random.Random(SEEDS.REPRODUCIBILITY)
+            draws = 2000
+            entries = 0
+            for _ in range(draws):
+                action = _sample_action(
+                    Positions.Neutral,
+                    rng,
+                    short_allowed=short_allowed,
+                    trade_duration=0,
+                    max_trade_duration_candles=max_trade_duration_candles,
+                    idle_duration=idle_duration,
+                    max_idle_duration_candles=max_idle_duration_candles,
+                )
+                if action in (Actions.Long_enter, Actions.Short_enter):
+                    entries += 1
+            return entries / draws
+
+        low_idle_rate = sample_entry_rate(idle_duration=0, short_allowed=True)
+        high_idle_rate = sample_entry_rate(idle_duration=60, short_allowed=True)
+
+        self.assertGreater(
+            high_idle_rate,
+            low_idle_rate,
+            "Entry rate should increase after exceeding max idle duration",
+        )
+
+        low_idle_rate_spot = sample_entry_rate(idle_duration=0, short_allowed=False)
+        high_idle_rate_spot = sample_entry_rate(idle_duration=60, short_allowed=False)
+        self.assertGreater(high_idle_rate_spot, low_idle_rate_spot)
+
     def test_parse_overrides(self):
         """Test parse overrides."""
         overrides = ["alpha=1.5", "mode=linear", "limit=42"]
@@ -117,6 +160,9 @@ class TestAPIAndHelpers(RewardSpaceTestBase):
             "idle_duration",
             "position",
             "action",
+            "sample_entry_prob",
+            "sample_exit_prob",
+            "sample_neutral_prob",
             "reward",
             "reward_invalid",
             "reward_idle",
@@ -125,6 +171,30 @@ class TestAPIAndHelpers(RewardSpaceTestBase):
         ]:
             self.assertIn(col, df_margin.columns)
 
+    def test_simulate_samples_sampling_probabilities_are_bounded(self):
+        """simulate_samples() exposes bounded sampling probabilities."""
+
+        df = simulate_samples(
+            params=self.base_params(max_trade_duration_candles=40),
+            num_samples=200,
+            seed=SEEDS.SMOKE_TEST,
+            base_factor=PARAMS.BASE_FACTOR,
+            profit_aim=PARAMS.PROFIT_AIM,
+            risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
+            max_duration_ratio=1.5,
+            trading_mode="margin",
+            pnl_base_std=PARAMS.PNL_STD,
+            pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
+        )
+
+        for col in ["sample_entry_prob", "sample_exit_prob", "sample_neutral_prob"]:
+            self.assertIn(col, df.columns)
+
+        values = (
+            df[["sample_entry_prob", "sample_exit_prob", "sample_neutral_prob"]].stack().dropna()
+        )
+        self.assertTrue(((values >= 0.0) & (values <= 0.9)).all())
+
     def test_to_bool(self):
         """Test _to_bool with various inputs."""
         df1 = simulate_samples(
index f1399520b944b0981f9dedc2472ad3ccb9c50f81..18a793050ec8756a6636126725546e759d0fdc3d 100644 (file)
@@ -515,11 +515,11 @@ class TestRewardComponents(RewardSpaceTestBase):
         idle_penalty_scale = _get_float_param(params, "idle_penalty_scale", 0.5)
         idle_penalty_power = _get_float_param(params, "idle_penalty_power", 1.025)
         factor = _get_float_param(params, "base_factor", float(base_factor))
-        idle_factor = factor * (profit_aim * risk_reward_ratio) / 4.0
+        idle_factor = factor * (profit_aim / 4.0)
         observed_ratio = abs(br_mid.idle_penalty) / (idle_factor * idle_penalty_scale)
         if observed_ratio > 0:
-            implied_D = 120 / observed_ratio ** (1 / idle_penalty_power)
-            self.assertAlmostEqualFloat(implied_D, 400.0, tolerance=20.0)
+            implied_max_idle_duration_candles = 120 / observed_ratio ** (1 / idle_penalty_power)
+            self.assertAlmostEqualFloat(implied_max_idle_duration_candles, 400.0, tolerance=20.0)
 
     # Owns invariant: components-pbrs-breakdown-fields-119
     def test_pbrs_breakdown_fields_finite_and_aligned(self):
index 7eff926390bdc39fe3468da2b537aeac0ec84916..3a124fdb61f1c8f7f8476e189c852bb47debfb86 100644 (file)
@@ -1194,18 +1194,22 @@ def assert_pbrs_invariance_report_classification(
 
 
 def assert_pbrs_canonical_sum_within_tolerance(test_case, total_shaping: float, tolerance: float):
-    """Validate cumulative PBRS shaping satisfies canonical bound.
+    """Validate cumulative shaping is small.
 
-    For canonical PBRS, the cumulative reward shaping across a trajectory
-    must be near zero (within tolerance). This is a core PBRS invariant.
+    In canonical PBRS, the per-step shaping corresponds to a telescoping term.
+    Over a full, closed episode it may cancel, but across many partial trajectories
+    or with resets/discounting it does not need to be exactly zero.
+
+    This helper remains as a *diagnostic* check for constructed test cases that
+    intentionally enforce small cumulative shaping.
 
     Args:
-        test_case: Test case instance with assertion methods
-        total_shaping: Total cumulative reward shaping value
-        tolerance: Maximum allowed absolute deviation from zero
+        test_case: Test case instance with assertion methods.
+        total_shaping: Total cumulative shaping value.
+        tolerance: Maximum allowed absolute deviation from zero.
 
     Example:
-        assert_pbrs_canonical_sum_within_tolerance(self, 5e-10, 1e-09)
+        assert_pbrs_canonical_sum_within_tolerance(self, 5e-10, 1e-9)
     """
     test_case.assertLess(abs(total_shaping), tolerance)
 
@@ -1213,20 +1217,18 @@ def assert_pbrs_canonical_sum_within_tolerance(test_case, total_shaping: float,
 def assert_non_canonical_shaping_exceeds(
     test_case, total_shaping: float, tolerance_multiple: float
 ):
-    """Validate non-canonical PBRS shaping exceeds threshold.
+    """Validate non-trivial shaping magnitude.
 
-    For non-canonical PBRS (e.g., with additives), the cumulative shaping
-    should exceed a scaled tolerance threshold, indicating violation of
-    the canonical PBRS invariant.
+    In non-canonical PBRS modes or when additives are effective, the shaping
+    trajectory is expected to deviate from the pure telescoping term more often.
 
-    Args:
-        test_case: Test case instance with assertion methods
-        total_shaping: Total cumulative reward shaping value
-        tolerance_multiple: Threshold value (typically scaled tolerance)
+    Note: cumulative shaping being large is not a strict correctness proof; it is
+    a useful smoke-signal for test fixtures that intentionally construct such cases.
 
-    Example:
-        # Expect shaping to exceed 10x tolerance for non-canonical case
-        assert_non_canonical_shaping_exceeds(self, 0.05, 1e-08)
+    Args:
+        test_case: Test case instance with assertion methods.
+        total_shaping: Total cumulative shaping value.
+        tolerance_multiple: Threshold value for the given test fixture.
     """
     test_case.assertGreater(abs(total_shaping), tolerance_multiple)
 
index ce47a5eea1dd6ca520a031e78c177316edbaf37a..101906f68e9c32283fb959fd2d44ae4a769625a0 100644 (file)
@@ -1175,6 +1175,10 @@ class TestPBRS(RewardSpaceTestBase):
             PBRS_INVARIANCE_TOL,
             f"Total shaping {total_shaping} exceeds invariance tolerance",
         )
+        inv_corr_vals = [1.0e-7, -1.0e-7, 2.0e-7]
+        max_abs_corr = float(np.max(np.abs(inv_corr_vals)))
+        self.assertLess(max_abs_corr, PBRS_INVARIANCE_TOL)
+
         n = len(small_vals)
         df = pd.DataFrame(
             {
@@ -1190,6 +1194,7 @@ class TestPBRS(RewardSpaceTestBase):
                 "reward_shaping": small_vals,
                 "reward_entry_additive": [0.0] * n,
                 "reward_exit_additive": [0.0] * n,
+                "reward_invariance_correction": inv_corr_vals,
                 "reward_invalid": np.zeros(n),
                 "duration_ratio": np.random.uniform(0.2, 1.0, n),
                 "idle_ratio": np.zeros(n),
@@ -1225,6 +1230,7 @@ class TestPBRS(RewardSpaceTestBase):
             self.assertAlmostEqual(
                 abs(total_shaping), val_abs, places=TOLERANCE.DECIMAL_PLACES_STRICT
             )
+        self.assertIn("max|correction|≈0", content)
 
     # Non-owning smoke; ownership: robustness/test_robustness.py:35 (robustness-decomposition-integrity-101)
     @pytest.mark.smoke
@@ -1239,6 +1245,10 @@ class TestPBRS(RewardSpaceTestBase):
         small_vals = [1.0e-7, -2.0e-7, 3.0e-7]  # sum = 2.0e-7 < tolerance
         total_shaping = float(sum(small_vals))
         self.assertLess(abs(total_shaping), PBRS_INVARIANCE_TOL)
+        inv_corr_vals = [1.0e-7, -1.0e-7, 2.0e-7]
+        max_abs_corr = float(np.max(np.abs(inv_corr_vals)))
+        self.assertLess(max_abs_corr, PBRS_INVARIANCE_TOL)
+
         n = len(small_vals)
         df = pd.DataFrame(
             {
@@ -1254,6 +1264,7 @@ class TestPBRS(RewardSpaceTestBase):
                 "reward_shaping": small_vals,
                 "reward_entry_additive": [0.0] * n,
                 "reward_exit_additive": [0.0] * n,
+                "reward_invariance_correction": inv_corr_vals,
                 "reward_invalid": np.zeros(n),
                 "duration_ratio": np.random.uniform(0.2, 1.0, n),
                 "idle_ratio": np.zeros(n),
@@ -1288,11 +1299,16 @@ class TestPBRS(RewardSpaceTestBase):
         self.assertIn("| Exit Additive Effective | False |", content)
 
     def test_pbrs_canonical_warning_report(self):
-        """Canonical mode + no additives but |Σ shaping| > tolerance -> warning classification."""
+        """Canonical mode + no additives but max|invariance_correction| > tolerance -> warning."""
 
-        shaping_vals = [1.2e-4, 1.3e-4, 8.0e-5, -2.0e-5, 1.4e-4]  # sum = 4.5e-4 (> tol)
-        total_shaping = sum(shaping_vals)
+        shaping_vals = [1.2e-4, 1.3e-4, 8.0e-5, -2.0e-5, 1.4e-4]  # Σ not near 0
+        total_shaping = float(sum(shaping_vals))
         self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL)
+
+        inv_corr_vals = [1.0e-4, -2.0e-4, 1.5e-4, -1.2e-4, 7.0e-5]
+        max_abs_corr = float(np.max(np.abs(inv_corr_vals)))
+        self.assertGreater(max_abs_corr, PBRS_INVARIANCE_TOL)
+
         n = len(shaping_vals)
         df = pd.DataFrame(
             {
@@ -1308,6 +1324,7 @@ class TestPBRS(RewardSpaceTestBase):
                 "reward_shaping": shaping_vals,
                 "reward_entry_additive": [0.0] * n,
                 "reward_exit_additive": [0.0] * n,
+                "reward_invariance_correction": inv_corr_vals,
                 "reward_invalid": np.zeros(n),
                 "duration_ratio": np.random.uniform(0.2, 1.2, n),
                 "idle_ratio": np.zeros(n),
@@ -1335,8 +1352,8 @@ class TestPBRS(RewardSpaceTestBase):
         assert_pbrs_invariance_report_classification(
             self, content, "Canonical (with warning)", expect_additives=False
         )
-        expected_sum_fragment = f"{total_shaping:.6f}"
-        self.assertIn(expected_sum_fragment, content)
+        expected_corr_fragment = f"{max_abs_corr:.6e}"
+        self.assertIn(expected_corr_fragment, content)
 
     # Non-owning smoke; ownership: robustness/test_robustness.py:35 (robustness-decomposition-integrity-101)
     @pytest.mark.smoke
index 2b681402e7782e71f71a546ac2cdf7fab1c4cc6e..7ba323d3b2e5c06f5c4aefc4f79f9a628b75dcf2 100644 (file)
@@ -2795,7 +2795,7 @@ class MyRLEnv(Base5ActionRLEnv):
         base_factor = float(
             model_reward_parameters.get("base_factor", ReforceXY.DEFAULT_BASE_FACTOR)
         )
-        idle_factor = base_factor * self._pnl_target / 4.0
+        idle_factor = base_factor * (self.profit_aim / 4.0)
         hold_factor = idle_factor
 
         # 2. Idle penalty