fix(ReforceXY): make the data generation duration aware

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Tue, 23 Dec 2025 17:02:49 +0000 (18:02 +0100)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Tue, 23 Dec 2025 17:02:49 +0000 (18:02 +0100)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 23 Dec 2025 17:02:49 +0000 (18:02 +0100)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 23 Dec 2025 17:02:49 +0000 (18:02 +0100)
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py

index 4f34e1cd14835e111f400634771e5b98b7f4f912..21ea4ea121aafd15b8e4a432130899088b0789e1 100644 (file)
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -62,9 +62,16 @@ class Positions(Enum):
  
  # Mathematical constants pre-computed for performance
  _LOG_2 = math.log(2.0)
+
  DEFAULT_IDLE_DURATION_MULTIPLIER = 4
  
-# Tolerance for PBRS invariance classification (canonical if |Σ shaping| < PBRS_INVARIANCE_TOL)
+# Tolerance for PBRS invariance classification.
+#
+# When `reward_invariance_correction` is available (reward_shaping - reward_pbrs_delta),
+# canonical PBRS should satisfy max|correction| < PBRS_INVARIANCE_TOL.
+#
+# When that diagnostic column is not available (e.g., reporting from partial datasets),
+# we fall back to the weaker heuristic |Σ shaping| < PBRS_INVARIANCE_TOL.
  PBRS_INVARIANCE_TOL: float = 1e-6
  # Default discount factor γ for potential-based reward shaping
  POTENTIAL_GAMMA_DEFAULT: float = 0.95
@@ -1195,7 +1202,7 @@ def calculate_reward(
  
      pnl_target = float(profit_aim * risk_reward_ratio)
  
-    idle_factor = factor * pnl_target / 4.0
+    idle_factor = factor * (profit_aim / 4.0)
      hold_factor = idle_factor
  
      max_trade_duration_candles = _get_int_param(
@@ -1380,26 +1387,118 @@ def calculate_reward(
      return breakdown
  
  
+def _duration_hazard_probability(
+    *,
+    duration_ratio: float,
+    base_probability: float,
+    overtime_multiplier: float,
+    max_probability: float,
+) -> float:
+    """Compute a bounded hazard probability keyed on a duration ratio.
+
+    Behavior:
+    - duration_ratio <= 1 -> returns base_probability
+    - duration_ratio > 1 -> increases linearly with overtime
+
+    Notes
+    -----
+    This is used for:
+    - exit probability when holding past max trade duration
+    - entry probability when idling past max idle duration
+    """
+
+    if not np.isfinite(duration_ratio):
+        return float(np.clip(base_probability, 0.0, max_probability))
+
+    overtime = max(0.0, float(duration_ratio - 1.0))
+    probability = base_probability * (1.0 + overtime_multiplier * overtime)
+    return float(np.clip(probability, 0.0, max_probability))
+
+
+_SAMPLE_ENTRY_PROBABILITY_MARGIN = 0.4
+_SAMPLE_ENTRY_PROBABILITY_SPOT = 0.3
+_SAMPLE_DURATION_HAZARD_OVERTIME_MULTIPLIER = 4.0
+_SAMPLE_DURATION_HAZARD_MAX_PROBABILITY = 0.9
+_SAMPLE_EXIT_PROBABILITY_MIN = 0.002
+_SAMPLE_EXIT_PROBABILITY_MAX = 0.2
+
+
+def _sampling_probabilities(
+    position: Positions,
+    *,
+    short_allowed: bool,
+    trade_duration: int,
+    max_trade_duration_candles: int,
+    idle_duration: int,
+    max_idle_duration_candles: int,
+) -> tuple[float, float, float]:
+    if position == Positions.Neutral:
+        base_entry_prob = (
+            _SAMPLE_ENTRY_PROBABILITY_MARGIN if short_allowed else _SAMPLE_ENTRY_PROBABILITY_SPOT
+        )
+        idle_ratio = idle_duration / max(1, int(max_idle_duration_candles))
+        entry_prob = _duration_hazard_probability(
+            duration_ratio=idle_ratio,
+            base_probability=base_entry_prob,
+            overtime_multiplier=_SAMPLE_DURATION_HAZARD_OVERTIME_MULTIPLIER,
+            max_probability=_SAMPLE_DURATION_HAZARD_MAX_PROBABILITY,
+        )
+        neutral_prob = max(0.0, 1.0 - entry_prob)
+        return float(entry_prob), float("nan"), float(neutral_prob)
+
+    duration_ratio = _compute_duration_ratio(trade_duration, max_trade_duration_candles)
+
+    base_exit_prob = 1.0 / max(1, int(max_trade_duration_candles))
+    base_exit_prob = float(
+        np.clip(base_exit_prob, _SAMPLE_EXIT_PROBABILITY_MIN, _SAMPLE_EXIT_PROBABILITY_MAX)
+    )
+
+    exit_prob = _duration_hazard_probability(
+        duration_ratio=duration_ratio,
+        base_probability=base_exit_prob,
+        overtime_multiplier=_SAMPLE_DURATION_HAZARD_OVERTIME_MULTIPLIER,
+        max_probability=_SAMPLE_DURATION_HAZARD_MAX_PROBABILITY,
+    )
+    return float("nan"), float(exit_prob), float("nan")
+
+
  def _sample_action(
      position: Positions,
      rng: random.Random,
      *,
      short_allowed: bool,
-) -> Actions:
+    trade_duration: int,
+    max_trade_duration_candles: int,
+    idle_duration: int,
+    max_idle_duration_candles: int,
+) -> tuple[Actions, float, float, float]:
+    entry_prob, exit_prob, neutral_prob = _sampling_probabilities(
+        position,
+        short_allowed=short_allowed,
+        trade_duration=trade_duration,
+        max_trade_duration_candles=max_trade_duration_candles,
+        idle_duration=idle_duration,
+        max_idle_duration_candles=max_idle_duration_candles,
+    )
+
      if position == Positions.Neutral:
          if short_allowed:
              choices = [Actions.Neutral, Actions.Long_enter, Actions.Short_enter]
-            weights = [0.6, 0.2, 0.2]
+            weights = [neutral_prob, entry_prob * 0.5, entry_prob * 0.5]
          else:
              choices = [Actions.Neutral, Actions.Long_enter]
-            weights = [0.7, 0.3]
-    elif position == Positions.Long:
+            weights = [neutral_prob, entry_prob]
+        action = rng.choices(choices, weights=weights, k=1)[0]
+        return action, entry_prob, exit_prob, neutral_prob
+
+    if position == Positions.Long:
          choices = [Actions.Neutral, Actions.Long_exit]
-        weights = [0.55, 0.45]
      else:  # Positions.Short
          choices = [Actions.Neutral, Actions.Short_exit]
-        weights = [0.55, 0.45]
-    return rng.choices(choices, weights=weights, k=1)[0]
+
+    weights = [1.0 - exit_prob, exit_prob]
+    action = rng.choices(choices, weights=weights, k=1)[0]
+    return action, entry_prob, exit_prob, neutral_prob
  
  
  def parse_overrides(overrides: Iterable[str]) -> RewardParams:
@@ -1531,7 +1630,15 @@ def simulate_samples(
              max_unrealized_profit = 0.0
              min_unrealized_profit = 0.0
  
-        action = _sample_action(position, rng, short_allowed=short_allowed)
+        action, sample_entry_prob, sample_exit_prob, sample_neutral_prob = _sample_action(
+            position,
+            rng,
+            short_allowed=short_allowed,
+            trade_duration=trade_duration,
+            max_trade_duration_candles=max_trade_duration_candles,
+            idle_duration=idle_duration,
+            max_idle_duration_candles=max_idle_duration_candles,
+        )
  
          context = RewardContext(
              pnl=pnl,
@@ -1567,6 +1674,10 @@ def simulate_samples(
                  "idle_ratio": idle_ratio,
                  "position": float(context.position.value),
                  "action": int(context.action.value),
+                # Sampling diagnostics
+                "sample_entry_prob": sample_entry_prob,
+                "sample_exit_prob": sample_exit_prob,
+                "sample_neutral_prob": sample_neutral_prob,
                  "reward": breakdown.total,
                  "reward_invalid": breakdown.invalid_penalty,
                  "reward_idle": breakdown.idle_penalty,
@@ -3887,10 +3998,23 @@ def write_complete_statistical_analysis(
                  exit_additive_enabled_raw,
              )
  
-            # True invariance requires canonical mode AND no effective additives.
+            # True PBRS invariance classification:
+            # - Canonical requires canonical mode AND no effective additives.
+            # - When `reward_invariance_correction` is present, we use it as the primary
+            #   diagnostic (reward_shaping - reward_pbrs_delta).
+            # - Otherwise, we fall back to the weaker heuristic |Σ shaping| ≈ 0.
              is_theoretically_invariant = exit_potential_mode == "canonical" and not (
                  entry_additive_effective or exit_additive_effective
              )
+
+            has_inv_correction = "reward_invariance_correction" in df.columns
+            max_abs_inv_correction: float | None
+            if has_inv_correction:
+                max_abs_inv_correction = float(df["reward_invariance_correction"].abs().max())
+                correction_near_zero = max_abs_inv_correction < PBRS_INVARIANCE_TOL
+            else:
+                max_abs_inv_correction = None
+                correction_near_zero = None
              shaping_near_zero = abs(total_shaping) < PBRS_INVARIANCE_TOL
  
              suppression_note = ""
@@ -3903,18 +4027,33 @@ def write_complete_statistical_analysis(
  
              # Prepare invariance summary markdown block
              if is_theoretically_invariant:
-                if shaping_near_zero:
+                if correction_near_zero is True:
                      invariance_status = "✅ Canonical"
                      invariance_note = (
-                        "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)."
+                        "Theoretical invariance preserved (canonical mode, no additives, max|correction|≈0)."
                          + suppression_note
                      )
-                else:
+                elif correction_near_zero is False:
                      invariance_status = "⚠️ Canonical (with warning)"
                      invariance_note = (
-                        f"Canonical mode but unexpected shaping sum = {total_shaping:.6f}."
-                        + suppression_note
+                        "Canonical mode but invariance correction is non-zero"
+                        f" (max|correction|={max_abs_inv_correction:.6e})." + suppression_note
                      )
+                else:
+                    # Fallback: without invariance correction, use Σ shaping as a heuristic.
+                    if shaping_near_zero:
+                        invariance_status = "✅ Canonical"
+                        invariance_note = (
+                            "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)."
+                            + suppression_note
+                        )
+                    else:
+                        invariance_status = "⚠️ Canonical (with warning)"
+                        invariance_note = (
+                            "Canonical mode but Σ shaping is non-zero"
+                            f" (Σ={total_shaping:.6f}; correction column unavailable)."
+                            + suppression_note
+                        )
              else:
                  invariance_status = "❌ Non-canonical"
                  reasons = []
@@ -4156,17 +4295,24 @@ def write_complete_statistical_analysis(
          else:
              f.write("6. **Distribution Shift** - Not performed (no real episodes provided)\n")
          if "reward_shaping" in df.columns:
-            _total_shaping = df["reward_shaping"].sum()
-            _canonical = abs(_total_shaping) < PBRS_INVARIANCE_TOL
-            f.write(
-                "7. **PBRS Invariance** - "
-                + (
+            _total_shaping = float(df["reward_shaping"].sum())
+            if "reward_invariance_correction" in df.columns:
+                _max_abs_corr = float(df["reward_invariance_correction"].abs().max())
+                _canonical = _max_abs_corr < PBRS_INVARIANCE_TOL
+                _pbrs_summary = (
+                    "Canonical (max|correction| ≈ 0)"
+                    if _canonical
+                    else f"Canonical (with warning; max|correction|={_max_abs_corr:.6e})"
+                )
+            else:
+                _canonical = abs(_total_shaping) < PBRS_INVARIANCE_TOL
+                _pbrs_summary = (
                      "Canonical (Σ shaping ≈ 0)"
                      if _canonical
-                    else f"Non-canonical (Σ shaping = {_total_shaping:.6f})"
+                    else f"Canonical (with warning; Σ shaping={_total_shaping:.6f})"
                  )
-                + "\n"
-            )
+
+            f.write("7. **PBRS Invariance** - " + _pbrs_summary + "\n")
          f.write("\n")
          f.write("**Generated Files:**\n")
          f.write("- `reward_samples.csv` - Raw synthetic samples\n")
diff --git a/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py b/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py

index caf89860e36cde5654b7ca104dedbbf5cd854e49..d0f17bcb0a795898c57efe5af3dbaa1450f6858b 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py
+++ b/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py
@@ -2,6 +2,7 @@
  """Tests for public API and helper functions."""
  
  import math
+import random
  import tempfile
  import unittest
  from pathlib import Path
@@ -19,6 +20,7 @@ from reward_space_analysis import (
      _get_float_param,
      _get_int_param,
      _get_str_param,
+    _sample_action,
      build_argument_parser,
      calculate_reward,
      parse_overrides,
@@ -35,6 +37,47 @@ pytestmark = pytest.mark.api
  class TestAPIAndHelpers(RewardSpaceTestBase):
      """Public API + helper utility tests."""
  
+    def test_sample_action_idle_hazard_increases_entry_rate(self):
+        """_sample_action() increases entry probability past idle cap.
+
+        This guards the synthetic simulator against unrealistically long neutral streaks.
+        The test is statistical but deterministic via fixed RNG seeds.
+        """
+
+        max_idle_duration_candles = 20
+        max_trade_duration_candles = 100
+
+        def sample_entry_rate(*, idle_duration: int, short_allowed: bool) -> float:
+            rng = random.Random(SEEDS.REPRODUCIBILITY)
+            draws = 2000
+            entries = 0
+            for _ in range(draws):
+                action = _sample_action(
+                    Positions.Neutral,
+                    rng,
+                    short_allowed=short_allowed,
+                    trade_duration=0,
+                    max_trade_duration_candles=max_trade_duration_candles,
+                    idle_duration=idle_duration,
+                    max_idle_duration_candles=max_idle_duration_candles,
+                )
+                if action in (Actions.Long_enter, Actions.Short_enter):
+                    entries += 1
+            return entries / draws
+
+        low_idle_rate = sample_entry_rate(idle_duration=0, short_allowed=True)
+        high_idle_rate = sample_entry_rate(idle_duration=60, short_allowed=True)
+
+        self.assertGreater(
+            high_idle_rate,
+            low_idle_rate,
+            "Entry rate should increase after exceeding max idle duration",
+        )
+
+        low_idle_rate_spot = sample_entry_rate(idle_duration=0, short_allowed=False)
+        high_idle_rate_spot = sample_entry_rate(idle_duration=60, short_allowed=False)
+        self.assertGreater(high_idle_rate_spot, low_idle_rate_spot)
+
      def test_parse_overrides(self):
          """Test parse overrides."""
          overrides = ["alpha=1.5", "mode=linear", "limit=42"]
@@ -117,6 +160,9 @@ class TestAPIAndHelpers(RewardSpaceTestBase):
              "idle_duration",
              "position",
              "action",
+            "sample_entry_prob",
+            "sample_exit_prob",
+            "sample_neutral_prob",
              "reward",
              "reward_invalid",
              "reward_idle",
@@ -125,6 +171,30 @@ class TestAPIAndHelpers(RewardSpaceTestBase):
          ]:
              self.assertIn(col, df_margin.columns)
  
+    def test_simulate_samples_sampling_probabilities_are_bounded(self):
+        """simulate_samples() exposes bounded sampling probabilities."""
+
+        df = simulate_samples(
+            params=self.base_params(max_trade_duration_candles=40),
+            num_samples=200,
+            seed=SEEDS.SMOKE_TEST,
+            base_factor=PARAMS.BASE_FACTOR,
+            profit_aim=PARAMS.PROFIT_AIM,
+            risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
+            max_duration_ratio=1.5,
+            trading_mode="margin",
+            pnl_base_std=PARAMS.PNL_STD,
+            pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
+        )
+
+        for col in ["sample_entry_prob", "sample_exit_prob", "sample_neutral_prob"]:
+            self.assertIn(col, df.columns)
+
+        values = (
+            df[["sample_entry_prob", "sample_exit_prob", "sample_neutral_prob"]].stack().dropna()
+        )
+        self.assertTrue(((values >= 0.0) & (values <= 0.9)).all())
+
      def test_to_bool(self):
          """Test _to_bool with various inputs."""
          df1 = simulate_samples(
diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py

index f1399520b944b0981f9dedc2472ad3ccb9c50f81..18a793050ec8756a6636126725546e759d0fdc3d 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
+++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
@@ -515,11 +515,11 @@ class TestRewardComponents(RewardSpaceTestBase):
          idle_penalty_scale = _get_float_param(params, "idle_penalty_scale", 0.5)
          idle_penalty_power = _get_float_param(params, "idle_penalty_power", 1.025)
          factor = _get_float_param(params, "base_factor", float(base_factor))
-        idle_factor = factor * (profit_aim * risk_reward_ratio) / 4.0
+        idle_factor = factor * (profit_aim / 4.0)
          observed_ratio = abs(br_mid.idle_penalty) / (idle_factor * idle_penalty_scale)
          if observed_ratio > 0:
-            implied_D = 120 / observed_ratio ** (1 / idle_penalty_power)
-            self.assertAlmostEqualFloat(implied_D, 400.0, tolerance=20.0)
+            implied_max_idle_duration_candles = 120 / observed_ratio ** (1 / idle_penalty_power)
+            self.assertAlmostEqualFloat(implied_max_idle_duration_candles, 400.0, tolerance=20.0)
  
      # Owns invariant: components-pbrs-breakdown-fields-119
      def test_pbrs_breakdown_fields_finite_and_aligned(self):
diff --git a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py

index 7eff926390bdc39fe3468da2b537aeac0ec84916..3a124fdb61f1c8f7f8476e189c852bb47debfb86 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py
+++ b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py
@@ -1194,18 +1194,22 @@ def assert_pbrs_invariance_report_classification(
  
  
  def assert_pbrs_canonical_sum_within_tolerance(test_case, total_shaping: float, tolerance: float):
-    """Validate cumulative PBRS shaping satisfies canonical bound.
+    """Validate cumulative shaping is small.
  
-    For canonical PBRS, the cumulative reward shaping across a trajectory
-    must be near zero (within tolerance). This is a core PBRS invariant.
+    In canonical PBRS, the per-step shaping corresponds to a telescoping term.
+    Over a full, closed episode it may cancel, but across many partial trajectories
+    or with resets/discounting it does not need to be exactly zero.
+
+    This helper remains as a *diagnostic* check for constructed test cases that
+    intentionally enforce small cumulative shaping.
  
      Args:
-        test_case: Test case instance with assertion methods
-        total_shaping: Total cumulative reward shaping value
-        tolerance: Maximum allowed absolute deviation from zero
+        test_case: Test case instance with assertion methods.
+        total_shaping: Total cumulative shaping value.
+        tolerance: Maximum allowed absolute deviation from zero.
  
      Example:
-        assert_pbrs_canonical_sum_within_tolerance(self, 5e-10, 1e-09)
+        assert_pbrs_canonical_sum_within_tolerance(self, 5e-10, 1e-9)
      """
      test_case.assertLess(abs(total_shaping), tolerance)
  
@@ -1213,20 +1217,18 @@ def assert_pbrs_canonical_sum_within_tolerance(test_case, total_shaping: float,
  def assert_non_canonical_shaping_exceeds(
      test_case, total_shaping: float, tolerance_multiple: float
  ):
-    """Validate non-canonical PBRS shaping exceeds threshold.
+    """Validate non-trivial shaping magnitude.
  
-    For non-canonical PBRS (e.g., with additives), the cumulative shaping
-    should exceed a scaled tolerance threshold, indicating violation of
-    the canonical PBRS invariant.
+    In non-canonical PBRS modes or when additives are effective, the shaping
+    trajectory is expected to deviate from the pure telescoping term more often.
  
-    Args:
-        test_case: Test case instance with assertion methods
-        total_shaping: Total cumulative reward shaping value
-        tolerance_multiple: Threshold value (typically scaled tolerance)
+    Note: cumulative shaping being large is not a strict correctness proof; it is
+    a useful smoke-signal for test fixtures that intentionally construct such cases.
  
-    Example:
-        # Expect shaping to exceed 10x tolerance for non-canonical case
-        assert_non_canonical_shaping_exceeds(self, 0.05, 1e-08)
+    Args:
+        test_case: Test case instance with assertion methods.
+        total_shaping: Total cumulative shaping value.
+        tolerance_multiple: Threshold value for the given test fixture.
      """
      test_case.assertGreater(abs(total_shaping), tolerance_multiple)
  
diff --git a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py

index ce47a5eea1dd6ca520a031e78c177316edbaf37a..101906f68e9c32283fb959fd2d44ae4a769625a0 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
+++ b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
@@ -1175,6 +1175,10 @@ class TestPBRS(RewardSpaceTestBase):
              PBRS_INVARIANCE_TOL,
              f"Total shaping {total_shaping} exceeds invariance tolerance",
          )
+        inv_corr_vals = [1.0e-7, -1.0e-7, 2.0e-7]
+        max_abs_corr = float(np.max(np.abs(inv_corr_vals)))
+        self.assertLess(max_abs_corr, PBRS_INVARIANCE_TOL)
+
          n = len(small_vals)
          df = pd.DataFrame(
              {
@@ -1190,6 +1194,7 @@ class TestPBRS(RewardSpaceTestBase):
                  "reward_shaping": small_vals,
                  "reward_entry_additive": [0.0] * n,
                  "reward_exit_additive": [0.0] * n,
+                "reward_invariance_correction": inv_corr_vals,
                  "reward_invalid": np.zeros(n),
                  "duration_ratio": np.random.uniform(0.2, 1.0, n),
                  "idle_ratio": np.zeros(n),
@@ -1225,6 +1230,7 @@ class TestPBRS(RewardSpaceTestBase):
              self.assertAlmostEqual(
                  abs(total_shaping), val_abs, places=TOLERANCE.DECIMAL_PLACES_STRICT
              )
+        self.assertIn("max|correction|≈0", content)
  
      # Non-owning smoke; ownership: robustness/test_robustness.py:35 (robustness-decomposition-integrity-101)
      @pytest.mark.smoke
@@ -1239,6 +1245,10 @@ class TestPBRS(RewardSpaceTestBase):
          small_vals = [1.0e-7, -2.0e-7, 3.0e-7]  # sum = 2.0e-7 < tolerance
          total_shaping = float(sum(small_vals))
          self.assertLess(abs(total_shaping), PBRS_INVARIANCE_TOL)
+        inv_corr_vals = [1.0e-7, -1.0e-7, 2.0e-7]
+        max_abs_corr = float(np.max(np.abs(inv_corr_vals)))
+        self.assertLess(max_abs_corr, PBRS_INVARIANCE_TOL)
+
          n = len(small_vals)
          df = pd.DataFrame(
              {
@@ -1254,6 +1264,7 @@ class TestPBRS(RewardSpaceTestBase):
                  "reward_shaping": small_vals,
                  "reward_entry_additive": [0.0] * n,
                  "reward_exit_additive": [0.0] * n,
+                "reward_invariance_correction": inv_corr_vals,
                  "reward_invalid": np.zeros(n),
                  "duration_ratio": np.random.uniform(0.2, 1.0, n),
                  "idle_ratio": np.zeros(n),
@@ -1288,11 +1299,16 @@ class TestPBRS(RewardSpaceTestBase):
          self.assertIn("| Exit Additive Effective | False |", content)
  
      def test_pbrs_canonical_warning_report(self):
-        """Canonical mode + no additives but |Σ shaping| > tolerance -> warning classification."""
+        """Canonical mode + no additives but max|invariance_correction| > tolerance -> warning."""
  
-        shaping_vals = [1.2e-4, 1.3e-4, 8.0e-5, -2.0e-5, 1.4e-4]  # sum = 4.5e-4 (> tol)
-        total_shaping = sum(shaping_vals)
+        shaping_vals = [1.2e-4, 1.3e-4, 8.0e-5, -2.0e-5, 1.4e-4]  # Σ not near 0
+        total_shaping = float(sum(shaping_vals))
          self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL)
+
+        inv_corr_vals = [1.0e-4, -2.0e-4, 1.5e-4, -1.2e-4, 7.0e-5]
+        max_abs_corr = float(np.max(np.abs(inv_corr_vals)))
+        self.assertGreater(max_abs_corr, PBRS_INVARIANCE_TOL)
+
          n = len(shaping_vals)
          df = pd.DataFrame(
              {
@@ -1308,6 +1324,7 @@ class TestPBRS(RewardSpaceTestBase):
                  "reward_shaping": shaping_vals,
                  "reward_entry_additive": [0.0] * n,
                  "reward_exit_additive": [0.0] * n,
+                "reward_invariance_correction": inv_corr_vals,
                  "reward_invalid": np.zeros(n),
                  "duration_ratio": np.random.uniform(0.2, 1.2, n),
                  "idle_ratio": np.zeros(n),
@@ -1335,8 +1352,8 @@ class TestPBRS(RewardSpaceTestBase):
          assert_pbrs_invariance_report_classification(
              self, content, "Canonical (with warning)", expect_additives=False
          )
-        expected_sum_fragment = f"{total_shaping:.6f}"
-        self.assertIn(expected_sum_fragment, content)
+        expected_corr_fragment = f"{max_abs_corr:.6e}"
+        self.assertIn(expected_corr_fragment, content)
  
      # Non-owning smoke; ownership: robustness/test_robustness.py:35 (robustness-decomposition-integrity-101)
      @pytest.mark.smoke
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py

index 2b681402e7782e71f71a546ac2cdf7fab1c4cc6e..7ba323d3b2e5c06f5c4aefc4f79f9a628b75dcf2 100644 (file)
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -2795,7 +2795,7 @@ class MyRLEnv(Base5ActionRLEnv):
          base_factor = float(
              model_reward_parameters.get("base_factor", ReforceXY.DEFAULT_BASE_FACTOR)
          )
-        idle_factor = base_factor * self._pnl_target / 4.0
+        idle_factor = base_factor * (self.profit_aim / 4.0)
          hold_factor = idle_factor
  
          # 2. Idle penalty
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Tue, 23 Dec 2025 17:02:49 +0000 (18:02 +0100)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Tue, 23 Dec 2025 17:02:49 +0000 (18:02 +0100)
ReforceXY/reward_space_analysis/reward_space_analysis.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/components/test_reward_components.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/helpers/assertions.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py		patch \| blob \| blame \| history
ReforceXY/user_data/freqaimodels/ReforceXY.py		patch \| blob \| blame \| history