test(ReforceXY): use standardized test parameters and tolerances

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Thu, 25 Dec 2025 11:05:37 +0000 (12:05 +0100)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Thu, 25 Dec 2025 11:05:37 +0000 (12:05 +0100)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Thu, 25 Dec 2025 11:05:37 +0000 (12:05 +0100)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Thu, 25 Dec 2025 11:05:37 +0000 (12:05 +0100)
diff --git a/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py b/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py

index e25e005017b0da98b32d6ab14445782b32e6a5de..496a974ee4f372ffd911150a08e4909a8d7e56bb 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py
+++ b/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py
@@ -44,7 +44,7 @@ class TestAPIAndHelpers(RewardSpaceTestBase):
          """
  
          max_idle_duration_candles = 20
-        max_trade_duration_candles = 100
+        max_trade_duration_candles = PARAMS.TRADE_DURATION_MEDIUM
  
          def sample_entry_rate(*, idle_duration: int, short_allowed: bool) -> float:
              rng = random.Random(SEEDS.REPRODUCIBILITY)
@@ -310,19 +310,20 @@ class TestPrivateFunctions(RewardSpaceTestBase):
      def test_exit_reward_calculation(self):
          """Test exit reward calculation with various scenarios."""
          scenarios = [
-            (Positions.Long, Actions.Long_exit, 0.05, "Profitable long exit"),
+            (Positions.Long, Actions.Long_exit, PARAMS.PNL_MEDIUM, "Profitable long exit"),
              (Positions.Short, Actions.Short_exit, -0.03, "Profitable short exit"),
-            (Positions.Long, Actions.Long_exit, -0.02, "Losing long exit"),
-            (Positions.Short, Actions.Short_exit, 0.02, "Losing short exit"),
+            (Positions.Long, Actions.Long_exit, -PARAMS.PNL_SMALL, "Losing long exit"),
+            (Positions.Short, Actions.Short_exit, PARAMS.PNL_SMALL, "Losing short exit"),
          ]
+        unrealized_pad = PARAMS.PNL_SMALL / 2
          for position, action, pnl, description in scenarios:
              with self.subTest(description=description):
                  context = self.make_ctx(
                      pnl=pnl,
-                    trade_duration=50,
+                    trade_duration=PARAMS.TRADE_DURATION_SHORT,
                      idle_duration=0,
-                    max_unrealized_profit=max(pnl + 0.01, 0.01),
-                    min_unrealized_profit=min(pnl - 0.01, -0.01),
+                    max_unrealized_profit=max(pnl + unrealized_pad, unrealized_pad),
+                    min_unrealized_profit=min(pnl - unrealized_pad, -unrealized_pad),
                      position=position,
                      action=action,
                  )
@@ -370,15 +371,15 @@ class TestPrivateFunctions(RewardSpaceTestBase):
          self.assertIn("check_invariants", params)
          self.assertIn("exit_factor_threshold", params)
          context = self.make_ctx(
-            pnl=0.05,
-            trade_duration=300,
+            pnl=PARAMS.PNL_MEDIUM,
+            trade_duration=SCENARIOS.DURATION_LONG,
              idle_duration=0,
-            max_unrealized_profit=0.06,
+            max_unrealized_profit=PARAMS.PROFIT_AIM,
              min_unrealized_profit=0.0,
              position=Positions.Long,
              action=Actions.Long_exit,
          )
-        breakdown = calculate_reward_with_defaults(context, params, base_factor=10000000.0)
+        breakdown = calculate_reward_with_defaults(context, params, base_factor=10_000_000.0)
          self.assertFinite(breakdown.exit_component, name="exit_component")
  
  
diff --git a/ReforceXY/reward_space_analysis/tests/cli/test_cli_params_and_csv.py b/ReforceXY/reward_space_analysis/tests/cli/test_cli_params_and_csv.py

index d7099aec561e882f28f0413f57c93c80797f7d86..d89ba8d9c985545cbc59f0856620bd898793a1f1 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/cli/test_cli_params_and_csv.py
+++ b/ReforceXY/reward_space_analysis/tests/cli/test_cli_params_and_csv.py
@@ -10,7 +10,7 @@ from pathlib import Path
  import pandas as pd
  import pytest
  
-from ..constants import SEEDS
+from ..constants import SEEDS, TOLERANCE
  from ..test_base import RewardSpaceTestBase
  
  # Pytest marker for taxonomy classification
@@ -283,6 +283,33 @@ class TestParamsPropagation(RewardSpaceTestBase):
                  f"Column {col} contains infinite values",
              )
  
+        # Verify mathematical alignment (CSV-level invariants)
+        # By construction in `calculate_reward()`: reward_shaping = pbrs_delta + invariance_correction
+        shaping_residual = (
+            df["reward_shaping"] - (df["reward_pbrs_delta"] + df["reward_invariance_correction"])
+        ).abs()
+        self.assertLessEqual(
+            float(shaping_residual.max()),
+            TOLERANCE.GENERIC_EQ,
+            "Expected reward_shaping == reward_pbrs_delta + reward_invariance_correction",
+        )
+
+        # Total reward should decompose into base + shaping + additives
+        reward_residual = (
+            df["reward"]
+            - (
+                df["reward_base"]
+                + df["reward_shaping"]
+                + df["reward_entry_additive"]
+                + df["reward_exit_additive"]
+            )
+        ).abs()
+        self.assertLessEqual(
+            float(reward_residual.max()),
+            TOLERANCE.GENERIC_EQ,
+            "Expected reward == reward_base + reward_shaping + additives",
+        )
+
  
  if __name__ == "__main__":
      unittest.main()
diff --git a/ReforceXY/reward_space_analysis/tests/constants.py b/ReforceXY/reward_space_analysis/tests/constants.py

index 3a62952cca61d59155c7f68140c5fbde099d039e..86282050a561402c2e5455a83f7020a127ddfd4f 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/constants.py
+++ b/ReforceXY/reward_space_analysis/tests/constants.py
@@ -239,6 +239,9 @@ class TestParameters:
          TRADE_DURATION_MEDIUM: Medium trade duration in steps (100)
          TRADE_DURATION_LONG: Long trade duration in steps (200)
  
+        # Simulation configuration
+        MAX_TRADE_DURATION_HETEROSCEDASTICITY: Max trade duration used for heteroscedasticity tests (10)
+
          # Common additive parameters
          ADDITIVE_RATIO_DEFAULT: Default additive ratio (0.4)
          ADDITIVE_GAIN_DEFAULT: Default additive gain (1.0)
@@ -261,6 +264,9 @@ class TestParameters:
      TRADE_DURATION_MEDIUM: int = 100
      TRADE_DURATION_LONG: int = 200
  
+    # Simulation configuration
+    MAX_TRADE_DURATION_HETEROSCEDASTICITY: int = 10
+
      # Additive parameters
      ADDITIVE_RATIO_DEFAULT: float = 0.4
      ADDITIVE_GAIN_DEFAULT: float = 1.0
diff --git a/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py b/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py

index 884311ed39e202b8f6f2094775176cda66a84498..c54dd31d3a4f06443d7f1f3f7187299dae1e08a2 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py
+++ b/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py
@@ -108,11 +108,6 @@ class TestReportFormatting(RewardSpaceTestBase):
          val = float(m.group(1)) if m else None
          if val is not None:
              self.assertLess(val, TOLERANCE.NEGLIGIBLE + TOLERANCE.IDENTITY_STRICT)
-        self.assertNotIn(
-            str(TOLERANCE.GENERIC_EQ),
-            content,
-            "Tolerance constant value should appear, not raw literal",
-        )
  
      def test_distribution_shift_section_present_with_real_episodes(self):
          """Distribution Shift section renders metrics table when real episodes provided."""
diff --git a/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py b/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py

index f7834f385ed7fda062154e5ba9e0472fc7beb5ec..abf53db81239320c51b78f9dbd1b22bebbb33ee3 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py
+++ b/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py
@@ -797,19 +797,37 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
  
      # Owns invariant: robustness-near-zero-half-life-105
      def test_robustness_105_half_life_near_zero_fallback(self):
-        """Invariant 105: Near-zero exit_half_life warns and returns factor≈base_factor (no attenuation)."""
+        """Invariant 105: Near-zero exit_half_life yields no attenuation (factor≈base).
+
+        This invariant is specifically about the *time attenuation kernel*:
+        `exit_attenuation_mode="half_life"` should return a time coefficient of 1.0 when
+        `exit_half_life` is close to zero.
+
+        To isolate the time coefficient, we choose inputs that keep the other
+        multiplicative coefficients at 1.0 (pnl_target and efficiency).
+        """
+
          base_factor = 60.0
-        pnl = 0.02
          pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO_HIGH
+        pnl = 0.5 * pnl_target
          test_context = self.make_ctx(
-            pnl=pnl, trade_duration=50, max_unrealized_profit=0.03, min_unrealized_profit=0.0
+            pnl=pnl,
+            trade_duration=50,
+            max_unrealized_profit=pnl,
+            min_unrealized_profit=0.0,
          )
          duration_ratio = 0.7
+
          near_zero_values = [1e-15, 1e-12, 5e-14]
          for hl in near_zero_values:
-            params = self.base_params(exit_attenuation_mode="half_life", exit_half_life=hl)
+            params = self.base_params(
+                exit_attenuation_mode="half_life",
+                exit_half_life=hl,
+                efficiency_weight=0.0,
+                win_reward_factor=0.0,
+            )
              with assert_diagnostic_warning(["exit_half_life", "close to 0"]):
-                _ = _get_exit_factor(
+                f0 = _get_exit_factor(
                      base_factor,
                      pnl,
                      pnl_target,
@@ -827,14 +845,25 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
                      params,
                      PARAMS.RISK_REWARD_RATIO_HIGH,
                  )
-            # Note: The expected value calculation needs adjustment since _get_exit_factor now computes
-            # pnl_target_coefficient and efficiency_coefficient internally
-            # For now, we just check that fdr is finite and reasonable
+
              self.assertFinite(fdr, name="fdr")
-            self.assertGreaterEqual(
+            self.assertAlmostEqualFloat(
+                fdr,
+                base_factor,
+                tolerance=TOLERANCE.IDENTITY_STRICT,
+                msg=f"Expected no time attenuation for near-zero half-life hl={hl} (fdr={fdr})",
+            )
+            self.assertAlmostEqualFloat(
+                f0,
+                base_factor,
+                tolerance=TOLERANCE.IDENTITY_STRICT,
+                msg=f"Expected factor==base at dr=0 for hl={hl} (f0={f0})",
+            )
+            self.assertAlmostEqualFloat(
                  fdr,
-                0.0,
-                msg=f"Near-zero half-life should give non-negative factor hl={hl} fdr={fdr}",
+                f0,
+                tolerance=TOLERANCE.IDENTITY_STRICT,
+                msg=f"Expected dr-insensitive factor under half-life near zero hl={hl} (f0={f0}, fdr={fdr})",
              )
  
  
diff --git a/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py b/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py

index 41e9899d3b7d4a3280c95d0de293c97fab3d01bd..4222ed4f78da0a98e4abf885a0b1e93eca76ebcf 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py
+++ b/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py
@@ -170,8 +170,8 @@ class TestStatistics(RewardSpaceTestBase):
          if len(df) > 30:
              idle_data = df[df["idle_duration"] > 0]
              if len(idle_data) > 10:
-                idle_dur = idle_data["idle_duration"].to_numpy()
-                idle_rew = idle_data["reward_idle"].to_numpy()
+                idle_dur = np.asarray(idle_data["idle_duration"], dtype=float)
+                idle_rew = np.asarray(idle_data["reward_idle"], dtype=float)
                  self.assertTrue(
                      len(idle_dur) == len(idle_rew),
                      "Idle duration and reward arrays should have same length",
@@ -425,7 +425,9 @@ class TestStatistics(RewardSpaceTestBase):
          """PnL variance increases with trade duration (heteroscedasticity)."""
  
          df = simulate_samples(
-            params=self.base_params(max_trade_duration_candles=100),
+            params=self.base_params(
+                max_trade_duration_candles=PARAMS.MAX_TRADE_DURATION_HETEROSCEDASTICITY
+            ),
              num_samples=SCENARIOS.SAMPLE_SIZE_LARGE + 200,
              seed=SEEDS.HETEROSCEDASTICITY,
              base_factor=PARAMS.BASE_FACTOR,
@@ -436,7 +438,14 @@ class TestStatistics(RewardSpaceTestBase):
              pnl_base_std=PARAMS.PNL_STD,
              pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
          )
-        exit_data = df[df["reward_exit"] != 0].copy()
+        # Use the action code rather than `reward_exit != 0`.
+        # `reward_exit` can be zero for break-even exits, but the exit action still
+        # contributes to the heteroscedasticity structure.
+        exit_action_codes = (
+            float(reward_space_analysis.Actions.Long_exit.value),
+            float(reward_space_analysis.Actions.Short_exit.value),
+        )
+        exit_data = df[df["action"].isin(exit_action_codes)].copy()
          if len(exit_data) < SCENARIOS.SAMPLE_SIZE_TINY:
              self.skipTest("Insufficient exit actions for heteroscedasticity test")
          exit_data["duration_bin"] = pd.cut(
diff --git a/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py b/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py

index 5609642484c5fbf318eb299fd5a4359f215b477b..4f8fc09d2c3425945ae98291e1df722c41270db7 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py
+++ b/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py
@@ -49,7 +49,7 @@ class TestTransforms(RewardSpaceTestBase):
                      self.assertAlmostEqualFloat(
                          result,
                          expected_value,
-                        tolerance=1e-10,
+                        tolerance=TOLERANCE.GENERIC_EQ,
                          msg=f"{transform_name}({test_val}) should equal {expected_value}",
                      )
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Thu, 25 Dec 2025 11:05:37 +0000 (12:05 +0100)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Thu, 25 Dec 2025 11:05:37 +0000 (12:05 +0100)
ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/cli/test_cli_params_and_csv.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/constants.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py		patch \| blob \| blame \| history