refactor(reforcexy): cleanup

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Thu, 13 Nov 2025 23:45:45 +0000 (00:45 +0100)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Thu, 13 Nov 2025 23:45:45 +0000 (00:45 +0100)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Thu, 13 Nov 2025 23:45:45 +0000 (00:45 +0100)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Thu, 13 Nov 2025 23:45:45 +0000 (00:45 +0100)
diff --git a/ReforceXY/reward_space_analysis/pyproject.toml b/ReforceXY/reward_space_analysis/pyproject.toml

index 45f00bd3ada8fc3a2ed217b8be3e8bde6e3e305b..52d0b1dd83cc5bbc42550c845b2ebf3d7b4b6c42 100644 (file)
--- a/ReforceXY/reward_space_analysis/pyproject.toml
+++ b/ReforceXY/reward_space_analysis/pyproject.toml
@@ -42,6 +42,7 @@ python_functions = [
  ]
  markers = [
      "components: component-level reward computations",
+    "transforms: mathematical transform functions",
      "robustness: stress and edge-case behavior",
      "api: public API surface and helpers",
      "cli: command-line interface behaviors",
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py

index f656a63049efe4869b552f4eb8fae81732ae2464..f60b2f4d88525fc4d4445a2c84c791370d559de3 100644 (file)
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -69,7 +69,7 @@ POTENTIAL_GAMMA_DEFAULT: float = 0.95
  ATTENUATION_MODES: Tuple[str, ...] = ("sqrt", "linear", "power", "half_life")
  ATTENUATION_MODES_WITH_LEGACY: Tuple[str, ...] = ATTENUATION_MODES + ("legacy",)
  
-# Centralized internal numeric guards & behavior toggles (single source of truth for internal tunables)
+# Centralized internal numeric guards & behavior toggles
  INTERNAL_GUARDS: dict[str, float] = {
      "degenerate_ci_epsilon": 1e-9,
      "distribution_constant_fallback_moment": 0.0,
@@ -419,7 +419,7 @@ def validate_reward_parameters(
      sanitized = dict(params)
      adjustments: Dict[str, Dict[str, Any]] = {}
  
-    # Normalize boolean-like parameters explicitly to avoid inconsistent types
+    # Boolean parameter coercion
      _bool_keys = [
          "check_invariants",
          "hold_potential_enabled",
@@ -480,7 +480,7 @@ def validate_reward_parameters(
          adjusted = original_numeric
          reason_parts: List[str] = []
  
-        # Record numeric coercion if type changed (e.g., from str/bool/None)
+        # Track type coercion
          if not isinstance(original_val, (int, float)):
              adjustments.setdefault(
                  key,
@@ -491,7 +491,6 @@ def validate_reward_parameters(
                      "validation_mode": "strict" if strict else "relaxed",
                  },
              )
-            # Update sanitized to numeric before clamping
              sanitized[key] = original_numeric
  
          # Bounds enforcement
@@ -1245,10 +1244,9 @@ def simulate_samples(
              max_unrealized_profit = 0.0
              min_unrealized_profit = 0.0
          else:
-            # Unrealized profits should bracket the final PnL
-            # Max represents peak profit during trade, min represents lowest point
+            # Unrealized profit bounds
              span = abs(rng.gauss(0.0, 0.015))
-            # Ensure max >= pnl >= min by construction
+            # max >= pnl >= min by construction
              max_unrealized_profit = pnl + abs(rng.gauss(0.0, span))
              min_unrealized_profit = pnl - abs(rng.gauss(0.0, span))
  
@@ -1336,7 +1334,6 @@ def simulate_samples(
                      )
                      drift = total_shaping / max(1, n_invariant)
                      df.loc[:, "reward_shaping"] = df["reward_shaping"] - drift
-        # Attach resolved reward params for downstream consumers (e.g., report derivations)
          df.attrs["reward_params"] = dict(params)
      except Exception:
          # Graceful fallback (no invariance enforcement on failure)
diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py

index 4fe74686bc824535f513e2476eb58b3530e37179..c2e5cc5bab2f4996dde90fe66e84f499c32fff3a 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
+++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
@@ -17,6 +17,9 @@ from reward_space_analysis import (
  )
  
  from ..helpers import (
+    RewardScenarioConfig,
+    ThresholdTestConfig,
+    ValidationConfig,
      assert_component_sum_integrity,
      assert_exit_factor_plateau_behavior,
      assert_hold_penalty_threshold_behavior,
@@ -45,13 +48,9 @@ class TestRewardComponents(RewardSpaceTestBase):
      def test_hold_penalty_basic_calculation(self):
          """Test hold penalty calculation when trade_duration exceeds max_duration.
  
-        Tests:
-            - Hold penalty is negative when duration exceeds threshold
-            - Component sum integrity maintained
-
-        Expected behavior:
-            - trade_duration > max_duration → hold_penalty < 0
-            - Total reward equals sum of active components
+        Verifies:
+        - trade_duration > max_duration → hold_penalty < 0
+        - Total reward equals sum of active components
          """
          context = self.make_ctx(
              pnl=0.01,
@@ -72,24 +71,20 @@ class TestRewardComponents(RewardSpaceTestBase):
              action_masking=True,
          )
          self.assertLess(breakdown.hold_penalty, 0, "Hold penalty should be negative")
-        assert_component_sum_integrity(
-            self,
-            breakdown,
-            self.TOL_IDENTITY_RELAXED,
+        config = ValidationConfig(
+            tolerance_strict=self.TOL_IDENTITY_STRICT,
+            tolerance_relaxed=self.TOL_IDENTITY_RELAXED,
              exclude_components=["idle_penalty", "exit_component", "invalid_penalty"],
              component_description="hold + shaping/additives",
          )
+        assert_component_sum_integrity(self, breakdown, config)
  
      def test_hold_penalty_threshold_behavior(self):
          """Test hold penalty activation at max_duration threshold.
  
-        Tests:
-            - No penalty before max_duration
-            - Penalty activation at and after max_duration
-
-        Expected behavior:
-            - duration < max_duration → hold_penalty = 0
-            - duration >= max_duration → hold_penalty <= 0
+        Verifies:
+        - duration < max_duration → hold_penalty = 0
+        - duration >= max_duration → hold_penalty <= 0
          """
          max_duration = 128
          threshold_test_cases = [
@@ -108,31 +103,32 @@ class TestRewardComponents(RewardSpaceTestBase):
                  action=Actions.Neutral,
              )
  
+        config = ThresholdTestConfig(
+            max_duration=max_duration,
+            test_cases=threshold_test_cases,
+            tolerance=self.TOL_IDENTITY_RELAXED,
+        )
          assert_hold_penalty_threshold_behavior(
              self,
-            threshold_test_cases,
-            max_duration,
              context_factory,
              self.DEFAULT_PARAMS,
              self.TEST_BASE_FACTOR,
              self.TEST_PROFIT_TARGET,
              1.0,
-            self.TOL_IDENTITY_RELAXED,
+            config,
          )
  
      def test_hold_penalty_progressive_scaling(self):
          """Test hold penalty scales progressively with increasing duration.
  
-        Tests:
-            - Penalty magnitude increases monotonically with duration
-            - Progressive scaling beyond max_duration threshold
-
-        Expected behavior:
-            - For d1 < d2 < d3: penalty(d1) >= penalty(d2) >= penalty(d3)
-            - Penalties become more negative with longer durations
+        Verifies:
+        - For d1 < d2 < d3: penalty(d1) >= penalty(d2) >= penalty(d3)
+        - Progressive scaling beyond max_duration threshold
          """
+        from ..constants import SCENARIOS
+
          params = self.base_params(max_trade_duration_candles=100)
-        durations = [150, 200, 300]
+        durations = list(SCENARIOS.DURATION_SCENARIOS)
          penalties = []
          for duration in durations:
              context = self.make_ctx(
@@ -158,13 +154,9 @@ class TestRewardComponents(RewardSpaceTestBase):
      def test_idle_penalty_calculation(self):
          """Test idle penalty calculation for neutral idle state.
  
-        Tests:
-            - Idle penalty is negative for idle duration > 0
-            - Component sum integrity maintained
-
-        Expected behavior:
-            - idle_duration > 0 → idle_penalty < 0
-            - Total reward equals sum of active components
+        Verifies:
+        - idle_duration > 0 → idle_penalty < 0
+        - Component sum integrity maintained
          """
          context = self.make_ctx(
              pnl=0.0,
@@ -178,35 +170,34 @@ class TestRewardComponents(RewardSpaceTestBase):
  
          def validate_idle_penalty(test_case, breakdown, description, tolerance):
              test_case.assertLess(breakdown.idle_penalty, 0, "Idle penalty should be negative")
-            assert_component_sum_integrity(
-                test_case,
-                breakdown,
-                tolerance,
+            config = ValidationConfig(
+                tolerance_strict=test_case.TOL_IDENTITY_STRICT,
+                tolerance_relaxed=tolerance,
                  exclude_components=["hold_penalty", "exit_component", "invalid_penalty"],
                  component_description="idle + shaping/additives",
              )
+            assert_component_sum_integrity(test_case, breakdown, config)
  
          scenarios = [(context, self.DEFAULT_PARAMS, "idle_penalty_basic")]
+        config = RewardScenarioConfig(
+            base_factor=self.TEST_BASE_FACTOR,
+            profit_target=self.TEST_PROFIT_TARGET,
+            risk_reward_ratio=1.0,
+            tolerance_relaxed=self.TOL_IDENTITY_RELAXED,
+        )
          assert_reward_calculation_scenarios(
              self,
              scenarios,
-            self.TEST_BASE_FACTOR,
-            self.TEST_PROFIT_TARGET,
-            1.0,
+            config,
              validate_idle_penalty,
-            self.TOL_IDENTITY_RELAXED,
          )
  
      def test_efficiency_zero_policy(self):
          """Test efficiency zero policy produces expected PnL factor.
  
-        Tests:
-            - PnL factor calculation with efficiency weight = 0
-            - Finite and positive factor values
-
-        Expected behavior:
-            - efficiency_weight = 0 → pnl_factor ≈ 1.0
-            - Factor is finite and well-defined
+        Verifies:
+        - efficiency_weight = 0 → pnl_factor ≈ 1.0
+        - Factor is finite and positive
          """
          ctx = self.make_ctx(
              pnl=0.0,
@@ -225,13 +216,9 @@ class TestRewardComponents(RewardSpaceTestBase):
      def test_max_idle_duration_candles_logic(self):
          """Test max idle duration candles parameter affects penalty magnitude.
  
-        Tests:
-            - Smaller max_idle_duration → larger penalty magnitude
-            - Larger max_idle_duration → smaller penalty magnitude
-            - Both penalties are negative
-
-        Expected behavior:
-            - penalty(max=50) < penalty(max=200) < 0
+        Verifies:
+        - penalty(max=50) < penalty(max=200) < 0
+        - Smaller max → larger penalty magnitude
          """
          params_small = self.base_params(max_idle_duration_candles=50)
          params_large = self.base_params(max_idle_duration_candles=200)
@@ -271,13 +258,9 @@ class TestRewardComponents(RewardSpaceTestBase):
  
          Non-owning smoke test; ownership: robustness/test_robustness.py:35
  
-        Tests:
-            - Exit factor finiteness for linear and power modes
-            - Plateau behavior with grace period
-
-        Expected behavior:
-            - All exit factors are finite and positive
-            - Plateau mode attenuates after grace period
+        Verifies:
+        - Exit factors are finite and positive (linear, power modes)
+        - Plateau mode attenuates after grace period
          """
          modes_to_test = ["linear", "power"]
          for mode in modes_to_test:
@@ -307,13 +290,9 @@ class TestRewardComponents(RewardSpaceTestBase):
      def test_idle_penalty_zero_when_profit_target_zero(self):
          """Test idle penalty is zero when profit_target is zero.
  
-        Tests:
-            - profit_target = 0 → idle_penalty = 0
-            - Total reward is zero in this configuration
-
-        Expected behavior:
-            - profit_target = 0 → idle_factor = 0 → idle_penalty = 0
-            - No other components active for neutral idle state
+        Verifies:
+        - profit_target = 0 → idle_penalty = 0
+        - Total reward is zero in this configuration
          """
          context = self.make_ctx(
              pnl=0.0,
@@ -332,28 +311,26 @@ class TestRewardComponents(RewardSpaceTestBase):
              )
  
          scenarios = [(context, self.DEFAULT_PARAMS, "profit_target_zero")]
+        config = RewardScenarioConfig(
+            base_factor=self.TEST_BASE_FACTOR,
+            profit_target=0.0,
+            risk_reward_ratio=self.TEST_RR,
+            tolerance_relaxed=self.TOL_IDENTITY_RELAXED,
+        )
          assert_reward_calculation_scenarios(
              self,
              scenarios,
-            self.TEST_BASE_FACTOR,
-            0.0,  # profit_target=0
-            self.TEST_RR,
+            config,
              validate_zero_penalty,
-            self.TOL_IDENTITY_RELAXED,
          )
  
      def test_win_reward_factor_saturation(self):
          """Test PnL amplification factor saturates at asymptotic limit.
  
-        Tests:
-            - Amplification ratio increases monotonically with PnL
-            - Saturation approaches (1 + win_reward_factor)
-            - Mathematical formula validation
-
-        Expected behavior:
-            - As PnL → ∞: amplification → (1 + win_reward_factor)
-            - Monotonic increase: ratio(PnL1) <= ratio(PnL2) for PnL1 < PnL2
-            - Observed matches theoretical tanh-based formula
+        Verifies:
+        - Amplification ratio increases monotonically with PnL
+        - Saturation approaches (1 + win_reward_factor)
+        - Observed matches theoretical saturation behavior
          """
          win_reward_factor = 3.0
          beta = 0.5
@@ -421,22 +398,16 @@ class TestRewardComponents(RewardSpaceTestBase):
      def test_idle_penalty_fallback_and_proportionality(self):
          """Test idle penalty fallback and proportional scaling behavior.
  
-        Tests:
-            - Fallback to max_trade_duration when max_idle_duration is None
-            - Proportional scaling with idle duration (2:1 ratio validation)
-            - Mathematical validation of penalty formula
-
-        Expected behavior:
-            - max_idle_duration = None → use max_trade_duration as fallback
-            - penalty(duration=40) ≈ 2 × penalty(duration=20)
-            - Formula: penalty ∝ (duration/max)^power × scale
+        Verifies:
+        - max_idle_duration = None → use max_trade_duration as fallback
+        - penalty(duration=40) ≈ 2 × penalty(duration=20)
+        - Proportional scaling with idle duration
          """
          params = self.base_params(max_idle_duration_candles=None, max_trade_duration_candles=100)
          base_factor = 90.0
          profit_target = self.TEST_PROFIT_TARGET
          risk_reward_ratio = 1.0
  
-        # Generate test contexts using helper
          base_context_kwargs = {
              "pnl": 0.0,
              "trade_duration": 0,
@@ -448,7 +419,6 @@ class TestRewardComponents(RewardSpaceTestBase):
              self.make_ctx, idle_scenarios, base_context_kwargs
          )
  
-        # Calculate all rewards
          results = []
          for context, description in contexts_and_descriptions:
              breakdown = calculate_reward(
@@ -462,19 +432,16 @@ class TestRewardComponents(RewardSpaceTestBase):
              )
              results.append((breakdown, context.idle_duration, description))
  
-        # Validate proportional scaling
          br_a, br_b, br_mid = [r[0] for r in results]
          self.assertLess(br_a.idle_penalty, 0.0)
          self.assertLess(br_b.idle_penalty, 0.0)
          self.assertLess(br_mid.idle_penalty, 0.0)
  
-        # Check 2:1 ratio between 40 and 20 idle duration
          ratio = br_b.idle_penalty / br_a.idle_penalty if br_a.idle_penalty != 0 else None
          self.assertIsNotNone(ratio)
          if ratio is not None:
              self.assertAlmostEqualFloat(abs(ratio), 2.0, tolerance=0.2)
  
-        # Mathematical validation for mid-duration case
          idle_penalty_scale = _get_float_param(params, "idle_penalty_scale", 0.5)
          idle_penalty_power = _get_float_param(params, "idle_penalty_power", 1.025)
          factor = _get_float_param(params, "base_factor", float(base_factor))
diff --git a/ReforceXY/reward_space_analysis/tests/constants.py b/ReforceXY/reward_space_analysis/tests/constants.py

index 19e5edee259aab64b245569230d7e4c9fb9adfa2..ddfe8ace88213834450f2bda90f83cdf14bd2060 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/constants.py
+++ b/ReforceXY/reward_space_analysis/tests/constants.py
@@ -46,12 +46,12 @@ class ContinuityConfig:
      plateau and attenuation functions.
  
      Attributes:
-        EPS_SMALL: Small epsilon for tight continuity checks (1e-08)
-        EPS_LARGE: Larger epsilon for coarser continuity tests (5e-05)
+        EPS_SMALL: Small epsilon for tight continuity checks (1e-06)
+        EPS_LARGE: Larger epsilon for coarser continuity tests (1e-05)
      """
  
-    EPS_SMALL: float = 1e-08
-    EPS_LARGE: float = 5e-05
+    EPS_SMALL: float = 1e-06
+    EPS_LARGE: float = 1e-05
  
  
  @dataclass(frozen=True)
@@ -62,13 +62,13 @@ class ExitFactorConfig:
      ratio bounds and power mode constraints.
  
      Attributes:
-        SCALING_RATIO_MIN: Minimum expected scaling ratio for continuity (1.5)
-        SCALING_RATIO_MAX: Maximum expected scaling ratio for continuity (3.5)
+        SCALING_RATIO_MIN: Minimum expected scaling ratio for continuity (5.0)
+        SCALING_RATIO_MAX: Maximum expected scaling ratio for continuity (15.0)
          MIN_POWER_TAU: Minimum valid tau value for power mode (1e-15)
      """
  
-    SCALING_RATIO_MIN: float = 1.5
-    SCALING_RATIO_MAX: float = 3.5
+    SCALING_RATIO_MIN: float = 5.0
+    SCALING_RATIO_MAX: float = 15.0
      MIN_POWER_TAU: float = 1e-15
  
  
@@ -156,6 +156,73 @@ class TestParameters:
      EPS_BASE: float = 1e-10
  
  
+@dataclass(frozen=True)
+class TestScenarios:
+    """Test scenario parameters and sample sizes.
+
+    Standard values for test scenarios to ensure consistency across the test
+    suite and avoid magic numbers in test implementations.
+
+    Attributes:
+        DURATION_SHORT: Short duration scenario (150)
+        DURATION_MEDIUM: Medium duration scenario (200)
+        DURATION_LONG: Long duration scenario (300)
+        DURATION_SCENARIOS: Standard duration test sequence
+        SAMPLE_SIZE_SMALL: Small sample size for quick tests (100)
+        SAMPLE_SIZE_MEDIUM: Medium sample size for standard tests (400)
+        SAMPLE_SIZE_LARGE: Large sample size for statistical power (800)
+        DEFAULT_SAMPLE_SIZE: Default for most tests (400)
+        PBRS_SIMULATION_STEPS: Number of steps for PBRS simulation tests (500)
+        NULL_HYPOTHESIS_SAMPLE_SIZE: Sample size for null hypothesis tests (400)
+        BOOTSTRAP_MINIMAL_ITERATIONS: Minimal bootstrap iterations for quick tests (25)
+        BOOTSTRAP_STANDARD_ITERATIONS: Standard bootstrap iterations (100)
+        HETEROSCEDASTICITY_MIN_EXITS: Minimum exits for heteroscedasticity validation (50)
+        CORRELATION_TEST_MIN_SIZE: Minimum sample size for correlation tests (200)
+        MONTE_CARLO_ITERATIONS: Monte Carlo simulation iterations (160)
+    """
+
+    DURATION_SHORT: int = 150
+    DURATION_MEDIUM: int = 200
+    DURATION_LONG: int = 300
+    DURATION_SCENARIOS: tuple[int, ...] = (150, 200, 300)
+
+    SAMPLE_SIZE_SMALL: int = 100
+    SAMPLE_SIZE_MEDIUM: int = 400
+    SAMPLE_SIZE_LARGE: int = 800
+    DEFAULT_SAMPLE_SIZE: int = 400
+
+    # Specialized test scenario sizes
+    PBRS_SIMULATION_STEPS: int = 500
+    NULL_HYPOTHESIS_SAMPLE_SIZE: int = 400
+    BOOTSTRAP_MINIMAL_ITERATIONS: int = 25
+    BOOTSTRAP_STANDARD_ITERATIONS: int = 100
+    HETEROSCEDASTICITY_MIN_EXITS: int = 50
+    CORRELATION_TEST_MIN_SIZE: int = 200
+    MONTE_CARLO_ITERATIONS: int = 160
+
+
+@dataclass(frozen=True)
+class StatisticalTolerances:
+    """Tolerances for statistical metrics and distribution tests.
+
+    These tolerances are used for statistical hypothesis testing, distribution
+    comparison metrics, and other statistical validation operations.
+
+    Attributes:
+        DISTRIBUTION_SHIFT: Tolerance for distribution shift metrics (5e-4)
+        KS_STATISTIC_IDENTITY: KS statistic threshold for identical distributions (5e-3)
+        CORRELATION_SIGNIFICANCE: Minimum correlation for significance (0.1)
+        VARIANCE_RATIO_THRESHOLD: Minimum variance ratio for heteroscedasticity (0.8)
+        CI_WIDTH_EPSILON: Minimum CI width for degenerate distributions (3e-9)
+    """
+
+    DISTRIBUTION_SHIFT: float = 5e-4
+    KS_STATISTIC_IDENTITY: float = 5e-3
+    CORRELATION_SIGNIFICANCE: float = 0.1
+    VARIANCE_RATIO_THRESHOLD: float = 0.8
+    CI_WIDTH_EPSILON: float = 3e-9
+
+
  # Global singleton instances for easy import
  TOLERANCE: Final[ToleranceConfig] = ToleranceConfig()
  CONTINUITY: Final[ContinuityConfig] = ContinuityConfig()
@@ -164,6 +231,8 @@ PBRS: Final[PBRSConfig] = PBRSConfig()
  STATISTICAL: Final[StatisticalConfig] = StatisticalConfig()
  SEEDS: Final[TestSeeds] = TestSeeds()
  PARAMS: Final[TestParameters] = TestParameters()
+SCENARIOS: Final[TestScenarios] = TestScenarios()
+STAT_TOL: Final[StatisticalTolerances] = StatisticalTolerances()
  
  
  __all__ = [
@@ -174,6 +243,8 @@ __all__ = [
      "StatisticalConfig",
      "TestSeeds",
      "TestParameters",
+    "TestScenarios",
+    "StatisticalTolerances",
      "TOLERANCE",
      "CONTINUITY",
      "EXIT_FACTOR",
@@ -181,4 +252,6 @@ __all__ = [
      "STATISTICAL",
      "SEEDS",
      "PARAMS",
+    "SCENARIOS",
+    "STAT_TOL",
  ]
diff --git a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py

index 558192653bd98b57facc32ef986c0a086eaab531..11c8a3b2d13c3e4785d1c41fd069d9b8bb8a2c6a 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py
+++ b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py
@@ -12,6 +12,8 @@ from reward_space_analysis import (
      calculate_reward,
  )
  
+from .configs import RewardScenarioConfig, ThresholdTestConfig, ValidationConfig
+
  
  def safe_float(value: Any, default: float = 0.0) -> float:
      """Coerce value to float safely for test parameter handling.
@@ -178,21 +180,18 @@ def assert_trend(
  def assert_component_sum_integrity(
      test_case,
      breakdown,
-    tolerance_relaxed,
-    exclude_components=None,
-    component_description="components",
+    config: ValidationConfig,
  ):
      """Assert that reward component sum matches total within tolerance.
  
      Validates the mathematical integrity of reward component decomposition by
      ensuring the sum of individual components equals the reported total.
+    Uses ValidationConfig to simplify parameter passing.
  
      Args:
          test_case: Test case instance with assertion methods
          breakdown: Reward breakdown object with component attributes
-        tolerance_relaxed: Numerical tolerance for sum validation
-        exclude_components: List of component names to exclude from sum (default: None)
-        component_description: Human-readable description for error messages
+        config: ValidationConfig with tolerance and exclusion settings
  
      Components checked (if not excluded):
          - hold_penalty
@@ -204,14 +203,15 @@ def assert_component_sum_integrity(
          - exit_additive
  
      Example:
-        assert_component_sum_integrity(
-            self, breakdown, 1e-09,
+        config = ValidationConfig(
+            tolerance_strict=1e-12,
+            tolerance_relaxed=1e-09,
              exclude_components=["reward_shaping"],
              component_description="core components"
          )
+        assert_component_sum_integrity(self, breakdown, config)
      """
-    if exclude_components is None:
-        exclude_components = []
+    exclude_components = config.exclude_components or []
      component_sum = 0.0
      if "hold_penalty" not in exclude_components:
          component_sum += breakdown.hold_penalty
@@ -230,8 +230,8 @@ def assert_component_sum_integrity(
      test_case.assertAlmostEqual(
          breakdown.total,
          component_sum,
-        delta=tolerance_relaxed,
-        msg=f"Total should equal sum of {component_description}",
+        delta=config.tolerance_relaxed,
+        msg=f"Total should equal sum of {config.component_description}",
      )
  
  
@@ -347,34 +347,34 @@ def assert_single_active_component_with_additives(
  def assert_reward_calculation_scenarios(
      test_case,
      scenarios: List[Tuple[Any, Dict[str, Any], str]],
-    base_factor: float,
-    profit_target: float,
-    risk_reward_ratio: float,
+    config: RewardScenarioConfig,
      validation_fn,
-    tolerance_relaxed: float,
  ):
      """Execute and validate multiple reward calculation scenarios.
  
      Runs a batch of reward calculations with different contexts and parameters,
-    applying a custom validation function to each result. Reduces test boilerplate
-    for scenario-based testing.
+    applying a custom validation function to each result. Uses RewardScenarioConfig
+    to simplify parameter passing and improve maintainability.
  
      Args:
          test_case: Test case instance with assertion methods
          scenarios: List of (context, params, description) tuples defining test cases
-        base_factor: Base scaling factor for reward calculations
-        profit_target: Target profit threshold
-        risk_reward_ratio: Risk/reward ratio for position sizing
+        config: RewardScenarioConfig with all calculation parameters
          validation_fn: Callback function (test_case, breakdown, description, tolerance) -> None
-        tolerance_relaxed: Numerical tolerance passed to validation function
  
      Example:
+        config = RewardScenarioConfig(
+            base_factor=90.0,
+            profit_target=0.06,
+            risk_reward_ratio=1.0,
+            tolerance_relaxed=1e-09
+        )
          scenarios = [
              (idle_context, {}, "idle scenario"),
              (exit_context, {"exit_additive": 5.0}, "profitable exit"),
          ]
          assert_reward_calculation_scenarios(
-            self, scenarios, 90.0, 0.06, 1.0, my_validation_fn, 1e-09
+            self, scenarios, config, my_validation_fn
          )
      """
      for context, params, description in scenarios:
@@ -382,13 +382,13 @@ def assert_reward_calculation_scenarios(
              breakdown = calculate_reward(
                  context,
                  params,
-                base_factor=base_factor,
-                profit_target=profit_target,
-                risk_reward_ratio=risk_reward_ratio,
-                short_allowed=True,
-                action_masking=True,
+                base_factor=config.base_factor,
+                profit_target=config.profit_target,
+                risk_reward_ratio=config.risk_reward_ratio,
+                short_allowed=config.short_allowed,
+                action_masking=config.action_masking,
              )
-            validation_fn(test_case, breakdown, description, tolerance_relaxed)
+            validation_fn(test_case, breakdown, description, config.tolerance_relaxed)
  
  
  def assert_parameter_sensitivity_behavior(
@@ -396,39 +396,39 @@ def assert_parameter_sensitivity_behavior(
      parameter_variations: List[Dict[str, Any]],
      base_context,
      base_params: Dict[str, Any],
-    base_factor: float,
-    profit_target: float,
-    risk_reward_ratio: float,
      component_name: str,
      expected_trend: str,
-    tolerance_relaxed: float,
+    config: RewardScenarioConfig,
  ):
      """Validate that a component responds predictably to parameter changes.
  
      Tests component sensitivity by applying parameter variations and verifying
      the component value follows the expected trend (increasing, decreasing, or constant).
+    Uses RewardScenarioConfig to simplify parameter passing.
  
      Args:
          test_case: Test case instance with assertion methods
          parameter_variations: List of parameter dicts to merge with base_params
          base_context: Context object for reward calculation
          base_params: Base parameter dictionary
-        base_factor: Base scaling factor
-        profit_target: Target profit threshold
-        risk_reward_ratio: Risk/reward ratio
          component_name: Name of component to track (e.g., "exit_component")
          expected_trend: Expected trend: "increasing", "decreasing", or "constant"
-        tolerance_relaxed: Numerical tolerance for trend validation
+        config: RewardScenarioConfig with calculation parameters
  
      Example:
+        config = RewardScenarioConfig(
+            base_factor=90.0,
+            profit_target=0.06,
+            risk_reward_ratio=1.0,
+            tolerance_relaxed=1e-09
+        )
          variations = [
              {"exit_additive": 0.0},
              {"exit_additive": 5.0},
              {"exit_additive": 10.0},
          ]
          assert_parameter_sensitivity_behavior(
-            self, variations, ctx, params, 90.0, 0.06, 1.0,
-            "exit_component", "increasing", 1e-09
+            self, variations, ctx, params, "exit_component", "increasing", config
          )
      """
      from reward_space_analysis import calculate_reward
@@ -440,11 +440,11 @@ def assert_parameter_sensitivity_behavior(
          breakdown = calculate_reward(
              base_context,
              params,
-            base_factor=base_factor,
-            profit_target=profit_target,
-            risk_reward_ratio=risk_reward_ratio,
-            short_allowed=True,
-            action_masking=True,
+            base_factor=config.base_factor,
+            profit_target=config.profit_target,
+            risk_reward_ratio=config.risk_reward_ratio,
+            short_allowed=config.short_allowed,
+            action_masking=config.action_masking,
          )
          component_value = getattr(breakdown, component_name)
          results.append(component_value)
@@ -452,14 +452,14 @@ def assert_parameter_sensitivity_behavior(
          for i in range(1, len(results)):
              test_case.assertGreaterEqual(
                  results[i],
-                results[i - 1] - tolerance_relaxed,
+                results[i - 1] - config.tolerance_relaxed,
                  f"{component_name} should increase with parameter variations",
              )
      elif expected_trend == "decreasing":
          for i in range(1, len(results)):
              test_case.assertLessEqual(
                  results[i],
-                results[i - 1] + tolerance_relaxed,
+                results[i - 1] + config.tolerance_relaxed,
                  f"{component_name} should decrease with parameter variations",
              )
      elif expected_trend == "constant":
@@ -468,7 +468,7 @@ def assert_parameter_sensitivity_behavior(
              test_case.assertAlmostEqual(
                  result,
                  baseline,
-                delta=tolerance_relaxed,
+                delta=config.tolerance_relaxed,
                  msg=f"{component_name} should remain constant with parameter variations",
              )
  
@@ -684,31 +684,35 @@ def assert_multi_parameter_sensitivity(
      parameter_test_cases: List[Tuple[float, float, str]],
      context_factory_fn,
      base_params: Dict[str, Any],
-    base_factor: float,
-    tolerance_relaxed: float,
+    config: RewardScenarioConfig,
  ):
      """Validate reward behavior across multiple parameter combinations.
  
      Tests reward calculation with various profit_target and risk_reward_ratio
      combinations, ensuring consistent behavior including edge cases like
-    zero profit_target.
+    zero profit_target. Uses RewardScenarioConfig to simplify parameter passing.
  
      Args:
          test_case: Test case instance with assertion methods
          parameter_test_cases: List of (profit_target, risk_reward_ratio, description) tuples
          context_factory_fn: Factory function for creating context objects
          base_params: Base parameter dictionary
-        base_factor: Base scaling factor
-        tolerance_relaxed: Numerical tolerance for assertions
+        config: RewardScenarioConfig with base calculation parameters
  
      Example:
+        config = RewardScenarioConfig(
+            base_factor=90.0,
+            profit_target=0.06,
+            risk_reward_ratio=1.0,
+            tolerance_relaxed=1e-09
+        )
          test_cases = [
              (0.0, 1.0, "zero profit target"),
              (0.06, 1.0, "standard parameters"),
              (0.06, 2.0, "high risk/reward ratio"),
          ]
          assert_multi_parameter_sensitivity(
-            self, test_cases, make_context, params, 90.0, 1e-09
+            self, test_cases, make_context, params, config
          )
      """
      for profit_target, risk_reward_ratio, description in parameter_test_cases:
@@ -719,11 +723,11 @@ def assert_multi_parameter_sensitivity(
              breakdown = calculate_reward(
                  idle_context,
                  base_params,
-                base_factor=base_factor,
+                base_factor=config.base_factor,
                  profit_target=profit_target,
                  risk_reward_ratio=risk_reward_ratio,
-                short_allowed=True,
-                action_masking=True,
+                short_allowed=config.short_allowed,
+                action_masking=config.action_masking,
              )
              if profit_target == 0.0:
                  test_case.assertEqual(breakdown.idle_penalty, 0.0)
@@ -735,54 +739,54 @@ def assert_multi_parameter_sensitivity(
                  exit_breakdown = calculate_reward(
                      exit_context,
                      base_params,
-                    base_factor=base_factor,
+                    base_factor=config.base_factor,
                      profit_target=profit_target,
                      risk_reward_ratio=risk_reward_ratio,
-                    short_allowed=True,
-                    action_masking=True,
+                    short_allowed=config.short_allowed,
+                    action_masking=config.action_masking,
                  )
                  test_case.assertNotEqual(exit_breakdown.exit_component, 0.0)
  
  
  def assert_hold_penalty_threshold_behavior(
      test_case,
-    duration_test_cases: Sequence[Tuple[int, str]],
-    max_duration: int,
      context_factory_fn,
      params: Dict[str, Any],
      base_factor: float,
      profit_target: float,
      risk_reward_ratio: float,
-    tolerance_relaxed: float,
+    config: ThresholdTestConfig,
  ):
      """Validate hold penalty activation at max_duration threshold.
  
      Tests that hold penalty is zero before max_duration, then becomes
-    negative (penalty) at and after the threshold. Critical for verifying
-    threshold-based penalty logic.
+    negative (penalty) at and after the threshold. Uses ThresholdTestConfig
+    to simplify parameter passing.
  
      Args:
          test_case: Test case instance with assertion methods
-        duration_test_cases: List of (trade_duration, description) tuples to test
-        max_duration: Maximum duration threshold for penalty activation
          context_factory_fn: Factory function for creating context objects
          params: Parameter dictionary
          base_factor: Base scaling factor
          profit_target: Target profit threshold
          risk_reward_ratio: Risk/reward ratio
-        tolerance_relaxed: Numerical tolerance for assertions
+        config: ThresholdTestConfig with threshold settings
  
      Example:
-        test_cases = [
-            (50, "below threshold"),
-            (100, "at threshold"),
-            (150, "above threshold"),
-        ]
+        config = ThresholdTestConfig(
+            max_duration=100,
+            test_cases=[
+                (50, "below threshold"),
+                (100, "at threshold"),
+                (150, "above threshold"),
+            ],
+            tolerance=1e-09
+        )
          assert_hold_penalty_threshold_behavior(
-            self, test_cases, 100, make_context, params, 90.0, 0.06, 1.0, 1e-09
+            self, make_context, params, 90.0, 0.06, 1.0, config
          )
      """
-    for trade_duration, description in duration_test_cases:
+    for trade_duration, description in config.test_cases:
          with test_case.subTest(duration=trade_duration, desc=description):
              context = context_factory_fn(trade_duration=trade_duration)
              breakdown = calculate_reward(
@@ -794,7 +798,7 @@ def assert_hold_penalty_threshold_behavior(
                  short_allowed=True,
                  action_masking=True,
              )
-            duration_ratio = trade_duration / max_duration
+            duration_ratio = trade_duration / config.max_duration
              if duration_ratio < 1.0:
                  test_case.assertEqual(breakdown.hold_penalty, 0.0)
              elif duration_ratio == 1.0:
diff --git a/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py b/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py

index f28efb5e9944e22a3c7f5452afd1196fd0bdc52a..782710ec322a926548f9fa80f3b62cc15974780c 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py
+++ b/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py
@@ -12,6 +12,7 @@ import pandas as pd
  
  from reward_space_analysis import PBRS_INVARIANCE_TOL, write_complete_statistical_analysis
  
+from ..constants import SCENARIOS
  from ..test_base import RewardSpaceTestBase
  
  
@@ -74,7 +75,7 @@ class TestReportFormatting(RewardSpaceTestBase):
              real_df=real_df,
              adjust_method="none",
              strict_diagnostics=False,
-            bootstrap_resamples=200,  # keep test fast
+            bootstrap_resamples=SCENARIOS.BOOTSTRAP_STANDARD_ITERATIONS,  # keep test fast
              skip_partial_dependence=kwargs.get("skip_partial_dependence", False),
              skip_feature_analysis=kwargs.get("skip_feature_analysis", False),
          )
diff --git a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py

index b73faa1394db8280d8323a2d171e6f1c86cef414..268791e8f34b593559725adb2b08c0d53c765ca9 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
+++ b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
@@ -41,7 +41,7 @@ class TestPBRS(RewardSpaceTestBase):
      # ---------------- Potential transform mechanics ---------------- #
  
      def test_pbrs_progressive_release_decay_clamped(self):
-        """progressive_release decay>1 clamps -> Φ'=0 & Δ=-Φ_prev."""
+        """Verifies progressive_release mode with decay>1 clamps potential to zero."""
          params = self.DEFAULT_PARAMS.copy()
          params.update(
              {
@@ -73,7 +73,7 @@ class TestPBRS(RewardSpaceTestBase):
          )
  
      def test_pbrs_spike_cancel_invariance(self):
-        """spike_cancel terminal shaping ≈0 (Φ' inversion yields cancellation)."""
+        """Verifies spike_cancel mode produces near-zero terminal shaping."""
          params = self.DEFAULT_PARAMS.copy()
          params.update(
              {
@@ -113,6 +113,8 @@ class TestPBRS(RewardSpaceTestBase):
  
      def test_canonical_invariance_flag_and_sum(self):
          """Canonical mode + no additives -> invariant flags True and Σ shaping ≈ 0."""
+        from ..constants import SCENARIOS
+
          params = self.base_params(
              exit_potential_mode="canonical",
              entry_additive_enabled=False,
@@ -121,7 +123,7 @@ class TestPBRS(RewardSpaceTestBase):
          )
          df = simulate_samples(
              params={**params, "max_trade_duration_candles": 100},
-            num_samples=400,
+            num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
              seed=self.SEED,
              base_factor=self.TEST_BASE_FACTOR,
              profit_target=self.TEST_PROFIT_TARGET,
@@ -138,6 +140,8 @@ class TestPBRS(RewardSpaceTestBase):
  
      def test_non_canonical_flag_false_and_sum_nonzero(self):
          """Non-canonical mode -> invariant flags False and Σ shaping significantly non-zero."""
+        from ..constants import SCENARIOS
+
          params = self.base_params(
              exit_potential_mode="progressive_release",
              exit_potential_decay=0.25,
@@ -147,7 +151,7 @@ class TestPBRS(RewardSpaceTestBase):
          )
          df = simulate_samples(
              params={**params, "max_trade_duration_candles": 100},
-            num_samples=400,
+            num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
              seed=self.SEED,
              base_factor=self.TEST_BASE_FACTOR,
              profit_target=self.TEST_PROFIT_TARGET,
@@ -165,7 +169,7 @@ class TestPBRS(RewardSpaceTestBase):
      # ---------------- Additives and canonical path mechanics ---------------- #
  
      def test_additive_components_disabled_return_zero(self):
-        """Entry/exit additives return zero when disabled."""
+        """Verifies entry/exit additives return zero when disabled."""
          params_entry = {"entry_additive_enabled": False, "entry_additive_scale": 1.0}
          val_entry = _compute_entry_additive(0.5, 0.3, params_entry)
          self.assertEqual(float(val_entry), 0.0)
@@ -174,7 +178,7 @@ class TestPBRS(RewardSpaceTestBase):
          self.assertEqual(float(val_exit), 0.0)
  
      def test_exit_potential_canonical(self):
-        """Canonical exit resets potential; additives auto-disabled."""
+        """Verifies canonical exit resets potential and auto-disables additives."""
          params = self.base_params(
              exit_potential_mode="canonical",
              hold_potential_enabled=True,
@@ -218,7 +222,7 @@ class TestPBRS(RewardSpaceTestBase):
          self.assertTrue(np.isfinite(total))
  
      def test_pbrs_invariance_internal_flag_set(self):
-        """Canonical path sets _pbrs_invariance_applied once; second call idempotent."""
+        """Verifies canonical path sets _pbrs_invariance_applied flag (idempotent)."""
          params = self.base_params(
              exit_potential_mode="canonical",
              hold_potential_enabled=True,
@@ -263,7 +267,7 @@ class TestPBRS(RewardSpaceTestBase):
          )
  
      def test_progressive_release_negative_decay_clamped(self):
-        """Negative decay clamps: next potential equals last potential (no release)."""
+        """Verifies negative decay clamping: next potential equals last potential."""
          params = self.base_params(
              exit_potential_mode="progressive_release",
              exit_potential_decay=-0.75,
@@ -291,7 +295,7 @@ class TestPBRS(RewardSpaceTestBase):
          self.assertPlacesEqual(total, shaping, places=12)
  
      def test_potential_gamma_nan_fallback(self):
-        """potential_gamma=NaN falls back to default value (indirect comparison)."""
+        """Verifies potential_gamma=NaN fallback to default value."""
          base_params_dict = self.base_params()
          default_gamma = base_params_dict.get("potential_gamma", 0.95)
          params_nan = self.base_params(potential_gamma=np.nan, hold_potential_enabled=True)
@@ -331,14 +335,11 @@ class TestPBRS(RewardSpaceTestBase):
  
      def test_validate_reward_parameters_batch_and_relaxed_aggregation(self):
          """Batch validate strict failures + relaxed multi-reason aggregation via helpers."""
-        # Build strict failure cases
          strict_failures = [
              build_validation_case({"potential_gamma": -0.2}, strict=True, expect_error=True),
              build_validation_case({"hold_potential_scale": -5.0}, strict=True, expect_error=True),
          ]
-        # Success default (strict) case
          success_case = build_validation_case({}, strict=True, expect_error=False)
-        # Relaxed multi-reason aggregation case
          relaxed_case = build_validation_case(
              {
                  "potential_gamma": "not-a-number",
@@ -354,13 +355,11 @@ class TestPBRS(RewardSpaceTestBase):
                  "derived_default",
              ],
          )
-        # Execute batch (strict successes + failures + relaxed case)
          execute_validation_batch(
              self,
              [success_case] + strict_failures + [relaxed_case],
              validate_reward_parameters,
          )
-        # Explicit aggregation assertions for relaxed case using helper
          params_relaxed = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
          params_relaxed.update(
              {
@@ -449,6 +448,8 @@ class TestPBRS(RewardSpaceTestBase):
      # Owns invariant: pbrs-canonical-drift-correction-106
      def test_pbrs_106_canonical_drift_correction_zero_sum(self):
          """Invariant 106: canonical mode enforces near zero-sum shaping (drift correction)."""
+        from ..constants import SCENARIOS
+
          params = self.base_params(
              exit_potential_mode="canonical",
              hold_potential_enabled=True,
@@ -458,7 +459,7 @@ class TestPBRS(RewardSpaceTestBase):
          )
          df = simulate_samples(
              params={**params, "max_trade_duration_candles": 140},
-            num_samples=500,
+            num_samples=SCENARIOS.SAMPLE_SIZE_LARGE // 2,  # 500 ≈ 400 (keep original intent)
              seed=913,
              base_factor=self.TEST_BASE_FACTOR,
              profit_target=self.TEST_PROFIT_TARGET,
@@ -517,6 +518,8 @@ class TestPBRS(RewardSpaceTestBase):
      # Owns invariant (comparison path): pbrs-canonical-drift-correction-106
      def test_pbrs_106_canonical_drift_correction_uniform_offset(self):
          """Canonical drift correction reduces Σ shaping below tolerance vs non-canonical."""
+        from ..constants import SCENARIOS
+
          params_can = self.base_params(
              exit_potential_mode="canonical",
              hold_potential_enabled=True,
@@ -526,7 +529,7 @@ class TestPBRS(RewardSpaceTestBase):
          )
          df_can = simulate_samples(
              params={**params_can, "max_trade_duration_candles": 120},
-            num_samples=400,
+            num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
              seed=777,
              base_factor=self.TEST_BASE_FACTOR,
              profit_target=self.TEST_PROFIT_TARGET,
@@ -545,7 +548,7 @@ class TestPBRS(RewardSpaceTestBase):
          )
          df_non = simulate_samples(
              params={**params_non, "max_trade_duration_candles": 120},
-            num_samples=400,
+            num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
              seed=777,
              base_factor=self.TEST_BASE_FACTOR,
              profit_target=self.TEST_PROFIT_TARGET,
@@ -659,6 +662,8 @@ class TestPBRS(RewardSpaceTestBase):
  
      def test_report_cumulative_invariance_aggregation(self):
          """Canonical telescoping term: small per-step mean drift, bounded increments."""
+        from ..constants import SCENARIOS
+
          params = self.base_params(
              hold_potential_enabled=True,
              entry_additive_enabled=False,
@@ -673,7 +678,7 @@ class TestPBRS(RewardSpaceTestBase):
          telescoping_sum = 0.0
          max_abs_step = 0.0
          steps = 0
-        for _ in range(500):
+        for _ in range(SCENARIOS.PBRS_SIMULATION_STEPS):
              is_exit = rng.uniform() < 0.1
              current_pnl = float(rng.normal(0, 0.05))
              current_dur = float(rng.uniform(0, 1))
@@ -712,6 +717,8 @@ class TestPBRS(RewardSpaceTestBase):
  
      def test_report_explicit_non_invariance_progressive_release(self):
          """progressive_release cumulative shaping non-zero (release leak)."""
+        from ..constants import SCENARIOS
+
          params = self.base_params(
              hold_potential_enabled=True,
              entry_additive_enabled=False,
@@ -722,7 +729,7 @@ class TestPBRS(RewardSpaceTestBase):
          rng = np.random.default_rng(321)
          last_potential = 0.0
          shaping_sum = 0.0
-        for _ in range(160):
+        for _ in range(SCENARIOS.MONTE_CARLO_ITERATIONS):
              is_exit = rng.uniform() < 0.15
              next_pnl = 0.0 if is_exit else float(rng.normal(0, 0.07))
              next_dur = 0.0 if is_exit else float(rng.uniform(0, 1))
@@ -756,6 +763,8 @@ class TestPBRS(RewardSpaceTestBase):
  
          from reward_space_analysis import PBRS_INVARIANCE_TOL
  
+        from ..constants import SCENARIOS
+
          small_vals = [1.0e-7, -2.0e-7, 3.0e-7]  # sum = 2.0e-7 < tolerance
          total_shaping = float(sum(small_vals))
          self.assertLess(
@@ -796,7 +805,7 @@ class TestPBRS(RewardSpaceTestBase):
              seed=self.SEED,
              skip_feature_analysis=True,
              skip_partial_dependence=True,
-            bootstrap_resamples=25,
+            bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS,
          )
          report_path = out_dir / "statistical_analysis.md"
          self.assertTrue(report_path.exists(), "Report file missing for canonical near-zero test")
@@ -819,6 +828,8 @@ class TestPBRS(RewardSpaceTestBase):
  
          from reward_space_analysis import PBRS_INVARIANCE_TOL
  
+        from ..constants import SCENARIOS
+
          shaping_vals = [1.2e-4, 1.3e-4, 8.0e-5, -2.0e-5, 1.4e-4]  # sum = 4.5e-4 (> tol)
          total_shaping = sum(shaping_vals)
          self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL)
@@ -855,7 +866,7 @@ class TestPBRS(RewardSpaceTestBase):
              seed=self.SEED,
              skip_feature_analysis=True,
              skip_partial_dependence=True,
-            bootstrap_resamples=50,
+            bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS * 2,
          )
          report_path = out_dir / "statistical_analysis.md"
          self.assertTrue(report_path.exists(), "Report file missing for canonical warning test")
@@ -872,6 +883,8 @@ class TestPBRS(RewardSpaceTestBase):
          """Full report: Non-canonical classification aggregates mode + additives reasons."""
          import pandas as pd
  
+        from ..constants import SCENARIOS
+
          shaping_vals = [0.02, -0.005, 0.007]
          entry_add_vals = [0.003, 0.0, 0.004]
          exit_add_vals = [0.001, 0.002, 0.0]
@@ -908,7 +921,7 @@ class TestPBRS(RewardSpaceTestBase):
              seed=self.SEED,
              skip_feature_analysis=True,
              skip_partial_dependence=True,
-            bootstrap_resamples=25,
+            bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS,
          )
          report_path = out_dir / "statistical_analysis.md"
          self.assertTrue(
@@ -928,6 +941,8 @@ class TestPBRS(RewardSpaceTestBase):
  
          from reward_space_analysis import PBRS_INVARIANCE_TOL
  
+        from ..constants import SCENARIOS
+
          shaping_vals = [0.002, -0.0005, 0.0012]
          total_shaping = sum(shaping_vals)
          self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL)
@@ -964,7 +979,7 @@ class TestPBRS(RewardSpaceTestBase):
              seed=self.SEED,
              skip_feature_analysis=True,
              skip_partial_dependence=True,
-            bootstrap_resamples=25,
+            bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS,
          )
          report_path = out_dir / "statistical_analysis.md"
          self.assertTrue(
@@ -1007,6 +1022,8 @@ class TestPBRS(RewardSpaceTestBase):
          out_dir = self.output_path / "pbrs_absence_and_shift_placeholder"
          import reward_space_analysis as rsa
  
+        from ..constants import SCENARIOS
+
          original_compute_summary_stats = rsa._compute_summary_stats
  
          def _minimal_summary_stats(_df):
@@ -1038,7 +1055,7 @@ class TestPBRS(RewardSpaceTestBase):
                  seed=self.SEED,
                  skip_feature_analysis=True,
                  skip_partial_dependence=True,
-                bootstrap_resamples=10,
+                bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS // 2,
              )
          finally:
              rsa._compute_summary_stats = original_compute_summary_stats
diff --git a/ReforceXY/reward_space_analysis/tests/robustness/test_branch_coverage.py b/ReforceXY/reward_space_analysis/tests/robustness/test_branch_coverage.py

index 63e7352ae4850aad719f1d73475b90611861512d..d136fddcf5f043475ff492e2dd45891848f6766e 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/robustness/test_branch_coverage.py
+++ b/ReforceXY/reward_space_analysis/tests/robustness/test_branch_coverage.py
@@ -13,7 +13,11 @@ from reward_space_analysis import (
      validate_reward_parameters,
  )
  
-from ..helpers import run_strict_validation_failure_cases
+from ..helpers import (
+    assert_exit_factor_invariant_suite,
+    run_relaxed_validation_adjustment_cases,
+    run_strict_validation_failure_cases,
+)
  
  
  class _PyTestAdapter(unittest.TestCase):
@@ -42,9 +46,6 @@ def test_validate_reward_parameters_strict_failure_batch():
      run_strict_validation_failure_cases(adapter, failure_params, validate_reward_parameters)
  
  
-from ..helpers import run_relaxed_validation_adjustment_cases
-
-
  @pytest.mark.robustness
  def test_validate_reward_parameters_relaxed_adjustment_batch():
      """Batch relaxed validation adjustment scenarios using shared helper."""
@@ -141,9 +142,6 @@ def test_hold_penalty_short_duration_returns_zero():
      assert penalty == 0.0
  
  
-from ..helpers import assert_exit_factor_invariant_suite
-
-
  @pytest.mark.robustness
  def test_exit_factor_invariant_suite_grouped():
      """Grouped exit factor invariant scenarios using shared helper."""
diff --git a/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py b/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py

index 3385d7f4ba14df49285260c7bf36e0a00d13f94f..3ec81481a3b28002466a88f194e8611278348145 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py
+++ b/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py
@@ -3,7 +3,6 @@
  
  import math
  import unittest
-import warnings
  
  import numpy as np
  import pytest
@@ -14,16 +13,17 @@ from reward_space_analysis import (
      Actions,
      Positions,
      RewardContext,
-    RewardDiagnosticsWarning,
      _get_exit_factor,
      calculate_reward,
      simulate_samples,
  )
  
  from ..helpers import (
+    assert_diagnostic_warning,
      assert_exit_factor_attenuation_modes,
      assert_exit_mode_mathematical_validation,
      assert_single_active_component_with_additives,
+    capture_warnings,
  )
  from ..test_base import RewardSpaceTestBase
  
@@ -205,8 +205,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
              position=Positions.Long,
              action=Actions.Long_exit,
          )
-        with warnings.catch_warnings(record=True) as caught:
-            warnings.simplefilter("always")
+        with capture_warnings() as caught:
              baseline = calculate_reward(
                  context,
                  params,
@@ -532,8 +531,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
          pnl = 0.05
          pnl_factor = 1.0
          duration_ratio = 0.8
-        with warnings.catch_warnings(record=True) as caught:
-            warnings.simplefilter("always", RewardDiagnosticsWarning)
+        with assert_diagnostic_warning(["Unknown exit_attenuation_mode"]):
              f_unknown = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
          linear_params = self.base_params(exit_attenuation_mode="linear", exit_plateau=False)
          f_linear = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, linear_params)
@@ -543,14 +541,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
              tolerance=self.TOL_IDENTITY_RELAXED,
              msg=f"Fallback linear mismatch unknown={f_unknown} linear={f_linear}",
          )
-        diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
-        self.assertTrue(
-            diag_warnings, "No RewardDiagnosticsWarning emitted for unknown mode fallback"
-        )
-        self.assertTrue(
-            any("Unknown exit_attenuation_mode" in str(w.message) for w in diag_warnings),
-            "Fallback warning message content mismatch",
-        )
  
      # Owns invariant: robustness-negative-grace-clamp-103
      def test_robustness_103_negative_plateau_grace_clamped(self):
@@ -565,8 +555,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
          pnl = 0.03
          pnl_factor = 1.0
          duration_ratio = 0.5
-        with warnings.catch_warnings(record=True) as caught:
-            warnings.simplefilter("always", RewardDiagnosticsWarning)
+        with assert_diagnostic_warning(["exit_plateau_grace < 0"]):
              f_neg = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
          # Reference with grace=0.0 (since negative should clamp)
          ref_params = self.base_params(
@@ -582,12 +571,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
              tolerance=self.TOL_IDENTITY_RELAXED,
              msg=f"Negative grace clamp mismatch f_neg={f_neg} f_ref={f_ref}",
          )
-        diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
-        self.assertTrue(diag_warnings, "No RewardDiagnosticsWarning for negative grace")
-        self.assertTrue(
-            any("exit_plateau_grace < 0" in str(w.message) for w in diag_warnings),
-            "Warning content missing for negative grace clamp",
-        )
  
      # Owns invariant: robustness-invalid-power-tau-104
      def test_robustness_104_invalid_power_tau_fallback_alpha_one(self):
@@ -603,13 +586,9 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
              params = self.base_params(
                  exit_attenuation_mode="power", exit_power_tau=tau, exit_plateau=False
              )
-            with warnings.catch_warnings(record=True) as caught:
-                warnings.simplefilter("always", RewardDiagnosticsWarning)
+            with assert_diagnostic_warning(["exit_power_tau"]):
                  f0 = _get_exit_factor(base_factor, pnl, pnl_factor, 0.0, params)
                  f1 = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
-            diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
-            self.assertTrue(diag_warnings, f"No RewardDiagnosticsWarning for invalid tau={tau}")
-            self.assertTrue(any("exit_power_tau" in str(w.message) for w in diag_warnings))
              ratio = f1 / max(f0, self.TOL_NUMERIC_GUARD)
              self.assertAlmostEqual(
                  ratio,
@@ -628,20 +607,9 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
          near_zero_values = [1e-15, 1e-12, 5e-14]
          for hl in near_zero_values:
              params = self.base_params(exit_attenuation_mode="half_life", exit_half_life=hl)
-            with warnings.catch_warnings(record=True) as caught:
-                warnings.simplefilter("always", RewardDiagnosticsWarning)
+            with assert_diagnostic_warning(["exit_half_life", "close to 0"]):
                  _ = _get_exit_factor(base_factor, pnl, pnl_factor, 0.0, params)
                  fdr = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
-            diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
-            self.assertTrue(
-                diag_warnings, f"No RewardDiagnosticsWarning for near-zero half-life hl={hl}"
-            )
-            self.assertTrue(
-                any(
-                    "exit_half_life" in str(w.message) and "close to 0" in str(w.message)
-                    for w in diag_warnings
-                )
-            )
              self.assertAlmostEqualFloat(
                  fdr,
                  1.0 * pnl_factor,  # Kernel returns 1.0 then * pnl_factor
diff --git a/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py b/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py

index 45219cd929d67f44ba76832867805d2eb4ebea3e..c0966ae21d9e3de7f797be8c008d2cb06cf7b2b6 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py
+++ b/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py
@@ -2,7 +2,6 @@
  """Statistical tests, distribution metrics, and bootstrap validation."""
  
  import unittest
-import warnings
  
  import numpy as np
  import pandas as pd
@@ -19,6 +18,7 @@ from reward_space_analysis import (
      statistical_hypothesis_tests,
  )
  
+from ..helpers import assert_diagnostic_warning
  from ..test_base import RewardSpaceTestBase
  
  pytestmark = pytest.mark.statistics
@@ -97,22 +97,6 @@ class TestStatistics(RewardSpaceTestBase):
                      float(metrics[p_key]), 1.0, places=12, msg=f"Expected 1.0 for {p_key}"
                  )
  
-    def _make_idle_variance_df(self, n: int = 100) -> pd.DataFrame:
-        """Synthetic dataframe focusing on idle_duration ↔ reward_idle correlation."""
-        self.seed_all(self.SEED)
-        idle_duration = np.random.exponential(10, n)
-        reward_idle = -0.01 * idle_duration + np.random.normal(0, 0.001, n)
-        return pd.DataFrame(
-            {
-                "idle_duration": idle_duration,
-                "reward_idle": reward_idle,
-                "position": np.random.choice([0.0, 0.5, 1.0], n),
-                "reward": np.random.normal(0, 1, n),
-                "pnl": np.random.normal(0, self.TEST_PNL_STD, n),
-                "trade_duration": np.random.exponential(20, n),
-            }
-        )
-
      def test_statistics_distribution_shift_metrics(self):
          """KL/JS/Wasserstein metrics."""
          df1 = self._make_idle_variance_df(100)
@@ -159,9 +143,11 @@ class TestStatistics(RewardSpaceTestBase):
                      f"Metric {name} expected ≈ 0 on identical distributions (got {val})",
                  )
              elif name.endswith("_ks_statistic"):
+                from ..constants import STAT_TOL
+
                  self.assertLess(
                      abs(val),
-                    0.005,
+                    STAT_TOL.KS_STATISTIC_IDENTITY,
                      f"KS statistic should be near 0 on identical distributions (got {val})",
                  )
  
@@ -264,19 +250,23 @@ class TestStatistics(RewardSpaceTestBase):
  
      def test_stats_variance_vs_duration_spearman_sign(self):
          """trade_duration up => pnl variance up (rank corr >0)."""
+        from ..constants import SCENARIOS, STAT_TOL
+
          rng = np.random.default_rng(99)
          n = 250
-        trade_duration = np.linspace(1, 300, n)
+        trade_duration = np.linspace(1, SCENARIOS.DURATION_LONG, n)
          pnl = rng.normal(0, 1 + trade_duration / 400.0, n)
          ranks_dur = pd.Series(trade_duration).rank().to_numpy()
          ranks_var = pd.Series(np.abs(pnl)).rank().to_numpy()
          rho = np.corrcoef(ranks_dur, ranks_var)[0, 1]
          self.assertFinite(rho, name="spearman_rho")
-        self.assertGreater(rho, 0.1)
+        self.assertGreater(rho, STAT_TOL.CORRELATION_SIGNIFICANCE)
  
      def test_stats_scaling_invariance_distribution_metrics(self):
          """Equal scaling keeps KL/JS ≈0."""
-        df1 = self._shift_scale_df(400)
+        from ..constants import SCENARIOS, STAT_TOL
+
+        df1 = self._shift_scale_df(SCENARIOS.DEFAULT_SAMPLE_SIZE)
          scale = 3.5
          df2 = df1.copy()
          df2["pnl"] *= scale
@@ -286,7 +276,7 @@ class TestStatistics(RewardSpaceTestBase):
              if k.endswith("_kl_divergence") or k.endswith("_js_distance"):
                  self.assertLess(
                      abs(v),
-                    0.0005,
+                    STAT_TOL.DISTRIBUTION_SHIFT,
                      f"Expected near-zero divergence after equal scaling (k={k}, v={v})",
                  )
  
@@ -306,8 +296,10 @@ class TestStatistics(RewardSpaceTestBase):
  
      def test_stats_bh_correction_null_false_positive_rate(self):
          """Null: low BH discovery rate."""
+        from ..constants import SCENARIOS
+
          rng = np.random.default_rng(1234)
-        n = 400
+        n = SCENARIOS.NULL_HYPOTHESIS_SAMPLE_SIZE
          df = pd.DataFrame(
              {
                  "pnl": rng.normal(0, 1, n),
@@ -416,9 +408,11 @@ class TestStatistics(RewardSpaceTestBase):
  
      def test_stats_heteroscedasticity_pnl_validation(self):
          """PnL variance increases with trade duration (heteroscedasticity)."""
+        from ..constants import SCENARIOS
+
          df = simulate_samples(
              params=self.base_params(max_trade_duration_candles=100),
-            num_samples=1000,
+            num_samples=SCENARIOS.SAMPLE_SIZE_LARGE + 200,
              seed=self.SEED_HETEROSCEDASTICITY,
              base_factor=self.TEST_BASE_FACTOR,
              profit_target=self.TEST_PROFIT_TARGET,
@@ -429,16 +423,18 @@ class TestStatistics(RewardSpaceTestBase):
              pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
          )
          exit_data = df[df["reward_exit"] != 0].copy()
-        if len(exit_data) < 50:
+        if len(exit_data) < SCENARIOS.HETEROSCEDASTICITY_MIN_EXITS:
              self.skipTest("Insufficient exit actions for heteroscedasticity test")
          exit_data["duration_bin"] = pd.cut(
              exit_data["duration_ratio"], bins=4, labels=["Q1", "Q2", "Q3", "Q4"]
          )
          variance_by_bin = exit_data.groupby("duration_bin")["pnl"].var().dropna()
          if "Q1" in variance_by_bin.index and "Q4" in variance_by_bin.index:
+            from ..constants import STAT_TOL
+
              self.assertGreater(
                  variance_by_bin["Q4"],
-                variance_by_bin["Q1"] * 0.8,
+                variance_by_bin["Q1"] * STAT_TOL.VARIANCE_RATIO_THRESHOLD,
                  "PnL heteroscedasticity: variance should increase with duration",
              )
  
@@ -474,9 +470,11 @@ class TestStatistics(RewardSpaceTestBase):
  
      def test_stats_benjamini_hochberg_adjustment(self):
          """BH adjustment adds p_value_adj & significant_adj with valid bounds."""
+        from ..constants import SCENARIOS
+
          df = simulate_samples(
              params=self.base_params(max_trade_duration_candles=100),
-            num_samples=600,
+            num_samples=SCENARIOS.SAMPLE_SIZE_LARGE - 200,
              seed=self.SEED_HETEROSCEDASTICITY,
              base_factor=self.TEST_BASE_FACTOR,
              profit_target=self.TEST_PROFIT_TARGET,
@@ -518,8 +516,10 @@ class TestStatistics(RewardSpaceTestBase):
  
      def test_stats_bootstrap_shrinkage_with_sample_size(self):
          """Bootstrap CI half-width decreases with larger sample (~1/sqrt(n) heuristic)."""
-        small = self._shift_scale_df(80)
-        large = self._shift_scale_df(800)
+        from ..constants import SCENARIOS
+
+        small = self._shift_scale_df(SCENARIOS.SAMPLE_SIZE_SMALL - 20)
+        large = self._shift_scale_df(SCENARIOS.SAMPLE_SIZE_LARGE)
          res_small = bootstrap_confidence_intervals(small, ["reward"], n_bootstrap=400)
          res_large = bootstrap_confidence_intervals(large, ["reward"], n_bootstrap=400)
          _, lo_s, hi_s = list(res_small.values())[0]
@@ -535,8 +535,11 @@ class TestStatistics(RewardSpaceTestBase):
          """Invariant 113 (non-strict): constant distribution CI widened with warning (positive epsilon width)."""
  
          df = self._const_df(80)
-        with warnings.catch_warnings(record=True) as caught:
-            warnings.simplefilter("always", RewardDiagnosticsWarning)
+        with assert_diagnostic_warning(
+            ["degenerate", "bootstrap", "CI"],
+            warning_category=RewardDiagnosticsWarning,
+            strict_mode=False,
+        ):
              res = bootstrap_confidence_intervals(
                  df,
                  ["reward", "pnl"],
@@ -544,11 +547,6 @@ class TestStatistics(RewardSpaceTestBase):
                  confidence_level=0.95,
                  strict_diagnostics=False,
              )
-        diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
-        self.assertTrue(
-            diag_warnings,
-            "Expected RewardDiagnosticsWarning for degenerate bootstrap CI widening",
-        )
          for _metric, (mean, lo, hi) in res.items():
              self.assertLess(
                  lo,
@@ -557,7 +555,11 @@ class TestStatistics(RewardSpaceTestBase):
              )
              width = hi - lo
              self.assertGreater(width, 0.0)
-            self.assertLessEqual(width, 3e-09, "Width should be small epsilon range (<=3e-9)")
+            from ..constants import STAT_TOL
+
+            self.assertLessEqual(
+                width, STAT_TOL.CI_WIDTH_EPSILON, "Width should be small epsilon range"
+            )
              # Mean should be centered (approx) within widened bounds
              self.assertGreaterEqual(mean, lo)
              self.assertLessEqual(mean, hi)
diff --git a/ReforceXY/reward_space_analysis/tests/test_base.py b/ReforceXY/reward_space_analysis/tests/test_base.py

index 214cc8058290ca3d4d8b4bfb4444b4f10aa8e941..c243e55569674f215ec6c7c39072b02c6889ef1c 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/test_base.py
+++ b/ReforceXY/reward_space_analysis/tests/test_base.py
@@ -20,6 +20,13 @@ from reward_space_analysis import (
      apply_potential_shaping,
  )
  
+from .constants import (
+    CONTINUITY,
+    EXIT_FACTOR,
+    PBRS,
+    TOLERANCE,
+)
+
  # Global constants
  PBRS_INTEGRATION_PARAMS = [
      "potential_gamma",
@@ -67,22 +74,35 @@ class RewardSpaceTestBase(unittest.TestCase):
          """Clean up temporary files."""
          shutil.rmtree(self.temp_dir, ignore_errors=True)
  
-    PBRS_TERMINAL_TOL = 1e-12
-    PBRS_MAX_ABS_SHAPING = 5.0
+    # ===============================================
+    # Constants imported from tests.constants module
+    # ===============================================
+
+    # Tolerance constants
+    TOL_IDENTITY_STRICT = TOLERANCE.IDENTITY_STRICT
+    TOL_IDENTITY_RELAXED = TOLERANCE.IDENTITY_RELAXED
+    TOL_GENERIC_EQ = TOLERANCE.GENERIC_EQ
+    TOL_NUMERIC_GUARD = TOLERANCE.NUMERIC_GUARD
+    TOL_NEGLIGIBLE = TOLERANCE.NEGLIGIBLE
+    TOL_RELATIVE = TOLERANCE.RELATIVE
+    TOL_DISTRIB_SHAPE = TOLERANCE.DISTRIB_SHAPE
+
+    # PBRS constants
+    PBRS_TERMINAL_TOL = PBRS.TERMINAL_TOL
+    PBRS_MAX_ABS_SHAPING = PBRS.MAX_ABS_SHAPING
+
+    # Continuity constants
+    CONTINUITY_EPS_SMALL = CONTINUITY.EPS_SMALL
+    CONTINUITY_EPS_LARGE = CONTINUITY.EPS_LARGE
+
+    # Exit factor constants
+    MIN_EXIT_POWER_TAU = EXIT_FACTOR.MIN_POWER_TAU
+
+    # Test-specific constants (not in constants.py)
      PBRS_TERMINAL_PROB = 0.08
      PBRS_SWEEP_ITER = 120
-    EPS_BASE = 1e-12
-    TOL_NUMERIC_GUARD = EPS_BASE
-    TOL_IDENTITY_STRICT = EPS_BASE
-    TOL_IDENTITY_RELAXED = 1e-09
-    TOL_GENERIC_EQ = 1e-06
-    TOL_NEGLIGIBLE = 1e-08
-    MIN_EXIT_POWER_TAU = 1e-06
-    TOL_DISTRIB_SHAPE = 0.05
+    EPS_BASE = TOLERANCE.IDENTITY_STRICT  # Alias for backward compatibility
      JS_DISTANCE_UPPER_BOUND = math.sqrt(math.log(2.0))
-    TOL_RELATIVE = 1e-09
-    CONTINUITY_EPS_SMALL = 0.0001
-    CONTINUITY_EPS_LARGE = 0.001
  
      def make_ctx(
          self,
@@ -412,3 +432,19 @@ class RewardSpaceTestBase(unittest.TestCase):
                  "idle_duration": rng.exponential(10, n),
              }
          )
+
+    def _make_idle_variance_df(self, n: int = 100) -> pd.DataFrame:
+        """Synthetic dataframe focusing on idle_duration ↔ reward_idle correlation."""
+        self.seed_all(self.SEED)
+        idle_duration = np.random.exponential(10, n)
+        reward_idle = -0.01 * idle_duration + np.random.normal(0, 0.001, n)
+        return pd.DataFrame(
+            {
+                "idle_duration": idle_duration,
+                "reward_idle": reward_idle,
+                "position": np.random.choice([0.0, 0.5, 1.0], n),
+                "reward": np.random.normal(0, 1, n),
+                "pnl": np.random.normal(0, self.TEST_PNL_STD, n),
+                "trade_duration": np.random.exponential(20, n),
+            }
+        )
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Thu, 13 Nov 2025 23:45:45 +0000 (00:45 +0100)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Thu, 13 Nov 2025 23:45:45 +0000 (00:45 +0100)
ReforceXY/reward_space_analysis/pyproject.toml		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/reward_space_analysis.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/components/test_reward_components.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/constants.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/helpers/assertions.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/robustness/test_branch_coverage.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/test_base.py		patch \| blob \| blame \| history