]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
refactor(reforcexy): cleanup
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Thu, 13 Nov 2025 23:45:45 +0000 (00:45 +0100)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Thu, 13 Nov 2025 23:45:45 +0000 (00:45 +0100)
Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
ReforceXY/reward_space_analysis/pyproject.toml
ReforceXY/reward_space_analysis/reward_space_analysis.py
ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
ReforceXY/reward_space_analysis/tests/constants.py
ReforceXY/reward_space_analysis/tests/helpers/assertions.py
ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py
ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
ReforceXY/reward_space_analysis/tests/robustness/test_branch_coverage.py
ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py
ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py
ReforceXY/reward_space_analysis/tests/test_base.py

index 45f00bd3ada8fc3a2ed217b8be3e8bde6e3e305b..52d0b1dd83cc5bbc42550c845b2ebf3d7b4b6c42 100644 (file)
@@ -42,6 +42,7 @@ python_functions = [
 ]
 markers = [
     "components: component-level reward computations",
+    "transforms: mathematical transform functions",
     "robustness: stress and edge-case behavior",
     "api: public API surface and helpers",
     "cli: command-line interface behaviors",
index f656a63049efe4869b552f4eb8fae81732ae2464..f60b2f4d88525fc4d4445a2c84c791370d559de3 100644 (file)
@@ -69,7 +69,7 @@ POTENTIAL_GAMMA_DEFAULT: float = 0.95
 ATTENUATION_MODES: Tuple[str, ...] = ("sqrt", "linear", "power", "half_life")
 ATTENUATION_MODES_WITH_LEGACY: Tuple[str, ...] = ATTENUATION_MODES + ("legacy",)
 
-# Centralized internal numeric guards & behavior toggles (single source of truth for internal tunables)
+# Centralized internal numeric guards & behavior toggles
 INTERNAL_GUARDS: dict[str, float] = {
     "degenerate_ci_epsilon": 1e-9,
     "distribution_constant_fallback_moment": 0.0,
@@ -419,7 +419,7 @@ def validate_reward_parameters(
     sanitized = dict(params)
     adjustments: Dict[str, Dict[str, Any]] = {}
 
-    # Normalize boolean-like parameters explicitly to avoid inconsistent types
+    # Boolean parameter coercion
     _bool_keys = [
         "check_invariants",
         "hold_potential_enabled",
@@ -480,7 +480,7 @@ def validate_reward_parameters(
         adjusted = original_numeric
         reason_parts: List[str] = []
 
-        # Record numeric coercion if type changed (e.g., from str/bool/None)
+        # Track type coercion
         if not isinstance(original_val, (int, float)):
             adjustments.setdefault(
                 key,
@@ -491,7 +491,6 @@ def validate_reward_parameters(
                     "validation_mode": "strict" if strict else "relaxed",
                 },
             )
-            # Update sanitized to numeric before clamping
             sanitized[key] = original_numeric
 
         # Bounds enforcement
@@ -1245,10 +1244,9 @@ def simulate_samples(
             max_unrealized_profit = 0.0
             min_unrealized_profit = 0.0
         else:
-            # Unrealized profits should bracket the final PnL
-            # Max represents peak profit during trade, min represents lowest point
+            # Unrealized profit bounds
             span = abs(rng.gauss(0.0, 0.015))
-            # Ensure max >= pnl >= min by construction
+            # max >= pnl >= min by construction
             max_unrealized_profit = pnl + abs(rng.gauss(0.0, span))
             min_unrealized_profit = pnl - abs(rng.gauss(0.0, span))
 
@@ -1336,7 +1334,6 @@ def simulate_samples(
                     )
                     drift = total_shaping / max(1, n_invariant)
                     df.loc[:, "reward_shaping"] = df["reward_shaping"] - drift
-        # Attach resolved reward params for downstream consumers (e.g., report derivations)
         df.attrs["reward_params"] = dict(params)
     except Exception:
         # Graceful fallback (no invariance enforcement on failure)
index 4fe74686bc824535f513e2476eb58b3530e37179..c2e5cc5bab2f4996dde90fe66e84f499c32fff3a 100644 (file)
@@ -17,6 +17,9 @@ from reward_space_analysis import (
 )
 
 from ..helpers import (
+    RewardScenarioConfig,
+    ThresholdTestConfig,
+    ValidationConfig,
     assert_component_sum_integrity,
     assert_exit_factor_plateau_behavior,
     assert_hold_penalty_threshold_behavior,
@@ -45,13 +48,9 @@ class TestRewardComponents(RewardSpaceTestBase):
     def test_hold_penalty_basic_calculation(self):
         """Test hold penalty calculation when trade_duration exceeds max_duration.
 
-        Tests:
-            - Hold penalty is negative when duration exceeds threshold
-            - Component sum integrity maintained
-
-        Expected behavior:
-            - trade_duration > max_duration → hold_penalty < 0
-            - Total reward equals sum of active components
+        Verifies:
+        - trade_duration > max_duration → hold_penalty < 0
+        - Total reward equals sum of active components
         """
         context = self.make_ctx(
             pnl=0.01,
@@ -72,24 +71,20 @@ class TestRewardComponents(RewardSpaceTestBase):
             action_masking=True,
         )
         self.assertLess(breakdown.hold_penalty, 0, "Hold penalty should be negative")
-        assert_component_sum_integrity(
-            self,
-            breakdown,
-            self.TOL_IDENTITY_RELAXED,
+        config = ValidationConfig(
+            tolerance_strict=self.TOL_IDENTITY_STRICT,
+            tolerance_relaxed=self.TOL_IDENTITY_RELAXED,
             exclude_components=["idle_penalty", "exit_component", "invalid_penalty"],
             component_description="hold + shaping/additives",
         )
+        assert_component_sum_integrity(self, breakdown, config)
 
     def test_hold_penalty_threshold_behavior(self):
         """Test hold penalty activation at max_duration threshold.
 
-        Tests:
-            - No penalty before max_duration
-            - Penalty activation at and after max_duration
-
-        Expected behavior:
-            - duration < max_duration → hold_penalty = 0
-            - duration >= max_duration → hold_penalty <= 0
+        Verifies:
+        - duration < max_duration → hold_penalty = 0
+        - duration >= max_duration → hold_penalty <= 0
         """
         max_duration = 128
         threshold_test_cases = [
@@ -108,31 +103,32 @@ class TestRewardComponents(RewardSpaceTestBase):
                 action=Actions.Neutral,
             )
 
+        config = ThresholdTestConfig(
+            max_duration=max_duration,
+            test_cases=threshold_test_cases,
+            tolerance=self.TOL_IDENTITY_RELAXED,
+        )
         assert_hold_penalty_threshold_behavior(
             self,
-            threshold_test_cases,
-            max_duration,
             context_factory,
             self.DEFAULT_PARAMS,
             self.TEST_BASE_FACTOR,
             self.TEST_PROFIT_TARGET,
             1.0,
-            self.TOL_IDENTITY_RELAXED,
+            config,
         )
 
     def test_hold_penalty_progressive_scaling(self):
         """Test hold penalty scales progressively with increasing duration.
 
-        Tests:
-            - Penalty magnitude increases monotonically with duration
-            - Progressive scaling beyond max_duration threshold
-
-        Expected behavior:
-            - For d1 < d2 < d3: penalty(d1) >= penalty(d2) >= penalty(d3)
-            - Penalties become more negative with longer durations
+        Verifies:
+        - For d1 < d2 < d3: penalty(d1) >= penalty(d2) >= penalty(d3)
+        - Progressive scaling beyond max_duration threshold
         """
+        from ..constants import SCENARIOS
+
         params = self.base_params(max_trade_duration_candles=100)
-        durations = [150, 200, 300]
+        durations = list(SCENARIOS.DURATION_SCENARIOS)
         penalties = []
         for duration in durations:
             context = self.make_ctx(
@@ -158,13 +154,9 @@ class TestRewardComponents(RewardSpaceTestBase):
     def test_idle_penalty_calculation(self):
         """Test idle penalty calculation for neutral idle state.
 
-        Tests:
-            - Idle penalty is negative for idle duration > 0
-            - Component sum integrity maintained
-
-        Expected behavior:
-            - idle_duration > 0 → idle_penalty < 0
-            - Total reward equals sum of active components
+        Verifies:
+        - idle_duration > 0 → idle_penalty < 0
+        - Component sum integrity maintained
         """
         context = self.make_ctx(
             pnl=0.0,
@@ -178,35 +170,34 @@ class TestRewardComponents(RewardSpaceTestBase):
 
         def validate_idle_penalty(test_case, breakdown, description, tolerance):
             test_case.assertLess(breakdown.idle_penalty, 0, "Idle penalty should be negative")
-            assert_component_sum_integrity(
-                test_case,
-                breakdown,
-                tolerance,
+            config = ValidationConfig(
+                tolerance_strict=test_case.TOL_IDENTITY_STRICT,
+                tolerance_relaxed=tolerance,
                 exclude_components=["hold_penalty", "exit_component", "invalid_penalty"],
                 component_description="idle + shaping/additives",
             )
+            assert_component_sum_integrity(test_case, breakdown, config)
 
         scenarios = [(context, self.DEFAULT_PARAMS, "idle_penalty_basic")]
+        config = RewardScenarioConfig(
+            base_factor=self.TEST_BASE_FACTOR,
+            profit_target=self.TEST_PROFIT_TARGET,
+            risk_reward_ratio=1.0,
+            tolerance_relaxed=self.TOL_IDENTITY_RELAXED,
+        )
         assert_reward_calculation_scenarios(
             self,
             scenarios,
-            self.TEST_BASE_FACTOR,
-            self.TEST_PROFIT_TARGET,
-            1.0,
+            config,
             validate_idle_penalty,
-            self.TOL_IDENTITY_RELAXED,
         )
 
     def test_efficiency_zero_policy(self):
         """Test efficiency zero policy produces expected PnL factor.
 
-        Tests:
-            - PnL factor calculation with efficiency weight = 0
-            - Finite and positive factor values
-
-        Expected behavior:
-            - efficiency_weight = 0 → pnl_factor ≈ 1.0
-            - Factor is finite and well-defined
+        Verifies:
+        - efficiency_weight = 0 → pnl_factor ≈ 1.0
+        - Factor is finite and positive
         """
         ctx = self.make_ctx(
             pnl=0.0,
@@ -225,13 +216,9 @@ class TestRewardComponents(RewardSpaceTestBase):
     def test_max_idle_duration_candles_logic(self):
         """Test max idle duration candles parameter affects penalty magnitude.
 
-        Tests:
-            - Smaller max_idle_duration → larger penalty magnitude
-            - Larger max_idle_duration → smaller penalty magnitude
-            - Both penalties are negative
-
-        Expected behavior:
-            - penalty(max=50) < penalty(max=200) < 0
+        Verifies:
+        - penalty(max=50) < penalty(max=200) < 0
+        - Smaller max → larger penalty magnitude
         """
         params_small = self.base_params(max_idle_duration_candles=50)
         params_large = self.base_params(max_idle_duration_candles=200)
@@ -271,13 +258,9 @@ class TestRewardComponents(RewardSpaceTestBase):
 
         Non-owning smoke test; ownership: robustness/test_robustness.py:35
 
-        Tests:
-            - Exit factor finiteness for linear and power modes
-            - Plateau behavior with grace period
-
-        Expected behavior:
-            - All exit factors are finite and positive
-            - Plateau mode attenuates after grace period
+        Verifies:
+        - Exit factors are finite and positive (linear, power modes)
+        - Plateau mode attenuates after grace period
         """
         modes_to_test = ["linear", "power"]
         for mode in modes_to_test:
@@ -307,13 +290,9 @@ class TestRewardComponents(RewardSpaceTestBase):
     def test_idle_penalty_zero_when_profit_target_zero(self):
         """Test idle penalty is zero when profit_target is zero.
 
-        Tests:
-            - profit_target = 0 → idle_penalty = 0
-            - Total reward is zero in this configuration
-
-        Expected behavior:
-            - profit_target = 0 → idle_factor = 0 → idle_penalty = 0
-            - No other components active for neutral idle state
+        Verifies:
+        - profit_target = 0 → idle_penalty = 0
+        - Total reward is zero in this configuration
         """
         context = self.make_ctx(
             pnl=0.0,
@@ -332,28 +311,26 @@ class TestRewardComponents(RewardSpaceTestBase):
             )
 
         scenarios = [(context, self.DEFAULT_PARAMS, "profit_target_zero")]
+        config = RewardScenarioConfig(
+            base_factor=self.TEST_BASE_FACTOR,
+            profit_target=0.0,
+            risk_reward_ratio=self.TEST_RR,
+            tolerance_relaxed=self.TOL_IDENTITY_RELAXED,
+        )
         assert_reward_calculation_scenarios(
             self,
             scenarios,
-            self.TEST_BASE_FACTOR,
-            0.0,  # profit_target=0
-            self.TEST_RR,
+            config,
             validate_zero_penalty,
-            self.TOL_IDENTITY_RELAXED,
         )
 
     def test_win_reward_factor_saturation(self):
         """Test PnL amplification factor saturates at asymptotic limit.
 
-        Tests:
-            - Amplification ratio increases monotonically with PnL
-            - Saturation approaches (1 + win_reward_factor)
-            - Mathematical formula validation
-
-        Expected behavior:
-            - As PnL → ∞: amplification → (1 + win_reward_factor)
-            - Monotonic increase: ratio(PnL1) <= ratio(PnL2) for PnL1 < PnL2
-            - Observed matches theoretical tanh-based formula
+        Verifies:
+        - Amplification ratio increases monotonically with PnL
+        - Saturation approaches (1 + win_reward_factor)
+        - Observed matches theoretical saturation behavior
         """
         win_reward_factor = 3.0
         beta = 0.5
@@ -421,22 +398,16 @@ class TestRewardComponents(RewardSpaceTestBase):
     def test_idle_penalty_fallback_and_proportionality(self):
         """Test idle penalty fallback and proportional scaling behavior.
 
-        Tests:
-            - Fallback to max_trade_duration when max_idle_duration is None
-            - Proportional scaling with idle duration (2:1 ratio validation)
-            - Mathematical validation of penalty formula
-
-        Expected behavior:
-            - max_idle_duration = None → use max_trade_duration as fallback
-            - penalty(duration=40) ≈ 2 × penalty(duration=20)
-            - Formula: penalty ∝ (duration/max)^power × scale
+        Verifies:
+        - max_idle_duration = None → use max_trade_duration as fallback
+        - penalty(duration=40) ≈ 2 × penalty(duration=20)
+        - Proportional scaling with idle duration
         """
         params = self.base_params(max_idle_duration_candles=None, max_trade_duration_candles=100)
         base_factor = 90.0
         profit_target = self.TEST_PROFIT_TARGET
         risk_reward_ratio = 1.0
 
-        # Generate test contexts using helper
         base_context_kwargs = {
             "pnl": 0.0,
             "trade_duration": 0,
@@ -448,7 +419,6 @@ class TestRewardComponents(RewardSpaceTestBase):
             self.make_ctx, idle_scenarios, base_context_kwargs
         )
 
-        # Calculate all rewards
         results = []
         for context, description in contexts_and_descriptions:
             breakdown = calculate_reward(
@@ -462,19 +432,16 @@ class TestRewardComponents(RewardSpaceTestBase):
             )
             results.append((breakdown, context.idle_duration, description))
 
-        # Validate proportional scaling
         br_a, br_b, br_mid = [r[0] for r in results]
         self.assertLess(br_a.idle_penalty, 0.0)
         self.assertLess(br_b.idle_penalty, 0.0)
         self.assertLess(br_mid.idle_penalty, 0.0)
 
-        # Check 2:1 ratio between 40 and 20 idle duration
         ratio = br_b.idle_penalty / br_a.idle_penalty if br_a.idle_penalty != 0 else None
         self.assertIsNotNone(ratio)
         if ratio is not None:
             self.assertAlmostEqualFloat(abs(ratio), 2.0, tolerance=0.2)
 
-        # Mathematical validation for mid-duration case
         idle_penalty_scale = _get_float_param(params, "idle_penalty_scale", 0.5)
         idle_penalty_power = _get_float_param(params, "idle_penalty_power", 1.025)
         factor = _get_float_param(params, "base_factor", float(base_factor))
index 19e5edee259aab64b245569230d7e4c9fb9adfa2..ddfe8ace88213834450f2bda90f83cdf14bd2060 100644 (file)
@@ -46,12 +46,12 @@ class ContinuityConfig:
     plateau and attenuation functions.
 
     Attributes:
-        EPS_SMALL: Small epsilon for tight continuity checks (1e-08)
-        EPS_LARGE: Larger epsilon for coarser continuity tests (5e-05)
+        EPS_SMALL: Small epsilon for tight continuity checks (1e-06)
+        EPS_LARGE: Larger epsilon for coarser continuity tests (1e-05)
     """
 
-    EPS_SMALL: float = 1e-08
-    EPS_LARGE: float = 5e-05
+    EPS_SMALL: float = 1e-06
+    EPS_LARGE: float = 1e-05
 
 
 @dataclass(frozen=True)
@@ -62,13 +62,13 @@ class ExitFactorConfig:
     ratio bounds and power mode constraints.
 
     Attributes:
-        SCALING_RATIO_MIN: Minimum expected scaling ratio for continuity (1.5)
-        SCALING_RATIO_MAX: Maximum expected scaling ratio for continuity (3.5)
+        SCALING_RATIO_MIN: Minimum expected scaling ratio for continuity (5.0)
+        SCALING_RATIO_MAX: Maximum expected scaling ratio for continuity (15.0)
         MIN_POWER_TAU: Minimum valid tau value for power mode (1e-15)
     """
 
-    SCALING_RATIO_MIN: float = 1.5
-    SCALING_RATIO_MAX: float = 3.5
+    SCALING_RATIO_MIN: float = 5.0
+    SCALING_RATIO_MAX: float = 15.0
     MIN_POWER_TAU: float = 1e-15
 
 
@@ -156,6 +156,73 @@ class TestParameters:
     EPS_BASE: float = 1e-10
 
 
+@dataclass(frozen=True)
+class TestScenarios:
+    """Test scenario parameters and sample sizes.
+
+    Standard values for test scenarios to ensure consistency across the test
+    suite and avoid magic numbers in test implementations.
+
+    Attributes:
+        DURATION_SHORT: Short duration scenario (150)
+        DURATION_MEDIUM: Medium duration scenario (200)
+        DURATION_LONG: Long duration scenario (300)
+        DURATION_SCENARIOS: Standard duration test sequence
+        SAMPLE_SIZE_SMALL: Small sample size for quick tests (100)
+        SAMPLE_SIZE_MEDIUM: Medium sample size for standard tests (400)
+        SAMPLE_SIZE_LARGE: Large sample size for statistical power (800)
+        DEFAULT_SAMPLE_SIZE: Default for most tests (400)
+        PBRS_SIMULATION_STEPS: Number of steps for PBRS simulation tests (500)
+        NULL_HYPOTHESIS_SAMPLE_SIZE: Sample size for null hypothesis tests (400)
+        BOOTSTRAP_MINIMAL_ITERATIONS: Minimal bootstrap iterations for quick tests (25)
+        BOOTSTRAP_STANDARD_ITERATIONS: Standard bootstrap iterations (100)
+        HETEROSCEDASTICITY_MIN_EXITS: Minimum exits for heteroscedasticity validation (50)
+        CORRELATION_TEST_MIN_SIZE: Minimum sample size for correlation tests (200)
+        MONTE_CARLO_ITERATIONS: Monte Carlo simulation iterations (160)
+    """
+
+    DURATION_SHORT: int = 150
+    DURATION_MEDIUM: int = 200
+    DURATION_LONG: int = 300
+    DURATION_SCENARIOS: tuple[int, ...] = (150, 200, 300)
+
+    SAMPLE_SIZE_SMALL: int = 100
+    SAMPLE_SIZE_MEDIUM: int = 400
+    SAMPLE_SIZE_LARGE: int = 800
+    DEFAULT_SAMPLE_SIZE: int = 400
+
+    # Specialized test scenario sizes
+    PBRS_SIMULATION_STEPS: int = 500
+    NULL_HYPOTHESIS_SAMPLE_SIZE: int = 400
+    BOOTSTRAP_MINIMAL_ITERATIONS: int = 25
+    BOOTSTRAP_STANDARD_ITERATIONS: int = 100
+    HETEROSCEDASTICITY_MIN_EXITS: int = 50
+    CORRELATION_TEST_MIN_SIZE: int = 200
+    MONTE_CARLO_ITERATIONS: int = 160
+
+
+@dataclass(frozen=True)
+class StatisticalTolerances:
+    """Tolerances for statistical metrics and distribution tests.
+
+    These tolerances are used for statistical hypothesis testing, distribution
+    comparison metrics, and other statistical validation operations.
+
+    Attributes:
+        DISTRIBUTION_SHIFT: Tolerance for distribution shift metrics (5e-4)
+        KS_STATISTIC_IDENTITY: KS statistic threshold for identical distributions (5e-3)
+        CORRELATION_SIGNIFICANCE: Minimum correlation for significance (0.1)
+        VARIANCE_RATIO_THRESHOLD: Minimum variance ratio for heteroscedasticity (0.8)
+        CI_WIDTH_EPSILON: Minimum CI width for degenerate distributions (3e-9)
+    """
+
+    DISTRIBUTION_SHIFT: float = 5e-4
+    KS_STATISTIC_IDENTITY: float = 5e-3
+    CORRELATION_SIGNIFICANCE: float = 0.1
+    VARIANCE_RATIO_THRESHOLD: float = 0.8
+    CI_WIDTH_EPSILON: float = 3e-9
+
+
 # Global singleton instances for easy import
 TOLERANCE: Final[ToleranceConfig] = ToleranceConfig()
 CONTINUITY: Final[ContinuityConfig] = ContinuityConfig()
@@ -164,6 +231,8 @@ PBRS: Final[PBRSConfig] = PBRSConfig()
 STATISTICAL: Final[StatisticalConfig] = StatisticalConfig()
 SEEDS: Final[TestSeeds] = TestSeeds()
 PARAMS: Final[TestParameters] = TestParameters()
+SCENARIOS: Final[TestScenarios] = TestScenarios()
+STAT_TOL: Final[StatisticalTolerances] = StatisticalTolerances()
 
 
 __all__ = [
@@ -174,6 +243,8 @@ __all__ = [
     "StatisticalConfig",
     "TestSeeds",
     "TestParameters",
+    "TestScenarios",
+    "StatisticalTolerances",
     "TOLERANCE",
     "CONTINUITY",
     "EXIT_FACTOR",
@@ -181,4 +252,6 @@ __all__ = [
     "STATISTICAL",
     "SEEDS",
     "PARAMS",
+    "SCENARIOS",
+    "STAT_TOL",
 ]
index 558192653bd98b57facc32ef986c0a086eaab531..11c8a3b2d13c3e4785d1c41fd069d9b8bb8a2c6a 100644 (file)
@@ -12,6 +12,8 @@ from reward_space_analysis import (
     calculate_reward,
 )
 
+from .configs import RewardScenarioConfig, ThresholdTestConfig, ValidationConfig
+
 
 def safe_float(value: Any, default: float = 0.0) -> float:
     """Coerce value to float safely for test parameter handling.
@@ -178,21 +180,18 @@ def assert_trend(
 def assert_component_sum_integrity(
     test_case,
     breakdown,
-    tolerance_relaxed,
-    exclude_components=None,
-    component_description="components",
+    config: ValidationConfig,
 ):
     """Assert that reward component sum matches total within tolerance.
 
     Validates the mathematical integrity of reward component decomposition by
     ensuring the sum of individual components equals the reported total.
+    Uses ValidationConfig to simplify parameter passing.
 
     Args:
         test_case: Test case instance with assertion methods
         breakdown: Reward breakdown object with component attributes
-        tolerance_relaxed: Numerical tolerance for sum validation
-        exclude_components: List of component names to exclude from sum (default: None)
-        component_description: Human-readable description for error messages
+        config: ValidationConfig with tolerance and exclusion settings
 
     Components checked (if not excluded):
         - hold_penalty
@@ -204,14 +203,15 @@ def assert_component_sum_integrity(
         - exit_additive
 
     Example:
-        assert_component_sum_integrity(
-            self, breakdown, 1e-09,
+        config = ValidationConfig(
+            tolerance_strict=1e-12,
+            tolerance_relaxed=1e-09,
             exclude_components=["reward_shaping"],
             component_description="core components"
         )
+        assert_component_sum_integrity(self, breakdown, config)
     """
-    if exclude_components is None:
-        exclude_components = []
+    exclude_components = config.exclude_components or []
     component_sum = 0.0
     if "hold_penalty" not in exclude_components:
         component_sum += breakdown.hold_penalty
@@ -230,8 +230,8 @@ def assert_component_sum_integrity(
     test_case.assertAlmostEqual(
         breakdown.total,
         component_sum,
-        delta=tolerance_relaxed,
-        msg=f"Total should equal sum of {component_description}",
+        delta=config.tolerance_relaxed,
+        msg=f"Total should equal sum of {config.component_description}",
     )
 
 
@@ -347,34 +347,34 @@ def assert_single_active_component_with_additives(
 def assert_reward_calculation_scenarios(
     test_case,
     scenarios: List[Tuple[Any, Dict[str, Any], str]],
-    base_factor: float,
-    profit_target: float,
-    risk_reward_ratio: float,
+    config: RewardScenarioConfig,
     validation_fn,
-    tolerance_relaxed: float,
 ):
     """Execute and validate multiple reward calculation scenarios.
 
     Runs a batch of reward calculations with different contexts and parameters,
-    applying a custom validation function to each result. Reduces test boilerplate
-    for scenario-based testing.
+    applying a custom validation function to each result. Uses RewardScenarioConfig
+    to simplify parameter passing and improve maintainability.
 
     Args:
         test_case: Test case instance with assertion methods
         scenarios: List of (context, params, description) tuples defining test cases
-        base_factor: Base scaling factor for reward calculations
-        profit_target: Target profit threshold
-        risk_reward_ratio: Risk/reward ratio for position sizing
+        config: RewardScenarioConfig with all calculation parameters
         validation_fn: Callback function (test_case, breakdown, description, tolerance) -> None
-        tolerance_relaxed: Numerical tolerance passed to validation function
 
     Example:
+        config = RewardScenarioConfig(
+            base_factor=90.0,
+            profit_target=0.06,
+            risk_reward_ratio=1.0,
+            tolerance_relaxed=1e-09
+        )
         scenarios = [
             (idle_context, {}, "idle scenario"),
             (exit_context, {"exit_additive": 5.0}, "profitable exit"),
         ]
         assert_reward_calculation_scenarios(
-            self, scenarios, 90.0, 0.06, 1.0, my_validation_fn, 1e-09
+            self, scenarios, config, my_validation_fn
         )
     """
     for context, params, description in scenarios:
@@ -382,13 +382,13 @@ def assert_reward_calculation_scenarios(
             breakdown = calculate_reward(
                 context,
                 params,
-                base_factor=base_factor,
-                profit_target=profit_target,
-                risk_reward_ratio=risk_reward_ratio,
-                short_allowed=True,
-                action_masking=True,
+                base_factor=config.base_factor,
+                profit_target=config.profit_target,
+                risk_reward_ratio=config.risk_reward_ratio,
+                short_allowed=config.short_allowed,
+                action_masking=config.action_masking,
             )
-            validation_fn(test_case, breakdown, description, tolerance_relaxed)
+            validation_fn(test_case, breakdown, description, config.tolerance_relaxed)
 
 
 def assert_parameter_sensitivity_behavior(
@@ -396,39 +396,39 @@ def assert_parameter_sensitivity_behavior(
     parameter_variations: List[Dict[str, Any]],
     base_context,
     base_params: Dict[str, Any],
-    base_factor: float,
-    profit_target: float,
-    risk_reward_ratio: float,
     component_name: str,
     expected_trend: str,
-    tolerance_relaxed: float,
+    config: RewardScenarioConfig,
 ):
     """Validate that a component responds predictably to parameter changes.
 
     Tests component sensitivity by applying parameter variations and verifying
     the component value follows the expected trend (increasing, decreasing, or constant).
+    Uses RewardScenarioConfig to simplify parameter passing.
 
     Args:
         test_case: Test case instance with assertion methods
         parameter_variations: List of parameter dicts to merge with base_params
         base_context: Context object for reward calculation
         base_params: Base parameter dictionary
-        base_factor: Base scaling factor
-        profit_target: Target profit threshold
-        risk_reward_ratio: Risk/reward ratio
         component_name: Name of component to track (e.g., "exit_component")
         expected_trend: Expected trend: "increasing", "decreasing", or "constant"
-        tolerance_relaxed: Numerical tolerance for trend validation
+        config: RewardScenarioConfig with calculation parameters
 
     Example:
+        config = RewardScenarioConfig(
+            base_factor=90.0,
+            profit_target=0.06,
+            risk_reward_ratio=1.0,
+            tolerance_relaxed=1e-09
+        )
         variations = [
             {"exit_additive": 0.0},
             {"exit_additive": 5.0},
             {"exit_additive": 10.0},
         ]
         assert_parameter_sensitivity_behavior(
-            self, variations, ctx, params, 90.0, 0.06, 1.0,
-            "exit_component", "increasing", 1e-09
+            self, variations, ctx, params, "exit_component", "increasing", config
         )
     """
     from reward_space_analysis import calculate_reward
@@ -440,11 +440,11 @@ def assert_parameter_sensitivity_behavior(
         breakdown = calculate_reward(
             base_context,
             params,
-            base_factor=base_factor,
-            profit_target=profit_target,
-            risk_reward_ratio=risk_reward_ratio,
-            short_allowed=True,
-            action_masking=True,
+            base_factor=config.base_factor,
+            profit_target=config.profit_target,
+            risk_reward_ratio=config.risk_reward_ratio,
+            short_allowed=config.short_allowed,
+            action_masking=config.action_masking,
         )
         component_value = getattr(breakdown, component_name)
         results.append(component_value)
@@ -452,14 +452,14 @@ def assert_parameter_sensitivity_behavior(
         for i in range(1, len(results)):
             test_case.assertGreaterEqual(
                 results[i],
-                results[i - 1] - tolerance_relaxed,
+                results[i - 1] - config.tolerance_relaxed,
                 f"{component_name} should increase with parameter variations",
             )
     elif expected_trend == "decreasing":
         for i in range(1, len(results)):
             test_case.assertLessEqual(
                 results[i],
-                results[i - 1] + tolerance_relaxed,
+                results[i - 1] + config.tolerance_relaxed,
                 f"{component_name} should decrease with parameter variations",
             )
     elif expected_trend == "constant":
@@ -468,7 +468,7 @@ def assert_parameter_sensitivity_behavior(
             test_case.assertAlmostEqual(
                 result,
                 baseline,
-                delta=tolerance_relaxed,
+                delta=config.tolerance_relaxed,
                 msg=f"{component_name} should remain constant with parameter variations",
             )
 
@@ -684,31 +684,35 @@ def assert_multi_parameter_sensitivity(
     parameter_test_cases: List[Tuple[float, float, str]],
     context_factory_fn,
     base_params: Dict[str, Any],
-    base_factor: float,
-    tolerance_relaxed: float,
+    config: RewardScenarioConfig,
 ):
     """Validate reward behavior across multiple parameter combinations.
 
     Tests reward calculation with various profit_target and risk_reward_ratio
     combinations, ensuring consistent behavior including edge cases like
-    zero profit_target.
+    zero profit_target. Uses RewardScenarioConfig to simplify parameter passing.
 
     Args:
         test_case: Test case instance with assertion methods
         parameter_test_cases: List of (profit_target, risk_reward_ratio, description) tuples
         context_factory_fn: Factory function for creating context objects
         base_params: Base parameter dictionary
-        base_factor: Base scaling factor
-        tolerance_relaxed: Numerical tolerance for assertions
+        config: RewardScenarioConfig with base calculation parameters
 
     Example:
+        config = RewardScenarioConfig(
+            base_factor=90.0,
+            profit_target=0.06,
+            risk_reward_ratio=1.0,
+            tolerance_relaxed=1e-09
+        )
         test_cases = [
             (0.0, 1.0, "zero profit target"),
             (0.06, 1.0, "standard parameters"),
             (0.06, 2.0, "high risk/reward ratio"),
         ]
         assert_multi_parameter_sensitivity(
-            self, test_cases, make_context, params, 90.0, 1e-09
+            self, test_cases, make_context, params, config
         )
     """
     for profit_target, risk_reward_ratio, description in parameter_test_cases:
@@ -719,11 +723,11 @@ def assert_multi_parameter_sensitivity(
             breakdown = calculate_reward(
                 idle_context,
                 base_params,
-                base_factor=base_factor,
+                base_factor=config.base_factor,
                 profit_target=profit_target,
                 risk_reward_ratio=risk_reward_ratio,
-                short_allowed=True,
-                action_masking=True,
+                short_allowed=config.short_allowed,
+                action_masking=config.action_masking,
             )
             if profit_target == 0.0:
                 test_case.assertEqual(breakdown.idle_penalty, 0.0)
@@ -735,54 +739,54 @@ def assert_multi_parameter_sensitivity(
                 exit_breakdown = calculate_reward(
                     exit_context,
                     base_params,
-                    base_factor=base_factor,
+                    base_factor=config.base_factor,
                     profit_target=profit_target,
                     risk_reward_ratio=risk_reward_ratio,
-                    short_allowed=True,
-                    action_masking=True,
+                    short_allowed=config.short_allowed,
+                    action_masking=config.action_masking,
                 )
                 test_case.assertNotEqual(exit_breakdown.exit_component, 0.0)
 
 
 def assert_hold_penalty_threshold_behavior(
     test_case,
-    duration_test_cases: Sequence[Tuple[int, str]],
-    max_duration: int,
     context_factory_fn,
     params: Dict[str, Any],
     base_factor: float,
     profit_target: float,
     risk_reward_ratio: float,
-    tolerance_relaxed: float,
+    config: ThresholdTestConfig,
 ):
     """Validate hold penalty activation at max_duration threshold.
 
     Tests that hold penalty is zero before max_duration, then becomes
-    negative (penalty) at and after the threshold. Critical for verifying
-    threshold-based penalty logic.
+    negative (penalty) at and after the threshold. Uses ThresholdTestConfig
+    to simplify parameter passing.
 
     Args:
         test_case: Test case instance with assertion methods
-        duration_test_cases: List of (trade_duration, description) tuples to test
-        max_duration: Maximum duration threshold for penalty activation
         context_factory_fn: Factory function for creating context objects
         params: Parameter dictionary
         base_factor: Base scaling factor
         profit_target: Target profit threshold
         risk_reward_ratio: Risk/reward ratio
-        tolerance_relaxed: Numerical tolerance for assertions
+        config: ThresholdTestConfig with threshold settings
 
     Example:
-        test_cases = [
-            (50, "below threshold"),
-            (100, "at threshold"),
-            (150, "above threshold"),
-        ]
+        config = ThresholdTestConfig(
+            max_duration=100,
+            test_cases=[
+                (50, "below threshold"),
+                (100, "at threshold"),
+                (150, "above threshold"),
+            ],
+            tolerance=1e-09
+        )
         assert_hold_penalty_threshold_behavior(
-            self, test_cases, 100, make_context, params, 90.0, 0.06, 1.0, 1e-09
+            self, make_context, params, 90.0, 0.06, 1.0, config
         )
     """
-    for trade_duration, description in duration_test_cases:
+    for trade_duration, description in config.test_cases:
         with test_case.subTest(duration=trade_duration, desc=description):
             context = context_factory_fn(trade_duration=trade_duration)
             breakdown = calculate_reward(
@@ -794,7 +798,7 @@ def assert_hold_penalty_threshold_behavior(
                 short_allowed=True,
                 action_masking=True,
             )
-            duration_ratio = trade_duration / max_duration
+            duration_ratio = trade_duration / config.max_duration
             if duration_ratio < 1.0:
                 test_case.assertEqual(breakdown.hold_penalty, 0.0)
             elif duration_ratio == 1.0:
index f28efb5e9944e22a3c7f5452afd1196fd0bdc52a..782710ec322a926548f9fa80f3b62cc15974780c 100644 (file)
@@ -12,6 +12,7 @@ import pandas as pd
 
 from reward_space_analysis import PBRS_INVARIANCE_TOL, write_complete_statistical_analysis
 
+from ..constants import SCENARIOS
 from ..test_base import RewardSpaceTestBase
 
 
@@ -74,7 +75,7 @@ class TestReportFormatting(RewardSpaceTestBase):
             real_df=real_df,
             adjust_method="none",
             strict_diagnostics=False,
-            bootstrap_resamples=200,  # keep test fast
+            bootstrap_resamples=SCENARIOS.BOOTSTRAP_STANDARD_ITERATIONS,  # keep test fast
             skip_partial_dependence=kwargs.get("skip_partial_dependence", False),
             skip_feature_analysis=kwargs.get("skip_feature_analysis", False),
         )
index b73faa1394db8280d8323a2d171e6f1c86cef414..268791e8f34b593559725adb2b08c0d53c765ca9 100644 (file)
@@ -41,7 +41,7 @@ class TestPBRS(RewardSpaceTestBase):
     # ---------------- Potential transform mechanics ---------------- #
 
     def test_pbrs_progressive_release_decay_clamped(self):
-        """progressive_release decay>1 clamps -> Φ'=0 & Δ=-Φ_prev."""
+        """Verifies progressive_release mode with decay>1 clamps potential to zero."""
         params = self.DEFAULT_PARAMS.copy()
         params.update(
             {
@@ -73,7 +73,7 @@ class TestPBRS(RewardSpaceTestBase):
         )
 
     def test_pbrs_spike_cancel_invariance(self):
-        """spike_cancel terminal shaping ≈0 (Φ' inversion yields cancellation)."""
+        """Verifies spike_cancel mode produces near-zero terminal shaping."""
         params = self.DEFAULT_PARAMS.copy()
         params.update(
             {
@@ -113,6 +113,8 @@ class TestPBRS(RewardSpaceTestBase):
 
     def test_canonical_invariance_flag_and_sum(self):
         """Canonical mode + no additives -> invariant flags True and Σ shaping ≈ 0."""
+        from ..constants import SCENARIOS
+
         params = self.base_params(
             exit_potential_mode="canonical",
             entry_additive_enabled=False,
@@ -121,7 +123,7 @@ class TestPBRS(RewardSpaceTestBase):
         )
         df = simulate_samples(
             params={**params, "max_trade_duration_candles": 100},
-            num_samples=400,
+            num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
             seed=self.SEED,
             base_factor=self.TEST_BASE_FACTOR,
             profit_target=self.TEST_PROFIT_TARGET,
@@ -138,6 +140,8 @@ class TestPBRS(RewardSpaceTestBase):
 
     def test_non_canonical_flag_false_and_sum_nonzero(self):
         """Non-canonical mode -> invariant flags False and Σ shaping significantly non-zero."""
+        from ..constants import SCENARIOS
+
         params = self.base_params(
             exit_potential_mode="progressive_release",
             exit_potential_decay=0.25,
@@ -147,7 +151,7 @@ class TestPBRS(RewardSpaceTestBase):
         )
         df = simulate_samples(
             params={**params, "max_trade_duration_candles": 100},
-            num_samples=400,
+            num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
             seed=self.SEED,
             base_factor=self.TEST_BASE_FACTOR,
             profit_target=self.TEST_PROFIT_TARGET,
@@ -165,7 +169,7 @@ class TestPBRS(RewardSpaceTestBase):
     # ---------------- Additives and canonical path mechanics ---------------- #
 
     def test_additive_components_disabled_return_zero(self):
-        """Entry/exit additives return zero when disabled."""
+        """Verifies entry/exit additives return zero when disabled."""
         params_entry = {"entry_additive_enabled": False, "entry_additive_scale": 1.0}
         val_entry = _compute_entry_additive(0.5, 0.3, params_entry)
         self.assertEqual(float(val_entry), 0.0)
@@ -174,7 +178,7 @@ class TestPBRS(RewardSpaceTestBase):
         self.assertEqual(float(val_exit), 0.0)
 
     def test_exit_potential_canonical(self):
-        """Canonical exit resets potential; additives auto-disabled."""
+        """Verifies canonical exit resets potential and auto-disables additives."""
         params = self.base_params(
             exit_potential_mode="canonical",
             hold_potential_enabled=True,
@@ -218,7 +222,7 @@ class TestPBRS(RewardSpaceTestBase):
         self.assertTrue(np.isfinite(total))
 
     def test_pbrs_invariance_internal_flag_set(self):
-        """Canonical path sets _pbrs_invariance_applied once; second call idempotent."""
+        """Verifies canonical path sets _pbrs_invariance_applied flag (idempotent)."""
         params = self.base_params(
             exit_potential_mode="canonical",
             hold_potential_enabled=True,
@@ -263,7 +267,7 @@ class TestPBRS(RewardSpaceTestBase):
         )
 
     def test_progressive_release_negative_decay_clamped(self):
-        """Negative decay clamps: next potential equals last potential (no release)."""
+        """Verifies negative decay clamping: next potential equals last potential."""
         params = self.base_params(
             exit_potential_mode="progressive_release",
             exit_potential_decay=-0.75,
@@ -291,7 +295,7 @@ class TestPBRS(RewardSpaceTestBase):
         self.assertPlacesEqual(total, shaping, places=12)
 
     def test_potential_gamma_nan_fallback(self):
-        """potential_gamma=NaN falls back to default value (indirect comparison)."""
+        """Verifies potential_gamma=NaN fallback to default value."""
         base_params_dict = self.base_params()
         default_gamma = base_params_dict.get("potential_gamma", 0.95)
         params_nan = self.base_params(potential_gamma=np.nan, hold_potential_enabled=True)
@@ -331,14 +335,11 @@ class TestPBRS(RewardSpaceTestBase):
 
     def test_validate_reward_parameters_batch_and_relaxed_aggregation(self):
         """Batch validate strict failures + relaxed multi-reason aggregation via helpers."""
-        # Build strict failure cases
         strict_failures = [
             build_validation_case({"potential_gamma": -0.2}, strict=True, expect_error=True),
             build_validation_case({"hold_potential_scale": -5.0}, strict=True, expect_error=True),
         ]
-        # Success default (strict) case
         success_case = build_validation_case({}, strict=True, expect_error=False)
-        # Relaxed multi-reason aggregation case
         relaxed_case = build_validation_case(
             {
                 "potential_gamma": "not-a-number",
@@ -354,13 +355,11 @@ class TestPBRS(RewardSpaceTestBase):
                 "derived_default",
             ],
         )
-        # Execute batch (strict successes + failures + relaxed case)
         execute_validation_batch(
             self,
             [success_case] + strict_failures + [relaxed_case],
             validate_reward_parameters,
         )
-        # Explicit aggregation assertions for relaxed case using helper
         params_relaxed = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
         params_relaxed.update(
             {
@@ -449,6 +448,8 @@ class TestPBRS(RewardSpaceTestBase):
     # Owns invariant: pbrs-canonical-drift-correction-106
     def test_pbrs_106_canonical_drift_correction_zero_sum(self):
         """Invariant 106: canonical mode enforces near zero-sum shaping (drift correction)."""
+        from ..constants import SCENARIOS
+
         params = self.base_params(
             exit_potential_mode="canonical",
             hold_potential_enabled=True,
@@ -458,7 +459,7 @@ class TestPBRS(RewardSpaceTestBase):
         )
         df = simulate_samples(
             params={**params, "max_trade_duration_candles": 140},
-            num_samples=500,
+            num_samples=SCENARIOS.SAMPLE_SIZE_LARGE // 2,  # 500 ≈ 400 (keep original intent)
             seed=913,
             base_factor=self.TEST_BASE_FACTOR,
             profit_target=self.TEST_PROFIT_TARGET,
@@ -517,6 +518,8 @@ class TestPBRS(RewardSpaceTestBase):
     # Owns invariant (comparison path): pbrs-canonical-drift-correction-106
     def test_pbrs_106_canonical_drift_correction_uniform_offset(self):
         """Canonical drift correction reduces Σ shaping below tolerance vs non-canonical."""
+        from ..constants import SCENARIOS
+
         params_can = self.base_params(
             exit_potential_mode="canonical",
             hold_potential_enabled=True,
@@ -526,7 +529,7 @@ class TestPBRS(RewardSpaceTestBase):
         )
         df_can = simulate_samples(
             params={**params_can, "max_trade_duration_candles": 120},
-            num_samples=400,
+            num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
             seed=777,
             base_factor=self.TEST_BASE_FACTOR,
             profit_target=self.TEST_PROFIT_TARGET,
@@ -545,7 +548,7 @@ class TestPBRS(RewardSpaceTestBase):
         )
         df_non = simulate_samples(
             params={**params_non, "max_trade_duration_candles": 120},
-            num_samples=400,
+            num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
             seed=777,
             base_factor=self.TEST_BASE_FACTOR,
             profit_target=self.TEST_PROFIT_TARGET,
@@ -659,6 +662,8 @@ class TestPBRS(RewardSpaceTestBase):
 
     def test_report_cumulative_invariance_aggregation(self):
         """Canonical telescoping term: small per-step mean drift, bounded increments."""
+        from ..constants import SCENARIOS
+
         params = self.base_params(
             hold_potential_enabled=True,
             entry_additive_enabled=False,
@@ -673,7 +678,7 @@ class TestPBRS(RewardSpaceTestBase):
         telescoping_sum = 0.0
         max_abs_step = 0.0
         steps = 0
-        for _ in range(500):
+        for _ in range(SCENARIOS.PBRS_SIMULATION_STEPS):
             is_exit = rng.uniform() < 0.1
             current_pnl = float(rng.normal(0, 0.05))
             current_dur = float(rng.uniform(0, 1))
@@ -712,6 +717,8 @@ class TestPBRS(RewardSpaceTestBase):
 
     def test_report_explicit_non_invariance_progressive_release(self):
         """progressive_release cumulative shaping non-zero (release leak)."""
+        from ..constants import SCENARIOS
+
         params = self.base_params(
             hold_potential_enabled=True,
             entry_additive_enabled=False,
@@ -722,7 +729,7 @@ class TestPBRS(RewardSpaceTestBase):
         rng = np.random.default_rng(321)
         last_potential = 0.0
         shaping_sum = 0.0
-        for _ in range(160):
+        for _ in range(SCENARIOS.MONTE_CARLO_ITERATIONS):
             is_exit = rng.uniform() < 0.15
             next_pnl = 0.0 if is_exit else float(rng.normal(0, 0.07))
             next_dur = 0.0 if is_exit else float(rng.uniform(0, 1))
@@ -756,6 +763,8 @@ class TestPBRS(RewardSpaceTestBase):
 
         from reward_space_analysis import PBRS_INVARIANCE_TOL
 
+        from ..constants import SCENARIOS
+
         small_vals = [1.0e-7, -2.0e-7, 3.0e-7]  # sum = 2.0e-7 < tolerance
         total_shaping = float(sum(small_vals))
         self.assertLess(
@@ -796,7 +805,7 @@ class TestPBRS(RewardSpaceTestBase):
             seed=self.SEED,
             skip_feature_analysis=True,
             skip_partial_dependence=True,
-            bootstrap_resamples=25,
+            bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS,
         )
         report_path = out_dir / "statistical_analysis.md"
         self.assertTrue(report_path.exists(), "Report file missing for canonical near-zero test")
@@ -819,6 +828,8 @@ class TestPBRS(RewardSpaceTestBase):
 
         from reward_space_analysis import PBRS_INVARIANCE_TOL
 
+        from ..constants import SCENARIOS
+
         shaping_vals = [1.2e-4, 1.3e-4, 8.0e-5, -2.0e-5, 1.4e-4]  # sum = 4.5e-4 (> tol)
         total_shaping = sum(shaping_vals)
         self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL)
@@ -855,7 +866,7 @@ class TestPBRS(RewardSpaceTestBase):
             seed=self.SEED,
             skip_feature_analysis=True,
             skip_partial_dependence=True,
-            bootstrap_resamples=50,
+            bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS * 2,
         )
         report_path = out_dir / "statistical_analysis.md"
         self.assertTrue(report_path.exists(), "Report file missing for canonical warning test")
@@ -872,6 +883,8 @@ class TestPBRS(RewardSpaceTestBase):
         """Full report: Non-canonical classification aggregates mode + additives reasons."""
         import pandas as pd
 
+        from ..constants import SCENARIOS
+
         shaping_vals = [0.02, -0.005, 0.007]
         entry_add_vals = [0.003, 0.0, 0.004]
         exit_add_vals = [0.001, 0.002, 0.0]
@@ -908,7 +921,7 @@ class TestPBRS(RewardSpaceTestBase):
             seed=self.SEED,
             skip_feature_analysis=True,
             skip_partial_dependence=True,
-            bootstrap_resamples=25,
+            bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS,
         )
         report_path = out_dir / "statistical_analysis.md"
         self.assertTrue(
@@ -928,6 +941,8 @@ class TestPBRS(RewardSpaceTestBase):
 
         from reward_space_analysis import PBRS_INVARIANCE_TOL
 
+        from ..constants import SCENARIOS
+
         shaping_vals = [0.002, -0.0005, 0.0012]
         total_shaping = sum(shaping_vals)
         self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL)
@@ -964,7 +979,7 @@ class TestPBRS(RewardSpaceTestBase):
             seed=self.SEED,
             skip_feature_analysis=True,
             skip_partial_dependence=True,
-            bootstrap_resamples=25,
+            bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS,
         )
         report_path = out_dir / "statistical_analysis.md"
         self.assertTrue(
@@ -1007,6 +1022,8 @@ class TestPBRS(RewardSpaceTestBase):
         out_dir = self.output_path / "pbrs_absence_and_shift_placeholder"
         import reward_space_analysis as rsa
 
+        from ..constants import SCENARIOS
+
         original_compute_summary_stats = rsa._compute_summary_stats
 
         def _minimal_summary_stats(_df):
@@ -1038,7 +1055,7 @@ class TestPBRS(RewardSpaceTestBase):
                 seed=self.SEED,
                 skip_feature_analysis=True,
                 skip_partial_dependence=True,
-                bootstrap_resamples=10,
+                bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS // 2,
             )
         finally:
             rsa._compute_summary_stats = original_compute_summary_stats
index 63e7352ae4850aad719f1d73475b90611861512d..d136fddcf5f043475ff492e2dd45891848f6766e 100644 (file)
@@ -13,7 +13,11 @@ from reward_space_analysis import (
     validate_reward_parameters,
 )
 
-from ..helpers import run_strict_validation_failure_cases
+from ..helpers import (
+    assert_exit_factor_invariant_suite,
+    run_relaxed_validation_adjustment_cases,
+    run_strict_validation_failure_cases,
+)
 
 
 class _PyTestAdapter(unittest.TestCase):
@@ -42,9 +46,6 @@ def test_validate_reward_parameters_strict_failure_batch():
     run_strict_validation_failure_cases(adapter, failure_params, validate_reward_parameters)
 
 
-from ..helpers import run_relaxed_validation_adjustment_cases
-
-
 @pytest.mark.robustness
 def test_validate_reward_parameters_relaxed_adjustment_batch():
     """Batch relaxed validation adjustment scenarios using shared helper."""
@@ -141,9 +142,6 @@ def test_hold_penalty_short_duration_returns_zero():
     assert penalty == 0.0
 
 
-from ..helpers import assert_exit_factor_invariant_suite
-
-
 @pytest.mark.robustness
 def test_exit_factor_invariant_suite_grouped():
     """Grouped exit factor invariant scenarios using shared helper."""
index 3385d7f4ba14df49285260c7bf36e0a00d13f94f..3ec81481a3b28002466a88f194e8611278348145 100644 (file)
@@ -3,7 +3,6 @@
 
 import math
 import unittest
-import warnings
 
 import numpy as np
 import pytest
@@ -14,16 +13,17 @@ from reward_space_analysis import (
     Actions,
     Positions,
     RewardContext,
-    RewardDiagnosticsWarning,
     _get_exit_factor,
     calculate_reward,
     simulate_samples,
 )
 
 from ..helpers import (
+    assert_diagnostic_warning,
     assert_exit_factor_attenuation_modes,
     assert_exit_mode_mathematical_validation,
     assert_single_active_component_with_additives,
+    capture_warnings,
 )
 from ..test_base import RewardSpaceTestBase
 
@@ -205,8 +205,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
             position=Positions.Long,
             action=Actions.Long_exit,
         )
-        with warnings.catch_warnings(record=True) as caught:
-            warnings.simplefilter("always")
+        with capture_warnings() as caught:
             baseline = calculate_reward(
                 context,
                 params,
@@ -532,8 +531,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
         pnl = 0.05
         pnl_factor = 1.0
         duration_ratio = 0.8
-        with warnings.catch_warnings(record=True) as caught:
-            warnings.simplefilter("always", RewardDiagnosticsWarning)
+        with assert_diagnostic_warning(["Unknown exit_attenuation_mode"]):
             f_unknown = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
         linear_params = self.base_params(exit_attenuation_mode="linear", exit_plateau=False)
         f_linear = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, linear_params)
@@ -543,14 +541,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
             tolerance=self.TOL_IDENTITY_RELAXED,
             msg=f"Fallback linear mismatch unknown={f_unknown} linear={f_linear}",
         )
-        diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
-        self.assertTrue(
-            diag_warnings, "No RewardDiagnosticsWarning emitted for unknown mode fallback"
-        )
-        self.assertTrue(
-            any("Unknown exit_attenuation_mode" in str(w.message) for w in diag_warnings),
-            "Fallback warning message content mismatch",
-        )
 
     # Owns invariant: robustness-negative-grace-clamp-103
     def test_robustness_103_negative_plateau_grace_clamped(self):
@@ -565,8 +555,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
         pnl = 0.03
         pnl_factor = 1.0
         duration_ratio = 0.5
-        with warnings.catch_warnings(record=True) as caught:
-            warnings.simplefilter("always", RewardDiagnosticsWarning)
+        with assert_diagnostic_warning(["exit_plateau_grace < 0"]):
             f_neg = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
         # Reference with grace=0.0 (since negative should clamp)
         ref_params = self.base_params(
@@ -582,12 +571,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
             tolerance=self.TOL_IDENTITY_RELAXED,
             msg=f"Negative grace clamp mismatch f_neg={f_neg} f_ref={f_ref}",
         )
-        diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
-        self.assertTrue(diag_warnings, "No RewardDiagnosticsWarning for negative grace")
-        self.assertTrue(
-            any("exit_plateau_grace < 0" in str(w.message) for w in diag_warnings),
-            "Warning content missing for negative grace clamp",
-        )
 
     # Owns invariant: robustness-invalid-power-tau-104
     def test_robustness_104_invalid_power_tau_fallback_alpha_one(self):
@@ -603,13 +586,9 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
             params = self.base_params(
                 exit_attenuation_mode="power", exit_power_tau=tau, exit_plateau=False
             )
-            with warnings.catch_warnings(record=True) as caught:
-                warnings.simplefilter("always", RewardDiagnosticsWarning)
+            with assert_diagnostic_warning(["exit_power_tau"]):
                 f0 = _get_exit_factor(base_factor, pnl, pnl_factor, 0.0, params)
                 f1 = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
-            diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
-            self.assertTrue(diag_warnings, f"No RewardDiagnosticsWarning for invalid tau={tau}")
-            self.assertTrue(any("exit_power_tau" in str(w.message) for w in diag_warnings))
             ratio = f1 / max(f0, self.TOL_NUMERIC_GUARD)
             self.assertAlmostEqual(
                 ratio,
@@ -628,20 +607,9 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
         near_zero_values = [1e-15, 1e-12, 5e-14]
         for hl in near_zero_values:
             params = self.base_params(exit_attenuation_mode="half_life", exit_half_life=hl)
-            with warnings.catch_warnings(record=True) as caught:
-                warnings.simplefilter("always", RewardDiagnosticsWarning)
+            with assert_diagnostic_warning(["exit_half_life", "close to 0"]):
                 _ = _get_exit_factor(base_factor, pnl, pnl_factor, 0.0, params)
                 fdr = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
-            diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
-            self.assertTrue(
-                diag_warnings, f"No RewardDiagnosticsWarning for near-zero half-life hl={hl}"
-            )
-            self.assertTrue(
-                any(
-                    "exit_half_life" in str(w.message) and "close to 0" in str(w.message)
-                    for w in diag_warnings
-                )
-            )
             self.assertAlmostEqualFloat(
                 fdr,
                 1.0 * pnl_factor,  # Kernel returns 1.0 then * pnl_factor
index 45219cd929d67f44ba76832867805d2eb4ebea3e..c0966ae21d9e3de7f797be8c008d2cb06cf7b2b6 100644 (file)
@@ -2,7 +2,6 @@
 """Statistical tests, distribution metrics, and bootstrap validation."""
 
 import unittest
-import warnings
 
 import numpy as np
 import pandas as pd
@@ -19,6 +18,7 @@ from reward_space_analysis import (
     statistical_hypothesis_tests,
 )
 
+from ..helpers import assert_diagnostic_warning
 from ..test_base import RewardSpaceTestBase
 
 pytestmark = pytest.mark.statistics
@@ -97,22 +97,6 @@ class TestStatistics(RewardSpaceTestBase):
                     float(metrics[p_key]), 1.0, places=12, msg=f"Expected 1.0 for {p_key}"
                 )
 
-    def _make_idle_variance_df(self, n: int = 100) -> pd.DataFrame:
-        """Synthetic dataframe focusing on idle_duration ↔ reward_idle correlation."""
-        self.seed_all(self.SEED)
-        idle_duration = np.random.exponential(10, n)
-        reward_idle = -0.01 * idle_duration + np.random.normal(0, 0.001, n)
-        return pd.DataFrame(
-            {
-                "idle_duration": idle_duration,
-                "reward_idle": reward_idle,
-                "position": np.random.choice([0.0, 0.5, 1.0], n),
-                "reward": np.random.normal(0, 1, n),
-                "pnl": np.random.normal(0, self.TEST_PNL_STD, n),
-                "trade_duration": np.random.exponential(20, n),
-            }
-        )
-
     def test_statistics_distribution_shift_metrics(self):
         """KL/JS/Wasserstein metrics."""
         df1 = self._make_idle_variance_df(100)
@@ -159,9 +143,11 @@ class TestStatistics(RewardSpaceTestBase):
                     f"Metric {name} expected ≈ 0 on identical distributions (got {val})",
                 )
             elif name.endswith("_ks_statistic"):
+                from ..constants import STAT_TOL
+
                 self.assertLess(
                     abs(val),
-                    0.005,
+                    STAT_TOL.KS_STATISTIC_IDENTITY,
                     f"KS statistic should be near 0 on identical distributions (got {val})",
                 )
 
@@ -264,19 +250,23 @@ class TestStatistics(RewardSpaceTestBase):
 
     def test_stats_variance_vs_duration_spearman_sign(self):
         """trade_duration up => pnl variance up (rank corr >0)."""
+        from ..constants import SCENARIOS, STAT_TOL
+
         rng = np.random.default_rng(99)
         n = 250
-        trade_duration = np.linspace(1, 300, n)
+        trade_duration = np.linspace(1, SCENARIOS.DURATION_LONG, n)
         pnl = rng.normal(0, 1 + trade_duration / 400.0, n)
         ranks_dur = pd.Series(trade_duration).rank().to_numpy()
         ranks_var = pd.Series(np.abs(pnl)).rank().to_numpy()
         rho = np.corrcoef(ranks_dur, ranks_var)[0, 1]
         self.assertFinite(rho, name="spearman_rho")
-        self.assertGreater(rho, 0.1)
+        self.assertGreater(rho, STAT_TOL.CORRELATION_SIGNIFICANCE)
 
     def test_stats_scaling_invariance_distribution_metrics(self):
         """Equal scaling keeps KL/JS ≈0."""
-        df1 = self._shift_scale_df(400)
+        from ..constants import SCENARIOS, STAT_TOL
+
+        df1 = self._shift_scale_df(SCENARIOS.DEFAULT_SAMPLE_SIZE)
         scale = 3.5
         df2 = df1.copy()
         df2["pnl"] *= scale
@@ -286,7 +276,7 @@ class TestStatistics(RewardSpaceTestBase):
             if k.endswith("_kl_divergence") or k.endswith("_js_distance"):
                 self.assertLess(
                     abs(v),
-                    0.0005,
+                    STAT_TOL.DISTRIBUTION_SHIFT,
                     f"Expected near-zero divergence after equal scaling (k={k}, v={v})",
                 )
 
@@ -306,8 +296,10 @@ class TestStatistics(RewardSpaceTestBase):
 
     def test_stats_bh_correction_null_false_positive_rate(self):
         """Null: low BH discovery rate."""
+        from ..constants import SCENARIOS
+
         rng = np.random.default_rng(1234)
-        n = 400
+        n = SCENARIOS.NULL_HYPOTHESIS_SAMPLE_SIZE
         df = pd.DataFrame(
             {
                 "pnl": rng.normal(0, 1, n),
@@ -416,9 +408,11 @@ class TestStatistics(RewardSpaceTestBase):
 
     def test_stats_heteroscedasticity_pnl_validation(self):
         """PnL variance increases with trade duration (heteroscedasticity)."""
+        from ..constants import SCENARIOS
+
         df = simulate_samples(
             params=self.base_params(max_trade_duration_candles=100),
-            num_samples=1000,
+            num_samples=SCENARIOS.SAMPLE_SIZE_LARGE + 200,
             seed=self.SEED_HETEROSCEDASTICITY,
             base_factor=self.TEST_BASE_FACTOR,
             profit_target=self.TEST_PROFIT_TARGET,
@@ -429,16 +423,18 @@ class TestStatistics(RewardSpaceTestBase):
             pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
         )
         exit_data = df[df["reward_exit"] != 0].copy()
-        if len(exit_data) < 50:
+        if len(exit_data) < SCENARIOS.HETEROSCEDASTICITY_MIN_EXITS:
             self.skipTest("Insufficient exit actions for heteroscedasticity test")
         exit_data["duration_bin"] = pd.cut(
             exit_data["duration_ratio"], bins=4, labels=["Q1", "Q2", "Q3", "Q4"]
         )
         variance_by_bin = exit_data.groupby("duration_bin")["pnl"].var().dropna()
         if "Q1" in variance_by_bin.index and "Q4" in variance_by_bin.index:
+            from ..constants import STAT_TOL
+
             self.assertGreater(
                 variance_by_bin["Q4"],
-                variance_by_bin["Q1"] * 0.8,
+                variance_by_bin["Q1"] * STAT_TOL.VARIANCE_RATIO_THRESHOLD,
                 "PnL heteroscedasticity: variance should increase with duration",
             )
 
@@ -474,9 +470,11 @@ class TestStatistics(RewardSpaceTestBase):
 
     def test_stats_benjamini_hochberg_adjustment(self):
         """BH adjustment adds p_value_adj & significant_adj with valid bounds."""
+        from ..constants import SCENARIOS
+
         df = simulate_samples(
             params=self.base_params(max_trade_duration_candles=100),
-            num_samples=600,
+            num_samples=SCENARIOS.SAMPLE_SIZE_LARGE - 200,
             seed=self.SEED_HETEROSCEDASTICITY,
             base_factor=self.TEST_BASE_FACTOR,
             profit_target=self.TEST_PROFIT_TARGET,
@@ -518,8 +516,10 @@ class TestStatistics(RewardSpaceTestBase):
 
     def test_stats_bootstrap_shrinkage_with_sample_size(self):
         """Bootstrap CI half-width decreases with larger sample (~1/sqrt(n) heuristic)."""
-        small = self._shift_scale_df(80)
-        large = self._shift_scale_df(800)
+        from ..constants import SCENARIOS
+
+        small = self._shift_scale_df(SCENARIOS.SAMPLE_SIZE_SMALL - 20)
+        large = self._shift_scale_df(SCENARIOS.SAMPLE_SIZE_LARGE)
         res_small = bootstrap_confidence_intervals(small, ["reward"], n_bootstrap=400)
         res_large = bootstrap_confidence_intervals(large, ["reward"], n_bootstrap=400)
         _, lo_s, hi_s = list(res_small.values())[0]
@@ -535,8 +535,11 @@ class TestStatistics(RewardSpaceTestBase):
         """Invariant 113 (non-strict): constant distribution CI widened with warning (positive epsilon width)."""
 
         df = self._const_df(80)
-        with warnings.catch_warnings(record=True) as caught:
-            warnings.simplefilter("always", RewardDiagnosticsWarning)
+        with assert_diagnostic_warning(
+            ["degenerate", "bootstrap", "CI"],
+            warning_category=RewardDiagnosticsWarning,
+            strict_mode=False,
+        ):
             res = bootstrap_confidence_intervals(
                 df,
                 ["reward", "pnl"],
@@ -544,11 +547,6 @@ class TestStatistics(RewardSpaceTestBase):
                 confidence_level=0.95,
                 strict_diagnostics=False,
             )
-        diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
-        self.assertTrue(
-            diag_warnings,
-            "Expected RewardDiagnosticsWarning for degenerate bootstrap CI widening",
-        )
         for _metric, (mean, lo, hi) in res.items():
             self.assertLess(
                 lo,
@@ -557,7 +555,11 @@ class TestStatistics(RewardSpaceTestBase):
             )
             width = hi - lo
             self.assertGreater(width, 0.0)
-            self.assertLessEqual(width, 3e-09, "Width should be small epsilon range (<=3e-9)")
+            from ..constants import STAT_TOL
+
+            self.assertLessEqual(
+                width, STAT_TOL.CI_WIDTH_EPSILON, "Width should be small epsilon range"
+            )
             # Mean should be centered (approx) within widened bounds
             self.assertGreaterEqual(mean, lo)
             self.assertLessEqual(mean, hi)
index 214cc8058290ca3d4d8b4bfb4444b4f10aa8e941..c243e55569674f215ec6c7c39072b02c6889ef1c 100644 (file)
@@ -20,6 +20,13 @@ from reward_space_analysis import (
     apply_potential_shaping,
 )
 
+from .constants import (
+    CONTINUITY,
+    EXIT_FACTOR,
+    PBRS,
+    TOLERANCE,
+)
+
 # Global constants
 PBRS_INTEGRATION_PARAMS = [
     "potential_gamma",
@@ -67,22 +74,35 @@ class RewardSpaceTestBase(unittest.TestCase):
         """Clean up temporary files."""
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
-    PBRS_TERMINAL_TOL = 1e-12
-    PBRS_MAX_ABS_SHAPING = 5.0
+    # ===============================================
+    # Constants imported from tests.constants module
+    # ===============================================
+
+    # Tolerance constants
+    TOL_IDENTITY_STRICT = TOLERANCE.IDENTITY_STRICT
+    TOL_IDENTITY_RELAXED = TOLERANCE.IDENTITY_RELAXED
+    TOL_GENERIC_EQ = TOLERANCE.GENERIC_EQ
+    TOL_NUMERIC_GUARD = TOLERANCE.NUMERIC_GUARD
+    TOL_NEGLIGIBLE = TOLERANCE.NEGLIGIBLE
+    TOL_RELATIVE = TOLERANCE.RELATIVE
+    TOL_DISTRIB_SHAPE = TOLERANCE.DISTRIB_SHAPE
+
+    # PBRS constants
+    PBRS_TERMINAL_TOL = PBRS.TERMINAL_TOL
+    PBRS_MAX_ABS_SHAPING = PBRS.MAX_ABS_SHAPING
+
+    # Continuity constants
+    CONTINUITY_EPS_SMALL = CONTINUITY.EPS_SMALL
+    CONTINUITY_EPS_LARGE = CONTINUITY.EPS_LARGE
+
+    # Exit factor constants
+    MIN_EXIT_POWER_TAU = EXIT_FACTOR.MIN_POWER_TAU
+
+    # Test-specific constants (not in constants.py)
     PBRS_TERMINAL_PROB = 0.08
     PBRS_SWEEP_ITER = 120
-    EPS_BASE = 1e-12
-    TOL_NUMERIC_GUARD = EPS_BASE
-    TOL_IDENTITY_STRICT = EPS_BASE
-    TOL_IDENTITY_RELAXED = 1e-09
-    TOL_GENERIC_EQ = 1e-06
-    TOL_NEGLIGIBLE = 1e-08
-    MIN_EXIT_POWER_TAU = 1e-06
-    TOL_DISTRIB_SHAPE = 0.05
+    EPS_BASE = TOLERANCE.IDENTITY_STRICT  # Alias for backward compatibility
     JS_DISTANCE_UPPER_BOUND = math.sqrt(math.log(2.0))
-    TOL_RELATIVE = 1e-09
-    CONTINUITY_EPS_SMALL = 0.0001
-    CONTINUITY_EPS_LARGE = 0.001
 
     def make_ctx(
         self,
@@ -412,3 +432,19 @@ class RewardSpaceTestBase(unittest.TestCase):
                 "idle_duration": rng.exponential(10, n),
             }
         )
+
+    def _make_idle_variance_df(self, n: int = 100) -> pd.DataFrame:
+        """Synthetic dataframe focusing on idle_duration ↔ reward_idle correlation."""
+        self.seed_all(self.SEED)
+        idle_duration = np.random.exponential(10, n)
+        reward_idle = -0.01 * idle_duration + np.random.normal(0, 0.001, n)
+        return pd.DataFrame(
+            {
+                "idle_duration": idle_duration,
+                "reward_idle": reward_idle,
+                "position": np.random.choice([0.0, 0.5, 1.0], n),
+                "reward": np.random.normal(0, 1, n),
+                "pnl": np.random.normal(0, self.TEST_PNL_STD, n),
+                "trade_duration": np.random.exponential(20, n),
+            }
+        )