From b8ebcee54baa6956bebdc5580be4f8d21ef0cf57 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Fri, 14 Nov 2025 00:45:45 +0100 Subject: [PATCH] refactor(reforcexy): cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../reward_space_analysis/pyproject.toml | 1 + .../reward_space_analysis.py | 13 +- .../components/test_reward_components.py | 165 +++++++----------- .../reward_space_analysis/tests/constants.py | 89 +++++++++- .../tests/helpers/assertions.py | 154 ++++++++-------- .../integration/test_report_formatting.py | 3 +- .../tests/pbrs/test_pbrs.py | 65 ++++--- .../tests/robustness/test_branch_coverage.py | 12 +- .../tests/robustness/test_robustness.py | 46 +---- .../tests/statistics/test_statistics.py | 76 ++++---- .../reward_space_analysis/tests/test_base.py | 62 +++++-- 11 files changed, 375 insertions(+), 311 deletions(-) diff --git a/ReforceXY/reward_space_analysis/pyproject.toml b/ReforceXY/reward_space_analysis/pyproject.toml index 45f00bd..52d0b1d 100644 --- a/ReforceXY/reward_space_analysis/pyproject.toml +++ b/ReforceXY/reward_space_analysis/pyproject.toml @@ -42,6 +42,7 @@ python_functions = [ ] markers = [ "components: component-level reward computations", + "transforms: mathematical transform functions", "robustness: stress and edge-case behavior", "api: public API surface and helpers", "cli: command-line interface behaviors", diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index f656a63..f60b2f4 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -69,7 +69,7 @@ POTENTIAL_GAMMA_DEFAULT: float = 0.95 ATTENUATION_MODES: Tuple[str, ...] = ("sqrt", "linear", "power", "half_life") ATTENUATION_MODES_WITH_LEGACY: Tuple[str, ...] = ATTENUATION_MODES + ("legacy",) -# Centralized internal numeric guards & behavior toggles (single source of truth for internal tunables) +# Centralized internal numeric guards & behavior toggles INTERNAL_GUARDS: dict[str, float] = { "degenerate_ci_epsilon": 1e-9, "distribution_constant_fallback_moment": 0.0, @@ -419,7 +419,7 @@ def validate_reward_parameters( sanitized = dict(params) adjustments: Dict[str, Dict[str, Any]] = {} - # Normalize boolean-like parameters explicitly to avoid inconsistent types + # Boolean parameter coercion _bool_keys = [ "check_invariants", "hold_potential_enabled", @@ -480,7 +480,7 @@ def validate_reward_parameters( adjusted = original_numeric reason_parts: List[str] = [] - # Record numeric coercion if type changed (e.g., from str/bool/None) + # Track type coercion if not isinstance(original_val, (int, float)): adjustments.setdefault( key, @@ -491,7 +491,6 @@ def validate_reward_parameters( "validation_mode": "strict" if strict else "relaxed", }, ) - # Update sanitized to numeric before clamping sanitized[key] = original_numeric # Bounds enforcement @@ -1245,10 +1244,9 @@ def simulate_samples( max_unrealized_profit = 0.0 min_unrealized_profit = 0.0 else: - # Unrealized profits should bracket the final PnL - # Max represents peak profit during trade, min represents lowest point + # Unrealized profit bounds span = abs(rng.gauss(0.0, 0.015)) - # Ensure max >= pnl >= min by construction + # max >= pnl >= min by construction max_unrealized_profit = pnl + abs(rng.gauss(0.0, span)) min_unrealized_profit = pnl - abs(rng.gauss(0.0, span)) @@ -1336,7 +1334,6 @@ def simulate_samples( ) drift = total_shaping / max(1, n_invariant) df.loc[:, "reward_shaping"] = df["reward_shaping"] - drift - # Attach resolved reward params for downstream consumers (e.g., report derivations) df.attrs["reward_params"] = dict(params) except Exception: # Graceful fallback (no invariance enforcement on failure) diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py index 4fe7468..c2e5cc5 100644 --- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py +++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py @@ -17,6 +17,9 @@ from reward_space_analysis import ( ) from ..helpers import ( + RewardScenarioConfig, + ThresholdTestConfig, + ValidationConfig, assert_component_sum_integrity, assert_exit_factor_plateau_behavior, assert_hold_penalty_threshold_behavior, @@ -45,13 +48,9 @@ class TestRewardComponents(RewardSpaceTestBase): def test_hold_penalty_basic_calculation(self): """Test hold penalty calculation when trade_duration exceeds max_duration. - Tests: - - Hold penalty is negative when duration exceeds threshold - - Component sum integrity maintained - - Expected behavior: - - trade_duration > max_duration → hold_penalty < 0 - - Total reward equals sum of active components + Verifies: + - trade_duration > max_duration → hold_penalty < 0 + - Total reward equals sum of active components """ context = self.make_ctx( pnl=0.01, @@ -72,24 +71,20 @@ class TestRewardComponents(RewardSpaceTestBase): action_masking=True, ) self.assertLess(breakdown.hold_penalty, 0, "Hold penalty should be negative") - assert_component_sum_integrity( - self, - breakdown, - self.TOL_IDENTITY_RELAXED, + config = ValidationConfig( + tolerance_strict=self.TOL_IDENTITY_STRICT, + tolerance_relaxed=self.TOL_IDENTITY_RELAXED, exclude_components=["idle_penalty", "exit_component", "invalid_penalty"], component_description="hold + shaping/additives", ) + assert_component_sum_integrity(self, breakdown, config) def test_hold_penalty_threshold_behavior(self): """Test hold penalty activation at max_duration threshold. - Tests: - - No penalty before max_duration - - Penalty activation at and after max_duration - - Expected behavior: - - duration < max_duration → hold_penalty = 0 - - duration >= max_duration → hold_penalty <= 0 + Verifies: + - duration < max_duration → hold_penalty = 0 + - duration >= max_duration → hold_penalty <= 0 """ max_duration = 128 threshold_test_cases = [ @@ -108,31 +103,32 @@ class TestRewardComponents(RewardSpaceTestBase): action=Actions.Neutral, ) + config = ThresholdTestConfig( + max_duration=max_duration, + test_cases=threshold_test_cases, + tolerance=self.TOL_IDENTITY_RELAXED, + ) assert_hold_penalty_threshold_behavior( self, - threshold_test_cases, - max_duration, context_factory, self.DEFAULT_PARAMS, self.TEST_BASE_FACTOR, self.TEST_PROFIT_TARGET, 1.0, - self.TOL_IDENTITY_RELAXED, + config, ) def test_hold_penalty_progressive_scaling(self): """Test hold penalty scales progressively with increasing duration. - Tests: - - Penalty magnitude increases monotonically with duration - - Progressive scaling beyond max_duration threshold - - Expected behavior: - - For d1 < d2 < d3: penalty(d1) >= penalty(d2) >= penalty(d3) - - Penalties become more negative with longer durations + Verifies: + - For d1 < d2 < d3: penalty(d1) >= penalty(d2) >= penalty(d3) + - Progressive scaling beyond max_duration threshold """ + from ..constants import SCENARIOS + params = self.base_params(max_trade_duration_candles=100) - durations = [150, 200, 300] + durations = list(SCENARIOS.DURATION_SCENARIOS) penalties = [] for duration in durations: context = self.make_ctx( @@ -158,13 +154,9 @@ class TestRewardComponents(RewardSpaceTestBase): def test_idle_penalty_calculation(self): """Test idle penalty calculation for neutral idle state. - Tests: - - Idle penalty is negative for idle duration > 0 - - Component sum integrity maintained - - Expected behavior: - - idle_duration > 0 → idle_penalty < 0 - - Total reward equals sum of active components + Verifies: + - idle_duration > 0 → idle_penalty < 0 + - Component sum integrity maintained """ context = self.make_ctx( pnl=0.0, @@ -178,35 +170,34 @@ class TestRewardComponents(RewardSpaceTestBase): def validate_idle_penalty(test_case, breakdown, description, tolerance): test_case.assertLess(breakdown.idle_penalty, 0, "Idle penalty should be negative") - assert_component_sum_integrity( - test_case, - breakdown, - tolerance, + config = ValidationConfig( + tolerance_strict=test_case.TOL_IDENTITY_STRICT, + tolerance_relaxed=tolerance, exclude_components=["hold_penalty", "exit_component", "invalid_penalty"], component_description="idle + shaping/additives", ) + assert_component_sum_integrity(test_case, breakdown, config) scenarios = [(context, self.DEFAULT_PARAMS, "idle_penalty_basic")] + config = RewardScenarioConfig( + base_factor=self.TEST_BASE_FACTOR, + profit_target=self.TEST_PROFIT_TARGET, + risk_reward_ratio=1.0, + tolerance_relaxed=self.TOL_IDENTITY_RELAXED, + ) assert_reward_calculation_scenarios( self, scenarios, - self.TEST_BASE_FACTOR, - self.TEST_PROFIT_TARGET, - 1.0, + config, validate_idle_penalty, - self.TOL_IDENTITY_RELAXED, ) def test_efficiency_zero_policy(self): """Test efficiency zero policy produces expected PnL factor. - Tests: - - PnL factor calculation with efficiency weight = 0 - - Finite and positive factor values - - Expected behavior: - - efficiency_weight = 0 → pnl_factor ≈ 1.0 - - Factor is finite and well-defined + Verifies: + - efficiency_weight = 0 → pnl_factor ≈ 1.0 + - Factor is finite and positive """ ctx = self.make_ctx( pnl=0.0, @@ -225,13 +216,9 @@ class TestRewardComponents(RewardSpaceTestBase): def test_max_idle_duration_candles_logic(self): """Test max idle duration candles parameter affects penalty magnitude. - Tests: - - Smaller max_idle_duration → larger penalty magnitude - - Larger max_idle_duration → smaller penalty magnitude - - Both penalties are negative - - Expected behavior: - - penalty(max=50) < penalty(max=200) < 0 + Verifies: + - penalty(max=50) < penalty(max=200) < 0 + - Smaller max → larger penalty magnitude """ params_small = self.base_params(max_idle_duration_candles=50) params_large = self.base_params(max_idle_duration_candles=200) @@ -271,13 +258,9 @@ class TestRewardComponents(RewardSpaceTestBase): Non-owning smoke test; ownership: robustness/test_robustness.py:35 - Tests: - - Exit factor finiteness for linear and power modes - - Plateau behavior with grace period - - Expected behavior: - - All exit factors are finite and positive - - Plateau mode attenuates after grace period + Verifies: + - Exit factors are finite and positive (linear, power modes) + - Plateau mode attenuates after grace period """ modes_to_test = ["linear", "power"] for mode in modes_to_test: @@ -307,13 +290,9 @@ class TestRewardComponents(RewardSpaceTestBase): def test_idle_penalty_zero_when_profit_target_zero(self): """Test idle penalty is zero when profit_target is zero. - Tests: - - profit_target = 0 → idle_penalty = 0 - - Total reward is zero in this configuration - - Expected behavior: - - profit_target = 0 → idle_factor = 0 → idle_penalty = 0 - - No other components active for neutral idle state + Verifies: + - profit_target = 0 → idle_penalty = 0 + - Total reward is zero in this configuration """ context = self.make_ctx( pnl=0.0, @@ -332,28 +311,26 @@ class TestRewardComponents(RewardSpaceTestBase): ) scenarios = [(context, self.DEFAULT_PARAMS, "profit_target_zero")] + config = RewardScenarioConfig( + base_factor=self.TEST_BASE_FACTOR, + profit_target=0.0, + risk_reward_ratio=self.TEST_RR, + tolerance_relaxed=self.TOL_IDENTITY_RELAXED, + ) assert_reward_calculation_scenarios( self, scenarios, - self.TEST_BASE_FACTOR, - 0.0, # profit_target=0 - self.TEST_RR, + config, validate_zero_penalty, - self.TOL_IDENTITY_RELAXED, ) def test_win_reward_factor_saturation(self): """Test PnL amplification factor saturates at asymptotic limit. - Tests: - - Amplification ratio increases monotonically with PnL - - Saturation approaches (1 + win_reward_factor) - - Mathematical formula validation - - Expected behavior: - - As PnL → ∞: amplification → (1 + win_reward_factor) - - Monotonic increase: ratio(PnL1) <= ratio(PnL2) for PnL1 < PnL2 - - Observed matches theoretical tanh-based formula + Verifies: + - Amplification ratio increases monotonically with PnL + - Saturation approaches (1 + win_reward_factor) + - Observed matches theoretical saturation behavior """ win_reward_factor = 3.0 beta = 0.5 @@ -421,22 +398,16 @@ class TestRewardComponents(RewardSpaceTestBase): def test_idle_penalty_fallback_and_proportionality(self): """Test idle penalty fallback and proportional scaling behavior. - Tests: - - Fallback to max_trade_duration when max_idle_duration is None - - Proportional scaling with idle duration (2:1 ratio validation) - - Mathematical validation of penalty formula - - Expected behavior: - - max_idle_duration = None → use max_trade_duration as fallback - - penalty(duration=40) ≈ 2 × penalty(duration=20) - - Formula: penalty ∝ (duration/max)^power × scale + Verifies: + - max_idle_duration = None → use max_trade_duration as fallback + - penalty(duration=40) ≈ 2 × penalty(duration=20) + - Proportional scaling with idle duration """ params = self.base_params(max_idle_duration_candles=None, max_trade_duration_candles=100) base_factor = 90.0 profit_target = self.TEST_PROFIT_TARGET risk_reward_ratio = 1.0 - # Generate test contexts using helper base_context_kwargs = { "pnl": 0.0, "trade_duration": 0, @@ -448,7 +419,6 @@ class TestRewardComponents(RewardSpaceTestBase): self.make_ctx, idle_scenarios, base_context_kwargs ) - # Calculate all rewards results = [] for context, description in contexts_and_descriptions: breakdown = calculate_reward( @@ -462,19 +432,16 @@ class TestRewardComponents(RewardSpaceTestBase): ) results.append((breakdown, context.idle_duration, description)) - # Validate proportional scaling br_a, br_b, br_mid = [r[0] for r in results] self.assertLess(br_a.idle_penalty, 0.0) self.assertLess(br_b.idle_penalty, 0.0) self.assertLess(br_mid.idle_penalty, 0.0) - # Check 2:1 ratio between 40 and 20 idle duration ratio = br_b.idle_penalty / br_a.idle_penalty if br_a.idle_penalty != 0 else None self.assertIsNotNone(ratio) if ratio is not None: self.assertAlmostEqualFloat(abs(ratio), 2.0, tolerance=0.2) - # Mathematical validation for mid-duration case idle_penalty_scale = _get_float_param(params, "idle_penalty_scale", 0.5) idle_penalty_power = _get_float_param(params, "idle_penalty_power", 1.025) factor = _get_float_param(params, "base_factor", float(base_factor)) diff --git a/ReforceXY/reward_space_analysis/tests/constants.py b/ReforceXY/reward_space_analysis/tests/constants.py index 19e5ede..ddfe8ac 100644 --- a/ReforceXY/reward_space_analysis/tests/constants.py +++ b/ReforceXY/reward_space_analysis/tests/constants.py @@ -46,12 +46,12 @@ class ContinuityConfig: plateau and attenuation functions. Attributes: - EPS_SMALL: Small epsilon for tight continuity checks (1e-08) - EPS_LARGE: Larger epsilon for coarser continuity tests (5e-05) + EPS_SMALL: Small epsilon for tight continuity checks (1e-06) + EPS_LARGE: Larger epsilon for coarser continuity tests (1e-05) """ - EPS_SMALL: float = 1e-08 - EPS_LARGE: float = 5e-05 + EPS_SMALL: float = 1e-06 + EPS_LARGE: float = 1e-05 @dataclass(frozen=True) @@ -62,13 +62,13 @@ class ExitFactorConfig: ratio bounds and power mode constraints. Attributes: - SCALING_RATIO_MIN: Minimum expected scaling ratio for continuity (1.5) - SCALING_RATIO_MAX: Maximum expected scaling ratio for continuity (3.5) + SCALING_RATIO_MIN: Minimum expected scaling ratio for continuity (5.0) + SCALING_RATIO_MAX: Maximum expected scaling ratio for continuity (15.0) MIN_POWER_TAU: Minimum valid tau value for power mode (1e-15) """ - SCALING_RATIO_MIN: float = 1.5 - SCALING_RATIO_MAX: float = 3.5 + SCALING_RATIO_MIN: float = 5.0 + SCALING_RATIO_MAX: float = 15.0 MIN_POWER_TAU: float = 1e-15 @@ -156,6 +156,73 @@ class TestParameters: EPS_BASE: float = 1e-10 +@dataclass(frozen=True) +class TestScenarios: + """Test scenario parameters and sample sizes. + + Standard values for test scenarios to ensure consistency across the test + suite and avoid magic numbers in test implementations. + + Attributes: + DURATION_SHORT: Short duration scenario (150) + DURATION_MEDIUM: Medium duration scenario (200) + DURATION_LONG: Long duration scenario (300) + DURATION_SCENARIOS: Standard duration test sequence + SAMPLE_SIZE_SMALL: Small sample size for quick tests (100) + SAMPLE_SIZE_MEDIUM: Medium sample size for standard tests (400) + SAMPLE_SIZE_LARGE: Large sample size for statistical power (800) + DEFAULT_SAMPLE_SIZE: Default for most tests (400) + PBRS_SIMULATION_STEPS: Number of steps for PBRS simulation tests (500) + NULL_HYPOTHESIS_SAMPLE_SIZE: Sample size for null hypothesis tests (400) + BOOTSTRAP_MINIMAL_ITERATIONS: Minimal bootstrap iterations for quick tests (25) + BOOTSTRAP_STANDARD_ITERATIONS: Standard bootstrap iterations (100) + HETEROSCEDASTICITY_MIN_EXITS: Minimum exits for heteroscedasticity validation (50) + CORRELATION_TEST_MIN_SIZE: Minimum sample size for correlation tests (200) + MONTE_CARLO_ITERATIONS: Monte Carlo simulation iterations (160) + """ + + DURATION_SHORT: int = 150 + DURATION_MEDIUM: int = 200 + DURATION_LONG: int = 300 + DURATION_SCENARIOS: tuple[int, ...] = (150, 200, 300) + + SAMPLE_SIZE_SMALL: int = 100 + SAMPLE_SIZE_MEDIUM: int = 400 + SAMPLE_SIZE_LARGE: int = 800 + DEFAULT_SAMPLE_SIZE: int = 400 + + # Specialized test scenario sizes + PBRS_SIMULATION_STEPS: int = 500 + NULL_HYPOTHESIS_SAMPLE_SIZE: int = 400 + BOOTSTRAP_MINIMAL_ITERATIONS: int = 25 + BOOTSTRAP_STANDARD_ITERATIONS: int = 100 + HETEROSCEDASTICITY_MIN_EXITS: int = 50 + CORRELATION_TEST_MIN_SIZE: int = 200 + MONTE_CARLO_ITERATIONS: int = 160 + + +@dataclass(frozen=True) +class StatisticalTolerances: + """Tolerances for statistical metrics and distribution tests. + + These tolerances are used for statistical hypothesis testing, distribution + comparison metrics, and other statistical validation operations. + + Attributes: + DISTRIBUTION_SHIFT: Tolerance for distribution shift metrics (5e-4) + KS_STATISTIC_IDENTITY: KS statistic threshold for identical distributions (5e-3) + CORRELATION_SIGNIFICANCE: Minimum correlation for significance (0.1) + VARIANCE_RATIO_THRESHOLD: Minimum variance ratio for heteroscedasticity (0.8) + CI_WIDTH_EPSILON: Minimum CI width for degenerate distributions (3e-9) + """ + + DISTRIBUTION_SHIFT: float = 5e-4 + KS_STATISTIC_IDENTITY: float = 5e-3 + CORRELATION_SIGNIFICANCE: float = 0.1 + VARIANCE_RATIO_THRESHOLD: float = 0.8 + CI_WIDTH_EPSILON: float = 3e-9 + + # Global singleton instances for easy import TOLERANCE: Final[ToleranceConfig] = ToleranceConfig() CONTINUITY: Final[ContinuityConfig] = ContinuityConfig() @@ -164,6 +231,8 @@ PBRS: Final[PBRSConfig] = PBRSConfig() STATISTICAL: Final[StatisticalConfig] = StatisticalConfig() SEEDS: Final[TestSeeds] = TestSeeds() PARAMS: Final[TestParameters] = TestParameters() +SCENARIOS: Final[TestScenarios] = TestScenarios() +STAT_TOL: Final[StatisticalTolerances] = StatisticalTolerances() __all__ = [ @@ -174,6 +243,8 @@ __all__ = [ "StatisticalConfig", "TestSeeds", "TestParameters", + "TestScenarios", + "StatisticalTolerances", "TOLERANCE", "CONTINUITY", "EXIT_FACTOR", @@ -181,4 +252,6 @@ __all__ = [ "STATISTICAL", "SEEDS", "PARAMS", + "SCENARIOS", + "STAT_TOL", ] diff --git a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py index 5581926..11c8a3b 100644 --- a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py +++ b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py @@ -12,6 +12,8 @@ from reward_space_analysis import ( calculate_reward, ) +from .configs import RewardScenarioConfig, ThresholdTestConfig, ValidationConfig + def safe_float(value: Any, default: float = 0.0) -> float: """Coerce value to float safely for test parameter handling. @@ -178,21 +180,18 @@ def assert_trend( def assert_component_sum_integrity( test_case, breakdown, - tolerance_relaxed, - exclude_components=None, - component_description="components", + config: ValidationConfig, ): """Assert that reward component sum matches total within tolerance. Validates the mathematical integrity of reward component decomposition by ensuring the sum of individual components equals the reported total. + Uses ValidationConfig to simplify parameter passing. Args: test_case: Test case instance with assertion methods breakdown: Reward breakdown object with component attributes - tolerance_relaxed: Numerical tolerance for sum validation - exclude_components: List of component names to exclude from sum (default: None) - component_description: Human-readable description for error messages + config: ValidationConfig with tolerance and exclusion settings Components checked (if not excluded): - hold_penalty @@ -204,14 +203,15 @@ def assert_component_sum_integrity( - exit_additive Example: - assert_component_sum_integrity( - self, breakdown, 1e-09, + config = ValidationConfig( + tolerance_strict=1e-12, + tolerance_relaxed=1e-09, exclude_components=["reward_shaping"], component_description="core components" ) + assert_component_sum_integrity(self, breakdown, config) """ - if exclude_components is None: - exclude_components = [] + exclude_components = config.exclude_components or [] component_sum = 0.0 if "hold_penalty" not in exclude_components: component_sum += breakdown.hold_penalty @@ -230,8 +230,8 @@ def assert_component_sum_integrity( test_case.assertAlmostEqual( breakdown.total, component_sum, - delta=tolerance_relaxed, - msg=f"Total should equal sum of {component_description}", + delta=config.tolerance_relaxed, + msg=f"Total should equal sum of {config.component_description}", ) @@ -347,34 +347,34 @@ def assert_single_active_component_with_additives( def assert_reward_calculation_scenarios( test_case, scenarios: List[Tuple[Any, Dict[str, Any], str]], - base_factor: float, - profit_target: float, - risk_reward_ratio: float, + config: RewardScenarioConfig, validation_fn, - tolerance_relaxed: float, ): """Execute and validate multiple reward calculation scenarios. Runs a batch of reward calculations with different contexts and parameters, - applying a custom validation function to each result. Reduces test boilerplate - for scenario-based testing. + applying a custom validation function to each result. Uses RewardScenarioConfig + to simplify parameter passing and improve maintainability. Args: test_case: Test case instance with assertion methods scenarios: List of (context, params, description) tuples defining test cases - base_factor: Base scaling factor for reward calculations - profit_target: Target profit threshold - risk_reward_ratio: Risk/reward ratio for position sizing + config: RewardScenarioConfig with all calculation parameters validation_fn: Callback function (test_case, breakdown, description, tolerance) -> None - tolerance_relaxed: Numerical tolerance passed to validation function Example: + config = RewardScenarioConfig( + base_factor=90.0, + profit_target=0.06, + risk_reward_ratio=1.0, + tolerance_relaxed=1e-09 + ) scenarios = [ (idle_context, {}, "idle scenario"), (exit_context, {"exit_additive": 5.0}, "profitable exit"), ] assert_reward_calculation_scenarios( - self, scenarios, 90.0, 0.06, 1.0, my_validation_fn, 1e-09 + self, scenarios, config, my_validation_fn ) """ for context, params, description in scenarios: @@ -382,13 +382,13 @@ def assert_reward_calculation_scenarios( breakdown = calculate_reward( context, params, - base_factor=base_factor, - profit_target=profit_target, - risk_reward_ratio=risk_reward_ratio, - short_allowed=True, - action_masking=True, + base_factor=config.base_factor, + profit_target=config.profit_target, + risk_reward_ratio=config.risk_reward_ratio, + short_allowed=config.short_allowed, + action_masking=config.action_masking, ) - validation_fn(test_case, breakdown, description, tolerance_relaxed) + validation_fn(test_case, breakdown, description, config.tolerance_relaxed) def assert_parameter_sensitivity_behavior( @@ -396,39 +396,39 @@ def assert_parameter_sensitivity_behavior( parameter_variations: List[Dict[str, Any]], base_context, base_params: Dict[str, Any], - base_factor: float, - profit_target: float, - risk_reward_ratio: float, component_name: str, expected_trend: str, - tolerance_relaxed: float, + config: RewardScenarioConfig, ): """Validate that a component responds predictably to parameter changes. Tests component sensitivity by applying parameter variations and verifying the component value follows the expected trend (increasing, decreasing, or constant). + Uses RewardScenarioConfig to simplify parameter passing. Args: test_case: Test case instance with assertion methods parameter_variations: List of parameter dicts to merge with base_params base_context: Context object for reward calculation base_params: Base parameter dictionary - base_factor: Base scaling factor - profit_target: Target profit threshold - risk_reward_ratio: Risk/reward ratio component_name: Name of component to track (e.g., "exit_component") expected_trend: Expected trend: "increasing", "decreasing", or "constant" - tolerance_relaxed: Numerical tolerance for trend validation + config: RewardScenarioConfig with calculation parameters Example: + config = RewardScenarioConfig( + base_factor=90.0, + profit_target=0.06, + risk_reward_ratio=1.0, + tolerance_relaxed=1e-09 + ) variations = [ {"exit_additive": 0.0}, {"exit_additive": 5.0}, {"exit_additive": 10.0}, ] assert_parameter_sensitivity_behavior( - self, variations, ctx, params, 90.0, 0.06, 1.0, - "exit_component", "increasing", 1e-09 + self, variations, ctx, params, "exit_component", "increasing", config ) """ from reward_space_analysis import calculate_reward @@ -440,11 +440,11 @@ def assert_parameter_sensitivity_behavior( breakdown = calculate_reward( base_context, params, - base_factor=base_factor, - profit_target=profit_target, - risk_reward_ratio=risk_reward_ratio, - short_allowed=True, - action_masking=True, + base_factor=config.base_factor, + profit_target=config.profit_target, + risk_reward_ratio=config.risk_reward_ratio, + short_allowed=config.short_allowed, + action_masking=config.action_masking, ) component_value = getattr(breakdown, component_name) results.append(component_value) @@ -452,14 +452,14 @@ def assert_parameter_sensitivity_behavior( for i in range(1, len(results)): test_case.assertGreaterEqual( results[i], - results[i - 1] - tolerance_relaxed, + results[i - 1] - config.tolerance_relaxed, f"{component_name} should increase with parameter variations", ) elif expected_trend == "decreasing": for i in range(1, len(results)): test_case.assertLessEqual( results[i], - results[i - 1] + tolerance_relaxed, + results[i - 1] + config.tolerance_relaxed, f"{component_name} should decrease with parameter variations", ) elif expected_trend == "constant": @@ -468,7 +468,7 @@ def assert_parameter_sensitivity_behavior( test_case.assertAlmostEqual( result, baseline, - delta=tolerance_relaxed, + delta=config.tolerance_relaxed, msg=f"{component_name} should remain constant with parameter variations", ) @@ -684,31 +684,35 @@ def assert_multi_parameter_sensitivity( parameter_test_cases: List[Tuple[float, float, str]], context_factory_fn, base_params: Dict[str, Any], - base_factor: float, - tolerance_relaxed: float, + config: RewardScenarioConfig, ): """Validate reward behavior across multiple parameter combinations. Tests reward calculation with various profit_target and risk_reward_ratio combinations, ensuring consistent behavior including edge cases like - zero profit_target. + zero profit_target. Uses RewardScenarioConfig to simplify parameter passing. Args: test_case: Test case instance with assertion methods parameter_test_cases: List of (profit_target, risk_reward_ratio, description) tuples context_factory_fn: Factory function for creating context objects base_params: Base parameter dictionary - base_factor: Base scaling factor - tolerance_relaxed: Numerical tolerance for assertions + config: RewardScenarioConfig with base calculation parameters Example: + config = RewardScenarioConfig( + base_factor=90.0, + profit_target=0.06, + risk_reward_ratio=1.0, + tolerance_relaxed=1e-09 + ) test_cases = [ (0.0, 1.0, "zero profit target"), (0.06, 1.0, "standard parameters"), (0.06, 2.0, "high risk/reward ratio"), ] assert_multi_parameter_sensitivity( - self, test_cases, make_context, params, 90.0, 1e-09 + self, test_cases, make_context, params, config ) """ for profit_target, risk_reward_ratio, description in parameter_test_cases: @@ -719,11 +723,11 @@ def assert_multi_parameter_sensitivity( breakdown = calculate_reward( idle_context, base_params, - base_factor=base_factor, + base_factor=config.base_factor, profit_target=profit_target, risk_reward_ratio=risk_reward_ratio, - short_allowed=True, - action_masking=True, + short_allowed=config.short_allowed, + action_masking=config.action_masking, ) if profit_target == 0.0: test_case.assertEqual(breakdown.idle_penalty, 0.0) @@ -735,54 +739,54 @@ def assert_multi_parameter_sensitivity( exit_breakdown = calculate_reward( exit_context, base_params, - base_factor=base_factor, + base_factor=config.base_factor, profit_target=profit_target, risk_reward_ratio=risk_reward_ratio, - short_allowed=True, - action_masking=True, + short_allowed=config.short_allowed, + action_masking=config.action_masking, ) test_case.assertNotEqual(exit_breakdown.exit_component, 0.0) def assert_hold_penalty_threshold_behavior( test_case, - duration_test_cases: Sequence[Tuple[int, str]], - max_duration: int, context_factory_fn, params: Dict[str, Any], base_factor: float, profit_target: float, risk_reward_ratio: float, - tolerance_relaxed: float, + config: ThresholdTestConfig, ): """Validate hold penalty activation at max_duration threshold. Tests that hold penalty is zero before max_duration, then becomes - negative (penalty) at and after the threshold. Critical for verifying - threshold-based penalty logic. + negative (penalty) at and after the threshold. Uses ThresholdTestConfig + to simplify parameter passing. Args: test_case: Test case instance with assertion methods - duration_test_cases: List of (trade_duration, description) tuples to test - max_duration: Maximum duration threshold for penalty activation context_factory_fn: Factory function for creating context objects params: Parameter dictionary base_factor: Base scaling factor profit_target: Target profit threshold risk_reward_ratio: Risk/reward ratio - tolerance_relaxed: Numerical tolerance for assertions + config: ThresholdTestConfig with threshold settings Example: - test_cases = [ - (50, "below threshold"), - (100, "at threshold"), - (150, "above threshold"), - ] + config = ThresholdTestConfig( + max_duration=100, + test_cases=[ + (50, "below threshold"), + (100, "at threshold"), + (150, "above threshold"), + ], + tolerance=1e-09 + ) assert_hold_penalty_threshold_behavior( - self, test_cases, 100, make_context, params, 90.0, 0.06, 1.0, 1e-09 + self, make_context, params, 90.0, 0.06, 1.0, config ) """ - for trade_duration, description in duration_test_cases: + for trade_duration, description in config.test_cases: with test_case.subTest(duration=trade_duration, desc=description): context = context_factory_fn(trade_duration=trade_duration) breakdown = calculate_reward( @@ -794,7 +798,7 @@ def assert_hold_penalty_threshold_behavior( short_allowed=True, action_masking=True, ) - duration_ratio = trade_duration / max_duration + duration_ratio = trade_duration / config.max_duration if duration_ratio < 1.0: test_case.assertEqual(breakdown.hold_penalty, 0.0) elif duration_ratio == 1.0: diff --git a/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py b/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py index f28efb5..782710e 100644 --- a/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py +++ b/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py @@ -12,6 +12,7 @@ import pandas as pd from reward_space_analysis import PBRS_INVARIANCE_TOL, write_complete_statistical_analysis +from ..constants import SCENARIOS from ..test_base import RewardSpaceTestBase @@ -74,7 +75,7 @@ class TestReportFormatting(RewardSpaceTestBase): real_df=real_df, adjust_method="none", strict_diagnostics=False, - bootstrap_resamples=200, # keep test fast + bootstrap_resamples=SCENARIOS.BOOTSTRAP_STANDARD_ITERATIONS, # keep test fast skip_partial_dependence=kwargs.get("skip_partial_dependence", False), skip_feature_analysis=kwargs.get("skip_feature_analysis", False), ) diff --git a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py index b73faa1..268791e 100644 --- a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py +++ b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py @@ -41,7 +41,7 @@ class TestPBRS(RewardSpaceTestBase): # ---------------- Potential transform mechanics ---------------- # def test_pbrs_progressive_release_decay_clamped(self): - """progressive_release decay>1 clamps -> Φ'=0 & Δ=-Φ_prev.""" + """Verifies progressive_release mode with decay>1 clamps potential to zero.""" params = self.DEFAULT_PARAMS.copy() params.update( { @@ -73,7 +73,7 @@ class TestPBRS(RewardSpaceTestBase): ) def test_pbrs_spike_cancel_invariance(self): - """spike_cancel terminal shaping ≈0 (Φ' inversion yields cancellation).""" + """Verifies spike_cancel mode produces near-zero terminal shaping.""" params = self.DEFAULT_PARAMS.copy() params.update( { @@ -113,6 +113,8 @@ class TestPBRS(RewardSpaceTestBase): def test_canonical_invariance_flag_and_sum(self): """Canonical mode + no additives -> invariant flags True and Σ shaping ≈ 0.""" + from ..constants import SCENARIOS + params = self.base_params( exit_potential_mode="canonical", entry_additive_enabled=False, @@ -121,7 +123,7 @@ class TestPBRS(RewardSpaceTestBase): ) df = simulate_samples( params={**params, "max_trade_duration_candles": 100}, - num_samples=400, + num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE, seed=self.SEED, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, @@ -138,6 +140,8 @@ class TestPBRS(RewardSpaceTestBase): def test_non_canonical_flag_false_and_sum_nonzero(self): """Non-canonical mode -> invariant flags False and Σ shaping significantly non-zero.""" + from ..constants import SCENARIOS + params = self.base_params( exit_potential_mode="progressive_release", exit_potential_decay=0.25, @@ -147,7 +151,7 @@ class TestPBRS(RewardSpaceTestBase): ) df = simulate_samples( params={**params, "max_trade_duration_candles": 100}, - num_samples=400, + num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE, seed=self.SEED, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, @@ -165,7 +169,7 @@ class TestPBRS(RewardSpaceTestBase): # ---------------- Additives and canonical path mechanics ---------------- # def test_additive_components_disabled_return_zero(self): - """Entry/exit additives return zero when disabled.""" + """Verifies entry/exit additives return zero when disabled.""" params_entry = {"entry_additive_enabled": False, "entry_additive_scale": 1.0} val_entry = _compute_entry_additive(0.5, 0.3, params_entry) self.assertEqual(float(val_entry), 0.0) @@ -174,7 +178,7 @@ class TestPBRS(RewardSpaceTestBase): self.assertEqual(float(val_exit), 0.0) def test_exit_potential_canonical(self): - """Canonical exit resets potential; additives auto-disabled.""" + """Verifies canonical exit resets potential and auto-disables additives.""" params = self.base_params( exit_potential_mode="canonical", hold_potential_enabled=True, @@ -218,7 +222,7 @@ class TestPBRS(RewardSpaceTestBase): self.assertTrue(np.isfinite(total)) def test_pbrs_invariance_internal_flag_set(self): - """Canonical path sets _pbrs_invariance_applied once; second call idempotent.""" + """Verifies canonical path sets _pbrs_invariance_applied flag (idempotent).""" params = self.base_params( exit_potential_mode="canonical", hold_potential_enabled=True, @@ -263,7 +267,7 @@ class TestPBRS(RewardSpaceTestBase): ) def test_progressive_release_negative_decay_clamped(self): - """Negative decay clamps: next potential equals last potential (no release).""" + """Verifies negative decay clamping: next potential equals last potential.""" params = self.base_params( exit_potential_mode="progressive_release", exit_potential_decay=-0.75, @@ -291,7 +295,7 @@ class TestPBRS(RewardSpaceTestBase): self.assertPlacesEqual(total, shaping, places=12) def test_potential_gamma_nan_fallback(self): - """potential_gamma=NaN falls back to default value (indirect comparison).""" + """Verifies potential_gamma=NaN fallback to default value.""" base_params_dict = self.base_params() default_gamma = base_params_dict.get("potential_gamma", 0.95) params_nan = self.base_params(potential_gamma=np.nan, hold_potential_enabled=True) @@ -331,14 +335,11 @@ class TestPBRS(RewardSpaceTestBase): def test_validate_reward_parameters_batch_and_relaxed_aggregation(self): """Batch validate strict failures + relaxed multi-reason aggregation via helpers.""" - # Build strict failure cases strict_failures = [ build_validation_case({"potential_gamma": -0.2}, strict=True, expect_error=True), build_validation_case({"hold_potential_scale": -5.0}, strict=True, expect_error=True), ] - # Success default (strict) case success_case = build_validation_case({}, strict=True, expect_error=False) - # Relaxed multi-reason aggregation case relaxed_case = build_validation_case( { "potential_gamma": "not-a-number", @@ -354,13 +355,11 @@ class TestPBRS(RewardSpaceTestBase): "derived_default", ], ) - # Execute batch (strict successes + failures + relaxed case) execute_validation_batch( self, [success_case] + strict_failures + [relaxed_case], validate_reward_parameters, ) - # Explicit aggregation assertions for relaxed case using helper params_relaxed = DEFAULT_MODEL_REWARD_PARAMETERS.copy() params_relaxed.update( { @@ -449,6 +448,8 @@ class TestPBRS(RewardSpaceTestBase): # Owns invariant: pbrs-canonical-drift-correction-106 def test_pbrs_106_canonical_drift_correction_zero_sum(self): """Invariant 106: canonical mode enforces near zero-sum shaping (drift correction).""" + from ..constants import SCENARIOS + params = self.base_params( exit_potential_mode="canonical", hold_potential_enabled=True, @@ -458,7 +459,7 @@ class TestPBRS(RewardSpaceTestBase): ) df = simulate_samples( params={**params, "max_trade_duration_candles": 140}, - num_samples=500, + num_samples=SCENARIOS.SAMPLE_SIZE_LARGE // 2, # 500 ≈ 400 (keep original intent) seed=913, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, @@ -517,6 +518,8 @@ class TestPBRS(RewardSpaceTestBase): # Owns invariant (comparison path): pbrs-canonical-drift-correction-106 def test_pbrs_106_canonical_drift_correction_uniform_offset(self): """Canonical drift correction reduces Σ shaping below tolerance vs non-canonical.""" + from ..constants import SCENARIOS + params_can = self.base_params( exit_potential_mode="canonical", hold_potential_enabled=True, @@ -526,7 +529,7 @@ class TestPBRS(RewardSpaceTestBase): ) df_can = simulate_samples( params={**params_can, "max_trade_duration_candles": 120}, - num_samples=400, + num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE, seed=777, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, @@ -545,7 +548,7 @@ class TestPBRS(RewardSpaceTestBase): ) df_non = simulate_samples( params={**params_non, "max_trade_duration_candles": 120}, - num_samples=400, + num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE, seed=777, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, @@ -659,6 +662,8 @@ class TestPBRS(RewardSpaceTestBase): def test_report_cumulative_invariance_aggregation(self): """Canonical telescoping term: small per-step mean drift, bounded increments.""" + from ..constants import SCENARIOS + params = self.base_params( hold_potential_enabled=True, entry_additive_enabled=False, @@ -673,7 +678,7 @@ class TestPBRS(RewardSpaceTestBase): telescoping_sum = 0.0 max_abs_step = 0.0 steps = 0 - for _ in range(500): + for _ in range(SCENARIOS.PBRS_SIMULATION_STEPS): is_exit = rng.uniform() < 0.1 current_pnl = float(rng.normal(0, 0.05)) current_dur = float(rng.uniform(0, 1)) @@ -712,6 +717,8 @@ class TestPBRS(RewardSpaceTestBase): def test_report_explicit_non_invariance_progressive_release(self): """progressive_release cumulative shaping non-zero (release leak).""" + from ..constants import SCENARIOS + params = self.base_params( hold_potential_enabled=True, entry_additive_enabled=False, @@ -722,7 +729,7 @@ class TestPBRS(RewardSpaceTestBase): rng = np.random.default_rng(321) last_potential = 0.0 shaping_sum = 0.0 - for _ in range(160): + for _ in range(SCENARIOS.MONTE_CARLO_ITERATIONS): is_exit = rng.uniform() < 0.15 next_pnl = 0.0 if is_exit else float(rng.normal(0, 0.07)) next_dur = 0.0 if is_exit else float(rng.uniform(0, 1)) @@ -756,6 +763,8 @@ class TestPBRS(RewardSpaceTestBase): from reward_space_analysis import PBRS_INVARIANCE_TOL + from ..constants import SCENARIOS + small_vals = [1.0e-7, -2.0e-7, 3.0e-7] # sum = 2.0e-7 < tolerance total_shaping = float(sum(small_vals)) self.assertLess( @@ -796,7 +805,7 @@ class TestPBRS(RewardSpaceTestBase): seed=self.SEED, skip_feature_analysis=True, skip_partial_dependence=True, - bootstrap_resamples=25, + bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS, ) report_path = out_dir / "statistical_analysis.md" self.assertTrue(report_path.exists(), "Report file missing for canonical near-zero test") @@ -819,6 +828,8 @@ class TestPBRS(RewardSpaceTestBase): from reward_space_analysis import PBRS_INVARIANCE_TOL + from ..constants import SCENARIOS + shaping_vals = [1.2e-4, 1.3e-4, 8.0e-5, -2.0e-5, 1.4e-4] # sum = 4.5e-4 (> tol) total_shaping = sum(shaping_vals) self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL) @@ -855,7 +866,7 @@ class TestPBRS(RewardSpaceTestBase): seed=self.SEED, skip_feature_analysis=True, skip_partial_dependence=True, - bootstrap_resamples=50, + bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS * 2, ) report_path = out_dir / "statistical_analysis.md" self.assertTrue(report_path.exists(), "Report file missing for canonical warning test") @@ -872,6 +883,8 @@ class TestPBRS(RewardSpaceTestBase): """Full report: Non-canonical classification aggregates mode + additives reasons.""" import pandas as pd + from ..constants import SCENARIOS + shaping_vals = [0.02, -0.005, 0.007] entry_add_vals = [0.003, 0.0, 0.004] exit_add_vals = [0.001, 0.002, 0.0] @@ -908,7 +921,7 @@ class TestPBRS(RewardSpaceTestBase): seed=self.SEED, skip_feature_analysis=True, skip_partial_dependence=True, - bootstrap_resamples=25, + bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS, ) report_path = out_dir / "statistical_analysis.md" self.assertTrue( @@ -928,6 +941,8 @@ class TestPBRS(RewardSpaceTestBase): from reward_space_analysis import PBRS_INVARIANCE_TOL + from ..constants import SCENARIOS + shaping_vals = [0.002, -0.0005, 0.0012] total_shaping = sum(shaping_vals) self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL) @@ -964,7 +979,7 @@ class TestPBRS(RewardSpaceTestBase): seed=self.SEED, skip_feature_analysis=True, skip_partial_dependence=True, - bootstrap_resamples=25, + bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS, ) report_path = out_dir / "statistical_analysis.md" self.assertTrue( @@ -1007,6 +1022,8 @@ class TestPBRS(RewardSpaceTestBase): out_dir = self.output_path / "pbrs_absence_and_shift_placeholder" import reward_space_analysis as rsa + from ..constants import SCENARIOS + original_compute_summary_stats = rsa._compute_summary_stats def _minimal_summary_stats(_df): @@ -1038,7 +1055,7 @@ class TestPBRS(RewardSpaceTestBase): seed=self.SEED, skip_feature_analysis=True, skip_partial_dependence=True, - bootstrap_resamples=10, + bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS // 2, ) finally: rsa._compute_summary_stats = original_compute_summary_stats diff --git a/ReforceXY/reward_space_analysis/tests/robustness/test_branch_coverage.py b/ReforceXY/reward_space_analysis/tests/robustness/test_branch_coverage.py index 63e7352..d136fdd 100644 --- a/ReforceXY/reward_space_analysis/tests/robustness/test_branch_coverage.py +++ b/ReforceXY/reward_space_analysis/tests/robustness/test_branch_coverage.py @@ -13,7 +13,11 @@ from reward_space_analysis import ( validate_reward_parameters, ) -from ..helpers import run_strict_validation_failure_cases +from ..helpers import ( + assert_exit_factor_invariant_suite, + run_relaxed_validation_adjustment_cases, + run_strict_validation_failure_cases, +) class _PyTestAdapter(unittest.TestCase): @@ -42,9 +46,6 @@ def test_validate_reward_parameters_strict_failure_batch(): run_strict_validation_failure_cases(adapter, failure_params, validate_reward_parameters) -from ..helpers import run_relaxed_validation_adjustment_cases - - @pytest.mark.robustness def test_validate_reward_parameters_relaxed_adjustment_batch(): """Batch relaxed validation adjustment scenarios using shared helper.""" @@ -141,9 +142,6 @@ def test_hold_penalty_short_duration_returns_zero(): assert penalty == 0.0 -from ..helpers import assert_exit_factor_invariant_suite - - @pytest.mark.robustness def test_exit_factor_invariant_suite_grouped(): """Grouped exit factor invariant scenarios using shared helper.""" diff --git a/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py b/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py index 3385d7f..3ec8148 100644 --- a/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py +++ b/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py @@ -3,7 +3,6 @@ import math import unittest -import warnings import numpy as np import pytest @@ -14,16 +13,17 @@ from reward_space_analysis import ( Actions, Positions, RewardContext, - RewardDiagnosticsWarning, _get_exit_factor, calculate_reward, simulate_samples, ) from ..helpers import ( + assert_diagnostic_warning, assert_exit_factor_attenuation_modes, assert_exit_mode_mathematical_validation, assert_single_active_component_with_additives, + capture_warnings, ) from ..test_base import RewardSpaceTestBase @@ -205,8 +205,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always") + with capture_warnings() as caught: baseline = calculate_reward( context, params, @@ -532,8 +531,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): pnl = 0.05 pnl_factor = 1.0 duration_ratio = 0.8 - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always", RewardDiagnosticsWarning) + with assert_diagnostic_warning(["Unknown exit_attenuation_mode"]): f_unknown = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params) linear_params = self.base_params(exit_attenuation_mode="linear", exit_plateau=False) f_linear = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, linear_params) @@ -543,14 +541,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): tolerance=self.TOL_IDENTITY_RELAXED, msg=f"Fallback linear mismatch unknown={f_unknown} linear={f_linear}", ) - diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)] - self.assertTrue( - diag_warnings, "No RewardDiagnosticsWarning emitted for unknown mode fallback" - ) - self.assertTrue( - any("Unknown exit_attenuation_mode" in str(w.message) for w in diag_warnings), - "Fallback warning message content mismatch", - ) # Owns invariant: robustness-negative-grace-clamp-103 def test_robustness_103_negative_plateau_grace_clamped(self): @@ -565,8 +555,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): pnl = 0.03 pnl_factor = 1.0 duration_ratio = 0.5 - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always", RewardDiagnosticsWarning) + with assert_diagnostic_warning(["exit_plateau_grace < 0"]): f_neg = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params) # Reference with grace=0.0 (since negative should clamp) ref_params = self.base_params( @@ -582,12 +571,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): tolerance=self.TOL_IDENTITY_RELAXED, msg=f"Negative grace clamp mismatch f_neg={f_neg} f_ref={f_ref}", ) - diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)] - self.assertTrue(diag_warnings, "No RewardDiagnosticsWarning for negative grace") - self.assertTrue( - any("exit_plateau_grace < 0" in str(w.message) for w in diag_warnings), - "Warning content missing for negative grace clamp", - ) # Owns invariant: robustness-invalid-power-tau-104 def test_robustness_104_invalid_power_tau_fallback_alpha_one(self): @@ -603,13 +586,9 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): params = self.base_params( exit_attenuation_mode="power", exit_power_tau=tau, exit_plateau=False ) - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always", RewardDiagnosticsWarning) + with assert_diagnostic_warning(["exit_power_tau"]): f0 = _get_exit_factor(base_factor, pnl, pnl_factor, 0.0, params) f1 = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params) - diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)] - self.assertTrue(diag_warnings, f"No RewardDiagnosticsWarning for invalid tau={tau}") - self.assertTrue(any("exit_power_tau" in str(w.message) for w in diag_warnings)) ratio = f1 / max(f0, self.TOL_NUMERIC_GUARD) self.assertAlmostEqual( ratio, @@ -628,20 +607,9 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): near_zero_values = [1e-15, 1e-12, 5e-14] for hl in near_zero_values: params = self.base_params(exit_attenuation_mode="half_life", exit_half_life=hl) - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always", RewardDiagnosticsWarning) + with assert_diagnostic_warning(["exit_half_life", "close to 0"]): _ = _get_exit_factor(base_factor, pnl, pnl_factor, 0.0, params) fdr = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params) - diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)] - self.assertTrue( - diag_warnings, f"No RewardDiagnosticsWarning for near-zero half-life hl={hl}" - ) - self.assertTrue( - any( - "exit_half_life" in str(w.message) and "close to 0" in str(w.message) - for w in diag_warnings - ) - ) self.assertAlmostEqualFloat( fdr, 1.0 * pnl_factor, # Kernel returns 1.0 then * pnl_factor diff --git a/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py b/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py index 45219cd..c0966ae 100644 --- a/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py +++ b/ReforceXY/reward_space_analysis/tests/statistics/test_statistics.py @@ -2,7 +2,6 @@ """Statistical tests, distribution metrics, and bootstrap validation.""" import unittest -import warnings import numpy as np import pandas as pd @@ -19,6 +18,7 @@ from reward_space_analysis import ( statistical_hypothesis_tests, ) +from ..helpers import assert_diagnostic_warning from ..test_base import RewardSpaceTestBase pytestmark = pytest.mark.statistics @@ -97,22 +97,6 @@ class TestStatistics(RewardSpaceTestBase): float(metrics[p_key]), 1.0, places=12, msg=f"Expected 1.0 for {p_key}" ) - def _make_idle_variance_df(self, n: int = 100) -> pd.DataFrame: - """Synthetic dataframe focusing on idle_duration ↔ reward_idle correlation.""" - self.seed_all(self.SEED) - idle_duration = np.random.exponential(10, n) - reward_idle = -0.01 * idle_duration + np.random.normal(0, 0.001, n) - return pd.DataFrame( - { - "idle_duration": idle_duration, - "reward_idle": reward_idle, - "position": np.random.choice([0.0, 0.5, 1.0], n), - "reward": np.random.normal(0, 1, n), - "pnl": np.random.normal(0, self.TEST_PNL_STD, n), - "trade_duration": np.random.exponential(20, n), - } - ) - def test_statistics_distribution_shift_metrics(self): """KL/JS/Wasserstein metrics.""" df1 = self._make_idle_variance_df(100) @@ -159,9 +143,11 @@ class TestStatistics(RewardSpaceTestBase): f"Metric {name} expected ≈ 0 on identical distributions (got {val})", ) elif name.endswith("_ks_statistic"): + from ..constants import STAT_TOL + self.assertLess( abs(val), - 0.005, + STAT_TOL.KS_STATISTIC_IDENTITY, f"KS statistic should be near 0 on identical distributions (got {val})", ) @@ -264,19 +250,23 @@ class TestStatistics(RewardSpaceTestBase): def test_stats_variance_vs_duration_spearman_sign(self): """trade_duration up => pnl variance up (rank corr >0).""" + from ..constants import SCENARIOS, STAT_TOL + rng = np.random.default_rng(99) n = 250 - trade_duration = np.linspace(1, 300, n) + trade_duration = np.linspace(1, SCENARIOS.DURATION_LONG, n) pnl = rng.normal(0, 1 + trade_duration / 400.0, n) ranks_dur = pd.Series(trade_duration).rank().to_numpy() ranks_var = pd.Series(np.abs(pnl)).rank().to_numpy() rho = np.corrcoef(ranks_dur, ranks_var)[0, 1] self.assertFinite(rho, name="spearman_rho") - self.assertGreater(rho, 0.1) + self.assertGreater(rho, STAT_TOL.CORRELATION_SIGNIFICANCE) def test_stats_scaling_invariance_distribution_metrics(self): """Equal scaling keeps KL/JS ≈0.""" - df1 = self._shift_scale_df(400) + from ..constants import SCENARIOS, STAT_TOL + + df1 = self._shift_scale_df(SCENARIOS.DEFAULT_SAMPLE_SIZE) scale = 3.5 df2 = df1.copy() df2["pnl"] *= scale @@ -286,7 +276,7 @@ class TestStatistics(RewardSpaceTestBase): if k.endswith("_kl_divergence") or k.endswith("_js_distance"): self.assertLess( abs(v), - 0.0005, + STAT_TOL.DISTRIBUTION_SHIFT, f"Expected near-zero divergence after equal scaling (k={k}, v={v})", ) @@ -306,8 +296,10 @@ class TestStatistics(RewardSpaceTestBase): def test_stats_bh_correction_null_false_positive_rate(self): """Null: low BH discovery rate.""" + from ..constants import SCENARIOS + rng = np.random.default_rng(1234) - n = 400 + n = SCENARIOS.NULL_HYPOTHESIS_SAMPLE_SIZE df = pd.DataFrame( { "pnl": rng.normal(0, 1, n), @@ -416,9 +408,11 @@ class TestStatistics(RewardSpaceTestBase): def test_stats_heteroscedasticity_pnl_validation(self): """PnL variance increases with trade duration (heteroscedasticity).""" + from ..constants import SCENARIOS + df = simulate_samples( params=self.base_params(max_trade_duration_candles=100), - num_samples=1000, + num_samples=SCENARIOS.SAMPLE_SIZE_LARGE + 200, seed=self.SEED_HETEROSCEDASTICITY, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, @@ -429,16 +423,18 @@ class TestStatistics(RewardSpaceTestBase): pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE, ) exit_data = df[df["reward_exit"] != 0].copy() - if len(exit_data) < 50: + if len(exit_data) < SCENARIOS.HETEROSCEDASTICITY_MIN_EXITS: self.skipTest("Insufficient exit actions for heteroscedasticity test") exit_data["duration_bin"] = pd.cut( exit_data["duration_ratio"], bins=4, labels=["Q1", "Q2", "Q3", "Q4"] ) variance_by_bin = exit_data.groupby("duration_bin")["pnl"].var().dropna() if "Q1" in variance_by_bin.index and "Q4" in variance_by_bin.index: + from ..constants import STAT_TOL + self.assertGreater( variance_by_bin["Q4"], - variance_by_bin["Q1"] * 0.8, + variance_by_bin["Q1"] * STAT_TOL.VARIANCE_RATIO_THRESHOLD, "PnL heteroscedasticity: variance should increase with duration", ) @@ -474,9 +470,11 @@ class TestStatistics(RewardSpaceTestBase): def test_stats_benjamini_hochberg_adjustment(self): """BH adjustment adds p_value_adj & significant_adj with valid bounds.""" + from ..constants import SCENARIOS + df = simulate_samples( params=self.base_params(max_trade_duration_candles=100), - num_samples=600, + num_samples=SCENARIOS.SAMPLE_SIZE_LARGE - 200, seed=self.SEED_HETEROSCEDASTICITY, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, @@ -518,8 +516,10 @@ class TestStatistics(RewardSpaceTestBase): def test_stats_bootstrap_shrinkage_with_sample_size(self): """Bootstrap CI half-width decreases with larger sample (~1/sqrt(n) heuristic).""" - small = self._shift_scale_df(80) - large = self._shift_scale_df(800) + from ..constants import SCENARIOS + + small = self._shift_scale_df(SCENARIOS.SAMPLE_SIZE_SMALL - 20) + large = self._shift_scale_df(SCENARIOS.SAMPLE_SIZE_LARGE) res_small = bootstrap_confidence_intervals(small, ["reward"], n_bootstrap=400) res_large = bootstrap_confidence_intervals(large, ["reward"], n_bootstrap=400) _, lo_s, hi_s = list(res_small.values())[0] @@ -535,8 +535,11 @@ class TestStatistics(RewardSpaceTestBase): """Invariant 113 (non-strict): constant distribution CI widened with warning (positive epsilon width).""" df = self._const_df(80) - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always", RewardDiagnosticsWarning) + with assert_diagnostic_warning( + ["degenerate", "bootstrap", "CI"], + warning_category=RewardDiagnosticsWarning, + strict_mode=False, + ): res = bootstrap_confidence_intervals( df, ["reward", "pnl"], @@ -544,11 +547,6 @@ class TestStatistics(RewardSpaceTestBase): confidence_level=0.95, strict_diagnostics=False, ) - diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)] - self.assertTrue( - diag_warnings, - "Expected RewardDiagnosticsWarning for degenerate bootstrap CI widening", - ) for _metric, (mean, lo, hi) in res.items(): self.assertLess( lo, @@ -557,7 +555,11 @@ class TestStatistics(RewardSpaceTestBase): ) width = hi - lo self.assertGreater(width, 0.0) - self.assertLessEqual(width, 3e-09, "Width should be small epsilon range (<=3e-9)") + from ..constants import STAT_TOL + + self.assertLessEqual( + width, STAT_TOL.CI_WIDTH_EPSILON, "Width should be small epsilon range" + ) # Mean should be centered (approx) within widened bounds self.assertGreaterEqual(mean, lo) self.assertLessEqual(mean, hi) diff --git a/ReforceXY/reward_space_analysis/tests/test_base.py b/ReforceXY/reward_space_analysis/tests/test_base.py index 214cc80..c243e55 100644 --- a/ReforceXY/reward_space_analysis/tests/test_base.py +++ b/ReforceXY/reward_space_analysis/tests/test_base.py @@ -20,6 +20,13 @@ from reward_space_analysis import ( apply_potential_shaping, ) +from .constants import ( + CONTINUITY, + EXIT_FACTOR, + PBRS, + TOLERANCE, +) + # Global constants PBRS_INTEGRATION_PARAMS = [ "potential_gamma", @@ -67,22 +74,35 @@ class RewardSpaceTestBase(unittest.TestCase): """Clean up temporary files.""" shutil.rmtree(self.temp_dir, ignore_errors=True) - PBRS_TERMINAL_TOL = 1e-12 - PBRS_MAX_ABS_SHAPING = 5.0 + # =============================================== + # Constants imported from tests.constants module + # =============================================== + + # Tolerance constants + TOL_IDENTITY_STRICT = TOLERANCE.IDENTITY_STRICT + TOL_IDENTITY_RELAXED = TOLERANCE.IDENTITY_RELAXED + TOL_GENERIC_EQ = TOLERANCE.GENERIC_EQ + TOL_NUMERIC_GUARD = TOLERANCE.NUMERIC_GUARD + TOL_NEGLIGIBLE = TOLERANCE.NEGLIGIBLE + TOL_RELATIVE = TOLERANCE.RELATIVE + TOL_DISTRIB_SHAPE = TOLERANCE.DISTRIB_SHAPE + + # PBRS constants + PBRS_TERMINAL_TOL = PBRS.TERMINAL_TOL + PBRS_MAX_ABS_SHAPING = PBRS.MAX_ABS_SHAPING + + # Continuity constants + CONTINUITY_EPS_SMALL = CONTINUITY.EPS_SMALL + CONTINUITY_EPS_LARGE = CONTINUITY.EPS_LARGE + + # Exit factor constants + MIN_EXIT_POWER_TAU = EXIT_FACTOR.MIN_POWER_TAU + + # Test-specific constants (not in constants.py) PBRS_TERMINAL_PROB = 0.08 PBRS_SWEEP_ITER = 120 - EPS_BASE = 1e-12 - TOL_NUMERIC_GUARD = EPS_BASE - TOL_IDENTITY_STRICT = EPS_BASE - TOL_IDENTITY_RELAXED = 1e-09 - TOL_GENERIC_EQ = 1e-06 - TOL_NEGLIGIBLE = 1e-08 - MIN_EXIT_POWER_TAU = 1e-06 - TOL_DISTRIB_SHAPE = 0.05 + EPS_BASE = TOLERANCE.IDENTITY_STRICT # Alias for backward compatibility JS_DISTANCE_UPPER_BOUND = math.sqrt(math.log(2.0)) - TOL_RELATIVE = 1e-09 - CONTINUITY_EPS_SMALL = 0.0001 - CONTINUITY_EPS_LARGE = 0.001 def make_ctx( self, @@ -412,3 +432,19 @@ class RewardSpaceTestBase(unittest.TestCase): "idle_duration": rng.exponential(10, n), } ) + + def _make_idle_variance_df(self, n: int = 100) -> pd.DataFrame: + """Synthetic dataframe focusing on idle_duration ↔ reward_idle correlation.""" + self.seed_all(self.SEED) + idle_duration = np.random.exponential(10, n) + reward_idle = -0.01 * idle_duration + np.random.normal(0, 0.001, n) + return pd.DataFrame( + { + "idle_duration": idle_duration, + "reward_idle": reward_idle, + "position": np.random.choice([0.0, 0.5, 1.0], n), + "reward": np.random.normal(0, 1, n), + "pnl": np.random.normal(0, self.TEST_PNL_STD, n), + "trade_duration": np.random.exponential(20, n), + } + ) -- 2.43.0