From bec090890850cb6b3ca3d464fdc848c4c0a551df Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 15 Nov 2025 21:32:27 +0100 Subject: [PATCH] test(reforcexy): improve PBRS impact analysis MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../reward_space_analysis.py | 113 ++++++++++++++---- .../reward_space_analysis/tests/README.md | 90 ++++++++++---- .../tests/api/test_api_helpers.py | 2 +- .../tests/cli/test_cli_params_and_csv.py | 48 ++++++++ .../tests/components/test_additives.py | 2 +- .../components/test_reward_components.py | 58 ++++++++- .../tests/components/test_transforms.py | 2 +- .../integration/test_report_formatting.py | 65 ++++++++++ .../integration/test_reward_calculation.py | 13 +- 9 files changed, 343 insertions(+), 50 deletions(-) diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index edb26a1..7e0da69 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -637,6 +637,10 @@ class RewardBreakdown: exit_additive: float = 0.0 prev_potential: float = 0.0 next_potential: float = 0.0 + # PBRS helpers + base_reward: float = 0.0 + pbrs_delta: float = 0.0 # Δ(s,s') = γ·Φ(s') − Φ(s) + invariance_correction: float = 0.0 def _get_exit_factor( @@ -1085,28 +1089,30 @@ def calculate_reward( else float(current_potential) ) - total_reward, reward_shaping, next_potential = apply_potential_shaping( - base_reward=base_reward, - current_pnl=current_pnl, - current_duration_ratio=current_duration_ratio, - next_pnl=next_pnl, - next_duration_ratio=next_duration_ratio, - is_exit=is_exit, - is_entry=is_entry, - previous_potential=current_potential, - last_potential=last_potential, - params=params, + total_reward, reward_shaping, next_potential, pbrs_delta, entry_additive, exit_additive = ( + apply_potential_shaping( + base_reward=base_reward, + current_pnl=current_pnl, + current_duration_ratio=current_duration_ratio, + next_pnl=next_pnl, + next_duration_ratio=next_duration_ratio, + is_exit=is_exit, + is_entry=is_entry, + previous_potential=current_potential, + last_potential=last_potential, + params=params, + ) ) breakdown.reward_shaping = reward_shaping breakdown.prev_potential = current_potential breakdown.next_potential = next_potential - breakdown.entry_additive = ( - _compute_entry_additive(next_pnl, next_duration_ratio, params) if is_entry else 0.0 - ) - breakdown.exit_additive = ( - _compute_exit_additive(current_pnl, current_duration_ratio, params) if is_exit else 0.0 - ) + breakdown.entry_additive = entry_additive + breakdown.exit_additive = exit_additive + breakdown.base_reward = base_reward + breakdown.pbrs_delta = pbrs_delta + # In canonical mode with additives disabled, this should be ~0 + breakdown.invariance_correction = reward_shaping - pbrs_delta breakdown.total = total_reward else: breakdown.total = base_reward @@ -1291,6 +1297,10 @@ def simulate_samples( "reward_exit_additive": breakdown.exit_additive, "prev_potential": breakdown.prev_potential, "next_potential": breakdown.next_potential, + # PBRS columns + "reward_base": breakdown.base_reward, + "reward_pbrs_delta": breakdown.pbrs_delta, + "reward_invariance_correction": breakdown.invariance_correction, "is_invalid": float(breakdown.invalid_penalty != 0.0), "pbrs_invariant": bool(pbrs_invariant), } @@ -2731,9 +2741,15 @@ def apply_potential_shaping( is_entry: bool = False, previous_potential: float = np.nan, last_potential: Optional[float] = None, -) -> tuple[float, float, float]: +) -> tuple[float, float, float, float, float, float]: """Compute shaped reward with explicit PBRS semantics. + Returns + ------- + tuple[float, float, float, float, float, float] + (reward, reward_shaping, next_potential, pbrs_delta, entry_additive, exit_additive) + where pbrs_delta = gamma * next_potential - prev_term is the pure PBRS component. + Notes ----- - Shaping Δ = γ·Φ(next) − Φ(prev) with prev = Φ(current_pnl, current_duration_ratio). @@ -2761,9 +2777,7 @@ def apply_potential_shaping( if not np.isfinite(prev_term): prev_term = 0.0 - # Next potential per transition type if is_exit: - # Exit potential is derived from the last potential if provided; otherwise from Φ(prev) (prev_term) last_potential = ( float(last_potential) if (last_potential is not None and np.isfinite(last_potential)) @@ -2774,7 +2788,8 @@ def apply_potential_shaping( next_potential = _compute_hold_potential(next_pnl, next_duration_ratio, params) # PBRS shaping Δ = γ·Φ(next) − Φ(prev) - reward_shaping = gamma * next_potential - float(prev_term) + pbrs_delta = gamma * next_potential - float(prev_term) + reward_shaping = pbrs_delta # Non-PBRS additives # Pre-compute candidate additives (return 0.0 if corresponding feature disabled) @@ -2786,10 +2801,18 @@ def apply_potential_shaping( reward = base_reward + reward_shaping + entry_additive + exit_additive if not np.isfinite(reward): - return float(base_reward), 0.0, 0.0 + return float(base_reward), 0.0, 0.0, 0.0, 0.0, 0.0 if np.isclose(reward_shaping, 0.0): reward_shaping = 0.0 - return float(reward), float(reward_shaping), float(next_potential) + pbrs_delta = 0.0 + return ( + float(reward), + float(reward_shaping), + float(next_potential), + float(pbrs_delta), + float(entry_additive), + float(exit_additive), + ) def _enforce_pbrs_invariance(params: RewardParams) -> RewardParams: @@ -3392,6 +3415,50 @@ def write_complete_statistical_analysis( pbrs_stats_df.index.name = "component" f.write(_df_to_md(pbrs_stats_df, index_name="component", ndigits=6)) + # PBRS metrics + pbrs_tracing_cols = ["reward_base", "reward_pbrs_delta", "reward_invariance_correction"] + if all(col in df.columns for col in pbrs_tracing_cols): + f.write("**PBRS Metrics:**\n\n") + f.write("Internal decomposition of reward shaping for diagnostic analysis:\n\n") + + # Calculate key metrics + mean_base = df["reward_base"].mean() + std_base = df["reward_base"].std() + mean_pbrs = df["reward_pbrs_delta"].mean() + std_pbrs = df["reward_pbrs_delta"].std() + mean_inv_corr = df["reward_invariance_correction"].mean() + std_inv_corr = df["reward_invariance_correction"].std() + max_inv_corr = df["reward_invariance_correction"].abs().max() + + # Calculate ratio of |pbrs_delta| / |base_reward| (only where base_reward != 0) + base_nonzero = df[df["reward_base"].abs() > 1e-10] + if len(base_nonzero) > 0: + pbrs_to_base_ratio = ( + base_nonzero["reward_pbrs_delta"].abs() / base_nonzero["reward_base"].abs() + ).mean() + else: + pbrs_to_base_ratio = 0.0 + + f.write("| Metric | Value | Description |\n") + f.write("|--------|-------|-------------|\n") + f.write(f"| Mean Base Reward | {mean_base:.6f} | Average reward before PBRS |\n") + f.write(f"| Std Base Reward | {std_base:.6f} | Variability of base reward |\n") + f.write(f"| Mean PBRS Delta | {mean_pbrs:.6f} | Average γ·Φ(s')−Φ(s) |\n") + f.write(f"| Std PBRS Delta | {std_pbrs:.6f} | Variability of PBRS delta |\n") + f.write( + f"| Mean Invariance Correction | {mean_inv_corr:.6f} | Average reward_shaping − pbrs_delta |\n" + ) + f.write( + f"| Std Invariance Correction | {std_inv_corr:.6f} | Variability of correction |\n" + ) + f.write( + f"| Max \\|Invariance Correction\\| | {max_inv_corr:.6e} | Peak deviation from pure PBRS |\n" + ) + f.write( + f"| Mean \\|PBRS\\| / \\|Base\\| Ratio | {pbrs_to_base_ratio:.4f} | Shaping magnitude vs base reward |\n" + ) + f.write("\n") + # PBRS invariance check total_shaping = df["reward_shaping"].sum() entry_add_total = df.get("reward_entry_additive", pd.Series([0])).sum() diff --git a/ReforceXY/reward_space_analysis/tests/README.md b/ReforceXY/reward_space_analysis/tests/README.md index 3f4d554..f6fc86c 100644 --- a/ReforceXY/reward_space_analysis/tests/README.md +++ b/ReforceXY/reward_space_analysis/tests/README.md @@ -30,6 +30,49 @@ Single ownership per invariant is tracked in the Coverage Mapping section of thi Markers are declared in `pyproject.toml` and enforced with `--strict-markers`. +## Test Framework + +The test suite uses **pytest as the runner** with **unittest.TestCase as the base class** (via `RewardSpaceTestBase`). + +### Hybrid Approach Rationale + +This design provides: + +- **pytest features**: Rich fixture system, parametrization, markers, and selective execution +- **unittest assertions**: Familiar assertion methods (`assertAlmostEqual`, `assertFinite`, `assertLess`, etc.) +- **Custom assertions**: Project-specific helpers (e.g., `assert_component_sum_integrity`) built on unittest base +- **Backward compatibility**: Gradual migration path from pure unittest + +### Base Class + +All test classes inherit from `RewardSpaceTestBase` (defined in `test_base.py`): + +```python +from ..test_base import RewardSpaceTestBase + +class TestMyFeature(RewardSpaceTestBase): + def test_something(self): + self.assertFinite(value) # unittest-style assertion +``` + +### Markers + +Module-level markers are declared via `pytestmark`: + +```python +import pytest + +pytestmark = pytest.mark.components +``` + +Individual tests can add additional markers: + +```python +@pytest.mark.smoke +def test_quick_check(self): + ... +``` + ## Running Tests Full suite (coverage ≥85% enforced): @@ -70,28 +113,31 @@ Columns: - Owning File: Path:line of primary declaration (prefer comment line `# Owns invariant:` when present; otherwise docstring line). - Notes: Clarifications (sub-modes, extensions, non-owning references elsewhere, line clusters for multi-path coverage). -| ID | Category | Description | Owning File | Notes | -| -------------------------------------------- | ----------- | ----------------------------------------------------------------------------------- | --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- | -| report-abs-shaping-line-091 | integration | Abs Σ Shaping Reward line present & formatted | integration/test_report_formatting.py:4 | Module docstring; primary test at line 84. PBRS report may render line; formatting owned here (core assertion lines 84–103) | -| report-additives-deterministic-092 | components | Additives deterministic report section | components/test_additives.py:4 | Integration/PBRS may reference outcome non-owning | -| robustness-decomposition-integrity-101 | robustness | Single active core component equals total reward under mutually exclusive scenarios | robustness/test_robustness.py:36 | Scenarios: idle, hold, exit, invalid; non-owning refs integration/test_reward_calculation.py | -| robustness-exit-mode-fallback-102 | robustness | Unknown exit_attenuation_mode falls back to linear w/ warning | robustness/test_robustness.py:525 | Comment line (function at :526) | -| robustness-negative-grace-clamp-103 | robustness | Negative exit_plateau_grace clamps to 0.0 w/ warning | robustness/test_robustness.py:555 | | -| robustness-invalid-power-tau-104 | robustness | Invalid power tau falls back alpha=1.0 w/ warning | robustness/test_robustness.py:592 | | -| robustness-near-zero-half-life-105 | robustness | Near-zero half life yields no attenuation (factor≈base) | robustness/test_robustness.py:621 | | -| pbrs-canonical-drift-correction-106 | pbrs | Canonical drift correction enforces near zero-sum shaping | pbrs/test_pbrs.py:449 | Multi-path: extension fallback (475), comparison path (517) | -| pbrs-canonical-near-zero-report-116 | pbrs | Canonical near-zero cumulative shaping classification | pbrs/test_pbrs.py:748 | Full report classification | -| statistics-partial-deps-skip-107 | statistics | skip_partial_dependence => empty PD structures | statistics/test_statistics.py:28 | Docstring line | -| helpers-duplicate-rows-drop-108 | helpers | Duplicate rows dropped w/ warning counting removals | helpers/test_utilities.py:26 | Docstring line | -| helpers-missing-cols-fill-109 | helpers | Missing required columns filled with NaN + single warning | helpers/test_utilities.py:50 | Docstring line | -| statistics-binned-stats-min-edges-110 | statistics | <2 bin edges raises ValueError | statistics/test_statistics.py:45 | Docstring line | -| statistics-constant-cols-exclusion-111 | statistics | Constant columns excluded & listed | statistics/test_statistics.py:57 | Docstring line | -| statistics-degenerate-distribution-shift-112 | statistics | Degenerate dist: zero shift metrics & KS p=1.0 | statistics/test_statistics.py:74 | Docstring line | -| statistics-constant-dist-widened-ci-113a | statistics | Non-strict: widened CI with warning | statistics/test_statistics.py:533 | Test docstring labels "Invariant 113 (non-strict)" | -| statistics-constant-dist-strict-omit-113b | statistics | Strict: omit metrics (no widened CI) | statistics/test_statistics.py:565 | Test docstring labels "Invariant 113 (strict)" | -| statistics-fallback-diagnostics-115 | statistics | Fallback diagnostics constant distribution (qq_r2=1.0 etc.) | statistics/test_statistics.py:190 | Docstring line | -| robustness-exit-pnl-only-117 | robustness | Only exit actions have non-zero PnL | robustness/test_robustness.py:126 | Newly assigned ID (previously unnumbered) | -| pbrs-absence-shift-placeholder-118 | pbrs | Placeholder shift line present (absence displayed) | pbrs/test_pbrs.py:979 | Ensures placeholder appears when shaping shift absent | +| ID | Category | Description | Owning File | Notes | +| -------------------------------------------- | ----------- | ----------------------------------------------------------------------------------- | ----------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| report-abs-shaping-line-091 | integration | Abs Σ Shaping Reward line present & formatted | integration/test_report_formatting.py:4 | Module docstring; primary test at line 84. PBRS report may render line; formatting owned here (core assertion lines 84–103) | +| report-additives-deterministic-092 | components | Additives deterministic report section | components/test_additives.py:4 | Integration/PBRS may reference outcome non-owning | +| robustness-decomposition-integrity-101 | robustness | Single active core component equals total reward under mutually exclusive scenarios | robustness/test_robustness.py:36 | Scenarios: idle, hold, exit, invalid; non-owning refs integration/test_reward_calculation.py | +| robustness-exit-mode-fallback-102 | robustness | Unknown exit_attenuation_mode falls back to linear w/ warning | robustness/test_robustness.py:525 | Comment line (function at :526) | +| robustness-negative-grace-clamp-103 | robustness | Negative exit_plateau_grace clamps to 0.0 w/ warning | robustness/test_robustness.py:555 | | +| robustness-invalid-power-tau-104 | robustness | Invalid power tau falls back alpha=1.0 w/ warning | robustness/test_robustness.py:592 | | +| robustness-near-zero-half-life-105 | robustness | Near-zero half life yields no attenuation (factor≈base) | robustness/test_robustness.py:621 | | +| pbrs-canonical-drift-correction-106 | pbrs | Canonical drift correction enforces near zero-sum shaping | pbrs/test_pbrs.py:449 | Multi-path: extension fallback (475), comparison path (517) | +| pbrs-canonical-near-zero-report-116 | pbrs | Canonical near-zero cumulative shaping classification | pbrs/test_pbrs.py:748 | Full report classification | +| statistics-partial-deps-skip-107 | statistics | skip_partial_dependence => empty PD structures | statistics/test_statistics.py:28 | Docstring line | +| helpers-duplicate-rows-drop-108 | helpers | Duplicate rows dropped w/ warning counting removals | helpers/test_utilities.py:26 | Docstring line | +| helpers-missing-cols-fill-109 | helpers | Missing required columns filled with NaN + single warning | helpers/test_utilities.py:50 | Docstring line | +| statistics-binned-stats-min-edges-110 | statistics | <2 bin edges raises ValueError | statistics/test_statistics.py:45 | Docstring line | +| statistics-constant-cols-exclusion-111 | statistics | Constant columns excluded & listed | statistics/test_statistics.py:57 | Docstring line | +| statistics-degenerate-distribution-shift-112 | statistics | Degenerate dist: zero shift metrics & KS p=1.0 | statistics/test_statistics.py:74 | Docstring line | +| statistics-constant-dist-widened-ci-113a | statistics | Non-strict: widened CI with warning | statistics/test_statistics.py:533 | Test docstring labels "Invariant 113 (non-strict)" | +| statistics-constant-dist-strict-omit-113b | statistics | Strict: omit metrics (no widened CI) | statistics/test_statistics.py:565 | Test docstring labels "Invariant 113 (strict)" | +| statistics-fallback-diagnostics-115 | statistics | Fallback diagnostics constant distribution (qq_r2=1.0 etc.) | statistics/test_statistics.py:190 | Docstring line | +| robustness-exit-pnl-only-117 | robustness | Only exit actions have non-zero PnL | robustness/test_robustness.py:126 | Newly assigned ID (previously unnumbered) | +| pbrs-absence-shift-placeholder-118 | pbrs | Placeholder shift line present (absence displayed) | pbrs/test_pbrs.py:979 | Ensures placeholder appears when shaping shift absent | +| components-pbrs-breakdown-fields-119 | components | PBRS breakdown fields finite and mathematically aligned | components/test_reward_components.py:454 | Tests base_reward, pbrs_delta, invariance_correction fields and their alignment | +| integration-pbrs-metrics-section-120 | integration | PBRS Metrics section present in report with tracing metrics | integration/test_report_formatting.py:156 | Verifies PBRS Metrics (Tracing) subsection rendering in statistical_analysis.md | +| cli-pbrs-csv-columns-121 | cli | PBRS columns in reward_samples.csv when shaping enabled | cli/test_cli_params_and_csv.py:240 | Ensures reward_base, reward_pbrs_delta, reward_invariance_correction columns exist and contain finite values | ### Non-Owning Smoke / Reference Checks diff --git a/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py b/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py index 9e3bb60..a93a26c 100644 --- a/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py +++ b/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py @@ -28,7 +28,7 @@ from reward_space_analysis import ( from ..test_base import RewardSpaceTestBase -pytestmark = pytest.mark.api # taxonomy classification +pytestmark = pytest.mark.api class TestAPIAndHelpers(RewardSpaceTestBase): diff --git a/ReforceXY/reward_space_analysis/tests/cli/test_cli_params_and_csv.py b/ReforceXY/reward_space_analysis/tests/cli/test_cli_params_and_csv.py index 7d041e9..e6a425a 100644 --- a/ReforceXY/reward_space_analysis/tests/cli/test_cli_params_and_csv.py +++ b/ReforceXY/reward_space_analysis/tests/cli/test_cli_params_and_csv.py @@ -236,6 +236,54 @@ class TestParamsPropagation(RewardSpaceTestBase): self.assertIn("max_trade_duration_candles", rp) self.assertEqual(int(rp["max_trade_duration_candles"]), 64) + # Owns invariant: cli-pbrs-csv-columns-121 + def test_csv_contains_pbrs_columns_when_shaping_present(self): + """Verify reward_samples.csv includes PBRS columns when shaping is enabled. + + Verifies: + - reward_base, reward_pbrs_delta, reward_invariance_correction columns exist + - All values are finite (no NaN/inf) + - Column values align mathematically + """ + out_dir = self.output_path / "pbrs_csv_columns" + cmd = [ + "uv", + "run", + sys.executable, + str(SCRIPT_PATH), + "--num_samples", + "150", + "--seed", + str(self.SEED), + "--out_dir", + str(out_dir), + # Enable PBRS shaping explicitly + "--params", + "exit_potential_mode=canonical", + ] + result = subprocess.run( + cmd, capture_output=True, text=True, cwd=Path(__file__).parent.parent + ) + self.assertEqual(result.returncode, 0, f"CLI failed: {result.stderr}") + + csv_path = out_dir / "reward_samples.csv" + self.assertTrue(csv_path.exists(), "Missing reward_samples.csv") + + df = pd.read_csv(csv_path) + + # Verify PBRS columns exist + required_cols = ["reward_base", "reward_pbrs_delta", "reward_invariance_correction"] + for col in required_cols: + self.assertIn(col, df.columns, f"Missing column: {col}") + + # Verify all values are finite + for col in required_cols: + self.assertFalse(df[col].isna().any(), f"Column {col} contains NaN values") + self.assertTrue( + df[col].apply(lambda x: abs(x) < float("inf")).all(), + f"Column {col} contains infinite values", + ) + if __name__ == "__main__": unittest.main() diff --git a/ReforceXY/reward_space_analysis/tests/components/test_additives.py b/ReforceXY/reward_space_analysis/tests/components/test_additives.py index 8d70f99..33a0869 100644 --- a/ReforceXY/reward_space_analysis/tests/components/test_additives.py +++ b/ReforceXY/reward_space_analysis/tests/components/test_additives.py @@ -12,7 +12,7 @@ from reward_space_analysis import apply_potential_shaping from ..test_base import RewardSpaceTestBase -pytestmark = pytest.mark.components # selective execution marker +pytestmark = pytest.mark.components class TestAdditivesDeterministicContribution(RewardSpaceTestBase): diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py index c2e5cc5..d7914dc 100644 --- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py +++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py @@ -29,7 +29,7 @@ from ..helpers import ( ) from ..test_base import RewardSpaceTestBase -pytestmark = pytest.mark.components # selective execution marker +pytestmark = pytest.mark.components class TestRewardComponents(RewardSpaceTestBase): @@ -451,6 +451,62 @@ class TestRewardComponents(RewardSpaceTestBase): implied_D = 120 / observed_ratio ** (1 / idle_penalty_power) self.assertAlmostEqualFloat(implied_D, 400.0, tolerance=20.0) + # Owns invariant: components-pbrs-breakdown-fields-119 + def test_pbrs_breakdown_fields_finite_and_aligned(self): + """Test PBRS breakdown fields are finite and mathematically aligned. + + Verifies: + - base_reward, pbrs_delta, invariance_correction are finite + - reward_shaping = pbrs_delta + invariance_correction (within tolerance) + - In canonical mode with no additives: invariance_correction ≈ 0 + """ + # Test with canonical PBRS (invariance_correction should be ~0) + canonical_params = self.base_params( + exit_potential_mode="canonical", + entry_additive_enabled=False, + exit_additive_enabled=False, + ) + context = self.make_ctx( + pnl=0.02, + trade_duration=50, + idle_duration=0, + max_unrealized_profit=0.03, + min_unrealized_profit=0.01, + position=Positions.Long, + action=Actions.Long_exit, + ) + breakdown = calculate_reward( + context, + canonical_params, + base_factor=self.TEST_BASE_FACTOR, + profit_target=self.TEST_PROFIT_TARGET, + risk_reward_ratio=self.TEST_RR, + short_allowed=True, + action_masking=True, + ) + + # Verify all PBRS fields are finite + self.assertFinite(breakdown.base_reward, name="base_reward") + self.assertFinite(breakdown.pbrs_delta, name="pbrs_delta") + self.assertFinite(breakdown.invariance_correction, name="invariance_correction") + + # Verify mathematical alignment: reward_shaping = pbrs_delta + invariance_correction + expected_shaping = breakdown.pbrs_delta + breakdown.invariance_correction + self.assertAlmostEqualFloat( + breakdown.reward_shaping, + expected_shaping, + tolerance=self.TOL_IDENTITY_STRICT, + msg="reward_shaping should equal pbrs_delta + invariance_correction", + ) + + # In canonical mode with no additives, invariance_correction should be ~0 + self.assertAlmostEqualFloat( + breakdown.invariance_correction, + 0.0, + tolerance=self.TOL_IDENTITY_STRICT, + msg="invariance_correction should be ~0 in canonical mode", + ) + if __name__ == "__main__": unittest.main() diff --git a/ReforceXY/reward_space_analysis/tests/components/test_transforms.py b/ReforceXY/reward_space_analysis/tests/components/test_transforms.py index f06e138..0b49410 100644 --- a/ReforceXY/reward_space_analysis/tests/components/test_transforms.py +++ b/ReforceXY/reward_space_analysis/tests/components/test_transforms.py @@ -12,7 +12,7 @@ from reward_space_analysis import apply_transform from ..test_base import RewardSpaceTestBase -pytestmark = pytest.mark.transforms # taxonomy classification +pytestmark = pytest.mark.transforms class TestTransforms(RewardSpaceTestBase): diff --git a/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py b/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py index 782710e..109f513 100644 --- a/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py +++ b/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py @@ -9,12 +9,15 @@ import unittest import numpy as np import pandas as pd +import pytest from reward_space_analysis import PBRS_INVARIANCE_TOL, write_complete_statistical_analysis from ..constants import SCENARIOS from ..test_base import RewardSpaceTestBase +pytestmark = pytest.mark.integration + class TestReportFormatting(RewardSpaceTestBase): def test_statistical_validation_section_absent_when_no_hypothesis_tests(self): @@ -153,6 +156,68 @@ class TestReportFormatting(RewardSpaceTestBase): # Ensure no partial dependence plots line for success path appears self.assertNotIn("partial_dependence_*.csv", content) + # Owns invariant: integration-pbrs-metrics-section-120 + def test_report_includes_pbrs_metrics_section(self): + """Verify statistical_analysis.md includes PBRS Metrics section with tracing metrics. + + Verifies: + - PBRS Metrics subsection exists when PBRS columns present + - Section includes Mean Base Reward, Mean PBRS Term, Mean Invariance Correction + - All metrics are formatted with proper precision + """ + # Create df with PBRS columns + n = 100 + df = pd.DataFrame( + { + "reward": np.random.normal(0, 0.1, n), + "reward_invalid": np.zeros(n), + "reward_idle": np.zeros(n), + "reward_hold": np.zeros(n), + "reward_exit": np.random.normal(0, 0.05, n), + "reward_shaping": np.random.normal(0, 0.02, n), + "reward_entry_additive": np.zeros(n), + "reward_exit_additive": np.zeros(n), + # PBRS columns + "reward_base": np.random.normal(0, 0.1, n), + "reward_pbrs_delta": np.random.normal(0, 0.02, n), + "reward_invariance_correction": np.random.normal(0, 1e-6, n), + "pnl": np.random.normal(0, 0.01, n), + "trade_duration": np.random.randint(10, 100, n).astype(float), + "idle_duration": np.zeros(n), + "position": np.random.choice([0, 1, 2], n).astype(float), + "action": np.random.choice([0, 1, 2, 3, 4], n).astype(float), + "duration_ratio": np.random.uniform(0, 1, n), + "idle_ratio": np.zeros(n), + } + ) + + content = self._write_report(df) + + # Verify PBRS Metrics section exists + self.assertIn("**PBRS Metrics (Tracing):**", content) + + # Verify key metrics are present + required_metrics = [ + "Mean Base Reward", + "Std Base Reward", + "Mean PBRS Delta", + "Std PBRS Delta", + "Mean Invariance Correction", + "Std Invariance Correction", + "Max \\|Invariance Correction\\|", + "Mean \\|PBRS\\| / \\|Base\\| Ratio", + ] + + for metric in required_metrics: + self.assertIn(metric, content, f"Missing metric in PBRS Metrics section: {metric}") + + # Verify proper formatting (values should be formatted with proper precision) + import re as _re + + # Check for at least one properly formatted metric line + m = _re.search(r"\| Mean Base Reward \| (-?[0-9]+\.[0-9]{6}) \|", content) + self.assertIsNotNone(m, "Mean Base Reward metric missing or misformatted") + if __name__ == "__main__": unittest.main() diff --git a/ReforceXY/reward_space_analysis/tests/integration/test_reward_calculation.py b/ReforceXY/reward_space_analysis/tests/integration/test_reward_calculation.py index 4d98897..f0050be 100644 --- a/ReforceXY/reward_space_analysis/tests/integration/test_reward_calculation.py +++ b/ReforceXY/reward_space_analysis/tests/integration/test_reward_calculation.py @@ -1,4 +1,13 @@ -"""Integration smoke tests: component activation and long/short symmetry.""" +"""Integration smoke tests: component activation and long/short symmetry. + +Non-owning smoke tests covering: +- Component activation scenarios (ownership: robustness/test_robustness.py) +- Long/short symmetry verification +- High-level reward calculation integration + +These tests verify integration behavior without owning specific invariants. +Detailed invariant ownership is tracked in tests/README.md Coverage Mapping. +""" import pytest @@ -10,6 +19,8 @@ from reward_space_analysis import ( from ..test_base import RewardSpaceTestBase +pytestmark = pytest.mark.integration + class TestRewardCalculation(RewardSpaceTestBase): """High-level integration smoke tests for reward calculation.""" -- 2.43.0