From f478f4015637ac2e4ac3ca7ae5d77644d54cdd71 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Tue, 7 Oct 2025 00:45:55 +0200
Subject: [PATCH] refactor(reforcexy): cleanup reward API removal leftover
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 ReforceXY/reward_space_analysis/README.md     |  2 +-
 .../test_reward_space_analysis.py             | 65 +++++++++++++++----
 2 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md
index fc6175a..683ab27 100644
--- a/ReforceXY/reward_space_analysis/README.md
+++ b/ReforceXY/reward_space_analysis/README.md
@@ -668,7 +668,7 @@ All runs execute a sequence of failâfast validations; a failure aborts with a
 | Distribution Metrics | Real vs synthetic shifts | Metrics within mathematical bounds (KL â¥0, JS â[0,1], Wasserstein â¥0, KS stats/p â¤[0,1]). Degenerate distributions handled safely (zeroed metrics). |
 | Distribution Diagnostics | Normality & moments | Finite mean/std/skew/kurtosis; Shapiro p-value â[0,1]; variance non-negative. |
 | Hypothesis Tests | Test result dicts | p-values & effect sizes within valid ranges; optional multiple-testing adjustment (BenjaminiâHochberg). |
-| Exit Factor Attenuation | Time-based scaling | Centralized piecewise divisor helper ensures single source of truth; threshold is warning-only (no hard cap). |
+| Exit Factor Attenuation | Time-based scaling | Centralized plateau/attenuation divisor helper ensures single source of truth; threshold is warning-only (no hard cap). |
 
 ### Statistical Method Notes
 
diff --git a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
index 88a2117..563bb8d 100644
--- a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
@@ -663,9 +663,12 @@ class TestRewardAlignment(RewardSpaceTestBase):
         )
 
     def test_exit_factor_calculation(self):
-        """Test exit factor calculation consistency."""
-        # Test different exit factor modes
-        modes_to_test = ["linear", "piecewise", "power"]
+        """Test exit factor calculation consistency across core modes + plateau variant.
+
+        Plateau behavior expressed via exit_plateau=True with a base kernel (e.g. linear).
+        """
+        # Core attenuation kernels (excluding legacy which is step-based)
+        modes_to_test = ["linear", "power"]
 
         for mode in modes_to_test:
             test_params = self.DEFAULT_PARAMS.copy()
@@ -677,12 +680,43 @@ class TestRewardAlignment(RewardSpaceTestBase):
                 duration_ratio=0.3,
                 params=test_params,
             )
-
             self.assertTrue(
                 np.isfinite(factor), f"Exit factor for {mode} should be finite"
             )
             self.assertGreater(factor, 0, f"Exit factor for {mode} should be positive")
 
+        # Plateau+linear variant sanity check (grace region at 0.5)
+        plateau_params = self.DEFAULT_PARAMS.copy()
+        plateau_params.update(
+            {
+                "exit_attenuation_mode": "linear",
+                "exit_plateau": True,
+                "exit_plateau_grace": 0.5,
+                "exit_linear_slope": 1.0,
+            }
+        )
+        plateau_factor_pre = rsa.compute_exit_factor(
+            base_factor=1.0,
+            pnl=0.02,
+            pnl_factor=1.5,
+            duration_ratio=0.4,  # inside grace
+            params=plateau_params,
+        )
+        plateau_factor_post = rsa.compute_exit_factor(
+            base_factor=1.0,
+            pnl=0.02,
+            pnl_factor=1.5,
+            duration_ratio=0.8,  # after grace => attenuated or equal (slope may reduce)
+            params=plateau_params,
+        )
+        self.assertGreater(plateau_factor_pre, 0)
+        self.assertGreater(plateau_factor_post, 0)
+        self.assertGreaterEqual(
+            plateau_factor_pre,
+            plateau_factor_post - 1e-12,
+            "Plateau pre-grace factor should be >= post-grace factor",
+        )
+
     def test_negative_slope_sanitization(self):
         """Negative slopes for linear must be sanitized to positive default (1.0)."""
         from reward_space_analysis import compute_exit_factor
@@ -691,7 +725,7 @@ class TestRewardAlignment(RewardSpaceTestBase):
         pnl = 0.04
         pnl_factor = 1.0
         duration_ratio_linear = 1.2  # any positive ratio
-        duration_ratio_piecewise = 1.3  # > grace so slope matters
+        duration_ratio_plateau = 1.3  # > grace so slope matters
 
         # Linear mode: slope -5.0 should behave like slope=1.0 (sanitized)
         params_lin_neg = self.DEFAULT_PARAMS.copy()
@@ -735,10 +769,10 @@ class TestRewardAlignment(RewardSpaceTestBase):
             }
         )
         val_pl_neg = compute_exit_factor(
-            base_factor, pnl, pnl_factor, duration_ratio_piecewise, params_pl_neg
+            base_factor, pnl, pnl_factor, duration_ratio_plateau, params_pl_neg
         )
         val_pl_pos = compute_exit_factor(
-            base_factor, pnl, pnl_factor, duration_ratio_piecewise, params_pl_pos
+            base_factor, pnl, pnl_factor, duration_ratio_plateau, params_pl_pos
         )
         self.assertAlmostEqualFloat(
             val_pl_neg,
@@ -1399,8 +1433,8 @@ class TestBoundaryConditions(RewardSpaceTestBase):
             "Reward should be finite even with extreme parameters",
         )
 
-    def test_different_exit_factor_modes(self):
-        """Test different exit factor calculation modes."""
+    def test_different_exit_attenuation_modes(self):
+        """Test different exit attenuation modes (legacy, sqrt, linear, power, half_life)."""
         modes = ["legacy", "sqrt", "linear", "power", "half_life"]
 
         for mode in modes:
@@ -1994,7 +2028,7 @@ class TestRewardRobustness(RewardSpaceTestBase):
     Covers:
     - Reward decomposition integrity (total == sum of active component exactly)
     - Exit factor monotonic attenuation per mode where mathematically expected
-    - Boundary parameter conditions (tau extremes, piecewise grace edges, linear slope = 0)
+    - Boundary parameter conditions (tau extremes, plateau grace edges, linear slope = 0)
     - Non-linear power tests for idle & holding penalties (power != 1)
     - Public wrapper `compute_exit_factor` (avoids private function usage in new tests)
     - Warning emission (exit_factor_threshold) without capping
@@ -2207,7 +2241,7 @@ class TestRewardRobustness(RewardSpaceTestBase):
         self.assertGreater(
             val_g1,
             val_g0,
-            "Piecewise grace=1.0 should delay attenuation vs grace=0.0",
+            "Plateau grace=1.0 should delay attenuation vs grace=0.0",
         )
         # Linear slope zero vs positive
         params_lin0 = self.DEFAULT_PARAMS.copy()
@@ -2327,13 +2361,18 @@ class TestRewardRobustness(RewardSpaceTestBase):
 
         params = self.DEFAULT_PARAMS.copy()
         # Try multiple modes / extreme params
-        modes = ["linear", "power", "piecewise", "half_life", "sqrt", "legacy"]
+        modes = ["linear", "power", "half_life", "sqrt", "legacy", "linear_plateau"]
         base_factor = 100.0
         pnl = 0.05
         pnl_factor = 2.0  # amplified
         for mode in modes:
             params_mode = params.copy()
-            params_mode["exit_factor_mode"] = mode
+            if mode == "linear_plateau":
+                params_mode["exit_attenuation_mode"] = "linear"
+                params_mode["exit_plateau"] = True
+                params_mode["exit_plateau_grace"] = 0.4
+            else:
+                params_mode["exit_attenuation_mode"] = mode
             val = compute_exit_factor(base_factor, pnl, pnl_factor, 2.0, params_mode)
             self.assertGreaterEqual(
                 val,
-- 
2.43.0