From: Jérôme Benoit <jerome.benoit@piment-noir.org>
Date: Mon, 6 Oct 2025 12:55:28 +0000 (+0200)
Subject: feat(reward): allow exit_piecewise_grace >1 and extend no-attenuation region; docs... 
X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=0a8763f9dd98dd1366321d6dd8a35d5a102d33fa;p=freqai-strategies.git

feat(reward): allow exit_piecewise_grace >1 and extend no-attenuation region; docs+tests updated
---

diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md
index cab7e5c..d77e8f6 100644
--- a/ReforceXY/reward_space_analysis/README.md
+++ b/ReforceXY/reward_space_analysis/README.md
@@ -230,7 +230,7 @@ _Exit factor configuration:_
 
 - `exit_factor_mode` (default: piecewise) - Time attenuation mode for exit factor (legacy|sqrt|linear|power|piecewise|half_life)
 - `exit_linear_slope` (default: 1.0) - Slope for linear exit attenuation
-- `exit_piecewise_grace` (default: 1.0) - Grace region fraction [0,1]; divisor=1 within grace
+- `exit_piecewise_grace` (default: 1.0) - Grace region boundary (duration ratio); values >1.0 extend no-attenuation period
 - `exit_piecewise_slope` (default: 1.0) - Slope after grace for piecewise mode (0 â flat beyond grace)
 - `exit_power_tau` (default: 0.5) - Tau in (0,1] mapped to alpha = -ln(tau)/ln(2)
 - `exit_half_life` (default: 0.5) - Half-life for exponential decay exit mode (factor *= 2^(-r/half_life))
@@ -664,7 +664,7 @@ Design intent: maintain a single canonical defaults map + explicit bounds; no si
 | `holding_penalty_scale` | 0.0 | â | Scale â¥ 0 |
 | `holding_penalty_power` | 0.0 | â | Power exponent â¥ 0 |
 | `exit_linear_slope` | 0.0 | â | Slope â¥ 0 |
-| `exit_piecewise_grace` | 0.0 | 1.0 | Fraction of max duration (grace region) |
+| `exit_piecewise_grace` | 0.0 | â | Grace boundary expressed in duration ratio units (can exceed 1.0 to extend full-strength region) |
 | `exit_piecewise_slope` | 0.0 | â | Slope â¥ 0 |
 | `exit_power_tau` | 1e-6 | 1.0 | Mapped to alpha = -ln(tau) |
 | `exit_half_life` | 1e-6 | â | Half-life in duration ratio units |
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py
index 9c698fb..6456db6 100644
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -96,8 +96,9 @@ def _piecewise_duration_divisor(
     exception fallback in ``_get_exit_factor`` without duplicating logic.
     """
     exit_piecewise_grace = _get_param_float(params, "exit_piecewise_grace", 1.0)
-    if not (0.0 <= exit_piecewise_grace <= 1.0):  # sanitize grace range
-        exit_piecewise_grace = 1.0
+    # Only enforce a lower bound; values >1.0 extend the grace region beyond max duration ratio.
+    if exit_piecewise_grace < 0.0:
+        exit_piecewise_grace = 0.0
     exit_piecewise_slope = _get_param_float(params, "exit_piecewise_slope", 1.0)
     if exit_piecewise_slope < 0.0:  # sanitize slope sign
         exit_piecewise_slope = 1.0
@@ -387,7 +388,7 @@ def _get_exit_factor(
             "power",
             "half_life",
         }:
-            # Default & fallback behaviour consolidated
+            # Default behaviour
             factor /= _piecewise_duration_divisor(duration_ratio, params)
         elif exit_factor_mode == "half_life":
             exit_half_life = _get_param_float(params, "exit_half_life", 0.5)
@@ -487,16 +488,11 @@ def _idle_penalty(
     """Mirror the environment's idle penalty behaviour."""
     idle_penalty_scale = _get_param_float(params, "idle_penalty_scale", 1.0)
     idle_penalty_power = _get_param_float(params, "idle_penalty_power", 1.0)
-    max_idle_duration_cfg = int(
+    max_idle_duration = int(
         params.get(
-            "max_idle_duration_candles", params.get("max_trade_duration_candles", 0)
+            "max_idle_duration_candles", params.get("max_trade_duration_candles", 128)
         )
     )
-    # Fallback: align with documented intent -> use context.max_trade_duration when cfg <= 0
-    if max_idle_duration_cfg <= 0:
-        max_idle_duration = context.max_trade_duration
-    else:
-        max_idle_duration = max_idle_duration_cfg
     idle_duration_ratio = context.idle_duration / max(1, max_idle_duration)
     return -idle_factor * idle_penalty_scale * idle_duration_ratio**idle_penalty_power
 
diff --git a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
index 5e61c74..2d39e9e 100644
--- a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
@@ -2119,6 +2119,7 @@ class TestRewardRobustness(RewardSpaceTestBase):
     def test_piecewise_slope_zero_constant_after_grace(self):
         """Piecewise slope=0 should yield flat factor after grace boundary."""
         from reward_space_analysis import compute_exit_factor
+
         params = self.DEFAULT_PARAMS.copy()
         params.update(
             {
@@ -2144,9 +2145,37 @@ class TestRewardRobustness(RewardSpaceTestBase):
                 msg=f"Piecewise slope=0 factor drift at ratio set {ratios} => {values}",
             )
 
+    def test_piecewise_grace_extends_beyond_one(self):
+        """Grace >1.0 should keep divisor=1 (no attenuation) past duration_ratio=1."""
+        from reward_space_analysis import compute_exit_factor
+
+        params = self.DEFAULT_PARAMS.copy()
+        params.update(
+            {
+                "exit_factor_mode": "piecewise",
+                "exit_piecewise_grace": 1.5,  # extend grace beyond max duration ratio 1.0
+                "exit_piecewise_slope": 2.0,
+            }
+        )
+        base_factor = 80.0
+        pnl = 0.03
+        pnl_factor = 1.1
+        # Ratios straddling 1.0 but below grace=1.5 plus one beyond grace
+        ratios = [0.8, 1.0, 1.2, 1.4, 1.6]
+        vals = [compute_exit_factor(base_factor, pnl, pnl_factor, r, params) for r in ratios]
+        # All ratios <=1.5 should yield identical factor
+        ref = vals[0]
+        for i, r in enumerate(ratios[:-1]):  # exclude last (1.6)
+            self.assertAlmostEqualFloat(
+                vals[i], ref, 1e-9, msg=f"Unexpected attenuation before grace end at ratio {r}"
+            )
+        # Last ratio (1.6) should be attenuated (strictly less than ref)
+        self.assertLess(vals[-1], ref, "Attenuation should begin after grace boundary")
+
     def test_legacy_step_non_monotonic(self):
         """Legacy mode applies step change at duration_ratio=1 (should not be monotonic)."""
         from reward_space_analysis import compute_exit_factor
+
         params = self.DEFAULT_PARAMS.copy()
         params["exit_factor_mode"] = "legacy"
         base_factor = 100.0
@@ -2169,6 +2198,7 @@ class TestRewardRobustness(RewardSpaceTestBase):
     def test_exit_factor_non_negative_with_positive_pnl(self):
         """Exit factor must not be negative when pnl >= 0 (invariant clamp)."""
         from reward_space_analysis import compute_exit_factor
+
         params = self.DEFAULT_PARAMS.copy()
         # Try multiple modes / extreme params
         modes = ["linear", "power", "piecewise", "half_life", "sqrt", "legacy"]
@@ -2180,7 +2210,9 @@ class TestRewardRobustness(RewardSpaceTestBase):
             params_mode["exit_factor_mode"] = mode
             val = compute_exit_factor(base_factor, pnl, pnl_factor, 2.0, params_mode)
             self.assertGreaterEqual(
-                val, 0.0, f"Exit factor should be >=0 for non-negative pnl in mode {mode}"
+                val,
+                0.0,
+                f"Exit factor should be >=0 for non-negative pnl in mode {mode}",
             )
 
 
@@ -2309,7 +2341,18 @@ class TestParameterValidation(RewardSpaceTestBase):
         params["exit_factor_threshold"] = 10.0  # low threshold to trigger easily
         # Remove base_factor to allow argument override
         params.pop("base_factor", None)
-        context = self._mk_context(pnl=0.06, trade_duration=10)
+        from reward_space_analysis import RewardContext, Actions, Positions
+        context = RewardContext(
+            pnl=0.06,
+            trade_duration=10,
+            idle_duration=0,
+            max_trade_duration=128,
+            max_unrealized_profit=0.08,
+            min_unrealized_profit=0.0,
+            position=Positions.Long,
+            action=Actions.Long_exit,
+            force_action=None,
+        )
         with _warnings.catch_warnings(record=True) as w:
             _warnings.simplefilter("always")
             br = calculate_reward(