From a53ef8190acdf42e2b67347d8d2db31037b18676 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Tue, 21 Oct 2025 13:19:26 +0200
Subject: [PATCH] refactor(reforcexy): cleanup RSA implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 ReforceXY/reward_space_analysis/README.md     |  2 +-
 .../reward_space_analysis.py                  | 29 +++++++++++--------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md
index d8dcab7..4f18b9e 100644
--- a/ReforceXY/reward_space_analysis/README.md
+++ b/ReforceXY/reward_space_analysis/README.md
@@ -182,7 +182,7 @@ Core frequently tuned parameters:
 | `pnl_factor_beta` | 0.5 | PnL amplification beta |
 | `idle_penalty_scale` | 0.5 | Idle penalty scale |
 | `idle_penalty_power` | 1.025 | Idle penalty exponent |
-| `max_trade_duration_candles` | 128 | Trade duration cap | 
+| `max_trade_duration_candles` | 128 | Trade duration cap |
 | `max_idle_duration_candles` | None | Idle duration cap; fallback 4Ã max trade duration |
 | `hold_penalty_scale` | 0.25 | Hold penalty scale |
 | `hold_penalty_power` | 1.025 | Hold penalty exponent |
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py
index 6ae5799..576ed67 100644
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -1082,13 +1082,15 @@ def calculate_reward(
     )
 
     if pbrs_enabled and not is_neutral:
-        # Derive Î¦(prev) from current state to ensure telescoping semantics
-        prev_potential = _compute_hold_potential(current_pnl, current_duration_ratio, params)
-        if not np.isfinite(prev_potential):
-            prev_potential = 0.0
-        # Effective previous potential used for reporting: prefer provided previous_potential if finite
-        prev_potential = (
-            float(previous_potential) if np.isfinite(previous_potential) else float(prev_potential)
+        # Compute Î¦(s) for the current state to preserve telescoping semantics Î = Î³Â·Î¦(s') â Î¦(s)
+        current_potential = _compute_hold_potential(current_pnl, current_duration_ratio, params)
+        if not np.isfinite(current_potential):
+            current_potential = 0.0
+
+        last_potential = (
+            float(previous_potential)
+            if np.isfinite(previous_potential)
+            else float(current_potential)
         )
 
         total_reward, reward_shaping, next_potential = apply_potential_shaping(
@@ -1099,12 +1101,13 @@ def calculate_reward(
             next_duration_ratio=next_duration_ratio,
             is_exit=is_exit,
             is_entry=is_entry,
-            previous_potential=previous_potential,
+            previous_potential=current_potential,
+            last_potential=last_potential,
             params=params,
         )
 
         breakdown.reward_shaping = reward_shaping
-        breakdown.prev_potential = prev_potential
+        breakdown.prev_potential = current_potential
         breakdown.next_potential = next_potential
         breakdown.entry_additive = (
             _compute_entry_additive(next_pnl, next_duration_ratio, params) if is_entry else 0.0
@@ -2095,9 +2098,11 @@ def statistical_hypothesis_tests(
         }
 
     # Optional multiple testing correction (Benjamini-Hochberg)
-    if adjust_method not in {"none", "benjamini_hochberg"}:
-        raise ValueError("Unsupported adjust_method. Use 'none' or 'benjamini_hochberg'.")
-    if adjust_method == "benjamini_hochberg" and results:
+    if adjust_method not in {"none", "benjamini_hochberg", "benjaminihochberg"}:
+        raise ValueError(
+            "Unsupported adjust_method. Use 'none', 'benjamini_hochberg', or 'benjaminihochberg'."
+        )
+    if adjust_method in {"benjamini_hochberg", "benjaminihochberg"} and results:
         # Collect p-values
         items = list(results.items())
         pvals = np.array([v[1]["p_value"] for v in items])
-- 
2.43.0