refactor(reforcexy): cleanup RSA implementation

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Tue, 21 Oct 2025 11:19:26 +0000 (13:19 +0200)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Tue, 21 Oct 2025 11:19:26 +0000 (13:19 +0200)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 21 Oct 2025 11:19:26 +0000 (13:19 +0200)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 21 Oct 2025 11:19:26 +0000 (13:19 +0200)
diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md

index d8dcab7bf28f8ac9fa5a8d3e576588e782cd75cc..4f18b9e2dbac8e030b21a2d299760173b92041f3 100644 (file)
--- a/ReforceXY/reward_space_analysis/README.md
+++ b/ReforceXY/reward_space_analysis/README.md
@@ -182,7 +182,7 @@ Core frequently tuned parameters:
  | `pnl_factor_beta` | 0.5 | PnL amplification beta |
  | `idle_penalty_scale` | 0.5 | Idle penalty scale |
  | `idle_penalty_power` | 1.025 | Idle penalty exponent |
-| `max_trade_duration_candles` | 128 | Trade duration cap | 
+| `max_trade_duration_candles` | 128 | Trade duration cap |
  | `max_idle_duration_candles` | None | Idle duration cap; fallback 4× max trade duration |
  | `hold_penalty_scale` | 0.25 | Hold penalty scale |
  | `hold_penalty_power` | 1.025 | Hold penalty exponent |
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py

index 6ae5799a9948cb82a4569d411b4670a96341f735..576ed675f89c9611a2cbf52302f8073e803b77c1 100644 (file)
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -1082,13 +1082,15 @@ def calculate_reward(
      )
  
      if pbrs_enabled and not is_neutral:
-        # Derive Φ(prev) from current state to ensure telescoping semantics
-        prev_potential = _compute_hold_potential(current_pnl, current_duration_ratio, params)
-        if not np.isfinite(prev_potential):
-            prev_potential = 0.0
-        # Effective previous potential used for reporting: prefer provided previous_potential if finite
-        prev_potential = (
-            float(previous_potential) if np.isfinite(previous_potential) else float(prev_potential)
+        # Compute Φ(s) for the current state to preserve telescoping semantics Δ = γ·Φ(s') − Φ(s)
+        current_potential = _compute_hold_potential(current_pnl, current_duration_ratio, params)
+        if not np.isfinite(current_potential):
+            current_potential = 0.0
+
+        last_potential = (
+            float(previous_potential)
+            if np.isfinite(previous_potential)
+            else float(current_potential)
          )
  
          total_reward, reward_shaping, next_potential = apply_potential_shaping(
@@ -1099,12 +1101,13 @@ def calculate_reward(
              next_duration_ratio=next_duration_ratio,
              is_exit=is_exit,
              is_entry=is_entry,
-            previous_potential=previous_potential,
+            previous_potential=current_potential,
+            last_potential=last_potential,
              params=params,
          )
  
          breakdown.reward_shaping = reward_shaping
-        breakdown.prev_potential = prev_potential
+        breakdown.prev_potential = current_potential
          breakdown.next_potential = next_potential
          breakdown.entry_additive = (
              _compute_entry_additive(next_pnl, next_duration_ratio, params) if is_entry else 0.0
@@ -2095,9 +2098,11 @@ def statistical_hypothesis_tests(
          }
  
      # Optional multiple testing correction (Benjamini-Hochberg)
-    if adjust_method not in {"none", "benjamini_hochberg"}:
-        raise ValueError("Unsupported adjust_method. Use 'none' or 'benjamini_hochberg'.")
-    if adjust_method == "benjamini_hochberg" and results:
+    if adjust_method not in {"none", "benjamini_hochberg", "benjaminihochberg"}:
+        raise ValueError(
+            "Unsupported adjust_method. Use 'none', 'benjamini_hochberg', or 'benjaminihochberg'."
+        )
+    if adjust_method in {"benjamini_hochberg", "benjaminihochberg"} and results:
          # Collect p-values
          items = list(results.items())
          pvals = np.array([v[1]["p_value"] for v in items])
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Tue, 21 Oct 2025 11:19:26 +0000 (13:19 +0200)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Tue, 21 Oct 2025 11:19:26 +0000 (13:19 +0200)
ReforceXY/reward_space_analysis/README.md		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/reward_space_analysis.py		patch \| blob \| blame \| history