From a53ef8190acdf42e2b67347d8d2db31037b18676 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Tue, 21 Oct 2025 13:19:26 +0200 Subject: [PATCH] refactor(reforcexy): cleanup RSA implementation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/reward_space_analysis/README.md | 2 +- .../reward_space_analysis.py | 29 +++++++++++-------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md index d8dcab7..4f18b9e 100644 --- a/ReforceXY/reward_space_analysis/README.md +++ b/ReforceXY/reward_space_analysis/README.md @@ -182,7 +182,7 @@ Core frequently tuned parameters: | `pnl_factor_beta` | 0.5 | PnL amplification beta | | `idle_penalty_scale` | 0.5 | Idle penalty scale | | `idle_penalty_power` | 1.025 | Idle penalty exponent | -| `max_trade_duration_candles` | 128 | Trade duration cap | +| `max_trade_duration_candles` | 128 | Trade duration cap | | `max_idle_duration_candles` | None | Idle duration cap; fallback 4× max trade duration | | `hold_penalty_scale` | 0.25 | Hold penalty scale | | `hold_penalty_power` | 1.025 | Hold penalty exponent | diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index 6ae5799..576ed67 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -1082,13 +1082,15 @@ def calculate_reward( ) if pbrs_enabled and not is_neutral: - # Derive Φ(prev) from current state to ensure telescoping semantics - prev_potential = _compute_hold_potential(current_pnl, current_duration_ratio, params) - if not np.isfinite(prev_potential): - prev_potential = 0.0 - # Effective previous potential used for reporting: prefer provided previous_potential if finite - prev_potential = ( - float(previous_potential) if np.isfinite(previous_potential) else float(prev_potential) + # Compute Φ(s) for the current state to preserve telescoping semantics Δ = γ·Φ(s') − Φ(s) + current_potential = _compute_hold_potential(current_pnl, current_duration_ratio, params) + if not np.isfinite(current_potential): + current_potential = 0.0 + + last_potential = ( + float(previous_potential) + if np.isfinite(previous_potential) + else float(current_potential) ) total_reward, reward_shaping, next_potential = apply_potential_shaping( @@ -1099,12 +1101,13 @@ def calculate_reward( next_duration_ratio=next_duration_ratio, is_exit=is_exit, is_entry=is_entry, - previous_potential=previous_potential, + previous_potential=current_potential, + last_potential=last_potential, params=params, ) breakdown.reward_shaping = reward_shaping - breakdown.prev_potential = prev_potential + breakdown.prev_potential = current_potential breakdown.next_potential = next_potential breakdown.entry_additive = ( _compute_entry_additive(next_pnl, next_duration_ratio, params) if is_entry else 0.0 @@ -2095,9 +2098,11 @@ def statistical_hypothesis_tests( } # Optional multiple testing correction (Benjamini-Hochberg) - if adjust_method not in {"none", "benjamini_hochberg"}: - raise ValueError("Unsupported adjust_method. Use 'none' or 'benjamini_hochberg'.") - if adjust_method == "benjamini_hochberg" and results: + if adjust_method not in {"none", "benjamini_hochberg", "benjaminihochberg"}: + raise ValueError( + "Unsupported adjust_method. Use 'none', 'benjamini_hochberg', or 'benjaminihochberg'." + ) + if adjust_method in {"benjamini_hochberg", "benjaminihochberg"} and results: # Collect p-values items = list(results.items()) pvals = np.array([v[1]["p_value"] for v in items]) -- 2.43.0