From 86ae6477e40f7e9d7af4d77eff37176227b8821e Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Mon, 6 Oct 2025 20:12:29 +0200
Subject: [PATCH] fix(reforcexy): reward analysis idle penalty
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 .../reward_space_analysis.py                  | 26 ++++++++++++-------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py
index e671f53..261a397 100644
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -489,18 +489,26 @@ def _idle_penalty(
     """Mirror the environment's idle penalty behaviour."""
     idle_penalty_scale = _get_param_float(params, "idle_penalty_scale", 0.75)
     idle_penalty_power = _get_param_float(params, "idle_penalty_power", 1.0)
-    max_trade_duration_candles = params.get("max_trade_duration_candles", 128)
-    max_idle_duration_candles = params.get("max_idle_duration_candles")
+    max_trade_duration_candles = params.get("max_trade_duration_candles")
     try:
-        max_idle_duration = (
-            int(max_idle_duration_candles)
-            if max_idle_duration_candles is not None
-            else 2 * max_trade_duration_candles
-        )
+        if max_trade_duration_candles is not None:
+            max_trade_duration_candles = int(max_trade_duration_candles)
+        else:
+            max_trade_duration_candles = int(context.max_trade_duration)
     except (TypeError, ValueError):
-        max_idle_duration = max_trade_duration_candles
-    if max_idle_duration <= 0:
+        max_trade_duration_candles = int(context.max_trade_duration)
+
+    max_idle_duration_candles = params.get("max_idle_duration_candles")
+    if max_idle_duration_candles is None:
         max_idle_duration = 2 * max_trade_duration_candles
+    else:
+        try:
+            max_idle_duration = int(max_idle_duration_candles)
+        except (TypeError, ValueError):
+            max_idle_duration = 2 * max_trade_duration_candles
+        if max_idle_duration <= 0:
+            max_idle_duration = 2 * max_trade_duration_candles
+
     idle_duration_ratio = context.idle_duration / max(1, max_idle_duration)
     return -idle_factor * idle_penalty_scale * idle_duration_ratio**idle_penalty_power
 
-- 
2.43.0