From 86ae6477e40f7e9d7af4d77eff37176227b8821e Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Mon, 6 Oct 2025 20:12:29 +0200 Subject: [PATCH] fix(reforcexy): reward analysis idle penalty MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../reward_space_analysis.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index e671f53..261a397 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -489,18 +489,26 @@ def _idle_penalty( """Mirror the environment's idle penalty behaviour.""" idle_penalty_scale = _get_param_float(params, "idle_penalty_scale", 0.75) idle_penalty_power = _get_param_float(params, "idle_penalty_power", 1.0) - max_trade_duration_candles = params.get("max_trade_duration_candles", 128) - max_idle_duration_candles = params.get("max_idle_duration_candles") + max_trade_duration_candles = params.get("max_trade_duration_candles") try: - max_idle_duration = ( - int(max_idle_duration_candles) - if max_idle_duration_candles is not None - else 2 * max_trade_duration_candles - ) + if max_trade_duration_candles is not None: + max_trade_duration_candles = int(max_trade_duration_candles) + else: + max_trade_duration_candles = int(context.max_trade_duration) except (TypeError, ValueError): - max_idle_duration = max_trade_duration_candles - if max_idle_duration <= 0: + max_trade_duration_candles = int(context.max_trade_duration) + + max_idle_duration_candles = params.get("max_idle_duration_candles") + if max_idle_duration_candles is None: max_idle_duration = 2 * max_trade_duration_candles + else: + try: + max_idle_duration = int(max_idle_duration_candles) + except (TypeError, ValueError): + max_idle_duration = 2 * max_trade_duration_candles + if max_idle_duration <= 0: + max_idle_duration = 2 * max_trade_duration_candles + idle_duration_ratio = context.idle_duration / max(1, max_idle_duration) return -idle_factor * idle_penalty_scale * idle_duration_ratio**idle_penalty_power -- 2.43.0