From 542d9ad2cd162e836632780a6225ca76ab3c13b6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 24 Sep 2025 22:06:32 +0200 Subject: [PATCH] fix(reforcexy): ensure penalty at holding trade with pnl target reached MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/user_data/freqaimodels/ReforceXY.py | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index d79e6a8..d931920 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -1331,21 +1331,37 @@ class MyRLEnv(Base5ActionRLEnv): self._position in (Positions.Short, Positions.Long) and action == Actions.Neutral.value ): - holding_duration_grace = float( - model_reward_parameters.get("holding_duration_grace", 1.0) + holding_duration_ratio_grace = float( + model_reward_parameters.get("holding_duration_ratio_grace", 1.0) ) - holding_overage_scale = float( - model_reward_parameters.get("holding_overage_scale", 1.0) + holding_penalty_scale = float( + model_reward_parameters.get("holding_penalty_scale", 0.3) ) - holding_overage_power = float( - model_reward_parameters.get("holding_overage_power", 1.1) + holding_penalty_power = float( + model_reward_parameters.get("holding_penalty_power", 1.0) ) - duration_overage_ratio = max(0.0, duration_ratio - holding_duration_grace) - if duration_overage_ratio > 0.0 or pnl > pnl_target: + if pnl >= pnl_target: + if duration_ratio <= holding_duration_ratio_grace and not np.isclose( + holding_duration_ratio_grace, 0.0 + ): + effective_duration_ratio = ( + duration_ratio / holding_duration_ratio_grace + ) + else: + effective_duration_ratio = 1.0 + ( + duration_ratio - holding_duration_ratio_grace + ) + return ( + -holding_factor + * holding_penalty_scale + * effective_duration_ratio**holding_penalty_power + ) + if duration_ratio > holding_duration_ratio_grace: return ( -holding_factor - * holding_overage_scale - * duration_overage_ratio**holding_overage_power + * holding_penalty_scale + * (1.0 + (duration_ratio - holding_duration_ratio_grace)) + ** holding_penalty_power ) return 0.0 -- 2.43.0