From 6a765f96ff1a1b7e1503a01c3d803900b96d61cb Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 13 Sep 2025 14:18:50 +0200 Subject: [PATCH] perf(reforcexy): refine reward calculation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/user_data/freqaimodels/ReforceXY.py | 33 +++++++++---------- .../user_data/strategies/QuickAdapterV3.py | 2 +- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 730ef81..b9b08e1 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -968,16 +968,7 @@ class ReforceXY(BaseReinforcementLearningModel): # factor = 1 # return 25.0 * factor - # reward agent for entering trades - if ( - action in (Actions.Long_enter.value, Actions.Short_enter.value) - and self._position == Positions.Neutral - ): - return self.rl_config.get("model_reward_parameters", {}).get( - "enter_action", 1.0 - ) - - # discourage agent from not entering trades + # discourage agent from sitting idle too long if action == Actions.Neutral.value and self._position == Positions.Neutral: return float( self.rl_config.get("model_reward_parameters", {}).get( @@ -992,23 +983,31 @@ class ReforceXY(BaseReinforcementLearningModel): ): duration_fraction = trade_duration / max_trade_duration max_pnl = max(self.get_most_recent_max_pnl(), pnl) + if max_pnl > 0: - drawdown_penalty = 0.0025 * factor * (max_pnl - pnl) * duration_fraction + drawdown_penalty = ( + 0.0025 * factor * (max_pnl - pnl) * duration_fraction + ) else: drawdown_penalty = 0.0 + lambda1 = 0.05 lambda2 = 0.1 if pnl >= 0: - if duration_fraction < 0.75: + if duration_fraction <= 1.0: duration_penalty_factor = 1.0 else: - duration_penalty_factor = 1.0 / (1.0 + lambda1 * duration_fraction) - return factor * pnl * duration_penalty_factor - lambda2 * duration_fraction - drawdown_penalty + duration_penalty_factor = 1.0 / ( + 1.0 + lambda1 * (duration_fraction - 1.0) + ) + return ( + factor * pnl * duration_penalty_factor + - lambda2 * duration_fraction + - drawdown_penalty + ) else: return ( - factor - * pnl - * (1 + lambda1 * duration_fraction) + factor * pnl * (1 + lambda1 * duration_fraction) - 2 * lambda2 * duration_fraction - drawdown_penalty ) diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py index 743750e..1ac569d 100644 --- a/quickadapter/user_data/strategies/QuickAdapterV3.py +++ b/quickadapter/user_data/strategies/QuickAdapterV3.py @@ -1122,7 +1122,7 @@ class QuickAdapterV3(IStrategy): side: str, order: Literal["entry", "exit"], rate: float, - min_natr_ratio_percent: float = 0.00999, + min_natr_ratio_percent: float = 0.0099, max_natr_ratio_percent: float = 0.095, lookback_period: int = 1, decay_ratio: float = 0.5, -- 2.43.0