From 66cfe51d26240615499f5eccb4604ab90e062cb9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Thu, 27 Feb 2025 10:28:26 +0100 Subject: [PATCH] perf(reforcexy): fine tune reward on opened positions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/user_data/freqaimodels/ReforceXY.py | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index a8dcc41..17b9a70 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -809,21 +809,28 @@ class ReforceXY(BaseReinforcementLearningModel): self._position in (Positions.Short, Positions.Long) and action == Actions.Neutral.value ): - # peak_pnl = np.max(self.get_most_recent_peak_pnl(), pnl) - # if peak_pnl > 0: - # drawdown_penalty = 0.01 * factor * (peak_pnl - pnl) - # else: - # drawdown_penalty = 0.0 + peak_pnl = np.max(self.get_most_recent_peak_pnl(), pnl) + if peak_pnl > 0: + drawdown_penalty = 0.01 * factor * (peak_pnl - pnl) + else: + drawdown_penalty = 0.0 lambda1 = 0.05 + lambda2 = 0.1 if pnl >= 0: return ( - factor * pnl * np.exp(-lambda1 * trade_duration) - - trade_duration / max_trade_duration + factor + * pnl + * np.exp(-lambda1 * (trade_duration / max_trade_duration)) + - lambda2 * (trade_duration / max_trade_duration) + - drawdown_penalty ) else: return ( - factor * pnl * (1 + lambda1 * trade_duration) - - 2 * trade_duration / max_trade_duration + factor + * pnl + * (1 + lambda1 * (trade_duration / max_trade_duration)) + - 2 * lambda2 * (trade_duration / max_trade_duration) + - drawdown_penalty ) # close long -- 2.43.0