From 5c1d15690c790fff9bd5b90583c7f1c13d0dd16f Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 26 Feb 2025 13:17:50 +0100 Subject: [PATCH] perf(reforcexy): improve reward for opened positions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/user_data/freqaimodels/ReforceXY.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 46d94bc..b3575f9 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -663,7 +663,6 @@ class ReforceXY(BaseReinforcementLearningModel): self.timeout: int = self.rl_config.get("max_trade_duration_candles", 128) self._last_closed_position: Positions = None self._last_closed_trade_tick: int = 0 - self._non_profit_steps: int = 0 # self.reward_range = (-1, 1) if self.force_actions: logger.info( @@ -707,7 +706,6 @@ class ReforceXY(BaseReinforcementLearningModel): self._force_action: Optional[ForceActions] = None self._last_closed_position: Positions = None self._last_closed_trade_tick: int = 0 - self._non_profit_steps: int = 0 return self._get_observation(), history def _get_reward_factor_at_trade_exit( @@ -806,19 +804,15 @@ class ReforceXY(BaseReinforcementLearningModel): ) ) - # discourage sitting in position + # pnl aware reward for sitting in position if ( self._position in (Positions.Short, Positions.Long) and action == Actions.Neutral.value ): - if pnl < 0: - self._non_profit_steps += 1 - return factor * ( - pnl - (0.1 * (self._non_profit_steps**2) * abs(pnl)) - ) # time aggressive (quadratic) and loss magnitude aware penalty - else: - self._non_profit_steps = 0 - return -1.0 * trade_duration / max_trade_duration + return ( + factor * pnl * np.exp(-0.05 * trade_duration) + - trade_duration / max_trade_duration + ) # close long if action == Actions.Long_exit.value and self._position == Positions.Long: @@ -895,7 +889,6 @@ class ReforceXY(BaseReinforcementLearningModel): self._position = Positions.Neutral self._last_closed_trade_tick = self._last_trade_tick self._last_trade_tick = None - self._non_profit_steps = 0 def execute_trade(self, action: int) -> None: """ -- 2.43.0