]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
perf(reforcexy): improve reward for opened positions
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 26 Feb 2025 12:17:50 +0000 (13:17 +0100)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 26 Feb 2025 12:17:50 +0000 (13:17 +0100)
Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
ReforceXY/user_data/freqaimodels/ReforceXY.py

index 46d94bcb39fc55f0bc46436618f4a8b9de5004b8..b3575f9087b76cc093f1edcda23c8bc6441061f6 100644 (file)
@@ -663,7 +663,6 @@ class ReforceXY(BaseReinforcementLearningModel):
             self.timeout: int = self.rl_config.get("max_trade_duration_candles", 128)
             self._last_closed_position: Positions = None
             self._last_closed_trade_tick: int = 0
-            self._non_profit_steps: int = 0
             # self.reward_range = (-1, 1)
             if self.force_actions:
                 logger.info(
@@ -707,7 +706,6 @@ class ReforceXY(BaseReinforcementLearningModel):
             self._force_action: Optional[ForceActions] = None
             self._last_closed_position: Positions = None
             self._last_closed_trade_tick: int = 0
-            self._non_profit_steps: int = 0
             return self._get_observation(), history
 
         def _get_reward_factor_at_trade_exit(
@@ -806,19 +804,15 @@ class ReforceXY(BaseReinforcementLearningModel):
                     )
                 )
 
-            # discourage sitting in position
+            # pnl aware reward for sitting in position
             if (
                 self._position in (Positions.Short, Positions.Long)
                 and action == Actions.Neutral.value
             ):
-                if pnl < 0:
-                    self._non_profit_steps += 1
-                    return factor * (
-                        pnl - (0.1 * (self._non_profit_steps**2) * abs(pnl))
-                    )  # time aggressive (quadratic) and loss magnitude aware penalty
-                else:
-                    self._non_profit_steps = 0
-                    return -1.0 * trade_duration / max_trade_duration
+                return (
+                    factor * pnl * np.exp(-0.05 * trade_duration)
+                    - trade_duration / max_trade_duration
+                )
 
             # close long
             if action == Actions.Long_exit.value and self._position == Positions.Long:
@@ -895,7 +889,6 @@ class ReforceXY(BaseReinforcementLearningModel):
             self._position = Positions.Neutral
             self._last_closed_trade_tick = self._last_trade_tick
             self._last_trade_tick = None
-            self._non_profit_steps = 0
 
         def execute_trade(self, action: int) -> None:
             """