feat(reforcexy): penalize holding non profitable position

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Tue, 18 Feb 2025 21:03:37 +0000 (22:03 +0100)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Tue, 18 Feb 2025 21:03:37 +0000 (22:03 +0100)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 18 Feb 2025 21:03:37 +0000 (22:03 +0100)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 18 Feb 2025 21:03:37 +0000 (22:03 +0100)
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py

index 5de1acc0a0b328d8f8592c3073b5a136d6cdcdc3..8a7a1d80f915e7aeaa8d97262cef2a6cc13e7454 100644 (file)
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -600,6 +600,7 @@ class ReforceXY(BaseReinforcementLearningModel):
              self.timeout: int = self.rl_config.get("max_trade_duration_candles", 128)
              self._last_closed_position: Positions = None
              self._last_closed_trade_tick: int = 0
+            self._non_profit_steps: int = 0
              # self.reward_range = (-1, 1)
              if self.force_actions:
                  logger.info(
@@ -643,6 +644,7 @@ class ReforceXY(BaseReinforcementLearningModel):
              self._force_action: Optional[ForceActions] = None
              self._last_closed_position: Positions = None
              self._last_closed_trade_tick: int = 0
+            self._non_profit_steps: int = 0
              return self._get_observation(), history
  
          def calculate_reward(self, action) -> float:
@@ -700,15 +702,28 @@ class ReforceXY(BaseReinforcementLearningModel):
              if action == Actions.Neutral.value and self._position == Positions.Neutral:
                  return -1
  
+            # discourage sitting in non profitable position
+            if (
+                self._position in (Positions.Short, Positions.Long)
+                and action == Actions.Neutral.value
+            ):
+                if pnl < 0:
+                    self._non_profit_steps += 1
+                else:
+                    self._non_profit_steps = 0
+            if self._non_profit_steps > 0:
+                return pnl - (
+                    0.1 * (self._non_profit_steps**2) * max(0, pnl)
+                )  # time aggressive (quadratic) and loss magnitude aware penalty
+
+            # discourage sitting in position
              max_trade_duration = self.rl_config.get("max_trade_duration_candles", 300)
              trade_duration = self.get_trade_duration()
-
              if trade_duration <= max_trade_duration:
                  factor *= 1.5
              elif trade_duration > max_trade_duration:
                  factor *= 0.5
  
-            # discourage sitting in position
              if (
                  self._position in (Positions.Short, Positions.Long)
                  and action == Actions.Neutral.value
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Tue, 18 Feb 2025 21:03:37 +0000 (22:03 +0100)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Tue, 18 Feb 2025 21:03:37 +0000 (22:03 +0100)