perf(reforcexy): fine tune reward on opened positions

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Thu, 27 Feb 2025 09:28:26 +0000 (10:28 +0100)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Thu, 27 Feb 2025 09:28:26 +0000 (10:28 +0100)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Thu, 27 Feb 2025 09:28:26 +0000 (10:28 +0100)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Thu, 27 Feb 2025 09:28:26 +0000 (10:28 +0100)
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py

index a8dcc41534b4881a541c7084b270540a244c70f5..17b9a70ade3db3c4ab85b611cc58ff54c583ab18 100644 (file)
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -809,21 +809,28 @@ class ReforceXY(BaseReinforcementLearningModel):
                  self._position in (Positions.Short, Positions.Long)
                  and action == Actions.Neutral.value
              ):
-                # peak_pnl = np.max(self.get_most_recent_peak_pnl(), pnl)
-                # if peak_pnl > 0:
-                #     drawdown_penalty = 0.01 * factor * (peak_pnl - pnl)
-                # else:
-                #     drawdown_penalty = 0.0
+                peak_pnl = np.max(self.get_most_recent_peak_pnl(), pnl)
+                if peak_pnl > 0:
+                    drawdown_penalty = 0.01 * factor * (peak_pnl - pnl)
+                else:
+                    drawdown_penalty = 0.0
                  lambda1 = 0.05
+                lambda2 = 0.1
                  if pnl >= 0:
                      return (
-                        factor * pnl * np.exp(-lambda1 * trade_duration)
-                        - trade_duration / max_trade_duration
+                        factor
+                        * pnl
+                        * np.exp(-lambda1 * (trade_duration / max_trade_duration))
+                        - lambda2 * (trade_duration / max_trade_duration)
+                        - drawdown_penalty
                      )
                  else:
                      return (
-                        factor * pnl * (1 + lambda1 * trade_duration)
-                        - 2 * trade_duration / max_trade_duration
+                        factor
+                        * pnl
+                        * (1 + lambda1 * (trade_duration / max_trade_duration))
+                        - 2 * lambda2 * (trade_duration / max_trade_duration)
+                        - drawdown_penalty
                      )
  
              # close long
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Thu, 27 Feb 2025 09:28:26 +0000 (10:28 +0100)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Thu, 27 Feb 2025 09:28:26 +0000 (10:28 +0100)