refactor(reforcexy): revert incorrect change

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Sat, 1 Mar 2025 11:54:16 +0000 (12:54 +0100)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Sat, 1 Mar 2025 11:54:16 +0000 (12:54 +0100)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Sat, 1 Mar 2025 11:54:16 +0000 (12:54 +0100)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Sat, 1 Mar 2025 11:54:16 +0000 (12:54 +0100)
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py

index 2deb6c93d61e62d47f6de1f857a54d75bcfcc0b4..6b68b12fa2377a5a5f8bdf611a92cc3c0d04a88f 100644 (file)
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -813,14 +813,14 @@ class ReforceXY(BaseReinforcementLearningModel):
                  self._position in (Positions.Short, Positions.Long)
                  and action == Actions.Neutral.value
              ):
+                max_pnl = max(self.get_most_recent_max_pnl(), pnl)
+                if max_pnl > 0:
+                    drawdown_penalty = 0.01 * factor * (max_pnl - pnl)
+                else:
+                    drawdown_penalty = 0.0
                  lambda1 = 0.05
                  lambda2 = 0.1
                  if pnl >= 0:
-                    max_pnl = max(self.get_most_recent_max_pnl(), pnl)
-                    if max_pnl > 0:
-                        drawdown_penalty = 0.01 * factor * (max_pnl - pnl)
-                    else:
-                        drawdown_penalty = 0.0
                      return (
                          factor
                          * pnl
@@ -829,9 +829,13 @@ class ReforceXY(BaseReinforcementLearningModel):
                          - drawdown_penalty
                      )
                  else:
-                    return factor * pnl * (
-                        1 + lambda1 * (trade_duration / max_trade_duration)
-                    ) - 2 * lambda2 * (trade_duration / max_trade_duration)
+                    return (
+                        factor
+                        * pnl
+                        * (1 + lambda1 * (trade_duration / max_trade_duration))
+                        - 2 * lambda2 * (trade_duration / max_trade_duration)
+                        - drawdown_penalty
+                    )
  
              # close long
              if action == Actions.Long_exit.value and self._position == Positions.Long:
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Sat, 1 Mar 2025 11:54:16 +0000 (12:54 +0100)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Sat, 1 Mar 2025 11:54:16 +0000 (12:54 +0100)