From 61244af76979c7b22a7c546196b243a5bf87db26 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Fri, 19 Sep 2025 01:48:29 +0200
Subject: [PATCH] fix(reforcexy): fix get_most_recent_max_pnl() semantic
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 ReforceXY/user_data/freqaimodels/ReforceXY.py | 45 ++++++++++++++-----
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py
index cd7aa37..be1e0ba 100644
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -1093,8 +1093,7 @@ class ReforceXY(BaseReinforcementLearningModel):
 
             # discourage agent from sitting idle too long
             if action == Actions.Neutral.value and self._position == Positions.Neutral:
-                idle_duration = self.get_idle_duration()
-                return float(-0.01 * idle_duration**1.05)
+                return -0.01 * self.get_idle_duration() ** 1.05
 
             # pnl and duration aware agent reward while sitting in position
             if (
@@ -1357,21 +1356,41 @@ class ReforceXY(BaseReinforcementLearningModel):
             return self._current_tick - self._last_closed_trade_tick
 
         def get_most_recent_max_pnl(self) -> float:
+            """
+            Calculate the most recent maximum unrealized profit if in a trade
+            """
+            if self._last_trade_tick is None:
+                return -np.inf
+            if self._position == Positions.Neutral:
+                return -np.inf
             pnl_history = self.history.get("pnl")
-            return (
-                np.max(pnl_history) if pnl_history and len(pnl_history) > 0 else -np.inf
-            )
+            if not pnl_history or len(pnl_history) == 0:
+                return -np.inf
+
+            pnl_history = np.asarray(pnl_history)
+            ticks = self.history.get("tick")
+            if not ticks:
+                return -np.inf
+            ticks = np.asarray(ticks)
+            start = np.searchsorted(ticks, self._last_trade_tick, side="left")
+            if start >= ticks.shape[0]:
+                return -np.inf
+            trade_slice = pnl_history[start:]
+            if trade_slice.size == 0:
+                return -np.inf
+            return np.max(trade_slice)
 
         def get_most_recent_return(self) -> float:
             """
             Calculate the tick to tick return if in a trade.
-            Return is generated from rising prices in Long
-            and falling prices in Short positions.
+            Return is generated from rising prices in Long and falling prices in Short positions.
             The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
             """
-            if self._current_tick <= 0:
+            if self._last_trade_tick is None:
                 return 0.0
-            if self._position == Positions.Long:
+            if self._position == Positions.Neutral:
+                return 0.0
+            elif self._position == Positions.Long:
                 current_price = self.current_price()
                 previous_price = self.previous_price()
                 if (
@@ -1381,7 +1400,7 @@ class ReforceXY(BaseReinforcementLearningModel):
                 ):
                     previous_price = self.add_entry_fee(previous_price)
                 return np.log(current_price) - np.log(previous_price)
-            if self._position == Positions.Short:
+            elif self._position == Positions.Short:
                 current_price = self.current_price()
                 previous_price = self.previous_price()
                 if (
@@ -1402,7 +1421,11 @@ class ReforceXY(BaseReinforcementLearningModel):
             """
             Calculate the tick to tick unrealized profit if in a trade
             """
-            if self._position == Positions.Long:
+            if self._last_trade_tick is None:
+                return 0.0
+            if self._position == Positions.Neutral:
+                return 0.0
+            elif self._position == Positions.Long:
                 current_price = self.add_exit_fee(self.current_price())
                 previous_price = self.add_entry_fee(self.previous_price())
                 return (current_price - previous_price) / previous_price
-- 
2.43.0