From 5e5749ccc8cea3719acac17fa1f7d7da08efc69b Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Mon, 12 Jan 2026 14:50:20 +0100
Subject: [PATCH] fix(reforcexy): correct short fee handling in returns/PnL;
 use log-diff for %-close_pct_change

---
 ReforceXY/user_data/freqaimodels/ReforceXY.py | 75 +++++++++++++++++--
 .../user_data/strategies/RLAgentStrategy.py   |  5 +-
 2 files changed, 72 insertions(+), 8 deletions(-)

diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py
index 5b2e9a3..000775b 100644
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -3483,14 +3483,23 @@ class MyRLEnv(Base5ActionRLEnv):
 
     def get_most_recent_return(self) -> float:
         """
-        Calculate the tick to tick return if the agent is in a trade.
-        Return is generated from rising prices in Long and falling prices in Short positions.
-        The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
+        Calculate tick-to-tick log-return for the current position.
+
+        Entry fees are applied on position transitions only (Neutral/opposite â current).
+
+        Returns
+        -------
+        float
+            Log-return: ln(current/previous)
+            - Long: positive when price rises
+            - Short: positive when price falls
+            - 0.0 if no trade, neutral position, or invalid prices
         """
         if self._last_trade_tick is None:
             return 0.0
         if self._position == Positions.Neutral:
             return 0.0
+
         elif self._position == Positions.Long:
             current_price = self.current_price()
             previous_price = self.previous_price()
@@ -3500,7 +3509,17 @@ class MyRLEnv(Base5ActionRLEnv):
                 or self._position_history[previous_tick] == Positions.Neutral
             ):
                 previous_price = self.add_entry_fee(previous_price)
+
+            if (
+                previous_price <= 0.0
+                or not np.isfinite(previous_price)
+                or current_price <= 0.0
+                or not np.isfinite(current_price)
+            ):
+                return 0.0
+
             return np.log(current_price) - np.log(previous_price)
+
         elif self._position == Positions.Short:
             current_price = self.current_price()
             previous_price = self.previous_price()
@@ -3509,8 +3528,18 @@ class MyRLEnv(Base5ActionRLEnv):
                 self._position_history[previous_tick] == Positions.Long
                 or self._position_history[previous_tick] == Positions.Neutral
             ):
-                previous_price = self.add_exit_fee(previous_price)
+                previous_price = self.add_entry_fee(previous_price)
+
+            if (
+                previous_price <= 0.0
+                or not np.isfinite(previous_price)
+                or current_price <= 0.0
+                or not np.isfinite(current_price)
+            ):
+                return 0.0
+
             return np.log(previous_price) - np.log(current_price)
+
         return 0.0
 
     def _update_portfolio_log_returns(self):
@@ -3518,20 +3547,52 @@ class MyRLEnv(Base5ActionRLEnv):
 
     def get_most_recent_profit(self) -> float:
         """
-        Calculate the tick to tick unrealized profit if the agent is in a trade
+        Calculate tick-to-tick unrealized profit ratio with fees.
+
+        Returns simple return: (current - previous) / previous
+        Entry/exit fees are always applied to simulate closing the position.
+
+        Returns
+        -------
+        float
+            Profit ratio (not log-return)
+            - Long: (current_with_exit_fee - previous_with_entry_fee) / previous
+            - Short: (previous_with_exit_fee - current_with_entry_fee) / previous
+            - 0.0 if no trade, neutral position, or invalid prices
         """
         if self._last_trade_tick is None:
             return 0.0
         if self._position == Positions.Neutral:
             return 0.0
+
         elif self._position == Positions.Long:
             current_price = self.add_exit_fee(self.current_price())
             previous_price = self.add_entry_fee(self.previous_price())
+
+            if (
+                previous_price <= 0.0
+                or not np.isfinite(previous_price)
+                or current_price <= 0.0
+                or not np.isfinite(current_price)
+            ):
+                return 0.0
+
             return (current_price - previous_price) / previous_price
+
         elif self._position == Positions.Short:
-            current_price = self.add_entry_fee(self.current_price())
-            previous_price = self.add_exit_fee(self.previous_price())
+            current_price = self.add_exit_fee(self.current_price())
+            previous_price = self.add_entry_fee(self.previous_price())
+
+            if (
+                previous_price <= 0.0
+                or not np.isfinite(previous_price)
+                or current_price <= 0.0
+                or not np.isfinite(current_price)
+            ):
+                return 0.0
+
             return (previous_price - current_price) / previous_price
+
         return 0.0
 
     def previous_tick(self) -> int:
diff --git a/ReforceXY/user_data/strategies/RLAgentStrategy.py b/ReforceXY/user_data/strategies/RLAgentStrategy.py
index dadcce6..769e84c 100644
--- a/ReforceXY/user_data/strategies/RLAgentStrategy.py
+++ b/ReforceXY/user_data/strategies/RLAgentStrategy.py
@@ -3,6 +3,8 @@ import logging
 from functools import reduce
 from typing import Any, Final, Literal, Optional
 
+import numpy as np
+
 # import talib.abstract as ta
 from freqtrade.persistence import Trade
 from freqtrade.strategy import IStrategy
@@ -44,7 +46,8 @@ class RLAgentStrategy(IStrategy):
     def feature_engineering_expand_basic(
         self, dataframe: DataFrame, metadata: dict[str, Any], **kwargs
     ) -> DataFrame:
-        dataframe["%-close_pct_change"] = dataframe.get("close").pct_change()
+        # TODO [BREAKING]: Rename %-close_pct_change -> %-close_log_return
+        dataframe["%-close_pct_change"] = np.log(dataframe.get("close")).diff()
         dataframe["%-raw_volume"] = dataframe.get("volume")
 
         return dataframe
-- 
2.43.0