]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
perf(reforcexy): refine reward calculation
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Sat, 13 Sep 2025 12:18:50 +0000 (14:18 +0200)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Sat, 13 Sep 2025 12:18:50 +0000 (14:18 +0200)
Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
ReforceXY/user_data/freqaimodels/ReforceXY.py
quickadapter/user_data/strategies/QuickAdapterV3.py

index 730ef81088b8fd8d008e0248fea8798a2ee246e3..b9b08e16d94a7d59a607e1bd4108def4a6d56243 100644 (file)
@@ -968,16 +968,7 @@ class ReforceXY(BaseReinforcementLearningModel):
             #         factor = 1
             #     return 25.0 * factor
 
-            # reward agent for entering trades
-            if (
-                action in (Actions.Long_enter.value, Actions.Short_enter.value)
-                and self._position == Positions.Neutral
-            ):
-                return self.rl_config.get("model_reward_parameters", {}).get(
-                    "enter_action", 1.0
-                )
-
-            # discourage agent from not entering trades
+            # discourage agent from sitting idle too long
             if action == Actions.Neutral.value and self._position == Positions.Neutral:
                 return float(
                     self.rl_config.get("model_reward_parameters", {}).get(
@@ -992,23 +983,31 @@ class ReforceXY(BaseReinforcementLearningModel):
             ):
                 duration_fraction = trade_duration / max_trade_duration
                 max_pnl = max(self.get_most_recent_max_pnl(), pnl)
+
                 if max_pnl > 0:
-                    drawdown_penalty = 0.0025 * factor * (max_pnl - pnl) * duration_fraction
+                    drawdown_penalty = (
+                        0.0025 * factor * (max_pnl - pnl) * duration_fraction
+                    )
                 else:
                     drawdown_penalty = 0.0
+
                 lambda1 = 0.05
                 lambda2 = 0.1
                 if pnl >= 0:
-                    if duration_fraction < 0.75:
+                    if duration_fraction <= 1.0:
                         duration_penalty_factor = 1.0
                     else:
-                        duration_penalty_factor = 1.0 / (1.0 + lambda1 * duration_fraction)
-                    return factor * pnl * duration_penalty_factor - lambda2 * duration_fraction - drawdown_penalty
+                        duration_penalty_factor = 1.0 / (
+                            1.0 + lambda1 * (duration_fraction - 1.0)
+                        )
+                    return (
+                        factor * pnl * duration_penalty_factor
+                        - lambda2 * duration_fraction
+                        - drawdown_penalty
+                    )
                 else:
                     return (
-                        factor
-                        * pnl
-                        * (1 + lambda1 * duration_fraction)
+                        factor * pnl * (1 + lambda1 * duration_fraction)
                         - 2 * lambda2 * duration_fraction
                         - drawdown_penalty
                     )
index 743750e23fd421f0990fd5d55f969b3dcbc56c7d..1ac569d606946d36d8af9643b08bcba45e168f66 100644 (file)
@@ -1122,7 +1122,7 @@ class QuickAdapterV3(IStrategy):
         side: str,
         order: Literal["entry", "exit"],
         rate: float,
-        min_natr_ratio_percent: float = 0.00999,
+        min_natr_ratio_percent: float = 0.0099,
         max_natr_ratio_percent: float = 0.095,
         lookback_period: int = 1,
         decay_ratio: float = 0.5,