perf(reforcexy): refine reward calculation

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Sat, 13 Sep 2025 12:18:50 +0000 (14:18 +0200)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Sat, 13 Sep 2025 12:18:50 +0000 (14:18 +0200)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Sat, 13 Sep 2025 12:18:50 +0000 (14:18 +0200)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Sat, 13 Sep 2025 12:18:50 +0000 (14:18 +0200)
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py

index 730ef81088b8fd8d008e0248fea8798a2ee246e3..b9b08e16d94a7d59a607e1bd4108def4a6d56243 100644 (file)
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -968,16 +968,7 @@ class ReforceXY(BaseReinforcementLearningModel):
              #         factor = 1
              #     return 25.0 * factor
  
-            # reward agent for entering trades
-            if (
-                action in (Actions.Long_enter.value, Actions.Short_enter.value)
-                and self._position == Positions.Neutral
-            ):
-                return self.rl_config.get("model_reward_parameters", {}).get(
-                    "enter_action", 1.0
-                )
-
-            # discourage agent from not entering trades
+            # discourage agent from sitting idle too long
              if action == Actions.Neutral.value and self._position == Positions.Neutral:
                  return float(
                      self.rl_config.get("model_reward_parameters", {}).get(
@@ -992,23 +983,31 @@ class ReforceXY(BaseReinforcementLearningModel):
              ):
                  duration_fraction = trade_duration / max_trade_duration
                  max_pnl = max(self.get_most_recent_max_pnl(), pnl)
+
                  if max_pnl > 0:
-                    drawdown_penalty = 0.0025 * factor * (max_pnl - pnl) * duration_fraction
+                    drawdown_penalty = (
+                        0.0025 * factor * (max_pnl - pnl) * duration_fraction
+                    )
                  else:
                      drawdown_penalty = 0.0
+
                  lambda1 = 0.05
                  lambda2 = 0.1
                  if pnl >= 0:
-                    if duration_fraction < 0.75:
+                    if duration_fraction <= 1.0:
                          duration_penalty_factor = 1.0
                      else:
-                        duration_penalty_factor = 1.0 / (1.0 + lambda1 * duration_fraction)
-                    return factor * pnl * duration_penalty_factor - lambda2 * duration_fraction - drawdown_penalty
+                        duration_penalty_factor = 1.0 / (
+                            1.0 + lambda1 * (duration_fraction - 1.0)
+                        )
+                    return (
+                        factor * pnl * duration_penalty_factor
+                        - lambda2 * duration_fraction
+                        - drawdown_penalty
+                    )
                  else:
                      return (
-                        factor
-                        * pnl
-                        * (1 + lambda1 * duration_fraction)
+                        factor * pnl * (1 + lambda1 * duration_fraction)
                          - 2 * lambda2 * duration_fraction
                          - drawdown_penalty
                      )
diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py

index 743750e23fd421f0990fd5d55f969b3dcbc56c7d..1ac569d606946d36d8af9643b08bcba45e168f66 100644 (file)
--- a/quickadapter/user_data/strategies/QuickAdapterV3.py
+++ b/quickadapter/user_data/strategies/QuickAdapterV3.py
@@ -1122,7 +1122,7 @@ class QuickAdapterV3(IStrategy):
          side: str,
          order: Literal["entry", "exit"],
          rate: float,
-        min_natr_ratio_percent: float = 0.00999,
+        min_natr_ratio_percent: float = 0.0099,
          max_natr_ratio_percent: float = 0.095,
          lookback_period: int = 1,
          decay_ratio: float = 0.5,
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Sat, 13 Sep 2025 12:18:50 +0000 (14:18 +0200)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Sat, 13 Sep 2025 12:18:50 +0000 (14:18 +0200)
ReforceXY/user_data/freqaimodels/ReforceXY.py		patch \| blob \| blame \| history
quickadapter/user_data/strategies/QuickAdapterV3.py		patch \| blob \| blame \| history