]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
perf(reforcexy): asymetric reward for opened trades at loss
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 26 Feb 2025 14:25:17 +0000 (15:25 +0100)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 26 Feb 2025 14:25:17 +0000 (15:25 +0100)
Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
ReforceXY/user_data/freqaimodels/ReforceXY.py
quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py
quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py

index b3575f9087b76cc093f1edcda23c8bc6441061f6..a8dcc41534b4881a541c7084b270540a244c70f5 100644 (file)
@@ -804,15 +804,27 @@ class ReforceXY(BaseReinforcementLearningModel):
                     )
                 )
 
-            # pnl aware reward for sitting in position
+            # pnl and duration aware reward for sitting in position
             if (
                 self._position in (Positions.Short, Positions.Long)
                 and action == Actions.Neutral.value
             ):
-                return (
-                    factor * pnl * np.exp(-0.05 * trade_duration)
-                    - trade_duration / max_trade_duration
-                )
+                # peak_pnl = np.max(self.get_most_recent_peak_pnl(), pnl)
+                # if peak_pnl > 0:
+                #     drawdown_penalty = 0.01 * factor * (peak_pnl - pnl)
+                # else:
+                #     drawdown_penalty = 0.0
+                lambda1 = 0.05
+                if pnl >= 0:
+                    return (
+                        factor * pnl * np.exp(-lambda1 * trade_duration)
+                        - trade_duration / max_trade_duration
+                    )
+                else:
+                    return (
+                        factor * pnl * (1 + lambda1 * trade_duration)
+                        - 2 * trade_duration / max_trade_duration
+                    )
 
             # close long
             if action == Actions.Long_exit.value and self._position == Positions.Long:
@@ -1023,6 +1035,11 @@ class ReforceXY(BaseReinforcementLearningModel):
                 return self._current_tick - self._start_tick
             return self._current_tick - self._last_closed_trade_tick
 
+        def get_most_recent_peak_pnl(self) -> float:
+            return (
+                np.max(self.history.get("pnl")) if self.history.get("pnl") else -np.inf
+            )
+
         def get_most_recent_return(self) -> float:
             """
             Calculate the tick to tick return if in a trade.
index 8a50f79226736cb799a9cc18c58f2ac74edff460..625ddc3df0a1d60e55298e0e1f1a986c7c4d9ee5 100644 (file)
@@ -199,14 +199,13 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel):
             dk.data["extra_returns_per_train"][MAXIMA_THRESHOLD_COLUMN] = max_pred
 
         dk.data["labels_mean"], dk.data["labels_std"] = {}, {}
-        # for label in dk.label_list + dk.unique_class_list:
-        for label in dk.label_list:
+        for label in dk.label_list + dk.unique_class_list:
             if pred_df_full[label].dtype == object:
                 continue
-            if not warmed_up:
-            f = [0, 0]
-            else:
-                f = spy.stats.norm.fit(pred_df_full[label])
+            if not warmed_up:
+                f = [0, 0]
+            else:
+                f = spy.stats.norm.fit(pred_df_full[label])
             dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]
 
         # fit the DI_threshold
index a1438bdc0c105349953f34324377a65fd8988d60..58dd4175a3474944bbcbeebdac82f4af63a3b0f9 100644 (file)
@@ -200,14 +200,13 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel):
             dk.data["extra_returns_per_train"][MAXIMA_THRESHOLD_COLUMN] = max_pred
 
         dk.data["labels_mean"], dk.data["labels_std"] = {}, {}
-        # for label in dk.label_list + dk.unique_class_list:
-        for label in dk.label_list:
+        for label in dk.label_list + dk.unique_class_list:
             if pred_df_full[label].dtype == object:
                 continue
-            if not warmed_up:
-            f = [0, 0]
-            else:
-                f = spy.stats.norm.fit(pred_df_full[label])
+            if not warmed_up:
+                f = [0, 0]
+            else:
+                f = spy.stats.norm.fit(pred_df_full[label])
             dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]
 
         # fit the DI_threshold