From: Jérôme Benoit Date: Wed, 26 Feb 2025 14:25:17 +0000 (+0100) Subject: perf(reforcexy): asymetric reward for opened trades at loss X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=8eff5e9747230ceb03330340231057b384a1db71;p=freqai-strategies.git perf(reforcexy): asymetric reward for opened trades at loss Signed-off-by: Jérôme Benoit --- diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index b3575f9..a8dcc41 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -804,15 +804,27 @@ class ReforceXY(BaseReinforcementLearningModel): ) ) - # pnl aware reward for sitting in position + # pnl and duration aware reward for sitting in position if ( self._position in (Positions.Short, Positions.Long) and action == Actions.Neutral.value ): - return ( - factor * pnl * np.exp(-0.05 * trade_duration) - - trade_duration / max_trade_duration - ) + # peak_pnl = np.max(self.get_most_recent_peak_pnl(), pnl) + # if peak_pnl > 0: + # drawdown_penalty = 0.01 * factor * (peak_pnl - pnl) + # else: + # drawdown_penalty = 0.0 + lambda1 = 0.05 + if pnl >= 0: + return ( + factor * pnl * np.exp(-lambda1 * trade_duration) + - trade_duration / max_trade_duration + ) + else: + return ( + factor * pnl * (1 + lambda1 * trade_duration) + - 2 * trade_duration / max_trade_duration + ) # close long if action == Actions.Long_exit.value and self._position == Positions.Long: @@ -1023,6 +1035,11 @@ class ReforceXY(BaseReinforcementLearningModel): return self._current_tick - self._start_tick return self._current_tick - self._last_closed_trade_tick + def get_most_recent_peak_pnl(self) -> float: + return ( + np.max(self.history.get("pnl")) if self.history.get("pnl") else -np.inf + ) + def get_most_recent_return(self) -> float: """ Calculate the tick to tick return if in a trade. diff --git a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py index 8a50f79..625ddc3 100644 --- a/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/LightGBMRegressorQuickAdapterV35.py @@ -199,14 +199,13 @@ class LightGBMRegressorQuickAdapterV35(BaseRegressionModel): dk.data["extra_returns_per_train"][MAXIMA_THRESHOLD_COLUMN] = max_pred dk.data["labels_mean"], dk.data["labels_std"] = {}, {} - # for label in dk.label_list + dk.unique_class_list: - for label in dk.label_list: + for label in dk.label_list + dk.unique_class_list: if pred_df_full[label].dtype == object: continue - # if not warmed_up: - f = [0, 0] - # else: - # f = spy.stats.norm.fit(pred_df_full[label]) + if not warmed_up: + f = [0, 0] + else: + f = spy.stats.norm.fit(pred_df_full[label]) dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] # fit the DI_threshold diff --git a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py index a1438bd..58dd417 100644 --- a/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py +++ b/quickadapter/user_data/freqaimodels/XGBoostRegressorQuickAdapterV35.py @@ -200,14 +200,13 @@ class XGBoostRegressorQuickAdapterV35(BaseRegressionModel): dk.data["extra_returns_per_train"][MAXIMA_THRESHOLD_COLUMN] = max_pred dk.data["labels_mean"], dk.data["labels_std"] = {}, {} - # for label in dk.label_list + dk.unique_class_list: - for label in dk.label_list: + for label in dk.label_list + dk.unique_class_list: if pred_df_full[label].dtype == object: continue - # if not warmed_up: - f = [0, 0] - # else: - # f = spy.stats.norm.fit(pred_df_full[label]) + if not warmed_up: + f = [0, 0] + else: + f = spy.stats.norm.fit(pred_df_full[label]) dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] # fit the DI_threshold