From 9b7fb4bcee63f6276c4af2da962ecae02dc07a98 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 11 Oct 2025 21:18:58 +0200 Subject: [PATCH] fix(reforcexy): use a stable reward metric for optuna best trial selection MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/user_data/freqaimodels/ReforceXY.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 503551e..1388032 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -1232,7 +1232,7 @@ class ReforceXY(BaseReinforcementLearningModel): if self.optuna_eval_callback.is_pruned: raise TrialPruned() - return self.optuna_eval_callback.last_mean_reward + return self.optuna_eval_callback.best_mean_reward def close_envs(self) -> None: """ -- 2.43.0