From f35597490a162610f7a415f18e5bb1e5e04f7e0d Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Thu, 25 Sep 2025 11:32:39 +0200 Subject: [PATCH] refactor(qav3): ensure safe hyperband pruner configuration MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/user_data/freqaimodels/ReforceXY.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index ec3fc13..fd7d60f 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -742,8 +742,11 @@ class ReforceXY(BaseReinforcementLearningModel): resource_eval_freq = max(PPO_N_STEPS) else: resource_eval_freq = self.get_eval_freq(total_timesteps, hyperopt=True) - max_resource = max(1, total_timesteps // (resource_eval_freq * self.n_envs)) - min_resource = min(3, max_resource) + reduction_factor = 3 + max_resource = max( + reduction_factor * 2, total_timesteps // (resource_eval_freq * self.n_envs) + ) + min_resource = min(reduction_factor, max_resource // reduction_factor) study: Study = create_study( study_name=study_name, sampler=TPESampler( @@ -755,7 +758,7 @@ class ReforceXY(BaseReinforcementLearningModel): pruner=HyperbandPruner( min_resource=min_resource, max_resource=max_resource, - reduction_factor=3, + reduction_factor=reduction_factor, ), direction=StudyDirection.MAXIMIZE, storage=storage, -- 2.43.0