From 4677eee45aef7d0a0a3068a9fbcb0f80402ecf9d Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Mon, 20 Oct 2025 23:28:11 +0200 Subject: [PATCH] fix(reforcexy): avoid potential divide by 0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../reward_space_analysis.py | 10 +++++----- ReforceXY/user_data/freqaimodels/ReforceXY.py | 20 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index b118fe8..5ac7416 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -730,15 +730,15 @@ def _get_exit_factor( "exit_half_life", DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_half_life", 0.5), ) - if hl <= 0.0: - if _is_strict_validation(params): - raise ValueError(f"exit_half_life={hl} must be > 0 in strict mode") + if hl <= 0.0 and _is_strict_validation(params): + raise ValueError(f"exit_half_life={hl} must be > 0 in strict mode") + if np.isclose(hl, 0.0): warnings.warn( - f"exit_half_life={hl} <= 0; falling back to 0.0", + f"exit_half_life={hl} close to 0; falling back to 1.0", RewardDiagnosticsWarning, stacklevel=2, ) - hl = 0.0 + return 1.0 return f * math.pow(2.0, -dr / hl) kernels = { diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 9fa91b7..4b3be9a 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -2148,8 +2148,8 @@ class MyRLEnv(Base5ActionRLEnv): def _half_life(f: float, dr: float, p: Mapping) -> float: hl = float(p.get("exit_half_life", 0.5)) - if hl <= 0.0: - hl = 0.0 + if np.isclose(hl, 0.0) or hl < 0.0: + return 1.0 return f * math.pow(2.0, -dr / hl) strategies: Dict[str, Callable[[float, float, Mapping], float]] = { @@ -2522,14 +2522,14 @@ class MyRLEnv(Base5ActionRLEnv): if terminated: # Enforce Φ(terminal)=0 for PBRS invariance (Wiewiora et al. 2003) self._last_potential = 0.0 - eps = 1e-6 - if self.is_pbrs_invariant_mode() and abs(self._total_reward_shaping) > eps: - logger.warning( - "PBRS mode %s invariance deviation: |sum Δ|=%.6f > eps=%.6f", - self._exit_potential_mode, - self._total_reward_shaping, - eps, - ) + # eps = np.finfo(float).eps + # if self.is_pbrs_invariant_mode() and abs(self._total_reward_shaping) > eps: + # logger.warning( + # "PBRS mode %s invariance deviation: |sum Δ|=%.6f > eps=%.6f", + # self._exit_potential_mode, + # abs(self._total_reward_shaping), + # eps, + # ) return ( self._get_observation(), reward, -- 2.43.0