]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
fix(reforcexy): avoid potential divide by 0
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Mon, 20 Oct 2025 21:28:11 +0000 (23:28 +0200)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Mon, 20 Oct 2025 21:28:11 +0000 (23:28 +0200)
Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
ReforceXY/reward_space_analysis/reward_space_analysis.py
ReforceXY/user_data/freqaimodels/ReforceXY.py

index b118fe89c0771def63df386da9b294a85c00e29e..5ac7416d17a2001612005b54ecd7ebf63308e958 100644 (file)
@@ -730,15 +730,15 @@ def _get_exit_factor(
             "exit_half_life",
             DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_half_life", 0.5),
         )
-        if hl <= 0.0:
-            if _is_strict_validation(params):
-                raise ValueError(f"exit_half_life={hl} must be > 0 in strict mode")
+        if hl <= 0.0 and _is_strict_validation(params):
+            raise ValueError(f"exit_half_life={hl} must be > 0 in strict mode")
+        if np.isclose(hl, 0.0):
             warnings.warn(
-                f"exit_half_life={hl} <= 0; falling back to 0.0",
+                f"exit_half_life={hl} close to 0; falling back to 1.0",
                 RewardDiagnosticsWarning,
                 stacklevel=2,
             )
-            hl = 0.0
+            return 1.0
         return f * math.pow(2.0, -dr / hl)
 
     kernels = {
index 9fa91b729084980753861b51f8efc26300a1974f..4b3be9ab506800a0d9d2578c2fdfdbd71ec4bb4c 100644 (file)
@@ -2148,8 +2148,8 @@ class MyRLEnv(Base5ActionRLEnv):
 
         def _half_life(f: float, dr: float, p: Mapping) -> float:
             hl = float(p.get("exit_half_life", 0.5))
-            if hl <= 0.0:
-                hl = 0.0
+            if np.isclose(hl, 0.0) or hl < 0.0:
+                return 1.0
             return f * math.pow(2.0, -dr / hl)
 
         strategies: Dict[str, Callable[[float, float, Mapping], float]] = {
@@ -2522,14 +2522,14 @@ class MyRLEnv(Base5ActionRLEnv):
         if terminated:
             # Enforce Φ(terminal)=0 for PBRS invariance (Wiewiora et al. 2003)
             self._last_potential = 0.0
-            eps = 1e-6
-            if self.is_pbrs_invariant_mode() and abs(self._total_reward_shaping) > eps:
-                logger.warning(
-                    "PBRS mode %s invariance deviation: |sum Δ|=%.6f > eps=%.6f",
-                    self._exit_potential_mode,
-                    self._total_reward_shaping,
-                    eps,
-                )
+            # eps = np.finfo(float).eps
+            if self.is_pbrs_invariant_mode() and abs(self._total_reward_shaping) > eps:
+                logger.warning(
+                    "PBRS mode %s invariance deviation: |sum Δ|=%.6f > eps=%.6f",
+                    self._exit_potential_mode,
+            #         abs(self._total_reward_shaping),
+                    eps,
+                )
         return (
             self._get_observation(),
             reward,