"exit_half_life",
DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_half_life", 0.5),
)
- if hl <= 0.0:
- if _is_strict_validation(params):
- raise ValueError(f"exit_half_life={hl} must be > 0 in strict mode")
+ if hl <= 0.0 and _is_strict_validation(params):
+ raise ValueError(f"exit_half_life={hl} must be > 0 in strict mode")
+ if np.isclose(hl, 0.0):
warnings.warn(
- f"exit_half_life={hl} <= 0; falling back to 0.0",
+ f"exit_half_life={hl} close to 0; falling back to 1.0",
RewardDiagnosticsWarning,
stacklevel=2,
)
- hl = 0.0
+ return 1.0
return f * math.pow(2.0, -dr / hl)
kernels = {
def _half_life(f: float, dr: float, p: Mapping) -> float:
hl = float(p.get("exit_half_life", 0.5))
- if hl <= 0.0:
- hl = 0.0
+ if np.isclose(hl, 0.0) or hl < 0.0:
+ return 1.0
return f * math.pow(2.0, -dr / hl)
strategies: Dict[str, Callable[[float, float, Mapping], float]] = {
if terminated:
# Enforce Φ(terminal)=0 for PBRS invariance (Wiewiora et al. 2003)
self._last_potential = 0.0
- eps = 1e-6
- if self.is_pbrs_invariant_mode() and abs(self._total_reward_shaping) > eps:
- logger.warning(
- "PBRS mode %s invariance deviation: |sum Δ|=%.6f > eps=%.6f",
- self._exit_potential_mode,
- self._total_reward_shaping,
- eps,
- )
+ # eps = np.finfo(float).eps
+ # if self.is_pbrs_invariant_mode() and abs(self._total_reward_shaping) > eps:
+ # logger.warning(
+ # "PBRS mode %s invariance deviation: |sum Δ|=%.6f > eps=%.6f",
+ # self._exit_potential_mode,
+ # abs(self._total_reward_shaping),
+ # eps,
+ # )
return (
self._get_observation(),
reward,