- Scalable synthetic scenario generation (reproducible)
- Reward component decomposition & bounds checks
-- PBRS modes: canonical, non-canonical, progressive_release, spike_cancel, retain_previous
+- PBRS modes: canonical, non_canonical, progressive_release, spike_cancel, retain_previous
- Feature importance & optional partial dependence
- Statistical tests (hypothesis, bootstrap CIs, distribution diagnostics)
- Real vs synthetic shift metrics
| `exit_linear_slope` | 1.0 | Linear kernel slope |
| `exit_power_tau` | 0.5 | Tau controlling power kernel decay (0,1] |
| `exit_half_life` | 0.5 | Half-life for half_life kernel |
-| `potential_gamma` | 0.9 | PBRS discount γ |
+| `potential_gamma` | 0.95 | PBRS discount γ |
| `exit_potential_mode` | canonical | Exit potential mode |
| `efficiency_weight` | 1.0 | Efficiency contribution weight |
| `efficiency_center` | 0.5 | Efficiency pivot in [0,1] |
# Non-canonical PBRS (allows additives with Φ(terminal)=0, breaks invariance)
python reward_space_analysis.py \
--num_samples 25000 \
- --params hold_potential_enabled=true entry_additive_enabled=true exit_additive_enabled=true exit_potential_mode=non-canonical \
+ --params hold_potential_enabled=true entry_additive_enabled=true exit_additive_enabled=true exit_potential_mode=non_canonical \
--out_dir pbrs_non_canonical
python reward_space_analysis.py \
}
ALLOWED_EXIT_POTENTIAL_MODES = {
"canonical",
- "non-canonical",
+ "non_canonical",
"progressive_release",
"spike_cancel",
"retain_previous",
# Potential-based reward shaping core parameters
# Discount factor γ for potential term (0 ≤ γ ≤ 1)
"potential_gamma": POTENTIAL_GAMMA_DEFAULT,
- # Exit potential modes: canonical | non-canonical | progressive_release | spike_cancel | retain_previous
+ # Exit potential modes: canonical | non_canonical | progressive_release | spike_cancel | retain_previous
"exit_potential_mode": "canonical",
"exit_potential_decay": 0.5,
# Hold potential (PBRS function Φ)
"exit_factor_threshold": "Warn if |exit_factor| exceeds",
# PBRS parameters
"potential_gamma": "PBRS discount γ (0–1)",
- "exit_potential_mode": "Exit potential mode (canonical|non-canonical|progressive_release|spike_cancel|retain_previous)",
+ "exit_potential_mode": "Exit potential mode (canonical|non_canonical|progressive_release|spike_cancel|retain_previous)",
"exit_potential_decay": "Decay for progressive_release (0–1)",
"hold_potential_enabled": "Enable hold potential Φ",
"hold_potential_scale": "Hold potential scale",
def _compute_exit_potential(last_potential: float, params: RewardParams) -> float:
- """Exit potential per mode (canonical/non-canonical -> 0; others transform Φ)."""
+ """Exit potential per mode (canonical/non_canonical -> 0; others transform Φ)."""
mode = _get_str_param(
params,
"exit_potential_mode",
str(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_potential_mode", "canonical")),
)
- if mode == "canonical" or mode == "non-canonical":
+ if mode == "canonical" or mode == "non_canonical":
return _fail_safely("canonical_exit_potential")
if mode == "progressive_release":
) -> List[ConfigTuple]:
exit_potential_modes = [
"canonical",
- "non-canonical",
+ "non_canonical",
"progressive_release",
"retain_previous",
"spike_cancel",
def test_additive_activation_deterministic_contribution(self):
"""Additives enabled increase total reward; shaping impact limited."""
- # Use a non-canonical exit mode to avoid automatic invariance enforcement
+ # Use a non_canonical exit mode to avoid automatic invariance enforcement
# disabling the additive components on first call (canonical path auto-disables).
base = self.base_params(
hold_potential_enabled=True,
entry_additive_enabled=False,
exit_additive_enabled=False,
- exit_potential_mode="non-canonical",
+ exit_potential_mode="non_canonical",
)
with_add = base.copy()
with_add.update(
# === EXIT POTENTIAL MODE ===
# exit_potential_mode options:
# 'canonical' -> Φ(s')=0 (preserves invariance, disables additives)
- # 'non-canonical' -> Φ(s')=0 (allows additives, breaks invariance)
+ # 'non_canonical' -> Φ(s')=0 (allows additives, breaks invariance)
# 'progressive_release' -> Φ(s')=Φ(s)*(1-decay_factor)
# 'spike_cancel' -> Φ(s')=Φ(s)/γ (Δ ≈ 0, cancels shaping)
# 'retain_previous' -> Φ(s')=Φ(s)
)
_allowed_exit_modes = {
"canonical",
- "non-canonical",
+ "non_canonical",
"progressive_release",
"spike_cancel",
"retain_previous",
if self._entry_additive_enabled or self._exit_additive_enabled:
logger.info(
"Canonical mode: additive rewards disabled with Φ(terminal)=0. PBRS invariance is preserved. "
- "To use additive rewards, set exit_potential_mode='non-canonical'."
+ "To use additive rewards, set exit_potential_mode='non_canonical'."
)
self._entry_additive_enabled = False
self._exit_additive_enabled = False
- elif self._exit_potential_mode == "non-canonical":
+ elif self._exit_potential_mode == "non_canonical":
if self._entry_additive_enabled or self._exit_additive_enabled:
logger.info(
"Non-canonical mode: additive rewards enabled with Φ(terminal)=0. PBRS invariance is intentionally broken."
See ``_apply_potential_shaping`` for complete PBRS documentation.
"""
mode = self._exit_potential_mode
- if mode == "canonical" or mode == "non-canonical":
+ if mode == "canonical" or mode == "non_canonical":
return 0.0
if mode == "progressive_release":
decay = self._exit_potential_decay
elif is_exit:
if (
self._exit_potential_mode == "canonical"
- or self._exit_potential_mode == "non-canonical"
+ or self._exit_potential_mode == "non_canonical"
):
next_potential = 0.0
exit_shaping_reward = -prev_potential