]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
refactor(reforcexy): align PBRS params namespace
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 15 Oct 2025 21:23:42 +0000 (23:23 +0200)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 15 Oct 2025 21:23:42 +0000 (23:23 +0200)
Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
ReforceXY/reward_space_analysis/README.md
ReforceXY/reward_space_analysis/reward_space_analysis.py
ReforceXY/reward_space_analysis/test_cli.py
ReforceXY/reward_space_analysis/test_reward_space_analysis.py
ReforceXY/user_data/freqaimodels/ReforceXY.py

index 6ddaa4b8abe4d44d1ac0dc743ef55ebd17f0542d..68e87e7f4382d7cf9f4203a64d53516abcf321ee 100644 (file)
@@ -6,7 +6,7 @@ Deterministic synthetic sampling with diagnostics for reward shaping, penalties,
 
 - Scalable synthetic scenario generation (reproducible)
 - Reward component decomposition & bounds checks
-- PBRS modes: canonical, non-canonical, progressive_release, spike_cancel, retain_previous
+- PBRS modes: canonical, non_canonical, progressive_release, spike_cancel, retain_previous
 - Feature importance & optional partial dependence
 - Statistical tests (hypothesis, bootstrap CIs, distribution diagnostics)
 - Real vs synthetic shift metrics
@@ -193,7 +193,7 @@ Core frequently tuned parameters:
 | `exit_linear_slope` | 1.0 | Linear kernel slope |
 | `exit_power_tau` | 0.5 | Tau controlling power kernel decay (0,1] |
 | `exit_half_life` | 0.5 | Half-life for half_life kernel |
-| `potential_gamma` | 0.9 | PBRS discount γ |
+| `potential_gamma` | 0.95 | PBRS discount γ |
 | `exit_potential_mode` | canonical | Exit potential mode |
 | `efficiency_weight` | 1.0 | Efficiency contribution weight |
 | `efficiency_center` | 0.5 | Efficiency pivot in [0,1] |
@@ -391,7 +391,7 @@ python reward_space_analysis.py \
 # Non-canonical PBRS (allows additives with Φ(terminal)=0, breaks invariance)
 python reward_space_analysis.py \
     --num_samples 25000 \
-    --params hold_potential_enabled=true entry_additive_enabled=true exit_additive_enabled=true exit_potential_mode=non-canonical \
+    --params hold_potential_enabled=true entry_additive_enabled=true exit_additive_enabled=true exit_potential_mode=non_canonical \
     --out_dir pbrs_non_canonical
 
 python reward_space_analysis.py \
index b51cd50bf9158b01f918381ab041a0b7882edb7c..e878df7b7d5dda36f505d671f1159fbe02d4001b 100644 (file)
@@ -78,7 +78,7 @@ ALLOWED_TRANSFORMS = {
 }
 ALLOWED_EXIT_POTENTIAL_MODES = {
     "canonical",
-    "non-canonical",
+    "non_canonical",
     "progressive_release",
     "spike_cancel",
     "retain_previous",
@@ -115,7 +115,7 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = {
     # Potential-based reward shaping core parameters
     # Discount factor γ for potential term (0 ≤ γ ≤ 1)
     "potential_gamma": POTENTIAL_GAMMA_DEFAULT,
-    # Exit potential modes: canonical | non-canonical | progressive_release | spike_cancel | retain_previous
+    # Exit potential modes: canonical | non_canonical | progressive_release | spike_cancel | retain_previous
     "exit_potential_mode": "canonical",
     "exit_potential_decay": 0.5,
     # Hold potential (PBRS function Φ)
@@ -160,7 +160,7 @@ DEFAULT_MODEL_REWARD_PARAMETERS_HELP: Dict[str, str] = {
     "exit_factor_threshold": "Warn if |exit_factor| exceeds",
     # PBRS parameters
     "potential_gamma": "PBRS discount γ (0–1)",
-    "exit_potential_mode": "Exit potential mode (canonical|non-canonical|progressive_release|spike_cancel|retain_previous)",
+    "exit_potential_mode": "Exit potential mode (canonical|non_canonical|progressive_release|spike_cancel|retain_previous)",
     "exit_potential_decay": "Decay for progressive_release (0–1)",
     "hold_potential_enabled": "Enable hold potential Φ",
     "hold_potential_scale": "Hold potential scale",
@@ -2381,13 +2381,13 @@ def _compute_exit_additive(
 
 
 def _compute_exit_potential(last_potential: float, params: RewardParams) -> float:
-    """Exit potential per mode (canonical/non-canonical -> 0; others transform Φ)."""
+    """Exit potential per mode (canonical/non_canonical -> 0; others transform Φ)."""
     mode = _get_str_param(
         params,
         "exit_potential_mode",
         str(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_potential_mode", "canonical")),
     )
-    if mode == "canonical" or mode == "non-canonical":
+    if mode == "canonical" or mode == "non_canonical":
         return _fail_safely("canonical_exit_potential")
 
     if mode == "progressive_release":
index da0c37d27509b546e892cd59cedba5b6bd2d4210..f7b40b40b95bf7b1411a172531f04f07323ba312 100644 (file)
@@ -79,7 +79,7 @@ def build_arg_matrix(
 ) -> List[ConfigTuple]:
     exit_potential_modes = [
         "canonical",
-        "non-canonical",
+        "non_canonical",
         "progressive_release",
         "retain_previous",
         "spike_cancel",
index 400a79f8635d644f963cf3b74cf1221a5bf6f169..c1f99b8259f31e5fb44c808c8b15581b723fa9df 100644 (file)
@@ -3398,13 +3398,13 @@ class TestReportFormatting(RewardSpaceTestBase):
 
     def test_additive_activation_deterministic_contribution(self):
         """Additives enabled increase total reward; shaping impact limited."""
-        # Use a non-canonical exit mode to avoid automatic invariance enforcement
+        # Use a non_canonical exit mode to avoid automatic invariance enforcement
         # disabling the additive components on first call (canonical path auto-disables).
         base = self.base_params(
             hold_potential_enabled=True,
             entry_additive_enabled=False,
             exit_additive_enabled=False,
-            exit_potential_mode="non-canonical",
+            exit_potential_mode="non_canonical",
         )
         with_add = base.copy()
         with_add.update(
index da2c66b9158cfb0e4432dd607e2080bc3b8962fb..08b4ac42ff4384a7b2fedd6182e021f60aec7d3b 100644 (file)
@@ -1363,7 +1363,7 @@ class MyRLEnv(Base5ActionRLEnv):
         # === EXIT POTENTIAL MODE ===
         # exit_potential_mode options:
         #   'canonical'           -> Φ(s')=0 (preserves invariance, disables additives)
-        #   'non-canonical'       -> Φ(s')=0 (allows additives, breaks invariance)
+        #   'non_canonical'       -> Φ(s')=0 (allows additives, breaks invariance)
         #   'progressive_release' -> Φ(s')=Φ(s)*(1-decay_factor)
         #   'spike_cancel'        -> Φ(s')=Φ(s)/γ (Δ ≈ 0, cancels shaping)
         #   'retain_previous'     -> Φ(s')=Φ(s)
@@ -1372,7 +1372,7 @@ class MyRLEnv(Base5ActionRLEnv):
         )
         _allowed_exit_modes = {
             "canonical",
-            "non-canonical",
+            "non_canonical",
             "progressive_release",
             "spike_cancel",
             "retain_previous",
@@ -1439,11 +1439,11 @@ class MyRLEnv(Base5ActionRLEnv):
             if self._entry_additive_enabled or self._exit_additive_enabled:
                 logger.info(
                     "Canonical mode: additive rewards disabled with Φ(terminal)=0. PBRS invariance is preserved. "
-                    "To use additive rewards, set exit_potential_mode='non-canonical'."
+                    "To use additive rewards, set exit_potential_mode='non_canonical'."
                 )
                 self._entry_additive_enabled = False
                 self._exit_additive_enabled = False
-        elif self._exit_potential_mode == "non-canonical":
+        elif self._exit_potential_mode == "non_canonical":
             if self._entry_additive_enabled or self._exit_additive_enabled:
                 logger.info(
                     "Non-canonical mode: additive rewards enabled with Φ(terminal)=0. PBRS invariance is intentionally broken."
@@ -1701,7 +1701,7 @@ class MyRLEnv(Base5ActionRLEnv):
         See ``_apply_potential_shaping`` for complete PBRS documentation.
         """
         mode = self._exit_potential_mode
-        if mode == "canonical" or mode == "non-canonical":
+        if mode == "canonical" or mode == "non_canonical":
             return 0.0
         if mode == "progressive_release":
             decay = self._exit_potential_decay
@@ -1959,7 +1959,7 @@ class MyRLEnv(Base5ActionRLEnv):
         elif is_exit:
             if (
                 self._exit_potential_mode == "canonical"
-                or self._exit_potential_mode == "non-canonical"
+                or self._exit_potential_mode == "non_canonical"
             ):
                 next_potential = 0.0
                 exit_shaping_reward = -prev_potential