test(reforcexy): unpack properly after signature change

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Sat, 15 Nov 2025 21:26:17 +0000 (22:26 +0100)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Sat, 15 Nov 2025 21:26:17 +0000 (22:26 +0100)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Sat, 15 Nov 2025 21:26:17 +0000 (22:26 +0100)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Sat, 15 Nov 2025 21:26:17 +0000 (22:26 +0100)
diff --git a/ReforceXY/reward_space_analysis/tests/components/test_additives.py b/ReforceXY/reward_space_analysis/tests/components/test_additives.py

index 33a0869d3b146ba7e41c7d60b58f2249e36f3ce3..e675f47916a1fb0a5702e87ed52e76336a21e5de 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/components/test_additives.py
+++ b/ReforceXY/reward_space_analysis/tests/components/test_additives.py
@@ -45,8 +45,12 @@ class TestAdditivesDeterministicContribution(RewardSpaceTestBase):
              "is_entry": True,
              "is_exit": False,
          }
-        _t0, s0, _n0 = apply_potential_shaping(last_potential=0.0, params=base, **ctx)
-        t1, s1, _n1 = apply_potential_shaping(last_potential=0.0, params=with_add, **ctx)
+        _t0, s0, _n0, _pbrs0, _entry0, _exit0 = apply_potential_shaping(
+            last_potential=0.0, params=base, **ctx
+        )
+        t1, s1, _n1, _pbrs1, _entry1, _exit1 = apply_potential_shaping(
+            last_potential=0.0, params=with_add, **ctx
+        )
          self.assertFinite(t1)
          self.assertFinite(s1)
          self.assertLess(abs(s1 - s0), 0.2)
diff --git a/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py b/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py

index 109f51387b25bd933fb33aca3fb3946b04f5caf3..7a59e9f86591bbe084327d9b65f7bc62ad8e0276 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py
+++ b/ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py
@@ -194,7 +194,7 @@ class TestReportFormatting(RewardSpaceTestBase):
          content = self._write_report(df)
  
          # Verify PBRS Metrics section exists
-        self.assertIn("**PBRS Metrics (Tracing):**", content)
+        self.assertIn("**PBRS Metrics:**", content)
  
          # Verify key metrics are present
          required_metrics = [
diff --git a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py

index 268791e8f34b593559725adb2b08c0d53c765ca9..8329bb4f6bb0c03ca0964338d33bbe1176df2044 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
+++ b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
@@ -56,7 +56,14 @@ class TestPBRS(RewardSpaceTestBase):
          current_pnl = 0.02
          current_dur = 0.5
          prev_potential = _compute_hold_potential(current_pnl, current_dur, params)
-        _total_reward, reward_shaping, next_potential = apply_potential_shaping(
+        (
+            _total_reward,
+            reward_shaping,
+            next_potential,
+            _pbrs_delta,
+            _entry_additive,
+            _exit_additive,
+        ) = apply_potential_shaping(
              base_reward=0.0,
              current_pnl=current_pnl,
              current_duration_ratio=current_dur,
@@ -93,7 +100,14 @@ class TestPBRS(RewardSpaceTestBase):
          expected_next_potential = (
              prev_potential / gamma if gamma not in (0.0, None) else prev_potential
          )
-        _total_reward, reward_shaping, next_potential = apply_potential_shaping(
+        (
+            _total_reward,
+            reward_shaping,
+            next_potential,
+            _pbrs_delta,
+            _entry_additive,
+            _exit_additive,
+        ) = apply_potential_shaping(
              base_reward=0.0,
              current_pnl=current_pnl,
              current_duration_ratio=current_dur,
@@ -190,16 +204,18 @@ class TestPBRS(RewardSpaceTestBase):
          current_duration_ratio = 0.4
          next_pnl = 0.0
          next_duration_ratio = 0.0
-        total, shaping, next_potential = apply_potential_shaping(
-            base_reward=base_reward,
-            current_pnl=current_pnl,
-            current_duration_ratio=current_duration_ratio,
-            next_pnl=next_pnl,
-            next_duration_ratio=next_duration_ratio,
-            is_exit=True,
-            is_entry=False,
-            last_potential=0.789,
-            params=params,
+        total, shaping, next_potential, _pbrs_delta, _entry_additive, _exit_additive = (
+            apply_potential_shaping(
+                base_reward=base_reward,
+                current_pnl=current_pnl,
+                current_duration_ratio=current_duration_ratio,
+                next_pnl=next_pnl,
+                next_duration_ratio=next_duration_ratio,
+                is_exit=True,
+                is_entry=False,
+                last_potential=0.789,
+                params=params,
+            )
          )
          self.assertIn("_pbrs_invariance_applied", params)
          self.assertFalse(
@@ -230,7 +246,7 @@ class TestPBRS(RewardSpaceTestBase):
              exit_additive_enabled=True,
          )
          terminal_next_potentials, shaping_values = self._canonical_sweep(params)
-        _t1, _s1, _n1 = apply_potential_shaping(
+        _t1, _s1, _n1, _pbrs_delta, _entry_additive, _exit_additive = apply_potential_shaping(
              base_reward=0.0,
              current_pnl=0.05,
              current_duration_ratio=0.3,
@@ -251,7 +267,7 @@ class TestPBRS(RewardSpaceTestBase):
          max_abs = max((abs(v) for v in shaping_values)) if shaping_values else 0.0
          self.assertLessEqual(max_abs, self.PBRS_MAX_ABS_SHAPING)
          state_after = (params["entry_additive_enabled"], params["exit_additive_enabled"])
-        _t2, _s2, _n2 = apply_potential_shaping(
+        _t2, _s2, _n2, _pbrs_delta2, _entry_additive2, _exit_additive2 = apply_potential_shaping(
              base_reward=0.0,
              current_pnl=0.02,
              current_duration_ratio=0.1,
@@ -274,15 +290,17 @@ class TestPBRS(RewardSpaceTestBase):
              hold_potential_enabled=True,
          )
          last_potential = 0.42
-        total, shaping, next_potential = apply_potential_shaping(
-            base_reward=0.0,
-            current_pnl=0.0,
-            current_duration_ratio=0.0,
-            next_pnl=0.0,
-            next_duration_ratio=0.0,
-            is_exit=True,
-            last_potential=last_potential,
-            params=params,
+        total, shaping, next_potential, _pbrs_delta, _entry_additive, _exit_additive = (
+            apply_potential_shaping(
+                base_reward=0.0,
+                current_pnl=0.0,
+                current_duration_ratio=0.0,
+                next_pnl=0.0,
+                next_duration_ratio=0.0,
+                is_exit=True,
+                last_potential=last_potential,
+                params=params,
+            )
          )
          self.assertPlacesEqual(next_potential, last_potential, places=12)
          gamma_raw = DEFAULT_MODEL_REWARD_PARAMETERS.get("potential_gamma", 0.95)
@@ -646,15 +664,17 @@ class TestPBRS(RewardSpaceTestBase):
                  exit_potential_mode="canonical",
                  potential_gamma=gamma,
              )
-            _tot, shap, next_pot = apply_potential_shaping(
-                base_reward=0.0,
-                current_pnl=0.02,
-                current_duration_ratio=0.3,
-                next_pnl=0.025,
-                next_duration_ratio=0.35,
-                is_exit=False,
-                last_potential=0.0,
-                params=params,
+            _tot, shap, next_pot, _pbrs_delta, _entry_additive, _exit_additive = (
+                apply_potential_shaping(
+                    base_reward=0.0,
+                    current_pnl=0.02,
+                    current_duration_ratio=0.3,
+                    next_pnl=0.025,
+                    next_duration_ratio=0.35,
+                    is_exit=False,
+                    last_potential=0.0,
+                    params=params,
+                )
              )
              self.assertTrue(np.isfinite(shap))
              self.assertTrue(np.isfinite(next_pot))
@@ -684,15 +704,17 @@ class TestPBRS(RewardSpaceTestBase):
              current_dur = float(rng.uniform(0, 1))
              next_pnl = 0.0 if is_exit else float(rng.normal(0, 0.05))
              next_dur = 0.0 if is_exit else float(rng.uniform(0, 1))
-            _tot, _shap, next_potential = apply_potential_shaping(
-                base_reward=0.0,
-                current_pnl=current_pnl,
-                current_duration_ratio=current_dur,
-                next_pnl=next_pnl,
-                next_duration_ratio=next_dur,
-                is_exit=is_exit,
-                last_potential=last_potential,
-                params=params,
+            _tot, _shap, next_potential, _pbrs_delta, _entry_additive, _exit_additive = (
+                apply_potential_shaping(
+                    base_reward=0.0,
+                    current_pnl=current_pnl,
+                    current_duration_ratio=current_dur,
+                    next_pnl=next_pnl,
+                    next_duration_ratio=next_dur,
+                    is_exit=is_exit,
+                    last_potential=last_potential,
+                    params=params,
+                )
              )
              inc = gamma * next_potential - last_potential
              telescoping_sum += inc
@@ -733,15 +755,17 @@ class TestPBRS(RewardSpaceTestBase):
              is_exit = rng.uniform() < 0.15
              next_pnl = 0.0 if is_exit else float(rng.normal(0, 0.07))
              next_dur = 0.0 if is_exit else float(rng.uniform(0, 1))
-            _tot, shap, next_pot = apply_potential_shaping(
-                base_reward=0.0,
-                current_pnl=float(rng.normal(0, 0.07)),
-                current_duration_ratio=float(rng.uniform(0, 1)),
-                next_pnl=next_pnl,
-                next_duration_ratio=next_dur,
-                is_exit=is_exit,
-                last_potential=last_potential,
-                params=params,
+            _tot, shap, next_pot, _pbrs_delta, _entry_additive, _exit_additive = (
+                apply_potential_shaping(
+                    base_reward=0.0,
+                    current_pnl=float(rng.normal(0, 0.07)),
+                    current_duration_ratio=float(rng.uniform(0, 1)),
+                    next_pnl=next_pnl,
+                    next_duration_ratio=next_dur,
+                    is_exit=is_exit,
+                    last_potential=last_potential,
+                    params=params,
+                )
              )
              shaping_sum += shap
              last_potential = 0.0 if is_exit else next_pot
diff --git a/ReforceXY/reward_space_analysis/tests/test_base.py b/ReforceXY/reward_space_analysis/tests/test_base.py

index 99913dcb06809e1b94246ad764b4c97503e54b66..1cf8b5696da658e787f960172299b255b36e804d 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/test_base.py
+++ b/ReforceXY/reward_space_analysis/tests/test_base.py
@@ -156,16 +156,18 @@ class RewardSpaceTestBase(unittest.TestCase):
              next_pnl = 0.0 if is_exit else float(rng.normal(0, 0.2))
              inc = rng.uniform(0, 0.12)
              next_dur = 0.0 if is_exit else float(min(1.0, current_dur + inc))
-            _tot, shap_val, next_pot = apply_potential_shaping(
-                base_reward=0.0,
-                current_pnl=current_pnl,
-                current_duration_ratio=current_dur,
-                next_pnl=next_pnl,
-                next_duration_ratio=next_dur,
-                is_exit=is_exit,
-                is_entry=False,
-                last_potential=last_potential,
-                params=params,
+            _tot, shap_val, next_pot, _pbrs_delta, _entry_additive, _exit_additive = (
+                apply_potential_shaping(
+                    base_reward=0.0,
+                    current_pnl=current_pnl,
+                    current_duration_ratio=current_dur,
+                    next_pnl=next_pnl,
+                    next_duration_ratio=next_dur,
+                    is_exit=is_exit,
+                    is_entry=False,
+                    last_potential=last_potential,
+                    params=params,
+                )
              )
              shaping_vals.append(shap_val)
              if is_exit:
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Sat, 15 Nov 2025 21:26:17 +0000 (22:26 +0100)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Sat, 15 Nov 2025 21:26:17 +0000 (22:26 +0100)
ReforceXY/reward_space_analysis/tests/components/test_additives.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/integration/test_report_formatting.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/test_base.py		patch \| blob \| blame \| history