refactor(reforcexy): PBRS transform namespace cleanup

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Wed, 15 Oct 2025 09:42:46 +0000 (11:42 +0200)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Wed, 15 Oct 2025 09:42:46 +0000 (11:42 +0200)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 15 Oct 2025 09:42:46 +0000 (11:42 +0200)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 15 Oct 2025 09:42:46 +0000 (11:42 +0200)
diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md

index 5e0fcf1a93e989ad1a1adb08189adca1198388a1..2175765a338627ab612c5750138b7ee94e42bf11 100644 (file)
--- a/ReforceXY/reward_space_analysis/README.md
+++ b/ReforceXY/reward_space_analysis/README.md
@@ -322,7 +322,7 @@ _PBRS (Potential-Based Reward Shaping) configuration:_
  - `hold_potential_enabled` (default: true) - Enable PBRS hold potential function Φ(s)
  - `hold_potential_scale` (default: 1.0) - Scale factor for hold potential function
  - `hold_potential_gain` (default: 1.0) - Gain factor applied before transforms in hold potential
-- `hold_potential_transform_pnl` (default: tanh) - Transform function for PnL: tanh, softsign, arctan, sigmoid, asinh_norm, clip
+- `hold_potential_transform_pnl` (default: tanh) - Transform function for PnL: tanh, softsign, arctan, sigmoid, asinh, clip
  - `hold_potential_transform_duration` (default: tanh) - Transform function for duration ratio
  - `entry_additive_enabled` (default: false) - Enable entry additive reward (non-PBRS component)
  - `entry_additive_scale` (default: 1.0) - Scale factor for entry additive reward
@@ -343,7 +343,7 @@ _PBRS (Potential-Based Reward Shaping) configuration:_
  | `softsign` | x / (1 + |x|) | (-1, 1) | Smoother than tanh, linear near 0 | Less aggressive saturation |
  | `arctan` | (2/pi) * arctan(x) | (-1, 1) | Slower saturation than tanh | Wide dynamic range |
  | `sigmoid` | 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) | (-1, 1) | Sigmoid mapped to (-1, 1) | Standard sigmoid activation |
-| `asinh_norm` | x / sqrt(1 + x^2) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness |
+| `asinh` | x / sqrt(1 + x^2) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness |
  | `clip` | clip(x, -1, 1) | [-1, 1] | Hard clipping at ±1 | Preserve linearity within bounds |
  
  _Invariant / safety controls:_
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py

index 3292953496cc16ae21ddc801171e0afba9625d7e..70d5ddfa9e30765aeb57469d339480ed1067feab 100644 (file)
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -170,8 +170,8 @@ ALLOWED_TRANSFORMS = {
      "softsign",
      "arctan",
      "sigmoid",
-    "asinh_norm",
      "clip",
+    "asinh",
  }
  ALLOWED_EXIT_POTENTIAL_MODES = {
      "canonical",
@@ -262,17 +262,17 @@ DEFAULT_MODEL_REWARD_PARAMETERS_HELP: Dict[str, str] = {
      "hold_potential_enabled": "Enable PBRS hold potential function Φ(s).",
      "hold_potential_scale": "Scale factor for hold potential function.",
      "hold_potential_gain": "Gain factor applied before transforms in hold potential.",
-    "hold_potential_transform_pnl": "Transform function for PnL in hold potential: tanh, softsign, arctan, sigmoid, asinh_norm, clip.",
+    "hold_potential_transform_pnl": "Transform function for PnL in hold potential: tanh, softsign, arctan, sigmoid, asinh, clip.",
      "hold_potential_transform_duration": "Transform function for duration ratio in hold potential.",
      "entry_additive_enabled": "Enable entry additive reward (non-PBRS component).",
      "entry_additive_scale": "Scale factor for entry additive reward.",
      "entry_additive_gain": "Gain factor for entry additive reward.",
-    "entry_additive_transform_pnl": "Transform function for PnL in entry additive.",
+    "entry_additive_transform_pnl": "Transform function for PnL in entry additive (tanh, softsign, arctan, sigmoid, asinh, clip).",
      "entry_additive_transform_duration": "Transform function for duration ratio in entry additive.",
      "exit_additive_enabled": "Enable exit additive reward (non-PBRS component).",
      "exit_additive_scale": "Scale factor for exit additive reward.",
      "exit_additive_gain": "Gain factor for exit additive reward.",
-    "exit_additive_transform_pnl": "Transform function for PnL in exit additive.",
+    "exit_additive_transform_pnl": "Transform function for PnL in exit additive (tanh, softsign, arctan, sigmoid, asinh, clip).",
      "exit_additive_transform_duration": "Transform function for duration ratio in exit additive.",
  }
  
@@ -3373,8 +3373,8 @@ def _apply_transform_sigmoid(value: float) -> float:
          return 1.0 if x > 0 else -1.0
  
  
-def _apply_transform_asinh_norm(value: float) -> float:
-    """asinh_norm: x / sqrt(1 + x^2) in (-1, 1)."""
+def _apply_transform_asinh(value: float) -> float:
+    """asinh: x / sqrt(1 + x^2) in (-1, 1)."""
      return float(value / math.hypot(1.0, value))
  
  
@@ -3390,7 +3390,7 @@ def apply_transform(transform_name: str, value: float, **kwargs: Any) -> float:
          "softsign": _apply_transform_softsign,
          "arctan": _apply_transform_arctan,
          "sigmoid": _apply_transform_sigmoid,
-        "asinh_norm": _apply_transform_asinh_norm,
+        "asinh": _apply_transform_asinh,
          "clip": _apply_transform_clip,
      }
  
diff --git a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py

index 6b332ae6b5677b8c00d9f452fbca210f7e582fb5..c9241e86735c3ec4e7bd9d76dc9b1d3489ffa018 100644 (file)
--- a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
@@ -3095,21 +3095,21 @@ class TestPBRSIntegration(RewardSpaceTestBase):
          self.assertTrue(abs(apply_transform("softsign", 100.0)) < 1.0)
          self.assertTrue(abs(apply_transform("softsign", -100.0)) < 1.0)
  
-    def test_asinh_norm_transform(self):
-        """asinh_norm transform: x / sqrt(1 + x^2) in (-1, 1)."""
-        self.assertAlmostEqualFloat(apply_transform("asinh_norm", 0.0), 0.0)
+    def test_asinh_transform(self):
+        """asinh transform: x / sqrt(1 + x^2) in (-1, 1)."""
+        self.assertAlmostEqualFloat(apply_transform("asinh", 0.0), 0.0)
          # Symmetry
          self.assertAlmostEqualFloat(
-            apply_transform("asinh_norm", 1.2345),
-            -apply_transform("asinh_norm", -1.2345),
+            apply_transform("asinh", 1.2345),
+            -apply_transform("asinh", -1.2345),
              tolerance=1e-12,
          )
          # Monotonicity
-        vals = [apply_transform("asinh_norm", x) for x in [-5.0, -1.0, 0.0, 1.0, 5.0]]
+        vals = [apply_transform("asinh", x) for x in [-5.0, -1.0, 0.0, 1.0, 5.0]]
          self.assertTrue(all(vals[i] < vals[i + 1] for i in range(len(vals) - 1)))
          # Bounded
-        self.assertTrue(abs(apply_transform("asinh_norm", 1e6)) < 1.0)
-        self.assertTrue(abs(apply_transform("asinh_norm", -1e6)) < 1.0)
+        self.assertTrue(abs(apply_transform("asinh", 1e6)) < 1.0)
+        self.assertTrue(abs(apply_transform("asinh", -1e6)) < 1.0)
  
      def test_arctan_transform(self):
          """arctan transform: (2/pi) * arctan(x) in (-1, 1)."""
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py

index 5a8555ce62e0b10a3269409d7c8d719cc4e1a1f5..5f838b018522ca5283640c61f08e80405c587bab 100644 (file)
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -1655,7 +1655,7 @@ class MyRLEnv(Base5ActionRLEnv):
          ----------
          name : str
              Transform function name: 'tanh', 'softsign', 'arctan', 'sigmoid',
-            'asinh_norm', or 'clip'
+            'asinh', or 'clip'
          x : float
              Input value to transform
  
@@ -1686,7 +1686,7 @@ class MyRLEnv(Base5ActionRLEnv):
              except OverflowError:
                  return 1.0 if x > 0 else -1.0
  
-        if name == "asinh_norm":
+        if name == "asinh":
              return x / math.hypot(1.0, x)
  
          if name == "clip":
@@ -1814,7 +1814,7 @@ class MyRLEnv(Base5ActionRLEnv):
          - softsign: x / (1 + |x|)
          - arctan: (2/pi) * arctan(x)
          - sigmoid: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x))
-        - asinh_norm: x / sqrt(1 + x^2)
+        - asinh: x / sqrt(1 + x^2)
          - clip: clip(x, -1, 1)
  
          **Parameters**:
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Wed, 15 Oct 2025 09:42:46 +0000 (11:42 +0200)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Wed, 15 Oct 2025 09:42:46 +0000 (11:42 +0200)
ReforceXY/reward_space_analysis/README.md		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/reward_space_analysis.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/test_reward_space_analysis.py		patch \| blob \| blame \| history
ReforceXY/user_data/freqaimodels/ReforceXY.py		patch \| blob \| blame \| history