From 330369ba00ff015ccdbef6bdf9ee1d938cc336bc Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Wed, 15 Oct 2025 11:42:46 +0200
Subject: [PATCH] refactor(reforcexy): PBRS transform namespace cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 ReforceXY/reward_space_analysis/README.md        |  4 ++--
 .../reward_space_analysis.py                     | 14 +++++++-------
 .../test_reward_space_analysis.py                | 16 ++++++++--------
 ReforceXY/user_data/freqaimodels/ReforceXY.py    |  6 +++---
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md
index 5e0fcf1..2175765 100644
--- a/ReforceXY/reward_space_analysis/README.md
+++ b/ReforceXY/reward_space_analysis/README.md
@@ -322,7 +322,7 @@ _PBRS (Potential-Based Reward Shaping) configuration:_
 - `hold_potential_enabled` (default: true) - Enable PBRS hold potential function Î¦(s)
 - `hold_potential_scale` (default: 1.0) - Scale factor for hold potential function
 - `hold_potential_gain` (default: 1.0) - Gain factor applied before transforms in hold potential
-- `hold_potential_transform_pnl` (default: tanh) - Transform function for PnL: tanh, softsign, arctan, sigmoid, asinh_norm, clip
+- `hold_potential_transform_pnl` (default: tanh) - Transform function for PnL: tanh, softsign, arctan, sigmoid, asinh, clip
 - `hold_potential_transform_duration` (default: tanh) - Transform function for duration ratio
 - `entry_additive_enabled` (default: false) - Enable entry additive reward (non-PBRS component)
 - `entry_additive_scale` (default: 1.0) - Scale factor for entry additive reward
@@ -343,7 +343,7 @@ _PBRS (Potential-Based Reward Shaping) configuration:_
 | `softsign` | x / (1 + |x|) | (-1, 1) | Smoother than tanh, linear near 0 | Less aggressive saturation |
 | `arctan` | (2/pi) * arctan(x) | (-1, 1) | Slower saturation than tanh | Wide dynamic range |
 | `sigmoid` | 2Ï(x) - 1, Ï(x) = 1/(1 + e^(-x)) | (-1, 1) | Sigmoid mapped to (-1, 1) | Standard sigmoid activation |
-| `asinh_norm` | x / sqrt(1 + x^2) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness |
+| `asinh` | x / sqrt(1 + x^2) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness |
 | `clip` | clip(x, -1, 1) | [-1, 1] | Hard clipping at Â±1 | Preserve linearity within bounds |
 
 _Invariant / safety controls:_
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py
index 3292953..70d5ddf 100644
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -170,8 +170,8 @@ ALLOWED_TRANSFORMS = {
     "softsign",
     "arctan",
     "sigmoid",
-    "asinh_norm",
     "clip",
+    "asinh",
 }
 ALLOWED_EXIT_POTENTIAL_MODES = {
     "canonical",
@@ -262,17 +262,17 @@ DEFAULT_MODEL_REWARD_PARAMETERS_HELP: Dict[str, str] = {
     "hold_potential_enabled": "Enable PBRS hold potential function Î¦(s).",
     "hold_potential_scale": "Scale factor for hold potential function.",
     "hold_potential_gain": "Gain factor applied before transforms in hold potential.",
-    "hold_potential_transform_pnl": "Transform function for PnL in hold potential: tanh, softsign, arctan, sigmoid, asinh_norm, clip.",
+    "hold_potential_transform_pnl": "Transform function for PnL in hold potential: tanh, softsign, arctan, sigmoid, asinh, clip.",
     "hold_potential_transform_duration": "Transform function for duration ratio in hold potential.",
     "entry_additive_enabled": "Enable entry additive reward (non-PBRS component).",
     "entry_additive_scale": "Scale factor for entry additive reward.",
     "entry_additive_gain": "Gain factor for entry additive reward.",
-    "entry_additive_transform_pnl": "Transform function for PnL in entry additive.",
+    "entry_additive_transform_pnl": "Transform function for PnL in entry additive (tanh, softsign, arctan, sigmoid, asinh, clip).",
     "entry_additive_transform_duration": "Transform function for duration ratio in entry additive.",
     "exit_additive_enabled": "Enable exit additive reward (non-PBRS component).",
     "exit_additive_scale": "Scale factor for exit additive reward.",
     "exit_additive_gain": "Gain factor for exit additive reward.",
-    "exit_additive_transform_pnl": "Transform function for PnL in exit additive.",
+    "exit_additive_transform_pnl": "Transform function for PnL in exit additive (tanh, softsign, arctan, sigmoid, asinh, clip).",
     "exit_additive_transform_duration": "Transform function for duration ratio in exit additive.",
 }
 
@@ -3373,8 +3373,8 @@ def _apply_transform_sigmoid(value: float) -> float:
         return 1.0 if x > 0 else -1.0
 
 
-def _apply_transform_asinh_norm(value: float) -> float:
-    """asinh_norm: x / sqrt(1 + x^2) in (-1, 1)."""
+def _apply_transform_asinh(value: float) -> float:
+    """asinh: x / sqrt(1 + x^2) in (-1, 1)."""
     return float(value / math.hypot(1.0, value))
 
 
@@ -3390,7 +3390,7 @@ def apply_transform(transform_name: str, value: float, **kwargs: Any) -> float:
         "softsign": _apply_transform_softsign,
         "arctan": _apply_transform_arctan,
         "sigmoid": _apply_transform_sigmoid,
-        "asinh_norm": _apply_transform_asinh_norm,
+        "asinh": _apply_transform_asinh,
         "clip": _apply_transform_clip,
     }
 
diff --git a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
index 6b332ae..c9241e8 100644
--- a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
@@ -3095,21 +3095,21 @@ class TestPBRSIntegration(RewardSpaceTestBase):
         self.assertTrue(abs(apply_transform("softsign", 100.0)) < 1.0)
         self.assertTrue(abs(apply_transform("softsign", -100.0)) < 1.0)
 
-    def test_asinh_norm_transform(self):
-        """asinh_norm transform: x / sqrt(1 + x^2) in (-1, 1)."""
-        self.assertAlmostEqualFloat(apply_transform("asinh_norm", 0.0), 0.0)
+    def test_asinh_transform(self):
+        """asinh transform: x / sqrt(1 + x^2) in (-1, 1)."""
+        self.assertAlmostEqualFloat(apply_transform("asinh", 0.0), 0.0)
         # Symmetry
         self.assertAlmostEqualFloat(
-            apply_transform("asinh_norm", 1.2345),
-            -apply_transform("asinh_norm", -1.2345),
+            apply_transform("asinh", 1.2345),
+            -apply_transform("asinh", -1.2345),
             tolerance=1e-12,
         )
         # Monotonicity
-        vals = [apply_transform("asinh_norm", x) for x in [-5.0, -1.0, 0.0, 1.0, 5.0]]
+        vals = [apply_transform("asinh", x) for x in [-5.0, -1.0, 0.0, 1.0, 5.0]]
         self.assertTrue(all(vals[i] < vals[i + 1] for i in range(len(vals) - 1)))
         # Bounded
-        self.assertTrue(abs(apply_transform("asinh_norm", 1e6)) < 1.0)
-        self.assertTrue(abs(apply_transform("asinh_norm", -1e6)) < 1.0)
+        self.assertTrue(abs(apply_transform("asinh", 1e6)) < 1.0)
+        self.assertTrue(abs(apply_transform("asinh", -1e6)) < 1.0)
 
     def test_arctan_transform(self):
         """arctan transform: (2/pi) * arctan(x) in (-1, 1)."""
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py
index 5a8555c..5f838b0 100644
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -1655,7 +1655,7 @@ class MyRLEnv(Base5ActionRLEnv):
         ----------
         name : str
             Transform function name: 'tanh', 'softsign', 'arctan', 'sigmoid',
-            'asinh_norm', or 'clip'
+            'asinh', or 'clip'
         x : float
             Input value to transform
 
@@ -1686,7 +1686,7 @@ class MyRLEnv(Base5ActionRLEnv):
             except OverflowError:
                 return 1.0 if x > 0 else -1.0
 
-        if name == "asinh_norm":
+        if name == "asinh":
             return x / math.hypot(1.0, x)
 
         if name == "clip":
@@ -1814,7 +1814,7 @@ class MyRLEnv(Base5ActionRLEnv):
         - softsign: x / (1 + |x|)
         - arctan: (2/pi) * arctan(x)
         - sigmoid: 2Ï(x) - 1, Ï(x) = 1/(1 + e^(-x))
-        - asinh_norm: x / sqrt(1 + x^2)
+        - asinh: x / sqrt(1 + x^2)
         - clip: clip(x, -1, 1)
 
         **Parameters**:
-- 
2.43.0