From 330369ba00ff015ccdbef6bdf9ee1d938cc336bc Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 15 Oct 2025 11:42:46 +0200 Subject: [PATCH] refactor(reforcexy): PBRS transform namespace cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/reward_space_analysis/README.md | 4 ++-- .../reward_space_analysis.py | 14 +++++++------- .../test_reward_space_analysis.py | 16 ++++++++-------- ReforceXY/user_data/freqaimodels/ReforceXY.py | 6 +++--- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md index 5e0fcf1..2175765 100644 --- a/ReforceXY/reward_space_analysis/README.md +++ b/ReforceXY/reward_space_analysis/README.md @@ -322,7 +322,7 @@ _PBRS (Potential-Based Reward Shaping) configuration:_ - `hold_potential_enabled` (default: true) - Enable PBRS hold potential function Φ(s) - `hold_potential_scale` (default: 1.0) - Scale factor for hold potential function - `hold_potential_gain` (default: 1.0) - Gain factor applied before transforms in hold potential -- `hold_potential_transform_pnl` (default: tanh) - Transform function for PnL: tanh, softsign, arctan, sigmoid, asinh_norm, clip +- `hold_potential_transform_pnl` (default: tanh) - Transform function for PnL: tanh, softsign, arctan, sigmoid, asinh, clip - `hold_potential_transform_duration` (default: tanh) - Transform function for duration ratio - `entry_additive_enabled` (default: false) - Enable entry additive reward (non-PBRS component) - `entry_additive_scale` (default: 1.0) - Scale factor for entry additive reward @@ -343,7 +343,7 @@ _PBRS (Potential-Based Reward Shaping) configuration:_ | `softsign` | x / (1 + |x|) | (-1, 1) | Smoother than tanh, linear near 0 | Less aggressive saturation | | `arctan` | (2/pi) * arctan(x) | (-1, 1) | Slower saturation than tanh | Wide dynamic range | | `sigmoid` | 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) | (-1, 1) | Sigmoid mapped to (-1, 1) | Standard sigmoid activation | -| `asinh_norm` | x / sqrt(1 + x^2) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness | +| `asinh` | x / sqrt(1 + x^2) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness | | `clip` | clip(x, -1, 1) | [-1, 1] | Hard clipping at ±1 | Preserve linearity within bounds | _Invariant / safety controls:_ diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index 3292953..70d5ddf 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -170,8 +170,8 @@ ALLOWED_TRANSFORMS = { "softsign", "arctan", "sigmoid", - "asinh_norm", "clip", + "asinh", } ALLOWED_EXIT_POTENTIAL_MODES = { "canonical", @@ -262,17 +262,17 @@ DEFAULT_MODEL_REWARD_PARAMETERS_HELP: Dict[str, str] = { "hold_potential_enabled": "Enable PBRS hold potential function Φ(s).", "hold_potential_scale": "Scale factor for hold potential function.", "hold_potential_gain": "Gain factor applied before transforms in hold potential.", - "hold_potential_transform_pnl": "Transform function for PnL in hold potential: tanh, softsign, arctan, sigmoid, asinh_norm, clip.", + "hold_potential_transform_pnl": "Transform function for PnL in hold potential: tanh, softsign, arctan, sigmoid, asinh, clip.", "hold_potential_transform_duration": "Transform function for duration ratio in hold potential.", "entry_additive_enabled": "Enable entry additive reward (non-PBRS component).", "entry_additive_scale": "Scale factor for entry additive reward.", "entry_additive_gain": "Gain factor for entry additive reward.", - "entry_additive_transform_pnl": "Transform function for PnL in entry additive.", + "entry_additive_transform_pnl": "Transform function for PnL in entry additive (tanh, softsign, arctan, sigmoid, asinh, clip).", "entry_additive_transform_duration": "Transform function for duration ratio in entry additive.", "exit_additive_enabled": "Enable exit additive reward (non-PBRS component).", "exit_additive_scale": "Scale factor for exit additive reward.", "exit_additive_gain": "Gain factor for exit additive reward.", - "exit_additive_transform_pnl": "Transform function for PnL in exit additive.", + "exit_additive_transform_pnl": "Transform function for PnL in exit additive (tanh, softsign, arctan, sigmoid, asinh, clip).", "exit_additive_transform_duration": "Transform function for duration ratio in exit additive.", } @@ -3373,8 +3373,8 @@ def _apply_transform_sigmoid(value: float) -> float: return 1.0 if x > 0 else -1.0 -def _apply_transform_asinh_norm(value: float) -> float: - """asinh_norm: x / sqrt(1 + x^2) in (-1, 1).""" +def _apply_transform_asinh(value: float) -> float: + """asinh: x / sqrt(1 + x^2) in (-1, 1).""" return float(value / math.hypot(1.0, value)) @@ -3390,7 +3390,7 @@ def apply_transform(transform_name: str, value: float, **kwargs: Any) -> float: "softsign": _apply_transform_softsign, "arctan": _apply_transform_arctan, "sigmoid": _apply_transform_sigmoid, - "asinh_norm": _apply_transform_asinh_norm, + "asinh": _apply_transform_asinh, "clip": _apply_transform_clip, } diff --git a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py index 6b332ae..c9241e8 100644 --- a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py @@ -3095,21 +3095,21 @@ class TestPBRSIntegration(RewardSpaceTestBase): self.assertTrue(abs(apply_transform("softsign", 100.0)) < 1.0) self.assertTrue(abs(apply_transform("softsign", -100.0)) < 1.0) - def test_asinh_norm_transform(self): - """asinh_norm transform: x / sqrt(1 + x^2) in (-1, 1).""" - self.assertAlmostEqualFloat(apply_transform("asinh_norm", 0.0), 0.0) + def test_asinh_transform(self): + """asinh transform: x / sqrt(1 + x^2) in (-1, 1).""" + self.assertAlmostEqualFloat(apply_transform("asinh", 0.0), 0.0) # Symmetry self.assertAlmostEqualFloat( - apply_transform("asinh_norm", 1.2345), - -apply_transform("asinh_norm", -1.2345), + apply_transform("asinh", 1.2345), + -apply_transform("asinh", -1.2345), tolerance=1e-12, ) # Monotonicity - vals = [apply_transform("asinh_norm", x) for x in [-5.0, -1.0, 0.0, 1.0, 5.0]] + vals = [apply_transform("asinh", x) for x in [-5.0, -1.0, 0.0, 1.0, 5.0]] self.assertTrue(all(vals[i] < vals[i + 1] for i in range(len(vals) - 1))) # Bounded - self.assertTrue(abs(apply_transform("asinh_norm", 1e6)) < 1.0) - self.assertTrue(abs(apply_transform("asinh_norm", -1e6)) < 1.0) + self.assertTrue(abs(apply_transform("asinh", 1e6)) < 1.0) + self.assertTrue(abs(apply_transform("asinh", -1e6)) < 1.0) def test_arctan_transform(self): """arctan transform: (2/pi) * arctan(x) in (-1, 1).""" diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 5a8555c..5f838b0 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -1655,7 +1655,7 @@ class MyRLEnv(Base5ActionRLEnv): ---------- name : str Transform function name: 'tanh', 'softsign', 'arctan', 'sigmoid', - 'asinh_norm', or 'clip' + 'asinh', or 'clip' x : float Input value to transform @@ -1686,7 +1686,7 @@ class MyRLEnv(Base5ActionRLEnv): except OverflowError: return 1.0 if x > 0 else -1.0 - if name == "asinh_norm": + if name == "asinh": return x / math.hypot(1.0, x) if name == "clip": @@ -1814,7 +1814,7 @@ class MyRLEnv(Base5ActionRLEnv): - softsign: x / (1 + |x|) - arctan: (2/pi) * arctan(x) - sigmoid: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) - - asinh_norm: x / sqrt(1 + x^2) + - asinh: x / sqrt(1 + x^2) - clip: clip(x, -1, 1) **Parameters**: -- 2.43.0