]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
refactor(reforcexy): rationalize PBRS transforms list
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 15 Oct 2025 09:24:48 +0000 (11:24 +0200)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 15 Oct 2025 09:24:48 +0000 (11:24 +0200)
Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
ReforceXY/reward_space_analysis/README.md
ReforceXY/reward_space_analysis/reward_space_analysis.py
ReforceXY/reward_space_analysis/test_reward_space_analysis.py
ReforceXY/user_data/freqaimodels/ReforceXY.py

index a498e0213020e5096cd2b33815a2ed764132e590..5e0fcf1a93e989ad1a1adb08189adca1198388a1 100644 (file)
@@ -299,7 +299,7 @@ effective_r = r            if not exit_plateau
 | legacy | step: ×1.5 if r* ≤ 1 else ×0.5 | No | Historical discontinuity retained (not smoothed) |
 | sqrt | 1 / sqrt(1 + r*) | Yes | Sub-linear decay |
 | linear | 1 / (1 + slope * r*) | Yes | slope = `exit_linear_slope` (≥0) |
-| power | (1 + r*)^(-alpha) | Yes | alpha = -ln(tau)/ln(2), tau = `exit_power_tau` ∈ (0,1]; tau=1 ⇒ alpha=0 (flat) |
+| power | (1 + r*)^(-alpha) | Yes | alpha = -ln(tau)/ln(2), tau = `exit_power_tau` ∈ (0,1]; tau=1 ⇒ alpha=0 (flat); invalid tau ⇒ alpha=1 (default) |
 | half_life | 2^(- r* / hl) | Yes | hl = `exit_half_life`; r* = hl ⇒ factor × 0.5 |
 
 Where r* = `effective_r` above.
@@ -317,13 +317,12 @@ _Profit factor configuration:_
 _PBRS (Potential-Based Reward Shaping) configuration:_
 
 - `potential_gamma` (default: 0.95) - Discount factor γ for PBRS potential term (0 ≤ γ ≤ 1)
-- `potential_softsign_sharpness` (default: 1.0) - Sharpness parameter for softsign_sharp transform (smaller = sharper)
 - `exit_potential_mode` (default: canonical) - Exit potential mode: 'canonical' (Φ=0, preserves invariance, disables additives), 'non-canonical' (Φ=0, allows additives, breaks invariance), 'progressive_release', 'spike_cancel', 'retain_previous'
 - `exit_potential_decay` (default: 0.5) - Decay factor for progressive_release exit mode (0 ≤ decay ≤ 1)
 - `hold_potential_enabled` (default: true) - Enable PBRS hold potential function Φ(s)
 - `hold_potential_scale` (default: 1.0) - Scale factor for hold potential function
 - `hold_potential_gain` (default: 1.0) - Gain factor applied before transforms in hold potential
-- `hold_potential_transform_pnl` (default: tanh) - Transform function for PnL: tanh, softsign, softsign_sharp, arctan, logistic, asinh_norm, clip
+- `hold_potential_transform_pnl` (default: tanh) - Transform function for PnL: tanh, softsign, arctan, sigmoid, asinh_norm, clip
 - `hold_potential_transform_duration` (default: tanh) - Transform function for duration ratio
 - `entry_additive_enabled` (default: false) - Enable entry additive reward (non-PBRS component)
 - `entry_additive_scale` (default: 1.0) - Scale factor for entry additive reward
@@ -341,11 +340,10 @@ _PBRS (Potential-Based Reward Shaping) configuration:_
 | Transform | Formula | Range | Characteristics | Use Case |
 |-----------|---------|-------|-----------------|----------|
 | `tanh` | tanh(x) | (-1, 1) | Smooth sigmoid, symmetric around 0 | Balanced PnL/duration transforms (default) |
-| `softsign` | x / (1 + \|x\|) | (-1, 1) | Smoother than tanh, linear near 0 | Less aggressive saturation |
-| `softsign_sharp` | (sharpness * x) / (1 + \|sharpness * x\|) | (-1, 1) | Tunable sharpness via `potential_softsign_sharpness` | Custom saturation control |
-| `arctan` | (2/π) × arctan(x) | (-1, 1) | Slower saturation than tanh | Wide dynamic range |
-| `logistic` | 2 / (1 + e^(-x)) - 1 | (-1, 1) | Equivalent to tanh(x/2), gentler curve | Mild non-linearity |
-| `asinh_norm` | x / √(1 + x²) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness |
+| `softsign` | x / (1 + |x|) | (-1, 1) | Smoother than tanh, linear near 0 | Less aggressive saturation |
+| `arctan` | (2/pi) * arctan(x) | (-1, 1) | Slower saturation than tanh | Wide dynamic range |
+| `sigmoid` | 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) | (-1, 1) | Sigmoid mapped to (-1, 1) | Standard sigmoid activation |
+| `asinh_norm` | x / sqrt(1 + x^2) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness |
 | `clip` | clip(x, -1, 1) | [-1, 1] | Hard clipping at ±1 | Preserve linearity within bounds |
 
 _Invariant / safety controls:_
@@ -581,8 +579,8 @@ python reward_space_analysis.py \
 
 python reward_space_analysis.py \
     --num_samples 25000 \
-    --params hold_potential_transform_pnl=softsign_sharp potential_softsign_sharpness=0.5 \
-    --output pbrs_sharp_transforms
+    --params hold_potential_transform_pnl=sigmoid hold_potential_gain=2.0 \
+    --output pbrs_sigmoid_transforms
 ```
 
 ### Real Data Comparison
index 9e5c252ed6933c6d80a8a51803008206e946d0e4..3292953496cc16ae21ddc801171e0afba9625d7e 100644 (file)
@@ -168,9 +168,8 @@ def _fail_safely(reason: str) -> float:
 ALLOWED_TRANSFORMS = {
     "tanh",
     "softsign",
-    "softsign_sharp",
     "arctan",
-    "logistic",
+    "sigmoid",
     "asinh_norm",
     "clip",
 }
@@ -213,7 +212,6 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = {
     # Potential-based reward shaping core parameters
     # Discount factor γ for potential term (0 ≤ γ ≤ 1)
     "potential_gamma": POTENTIAL_GAMMA_DEFAULT,
-    "potential_softsign_sharpness": 1.0,
     # Exit potential modes: canonical | non-canonical | progressive_release | spike_cancel | retain_previous
     "exit_potential_mode": "canonical",
     "exit_potential_decay": 0.5,
@@ -259,13 +257,12 @@ DEFAULT_MODEL_REWARD_PARAMETERS_HELP: Dict[str, str] = {
     "exit_factor_threshold": "If |exit factor| exceeds this threshold, emit warning.",
     # PBRS parameters
     "potential_gamma": "Discount factor γ for PBRS potential-based reward shaping (0 ≤ γ ≤ 1).",
-    "potential_softsign_sharpness": "Sharpness parameter for softsign_sharp transform (smaller = sharper).",
     "exit_potential_mode": "Exit potential mode: 'canonical' (Φ=0 & additives disabled), 'non-canonical' (Φ=0 & additives allowed), 'progressive_release', 'spike_cancel', 'retain_previous'.",
     "exit_potential_decay": "Decay factor for progressive_release exit mode (0 ≤ decay ≤ 1).",
     "hold_potential_enabled": "Enable PBRS hold potential function Φ(s).",
     "hold_potential_scale": "Scale factor for hold potential function.",
     "hold_potential_gain": "Gain factor applied before transforms in hold potential.",
-    "hold_potential_transform_pnl": "Transform function for PnL in hold potential: tanh, softsign, softsign_sharp, arctan, logistic, asinh_norm, clip.",
+    "hold_potential_transform_pnl": "Transform function for PnL in hold potential: tanh, softsign, arctan, sigmoid, asinh_norm, clip.",
     "hold_potential_transform_duration": "Transform function for duration ratio in hold potential.",
     "entry_additive_enabled": "Enable entry additive reward (non-PBRS component).",
     "entry_additive_scale": "Scale factor for entry additive reward.",
@@ -303,8 +300,6 @@ _PARAMETER_BOUNDS: Dict[str, Dict[str, float]] = {
     "pnl_factor_beta": {"min": 1e-6},
     # PBRS parameter bounds
     "potential_gamma": {"min": 0.0, "max": 1.0},
-    # Softsign sharpness: only lower bound enforced (upper bound limited implicitly by transform stability)
-    "potential_softsign_sharpness": {"min": 1e-6},
     "exit_potential_decay": {"min": 0.0, "max": 1.0},
     "hold_potential_scale": {"min": 0.0},
     "hold_potential_gain": {"min": 0.0},
@@ -3348,48 +3343,43 @@ def main() -> None:
 
 
 def _apply_transform_tanh(value: float) -> float:
-    """tanh(value) ∈ (-1,1)."""
-    return float(np.tanh(value))
+    """tanh: tanh(x) in (-1, 1)."""
+    return float(math.tanh(value))
 
 
 def _apply_transform_softsign(value: float) -> float:
-    """softsign: value/(1+|value|)."""
+    """softsign: x / (1 + |x|) in (-1, 1)."""
     x = value
     return float(x / (1.0 + abs(x)))
 
 
-def _apply_transform_softsign_sharp(value: float, sharpness: float = 1.0) -> float:
-    """softsign_sharp: (sharpness*value)/(1+|sharpness*value|) - multiplicative sharpness."""
-    xs = sharpness * value
-    return float(xs / (1.0 + abs(xs)))
-
-
 def _apply_transform_arctan(value: float) -> float:
-    """arctan normalized: (2/pi)*atan(value) ∈ (-1,1)."""
+    """arctan: (2/pi) * arctan(x) in (-1, 1)."""
     return float((2.0 / math.pi) * math.atan(value))
 
 
-def _apply_transform_logistic(value: float) -> float:
-    """Overflow‑safe logistic transform mapped to (-1,1): 2σ(x)−1."""
+def _apply_transform_sigmoid(value: float) -> float:
+    """sigmoid: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) in (-1, 1)."""
     x = value
     try:
         if x >= 0:
-            z = math.exp(-x)  # z in (0,1]
-            return float((1.0 - z) / (1.0 + z))
+            exp_neg_x = math.exp(-x)
+            sigma_x = 1.0 / (1.0 + exp_neg_x)
         else:
-            z = math.exp(x)  # z in (0,1]
-            return float((z - 1.0) / (z + 1.0))
+            exp_x = math.exp(x)
+            sigma_x = exp_x / (exp_x + 1.0)
+        return 2.0 * sigma_x - 1.0
     except OverflowError:
         return 1.0 if x > 0 else -1.0
 
 
 def _apply_transform_asinh_norm(value: float) -> float:
-    """Normalized asinh: value / sqrt(1 + value²) producing range (-1,1)."""
+    """asinh_norm: x / sqrt(1 + x^2) in (-1, 1)."""
     return float(value / math.hypot(1.0, value))
 
 
 def _apply_transform_clip(value: float) -> float:
-    """clip(value) to [-1,1]."""
+    """clip: clip(x, -1, 1) in [-1, 1]."""
     return float(np.clip(value, -1.0, 1.0))
 
 
@@ -3398,9 +3388,8 @@ def apply_transform(transform_name: str, value: float, **kwargs: Any) -> float:
     transforms = {
         "tanh": _apply_transform_tanh,
         "softsign": _apply_transform_softsign,
-        "softsign_sharp": _apply_transform_softsign_sharp,
         "arctan": _apply_transform_arctan,
-        "logistic": _apply_transform_logistic,
+        "sigmoid": _apply_transform_sigmoid,
         "asinh_norm": _apply_transform_asinh_norm,
         "clip": _apply_transform_clip,
     }
@@ -3709,18 +3698,9 @@ def _compute_bi_component(
     gain = _get_float_param(params, gain_key, 1.0)
     transform_pnl = _get_str_param(params, transform_pnl_key, "tanh")
     transform_duration = _get_str_param(params, transform_dur_key, "tanh")
-    sharpness = _get_float_param(params, "potential_softsign_sharpness", 1.0)
 
-    if transform_pnl == "softsign_sharp":
-        t_pnl = apply_transform(transform_pnl, gain * pnl, sharpness=sharpness)
-    else:
-        t_pnl = apply_transform(transform_pnl, gain * pnl)
-    if transform_duration == "softsign_sharp":
-        t_dur = apply_transform(
-            transform_duration, gain * duration_ratio, sharpness=sharpness
-        )
-    else:
-        t_dur = apply_transform(transform_duration, gain * duration_ratio)
+    t_pnl = apply_transform(transform_pnl, gain * pnl)
+    t_dur = apply_transform(transform_duration, gain * duration_ratio)
     value = scale * 0.5 * (t_pnl + t_dur)
     if not np.isfinite(value):
         return _fail_safely(non_finite_key)
index 068375dd2834ddc2e3524351fe48c4104cbabed0..6b332ae6b5677b8c00d9f452fbca210f7e582fb5 100644 (file)
@@ -3080,7 +3080,7 @@ class TestPBRSIntegration(RewardSpaceTestBase):
     """Tests for PBRS (Potential-Based Reward Shaping) integration."""
 
     def test_tanh_transform(self):
-        """tanh transform: bounded in (-1,1), symmetric."""
+        """tanh transform: tanh(x) in (-1, 1)."""
         self.assertAlmostEqualFloat(apply_transform("tanh", 0.0), 0.0)
         self.assertAlmostEqualFloat(apply_transform("tanh", 1.0), math.tanh(1.0))
         self.assertAlmostEqualFloat(apply_transform("tanh", -1.0), math.tanh(-1.0))
@@ -3088,37 +3088,15 @@ class TestPBRSIntegration(RewardSpaceTestBase):
         self.assertTrue(abs(apply_transform("tanh", -100.0)) <= 1.0)
 
     def test_softsign_transform(self):
-        """softsign transform: x/(1+|x|) in (-1,1)."""
+        """softsign transform: x / (1 + |x|) in (-1, 1)."""
         self.assertAlmostEqualFloat(apply_transform("softsign", 0.0), 0.0)
         self.assertAlmostEqualFloat(apply_transform("softsign", 1.0), 0.5)
         self.assertAlmostEqualFloat(apply_transform("softsign", -1.0), -0.5)
         self.assertTrue(abs(apply_transform("softsign", 100.0)) < 1.0)
         self.assertTrue(abs(apply_transform("softsign", -100.0)) < 1.0)
 
-    def test_softsign_sharp_transform(self):
-        """softsign_sharp transform: (s*x)/(1+|s*x|) in (-1,1) with sharpness s."""
-        # Baseline: s=1 should match softsign
-        self.assertAlmostEqualFloat(
-            apply_transform("softsign_sharp", 0.0, sharpness=1.0), 0.0
-        )
-        self.assertAlmostEqualFloat(
-            apply_transform("softsign_sharp", 1.0, sharpness=1.0),
-            apply_transform("softsign", 1.0),
-        )
-        # Higher sharpness => faster saturation
-        v_low = apply_transform("softsign_sharp", 0.5, sharpness=1.0)
-        v_high = apply_transform("softsign_sharp", 0.5, sharpness=4.0)
-        self.assertTrue(abs(v_high) > abs(v_low))
-        # Boundedness stress
-        self.assertTrue(
-            abs(apply_transform("softsign_sharp", 100.0, sharpness=10.0)) < 1.0
-        )
-        self.assertTrue(
-            abs(apply_transform("softsign_sharp", -100.0, sharpness=10.0)) < 1.0
-        )
-
     def test_asinh_norm_transform(self):
-        """asinh_norm transform: x/sqrt(1+x^2) in (-1,1)."""
+        """asinh_norm transform: x / sqrt(1 + x^2) in (-1, 1)."""
         self.assertAlmostEqualFloat(apply_transform("asinh_norm", 0.0), 0.0)
         # Symmetry
         self.assertAlmostEqualFloat(
@@ -3126,7 +3104,7 @@ class TestPBRSIntegration(RewardSpaceTestBase):
             -apply_transform("asinh_norm", -1.2345),
             tolerance=1e-12,
         )
-        # Monotonicity (sampled)
+        # Monotonicity
         vals = [apply_transform("asinh_norm", x) for x in [-5.0, -1.0, 0.0, 1.0, 5.0]]
         self.assertTrue(all(vals[i] < vals[i + 1] for i in range(len(vals) - 1)))
         # Bounded
@@ -3134,7 +3112,7 @@ class TestPBRSIntegration(RewardSpaceTestBase):
         self.assertTrue(abs(apply_transform("asinh_norm", -1e6)) < 1.0)
 
     def test_arctan_transform(self):
-        """arctan transform: normalized (2/pi)atan(x) bounded (-1,1)."""
+        """arctan transform: (2/pi) * arctan(x) in (-1, 1)."""
         self.assertAlmostEqualFloat(apply_transform("arctan", 0.0), 0.0)
         self.assertAlmostEqualFloat(
             apply_transform("arctan", 1.0),
@@ -3144,49 +3122,16 @@ class TestPBRSIntegration(RewardSpaceTestBase):
         self.assertTrue(abs(apply_transform("arctan", 100.0)) <= 1.0)
         self.assertTrue(abs(apply_transform("arctan", -100.0)) <= 1.0)
 
-    def test_logistic_transform(self):
-        """logistic transform: 2σ(x)-1 in (-1,1)."""
-        # Environment logistic returns 2σ(x)-1 centered at 0 in (-1,1)
-        self.assertAlmostEqualFloat(apply_transform("logistic", 0.0), 0.0)
-        self.assertTrue(apply_transform("logistic", 100.0) > 0.99)
-        self.assertTrue(apply_transform("logistic", -100.0) < -0.99)
-        self.assertTrue(-1 < apply_transform("logistic", 10.0) < 1)
-        self.assertTrue(-1 < apply_transform("logistic", -10.0) < 1)
-
-    def test_logistic_equivalence_tanh_half(self):
-        """logistic(x) must equal tanh(x/2) within tight tolerance across representative domain.
-
-        Uses identity: 2/(1+e^{-x}) - 1 = tanh(x/2).
-        """
-        samples = [
-            0.0,
-            1e-6,
-            -1e-6,
-            0.5,
-            -0.5,
-            1.0,
-            -1.0,
-            2.5,
-            -2.5,
-            5.0,
-            -5.0,
-            10.0,
-            -10.0,
-        ]
-        for x in samples:
-            with self.subTest(x=x):
-                v_log = apply_transform("logistic", x)
-                v_tanh = math.tanh(x / 2.0)
-                tol = 1e-12 if abs(x) <= 5 else 1e-10
-                self.assertAlmostEqualFloat(
-                    v_log,
-                    v_tanh,
-                    tolerance=tol,
-                    msg=f"Mismatch logistic vs tanh(x/2) at x={x}: {v_log} vs {v_tanh}",
-                )
+    def test_sigmoid_transform(self):
+        """sigmoid transform: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) in (-1, 1)."""
+        self.assertAlmostEqualFloat(apply_transform("sigmoid", 0.0), 0.0)
+        self.assertTrue(apply_transform("sigmoid", 100.0) > 0.99)
+        self.assertTrue(apply_transform("sigmoid", -100.0) < -0.99)
+        self.assertTrue(-1 < apply_transform("sigmoid", 10.0) < 1)
+        self.assertTrue(-1 < apply_transform("sigmoid", -10.0) < 1)
 
     def test_clip_transform(self):
-        """clip transform: clamp to [-1,1]."""
+        """clip transform: clip(x, -1, 1) in [-1, 1]."""
         self.assertAlmostEqualFloat(apply_transform("clip", 0.0), 0.0)
         self.assertAlmostEqualFloat(apply_transform("clip", 0.5), 0.5)
         self.assertAlmostEqualFloat(apply_transform("clip", 2.0), 1.0)
index 3512947f2032a1667fe7991eba11bea71a622913..5a8555ce62e0b10a3269409d7c8d719cc4e1a1f5 100644 (file)
@@ -1334,9 +1334,6 @@ class MyRLEnv(Base5ActionRLEnv):
         self.max_trade_duration_candles: int = self.rl_config.get(
             "max_trade_duration_candles", 128
         )
-        # === Constants ===
-        self.MIN_SOFTSIGN_SHARPNESS: float = 0.01
-        self.MAX_SOFTSIGN_SHARPNESS: float = 100.0
         # === INTERNAL STATE ===
         self._last_closed_position: Optional[Positions] = None
         self._last_closed_trade_tick: int = 0
@@ -1363,13 +1360,6 @@ class MyRLEnv(Base5ActionRLEnv):
                 original_gamma,
                 self._potential_gamma,
             )
-        self._potential_softsign_sharpness: float = float(
-            model_reward_parameters.get("potential_softsign_sharpness", 1.0)
-        )
-        self._potential_softsign_sharpness = max(
-            self.MIN_SOFTSIGN_SHARPNESS,
-            min(self.MAX_SOFTSIGN_SHARPNESS, self._potential_softsign_sharpness),
-        )
         # === EXIT POTENTIAL MODE ===
         # exit_potential_mode options:
         #   'canonical'           -> Φ(s')=0 (preserves invariance, disables additives)
@@ -1664,8 +1654,8 @@ class MyRLEnv(Base5ActionRLEnv):
         Parameters
         ----------
         name : str
-            Transform function name: 'tanh', 'softsign', 'softsign_sharp',
-            'arctan', 'logistic', 'asinh_norm', or 'clip'
+            Transform function name: 'tanh', 'softsign', 'arctan', 'sigmoid',
+            'asinh_norm', or 'clip'
         x : float
             Input value to transform
 
@@ -1681,23 +1671,18 @@ class MyRLEnv(Base5ActionRLEnv):
             ax = abs(x)
             return x / (1.0 + ax)
 
-        if name == "softsign_sharp":
-            s = self._potential_softsign_sharpness
-            xs = s * x
-            ax = abs(xs)
-            return xs / (1.0 + ax)
-
         if name == "arctan":
             return (2.0 / math.pi) * math.atan(x)
 
-        if name == "logistic":
+        if name == "sigmoid":
             try:
                 if x >= 0:
-                    z = math.exp(-x)  # z in (0,1]
-                    return (1.0 - z) / (1.0 + z)
+                    exp_neg_x = math.exp(-x)
+                    sigma_x = 1.0 / (1.0 + exp_neg_x)
                 else:
-                    z = math.exp(x)  # z in (0,1]
-                    return (z - 1.0) / (z + 1.0)
+                    exp_x = math.exp(x)
+                    sigma_x = exp_x / (exp_x + 1.0)
+                return 2.0 * sigma_x - 1.0
             except OverflowError:
                 return 1.0 if x > 0 else -1.0
 
@@ -1824,19 +1809,17 @@ class MyRLEnv(Base5ActionRLEnv):
         -----------------------
         Hold potential formula: Φ(s) = scale * 0.5 * [T_pnl(g*pnl_ratio) + T_dur(g*duration_ratio)]
 
-        **Bounded Transform Functions** (range [-1,1]):
-        - tanh: smooth saturation, tanh(x)
-        - softsign: x/(1+|x|), gentler than tanh
-        - softsign_sharp: (sharpness*x)/(1+|sharpness*x|), custom saturation control
-        - arctan: (2/π)*arctan(x), linear near origin
-        - logistic: 2σ(x)-1 where σ(x)=1/(1+e^(-x)), numerically stable implementation
-        - asinh_norm: x/√(1+x²), normalized asinh-like
-        - clip: hard clamp to [-1,1]
+        **Bounded Transform Functions** (each maps R -> (-1, 1) except clip which is [-1, 1]):
+        - tanh: tanh(x)
+        - softsign: x / (1 + |x|)
+        - arctan: (2/pi) * arctan(x)
+        - sigmoid: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x))
+        - asinh_norm: x / sqrt(1 + x^2)
+        - clip: clip(x, -1, 1)
 
         **Parameters**:
         - gain g: sharpens (g>1) or softens (g<1) transform input
         - scale: multiplies final potential value
-        - sharpness: affects softsign_sharp transform (must be >0)
 
         Exit Potential Modes
         --------------------