From f675f3cf8a4e60f1c5509b20776c73226f613d4f Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 15 Oct 2025 11:24:48 +0200 Subject: [PATCH] refactor(reforcexy): rationalize PBRS transforms list MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/reward_space_analysis/README.md | 18 ++--- .../reward_space_analysis.py | 56 +++++-------- .../test_reward_space_analysis.py | 81 +++---------------- ReforceXY/user_data/freqaimodels/ReforceXY.py | 47 ++++------- 4 files changed, 54 insertions(+), 148 deletions(-) diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md index a498e02..5e0fcf1 100644 --- a/ReforceXY/reward_space_analysis/README.md +++ b/ReforceXY/reward_space_analysis/README.md @@ -299,7 +299,7 @@ effective_r = r if not exit_plateau | legacy | step: ×1.5 if r* ≤ 1 else ×0.5 | No | Historical discontinuity retained (not smoothed) | | sqrt | 1 / sqrt(1 + r*) | Yes | Sub-linear decay | | linear | 1 / (1 + slope * r*) | Yes | slope = `exit_linear_slope` (≥0) | -| power | (1 + r*)^(-alpha) | Yes | alpha = -ln(tau)/ln(2), tau = `exit_power_tau` ∈ (0,1]; tau=1 ⇒ alpha=0 (flat) | +| power | (1 + r*)^(-alpha) | Yes | alpha = -ln(tau)/ln(2), tau = `exit_power_tau` ∈ (0,1]; tau=1 ⇒ alpha=0 (flat); invalid tau ⇒ alpha=1 (default) | | half_life | 2^(- r* / hl) | Yes | hl = `exit_half_life`; r* = hl ⇒ factor × 0.5 | Where r* = `effective_r` above. @@ -317,13 +317,12 @@ _Profit factor configuration:_ _PBRS (Potential-Based Reward Shaping) configuration:_ - `potential_gamma` (default: 0.95) - Discount factor γ for PBRS potential term (0 ≤ γ ≤ 1) -- `potential_softsign_sharpness` (default: 1.0) - Sharpness parameter for softsign_sharp transform (smaller = sharper) - `exit_potential_mode` (default: canonical) - Exit potential mode: 'canonical' (Φ=0, preserves invariance, disables additives), 'non-canonical' (Φ=0, allows additives, breaks invariance), 'progressive_release', 'spike_cancel', 'retain_previous' - `exit_potential_decay` (default: 0.5) - Decay factor for progressive_release exit mode (0 ≤ decay ≤ 1) - `hold_potential_enabled` (default: true) - Enable PBRS hold potential function Φ(s) - `hold_potential_scale` (default: 1.0) - Scale factor for hold potential function - `hold_potential_gain` (default: 1.0) - Gain factor applied before transforms in hold potential -- `hold_potential_transform_pnl` (default: tanh) - Transform function for PnL: tanh, softsign, softsign_sharp, arctan, logistic, asinh_norm, clip +- `hold_potential_transform_pnl` (default: tanh) - Transform function for PnL: tanh, softsign, arctan, sigmoid, asinh_norm, clip - `hold_potential_transform_duration` (default: tanh) - Transform function for duration ratio - `entry_additive_enabled` (default: false) - Enable entry additive reward (non-PBRS component) - `entry_additive_scale` (default: 1.0) - Scale factor for entry additive reward @@ -341,11 +340,10 @@ _PBRS (Potential-Based Reward Shaping) configuration:_ | Transform | Formula | Range | Characteristics | Use Case | |-----------|---------|-------|-----------------|----------| | `tanh` | tanh(x) | (-1, 1) | Smooth sigmoid, symmetric around 0 | Balanced PnL/duration transforms (default) | -| `softsign` | x / (1 + \|x\|) | (-1, 1) | Smoother than tanh, linear near 0 | Less aggressive saturation | -| `softsign_sharp` | (sharpness * x) / (1 + \|sharpness * x\|) | (-1, 1) | Tunable sharpness via `potential_softsign_sharpness` | Custom saturation control | -| `arctan` | (2/π) × arctan(x) | (-1, 1) | Slower saturation than tanh | Wide dynamic range | -| `logistic` | 2 / (1 + e^(-x)) - 1 | (-1, 1) | Equivalent to tanh(x/2), gentler curve | Mild non-linearity | -| `asinh_norm` | x / √(1 + x²) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness | +| `softsign` | x / (1 + |x|) | (-1, 1) | Smoother than tanh, linear near 0 | Less aggressive saturation | +| `arctan` | (2/pi) * arctan(x) | (-1, 1) | Slower saturation than tanh | Wide dynamic range | +| `sigmoid` | 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) | (-1, 1) | Sigmoid mapped to (-1, 1) | Standard sigmoid activation | +| `asinh_norm` | x / sqrt(1 + x^2) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness | | `clip` | clip(x, -1, 1) | [-1, 1] | Hard clipping at ±1 | Preserve linearity within bounds | _Invariant / safety controls:_ @@ -581,8 +579,8 @@ python reward_space_analysis.py \ python reward_space_analysis.py \ --num_samples 25000 \ - --params hold_potential_transform_pnl=softsign_sharp potential_softsign_sharpness=0.5 \ - --output pbrs_sharp_transforms + --params hold_potential_transform_pnl=sigmoid hold_potential_gain=2.0 \ + --output pbrs_sigmoid_transforms ``` ### Real Data Comparison diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index 9e5c252..3292953 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -168,9 +168,8 @@ def _fail_safely(reason: str) -> float: ALLOWED_TRANSFORMS = { "tanh", "softsign", - "softsign_sharp", "arctan", - "logistic", + "sigmoid", "asinh_norm", "clip", } @@ -213,7 +212,6 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = { # Potential-based reward shaping core parameters # Discount factor γ for potential term (0 ≤ γ ≤ 1) "potential_gamma": POTENTIAL_GAMMA_DEFAULT, - "potential_softsign_sharpness": 1.0, # Exit potential modes: canonical | non-canonical | progressive_release | spike_cancel | retain_previous "exit_potential_mode": "canonical", "exit_potential_decay": 0.5, @@ -259,13 +257,12 @@ DEFAULT_MODEL_REWARD_PARAMETERS_HELP: Dict[str, str] = { "exit_factor_threshold": "If |exit factor| exceeds this threshold, emit warning.", # PBRS parameters "potential_gamma": "Discount factor γ for PBRS potential-based reward shaping (0 ≤ γ ≤ 1).", - "potential_softsign_sharpness": "Sharpness parameter for softsign_sharp transform (smaller = sharper).", "exit_potential_mode": "Exit potential mode: 'canonical' (Φ=0 & additives disabled), 'non-canonical' (Φ=0 & additives allowed), 'progressive_release', 'spike_cancel', 'retain_previous'.", "exit_potential_decay": "Decay factor for progressive_release exit mode (0 ≤ decay ≤ 1).", "hold_potential_enabled": "Enable PBRS hold potential function Φ(s).", "hold_potential_scale": "Scale factor for hold potential function.", "hold_potential_gain": "Gain factor applied before transforms in hold potential.", - "hold_potential_transform_pnl": "Transform function for PnL in hold potential: tanh, softsign, softsign_sharp, arctan, logistic, asinh_norm, clip.", + "hold_potential_transform_pnl": "Transform function for PnL in hold potential: tanh, softsign, arctan, sigmoid, asinh_norm, clip.", "hold_potential_transform_duration": "Transform function for duration ratio in hold potential.", "entry_additive_enabled": "Enable entry additive reward (non-PBRS component).", "entry_additive_scale": "Scale factor for entry additive reward.", @@ -303,8 +300,6 @@ _PARAMETER_BOUNDS: Dict[str, Dict[str, float]] = { "pnl_factor_beta": {"min": 1e-6}, # PBRS parameter bounds "potential_gamma": {"min": 0.0, "max": 1.0}, - # Softsign sharpness: only lower bound enforced (upper bound limited implicitly by transform stability) - "potential_softsign_sharpness": {"min": 1e-6}, "exit_potential_decay": {"min": 0.0, "max": 1.0}, "hold_potential_scale": {"min": 0.0}, "hold_potential_gain": {"min": 0.0}, @@ -3348,48 +3343,43 @@ def main() -> None: def _apply_transform_tanh(value: float) -> float: - """tanh(value) ∈ (-1,1).""" - return float(np.tanh(value)) + """tanh: tanh(x) in (-1, 1).""" + return float(math.tanh(value)) def _apply_transform_softsign(value: float) -> float: - """softsign: value/(1+|value|).""" + """softsign: x / (1 + |x|) in (-1, 1).""" x = value return float(x / (1.0 + abs(x))) -def _apply_transform_softsign_sharp(value: float, sharpness: float = 1.0) -> float: - """softsign_sharp: (sharpness*value)/(1+|sharpness*value|) - multiplicative sharpness.""" - xs = sharpness * value - return float(xs / (1.0 + abs(xs))) - - def _apply_transform_arctan(value: float) -> float: - """arctan normalized: (2/pi)*atan(value) ∈ (-1,1).""" + """arctan: (2/pi) * arctan(x) in (-1, 1).""" return float((2.0 / math.pi) * math.atan(value)) -def _apply_transform_logistic(value: float) -> float: - """Overflow‑safe logistic transform mapped to (-1,1): 2σ(x)−1.""" +def _apply_transform_sigmoid(value: float) -> float: + """sigmoid: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) in (-1, 1).""" x = value try: if x >= 0: - z = math.exp(-x) # z in (0,1] - return float((1.0 - z) / (1.0 + z)) + exp_neg_x = math.exp(-x) + sigma_x = 1.0 / (1.0 + exp_neg_x) else: - z = math.exp(x) # z in (0,1] - return float((z - 1.0) / (z + 1.0)) + exp_x = math.exp(x) + sigma_x = exp_x / (exp_x + 1.0) + return 2.0 * sigma_x - 1.0 except OverflowError: return 1.0 if x > 0 else -1.0 def _apply_transform_asinh_norm(value: float) -> float: - """Normalized asinh: value / sqrt(1 + value²) producing range (-1,1).""" + """asinh_norm: x / sqrt(1 + x^2) in (-1, 1).""" return float(value / math.hypot(1.0, value)) def _apply_transform_clip(value: float) -> float: - """clip(value) to [-1,1].""" + """clip: clip(x, -1, 1) in [-1, 1].""" return float(np.clip(value, -1.0, 1.0)) @@ -3398,9 +3388,8 @@ def apply_transform(transform_name: str, value: float, **kwargs: Any) -> float: transforms = { "tanh": _apply_transform_tanh, "softsign": _apply_transform_softsign, - "softsign_sharp": _apply_transform_softsign_sharp, "arctan": _apply_transform_arctan, - "logistic": _apply_transform_logistic, + "sigmoid": _apply_transform_sigmoid, "asinh_norm": _apply_transform_asinh_norm, "clip": _apply_transform_clip, } @@ -3709,18 +3698,9 @@ def _compute_bi_component( gain = _get_float_param(params, gain_key, 1.0) transform_pnl = _get_str_param(params, transform_pnl_key, "tanh") transform_duration = _get_str_param(params, transform_dur_key, "tanh") - sharpness = _get_float_param(params, "potential_softsign_sharpness", 1.0) - if transform_pnl == "softsign_sharp": - t_pnl = apply_transform(transform_pnl, gain * pnl, sharpness=sharpness) - else: - t_pnl = apply_transform(transform_pnl, gain * pnl) - if transform_duration == "softsign_sharp": - t_dur = apply_transform( - transform_duration, gain * duration_ratio, sharpness=sharpness - ) - else: - t_dur = apply_transform(transform_duration, gain * duration_ratio) + t_pnl = apply_transform(transform_pnl, gain * pnl) + t_dur = apply_transform(transform_duration, gain * duration_ratio) value = scale * 0.5 * (t_pnl + t_dur) if not np.isfinite(value): return _fail_safely(non_finite_key) diff --git a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py index 068375d..6b332ae 100644 --- a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py @@ -3080,7 +3080,7 @@ class TestPBRSIntegration(RewardSpaceTestBase): """Tests for PBRS (Potential-Based Reward Shaping) integration.""" def test_tanh_transform(self): - """tanh transform: bounded in (-1,1), symmetric.""" + """tanh transform: tanh(x) in (-1, 1).""" self.assertAlmostEqualFloat(apply_transform("tanh", 0.0), 0.0) self.assertAlmostEqualFloat(apply_transform("tanh", 1.0), math.tanh(1.0)) self.assertAlmostEqualFloat(apply_transform("tanh", -1.0), math.tanh(-1.0)) @@ -3088,37 +3088,15 @@ class TestPBRSIntegration(RewardSpaceTestBase): self.assertTrue(abs(apply_transform("tanh", -100.0)) <= 1.0) def test_softsign_transform(self): - """softsign transform: x/(1+|x|) in (-1,1).""" + """softsign transform: x / (1 + |x|) in (-1, 1).""" self.assertAlmostEqualFloat(apply_transform("softsign", 0.0), 0.0) self.assertAlmostEqualFloat(apply_transform("softsign", 1.0), 0.5) self.assertAlmostEqualFloat(apply_transform("softsign", -1.0), -0.5) self.assertTrue(abs(apply_transform("softsign", 100.0)) < 1.0) self.assertTrue(abs(apply_transform("softsign", -100.0)) < 1.0) - def test_softsign_sharp_transform(self): - """softsign_sharp transform: (s*x)/(1+|s*x|) in (-1,1) with sharpness s.""" - # Baseline: s=1 should match softsign - self.assertAlmostEqualFloat( - apply_transform("softsign_sharp", 0.0, sharpness=1.0), 0.0 - ) - self.assertAlmostEqualFloat( - apply_transform("softsign_sharp", 1.0, sharpness=1.0), - apply_transform("softsign", 1.0), - ) - # Higher sharpness => faster saturation - v_low = apply_transform("softsign_sharp", 0.5, sharpness=1.0) - v_high = apply_transform("softsign_sharp", 0.5, sharpness=4.0) - self.assertTrue(abs(v_high) > abs(v_low)) - # Boundedness stress - self.assertTrue( - abs(apply_transform("softsign_sharp", 100.0, sharpness=10.0)) < 1.0 - ) - self.assertTrue( - abs(apply_transform("softsign_sharp", -100.0, sharpness=10.0)) < 1.0 - ) - def test_asinh_norm_transform(self): - """asinh_norm transform: x/sqrt(1+x^2) in (-1,1).""" + """asinh_norm transform: x / sqrt(1 + x^2) in (-1, 1).""" self.assertAlmostEqualFloat(apply_transform("asinh_norm", 0.0), 0.0) # Symmetry self.assertAlmostEqualFloat( @@ -3126,7 +3104,7 @@ class TestPBRSIntegration(RewardSpaceTestBase): -apply_transform("asinh_norm", -1.2345), tolerance=1e-12, ) - # Monotonicity (sampled) + # Monotonicity vals = [apply_transform("asinh_norm", x) for x in [-5.0, -1.0, 0.0, 1.0, 5.0]] self.assertTrue(all(vals[i] < vals[i + 1] for i in range(len(vals) - 1))) # Bounded @@ -3134,7 +3112,7 @@ class TestPBRSIntegration(RewardSpaceTestBase): self.assertTrue(abs(apply_transform("asinh_norm", -1e6)) < 1.0) def test_arctan_transform(self): - """arctan transform: normalized (2/pi)atan(x) bounded (-1,1).""" + """arctan transform: (2/pi) * arctan(x) in (-1, 1).""" self.assertAlmostEqualFloat(apply_transform("arctan", 0.0), 0.0) self.assertAlmostEqualFloat( apply_transform("arctan", 1.0), @@ -3144,49 +3122,16 @@ class TestPBRSIntegration(RewardSpaceTestBase): self.assertTrue(abs(apply_transform("arctan", 100.0)) <= 1.0) self.assertTrue(abs(apply_transform("arctan", -100.0)) <= 1.0) - def test_logistic_transform(self): - """logistic transform: 2σ(x)-1 in (-1,1).""" - # Environment logistic returns 2σ(x)-1 centered at 0 in (-1,1) - self.assertAlmostEqualFloat(apply_transform("logistic", 0.0), 0.0) - self.assertTrue(apply_transform("logistic", 100.0) > 0.99) - self.assertTrue(apply_transform("logistic", -100.0) < -0.99) - self.assertTrue(-1 < apply_transform("logistic", 10.0) < 1) - self.assertTrue(-1 < apply_transform("logistic", -10.0) < 1) - - def test_logistic_equivalence_tanh_half(self): - """logistic(x) must equal tanh(x/2) within tight tolerance across representative domain. - - Uses identity: 2/(1+e^{-x}) - 1 = tanh(x/2). - """ - samples = [ - 0.0, - 1e-6, - -1e-6, - 0.5, - -0.5, - 1.0, - -1.0, - 2.5, - -2.5, - 5.0, - -5.0, - 10.0, - -10.0, - ] - for x in samples: - with self.subTest(x=x): - v_log = apply_transform("logistic", x) - v_tanh = math.tanh(x / 2.0) - tol = 1e-12 if abs(x) <= 5 else 1e-10 - self.assertAlmostEqualFloat( - v_log, - v_tanh, - tolerance=tol, - msg=f"Mismatch logistic vs tanh(x/2) at x={x}: {v_log} vs {v_tanh}", - ) + def test_sigmoid_transform(self): + """sigmoid transform: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) in (-1, 1).""" + self.assertAlmostEqualFloat(apply_transform("sigmoid", 0.0), 0.0) + self.assertTrue(apply_transform("sigmoid", 100.0) > 0.99) + self.assertTrue(apply_transform("sigmoid", -100.0) < -0.99) + self.assertTrue(-1 < apply_transform("sigmoid", 10.0) < 1) + self.assertTrue(-1 < apply_transform("sigmoid", -10.0) < 1) def test_clip_transform(self): - """clip transform: clamp to [-1,1].""" + """clip transform: clip(x, -1, 1) in [-1, 1].""" self.assertAlmostEqualFloat(apply_transform("clip", 0.0), 0.0) self.assertAlmostEqualFloat(apply_transform("clip", 0.5), 0.5) self.assertAlmostEqualFloat(apply_transform("clip", 2.0), 1.0) diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 3512947..5a8555c 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -1334,9 +1334,6 @@ class MyRLEnv(Base5ActionRLEnv): self.max_trade_duration_candles: int = self.rl_config.get( "max_trade_duration_candles", 128 ) - # === Constants === - self.MIN_SOFTSIGN_SHARPNESS: float = 0.01 - self.MAX_SOFTSIGN_SHARPNESS: float = 100.0 # === INTERNAL STATE === self._last_closed_position: Optional[Positions] = None self._last_closed_trade_tick: int = 0 @@ -1363,13 +1360,6 @@ class MyRLEnv(Base5ActionRLEnv): original_gamma, self._potential_gamma, ) - self._potential_softsign_sharpness: float = float( - model_reward_parameters.get("potential_softsign_sharpness", 1.0) - ) - self._potential_softsign_sharpness = max( - self.MIN_SOFTSIGN_SHARPNESS, - min(self.MAX_SOFTSIGN_SHARPNESS, self._potential_softsign_sharpness), - ) # === EXIT POTENTIAL MODE === # exit_potential_mode options: # 'canonical' -> Φ(s')=0 (preserves invariance, disables additives) @@ -1664,8 +1654,8 @@ class MyRLEnv(Base5ActionRLEnv): Parameters ---------- name : str - Transform function name: 'tanh', 'softsign', 'softsign_sharp', - 'arctan', 'logistic', 'asinh_norm', or 'clip' + Transform function name: 'tanh', 'softsign', 'arctan', 'sigmoid', + 'asinh_norm', or 'clip' x : float Input value to transform @@ -1681,23 +1671,18 @@ class MyRLEnv(Base5ActionRLEnv): ax = abs(x) return x / (1.0 + ax) - if name == "softsign_sharp": - s = self._potential_softsign_sharpness - xs = s * x - ax = abs(xs) - return xs / (1.0 + ax) - if name == "arctan": return (2.0 / math.pi) * math.atan(x) - if name == "logistic": + if name == "sigmoid": try: if x >= 0: - z = math.exp(-x) # z in (0,1] - return (1.0 - z) / (1.0 + z) + exp_neg_x = math.exp(-x) + sigma_x = 1.0 / (1.0 + exp_neg_x) else: - z = math.exp(x) # z in (0,1] - return (z - 1.0) / (z + 1.0) + exp_x = math.exp(x) + sigma_x = exp_x / (exp_x + 1.0) + return 2.0 * sigma_x - 1.0 except OverflowError: return 1.0 if x > 0 else -1.0 @@ -1824,19 +1809,17 @@ class MyRLEnv(Base5ActionRLEnv): ----------------------- Hold potential formula: Φ(s) = scale * 0.5 * [T_pnl(g*pnl_ratio) + T_dur(g*duration_ratio)] - **Bounded Transform Functions** (range [-1,1]): - - tanh: smooth saturation, tanh(x) - - softsign: x/(1+|x|), gentler than tanh - - softsign_sharp: (sharpness*x)/(1+|sharpness*x|), custom saturation control - - arctan: (2/π)*arctan(x), linear near origin - - logistic: 2σ(x)-1 where σ(x)=1/(1+e^(-x)), numerically stable implementation - - asinh_norm: x/√(1+x²), normalized asinh-like - - clip: hard clamp to [-1,1] + **Bounded Transform Functions** (each maps R -> (-1, 1) except clip which is [-1, 1]): + - tanh: tanh(x) + - softsign: x / (1 + |x|) + - arctan: (2/pi) * arctan(x) + - sigmoid: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) + - asinh_norm: x / sqrt(1 + x^2) + - clip: clip(x, -1, 1) **Parameters**: - gain g: sharpens (g>1) or softens (g<1) transform input - scale: multiplies final potential value - - sharpness: affects softsign_sharp transform (must be >0) Exit Potential Modes -------------------- -- 2.43.0