From 3e7be4caf0c7114fa126984d913973d37f7f73f0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Tue, 30 Dec 2025 17:50:34 +0100 Subject: [PATCH] docs(ReforceXY): more aligned mathematical notation in README and code comments MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/reward_space_analysis/README.md | 32 +++++++++---------- .../reward_space_analysis.py | 2 +- .../components/test_reward_components.py | 4 +-- .../tests/pbrs/test_pbrs.py | 2 +- .../tests/transforms/test_transforms.py | 2 +- ReforceXY/user_data/freqaimodels/ReforceXY.py | 2 +- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md index f080852..e8d625c 100644 --- a/ReforceXY/reward_space_analysis/README.md +++ b/ReforceXY/reward_space_analysis/README.md @@ -233,7 +233,7 @@ be overridden via `--params`. The exit factor is computed as: -`exit_factor` = `base_factor ` × `pnl_target_coefficient` × `efficiency_coefficient` × `time_attenuation_coefficient` +`exit_factor` = `base_factor` · `pnl_target_coefficient` · `efficiency_coefficient` · `time_attenuation_coefficient` ##### PnL Target @@ -248,13 +248,13 @@ The exit factor is computed as: **Formula:** -Let `pnl_target = profit_aim × risk_reward_ratio`, `pnl_ratio = pnl / pnl_target`. +Let `pnl_target = profit_aim · risk_reward_ratio`, `pnl_ratio = pnl / pnl_target`. - If `pnl_target ≤ 0`: `pnl_target_coefficient = 1.0` - If `pnl_ratio > 1.0`: - `pnl_target_coefficient = 1.0 + win_reward_factor * tanh(pnl_amplification_sensitivity * (pnl_ratio − 1.0))` -- If `pnl_ratio < −(1.0 / risk_reward_ratio)`: - `pnl_target_coefficient = 1.0 + (win_reward_factor * risk_reward_ratio) * tanh(pnl_amplification_sensitivity * (|pnl_ratio| − 1.0))` + `pnl_target_coefficient = 1.0 + win_reward_factor · tanh(pnl_amplification_sensitivity · (pnl_ratio - 1.0))` +- If `pnl_ratio < -(1.0 / risk_reward_ratio)`: + `pnl_target_coefficient = 1.0 + (win_reward_factor · risk_reward_ratio) · tanh(pnl_amplification_sensitivity · (|pnl_ratio| - 1.0))` - Else: `pnl_target_coefficient = 1.0` ##### Efficiency @@ -270,9 +270,9 @@ Let `max_u = max_unrealized_profit`, `min_u = min_unrealized_profit`, `range = max_u - min_u`, `ratio = (pnl - min_u)/range`. Then: - If `pnl > 0`: - `efficiency_coefficient = 1 + efficiency_weight * (ratio - efficiency_center)` + `efficiency_coefficient = 1 + efficiency_weight · (ratio - efficiency_center)` - If `pnl < 0`: - `efficiency_coefficient = 1 + efficiency_weight * (efficiency_center - ratio)` + `efficiency_coefficient = 1 + efficiency_weight · (efficiency_center - ratio)` - Else: `efficiency_coefficient = 1` ##### Exit Attenuation @@ -318,7 +318,7 @@ where `kernel_function` depends on `exit_attenuation_mode`. See [Exit Attenuatio | `exit_potential_mode` | canonical | Potential release mode | | `exit_potential_decay` | 0.5 | Decay for progressive_release | | `hold_potential_enabled` | true | Enable hold potential Φ | -| `entry_fee_rate` | 0.0 | Entry fee rate (`price * (1 + fee)`) | +| `entry_fee_rate` | 0.0 | Entry fee rate (`price · (1 + fee)`) | | `exit_fee_rate` | 0.0 | Exit fee rate (`price / (1 + fee)`) | PBRS invariance holds when: `exit_potential_mode=canonical`. @@ -392,13 +392,13 @@ r* = r - grace if exit_plateau and r > grace r* = r if not exit_plateau ``` -| Mode | Formula | Monotonic | Notes | Use Case | -| --------- | ------------------------------- | --------- | ------------------------------------------- | ------------------------------------ | -| legacy | step: ×1.5 if r\* ≤ 1 else ×0.5 | No | Non-monotonic legacy mode (not recommended) | Backward compatibility only | -| sqrt | 1 / √(1 + r\*) | Yes | Sub-linear decay | Gentle long-trade penalty | -| linear | 1 / (1 + slope \* r\*) | Yes | slope = `exit_linear_slope` | Balanced duration penalty (default) | -| power | (1 + r\*)^(-alpha) | Yes | alpha = -ln(tau)/ln(2); tau=1 ⇒ alpha=0 | Tunable decay rate via tau parameter | -| half_life | 2^(- r\* / hl) | Yes | hl = `exit_half_life`; r\*=hl ⇒ factor ×0.5 | Time-based exponential discount | +| Mode | Formula | Monotonic | Notes | Use Case | +| --------- | ----------------------------- | --------- | ------------------------------------------- | ------------------------------------ | +| legacy | step: 1.5 if r\* ≤ 1 else 0.5 | No | Non-monotonic legacy mode (not recommended) | Backward compatibility only | +| sqrt | 1 / √(1 + r\*) | Yes | Sub-linear decay | Gentle long-trade penalty | +| linear | 1 / (1 + slope · r\*) | Yes | slope = `exit_linear_slope` | Balanced duration penalty (default) | +| power | (1 + r\*)^(-alpha) | Yes | alpha = -ln(tau)/ln(2); tau=1 ⇒ alpha=0 | Tunable decay rate via tau parameter | +| half_life | 2^(-r\* / hl) | Yes | hl = `exit_half_life`; r\*=hl ⇒ factor 0.5 | Time-based exponential discount | ### Transform Functions @@ -406,7 +406,7 @@ r* = r if not exit_plateau | ---------- | -------------------------------- | ------- | ----------------- | ----------------------------- | | `tanh` | tanh(x) | (-1, 1) | Smooth sigmoid | Balanced transforms (default) | | `softsign` | x / (1 + \|x\|) | (-1, 1) | Linear near 0 | Less aggressive saturation | -| `arctan` | (2/π) \* arctan(x) | (-1, 1) | Slower saturation | Wide dynamic range | +| `arctan` | (2/π) · arctan(x) | (-1, 1) | Slower saturation | Wide dynamic range | | `sigmoid` | 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) | (-1, 1) | Standard sigmoid | Generic shaping | | `asinh` | x / √(1 + x²) | (-1, 1) | Outlier robust | Extreme stability | | `clip` | clip(x, -1, 1) | [-1, 1] | Hard clipping | Preserve linearity | diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index 5a20b0e..fcb2539 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -2918,7 +2918,7 @@ def _apply_transform_softsign(value: float) -> float: def _apply_transform_arctan(value: float) -> float: - """arctan: (2/pi) * arctan(x) in (-1, 1).""" + """arctan: (2/π) · arctan(x) in (-1, 1).""" return float((2.0 / math.pi) * math.atan(value)) diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py index da99aa1..3004f75 100644 --- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py +++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py @@ -352,7 +352,7 @@ class TestRewardComponents(RewardSpaceTestBase): ) # Verify exact values at bounds using the formula - # coefficient = 1.0 + weight * (ratio - center) + # coefficient = 1.0 + weight · (ratio - center) # ratio = (pnl - min_pnl) / range_pnl range_pnl = max_unrealized_profit - min_unrealized_profit @@ -443,7 +443,7 @@ class TestRewardComponents(RewardSpaceTestBase): ) # Verify exact values at bounds using the INVERTED formula for losses - # coefficient = 1.0 + weight * (center - ratio) + # coefficient = 1.0 + weight · (center - ratio) range_pnl = max_unrealized_profit - min_unrealized_profit # Worst exit bound (first element: largest loss) diff --git a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py index fb56d63..2b81534 100644 --- a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py +++ b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py @@ -530,7 +530,7 @@ class TestPBRS(RewardSpaceTestBase): gamma = float(gamma_fallback) except Exception: gamma = 0.95 - # PBRS shaping Δ = γ·Φ(next) − Φ(prev). Here Φ(next)=Φ(prev) since decay clamps to 0. # noqa: RUF003 + # PBRS shaping Δ = γ·Φ(next) - Φ(prev). Here Φ(next)=Φ(prev) since decay clamps to 0. self.assertLessEqual( abs(shaping - ((gamma - 1.0) * prev_potential)), TOLERANCE.GENERIC_EQ, diff --git a/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py b/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py index 3004241..6d72d1e 100644 --- a/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py +++ b/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py @@ -33,7 +33,7 @@ class TestTransforms(RewardSpaceTestBase): ("softsign", [0.0, 1.0, -1.0], [0.0, 0.5, -0.5]), # asinh transform: x / sqrt(1 + x^2) in (-1, 1) ("asinh", [0.0], [0.0]), # More complex calculations tested separately - # arctan transform: (2/pi) * arctan(x) in (-1, 1) + # arctan transform: (2/π) · arctan(x) in (-1, 1) ("arctan", [0.0, 1.0], [0.0, 2.0 / math.pi * math.atan(1.0)]), # sigmoid transform: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) in (-1, 1) # noqa: RUF003 ("sigmoid", [0.0], [0.0]), # More complex calculations tested separately diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 3818ecd..c3022d8 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -2817,7 +2817,7 @@ class MyRLEnv(Base5ActionRLEnv): model_reward_parameters: Mapping[str, Any], ) -> float: """ - Compute exit factor: base_factor × time_attenuation_coefficient × pnl_target_coefficient × efficiency_coefficient. + Compute exit factor: base_factor · time_attenuation_coefficient · pnl_target_coefficient · efficiency_coefficient. """ if not ( np.isfinite(base_factor) -- 2.43.0