From 3e7be4caf0c7114fa126984d913973d37f7f73f0 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Tue, 30 Dec 2025 17:50:34 +0100
Subject: [PATCH] docs(ReforceXY): more aligned mathematical notation in README
 and code comments
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 ReforceXY/reward_space_analysis/README.md     | 32 +++++++++----------
 .../reward_space_analysis.py                  |  2 +-
 .../components/test_reward_components.py      |  4 +--
 .../tests/pbrs/test_pbrs.py                   |  2 +-
 .../tests/transforms/test_transforms.py       |  2 +-
 ReforceXY/user_data/freqaimodels/ReforceXY.py |  2 +-
 6 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md
index f080852..e8d625c 100644
--- a/ReforceXY/reward_space_analysis/README.md
+++ b/ReforceXY/reward_space_analysis/README.md
@@ -233,7 +233,7 @@ be overridden via `--params`.
 
 The exit factor is computed as:
 
-`exit_factor` = `base_factor ` Ã `pnl_target_coefficient` Ã `efficiency_coefficient` Ã `time_attenuation_coefficient`
+`exit_factor` = `base_factor` Â· `pnl_target_coefficient` Â· `efficiency_coefficient` Â· `time_attenuation_coefficient`
 
 ##### PnL Target
 
@@ -248,13 +248,13 @@ The exit factor is computed as:
 
 **Formula:**
 
-Let `pnl_target = profit_aim Ã risk_reward_ratio`, `pnl_ratio = pnl / pnl_target`.
+Let `pnl_target = profit_aim Â· risk_reward_ratio`, `pnl_ratio = pnl / pnl_target`.
 
 - If `pnl_target â¤ 0`: `pnl_target_coefficient = 1.0`
 - If `pnl_ratio > 1.0`:
-  `pnl_target_coefficient = 1.0 + win_reward_factor * tanh(pnl_amplification_sensitivity * (pnl_ratio â 1.0))`
-- If `pnl_ratio < â(1.0 / risk_reward_ratio)`:
-  `pnl_target_coefficient = 1.0 + (win_reward_factor * risk_reward_ratio) * tanh(pnl_amplification_sensitivity * (|pnl_ratio| â 1.0))`
+  `pnl_target_coefficient = 1.0 + win_reward_factor Â· tanh(pnl_amplification_sensitivity Â· (pnl_ratio - 1.0))`
+- If `pnl_ratio < -(1.0 / risk_reward_ratio)`:
+  `pnl_target_coefficient = 1.0 + (win_reward_factor Â· risk_reward_ratio) Â· tanh(pnl_amplification_sensitivity Â· (|pnl_ratio| - 1.0))`
 - Else: `pnl_target_coefficient = 1.0`
 
 ##### Efficiency
@@ -270,9 +270,9 @@ Let `max_u = max_unrealized_profit`, `min_u = min_unrealized_profit`,
 `range = max_u - min_u`, `ratio = (pnl - min_u)/range`. Then:
 
 - If `pnl > 0`:
-  `efficiency_coefficient = 1 + efficiency_weight * (ratio - efficiency_center)`
+  `efficiency_coefficient = 1 + efficiency_weight Â· (ratio - efficiency_center)`
 - If `pnl < 0`:
-  `efficiency_coefficient = 1 + efficiency_weight * (efficiency_center - ratio)`
+  `efficiency_coefficient = 1 + efficiency_weight Â· (efficiency_center - ratio)`
 - Else: `efficiency_coefficient = 1`
 
 ##### Exit Attenuation
@@ -318,7 +318,7 @@ where `kernel_function` depends on `exit_attenuation_mode`. See [Exit Attenuatio
 | `exit_potential_mode`    | canonical | Potential release mode               |
 | `exit_potential_decay`   | 0.5       | Decay for progressive_release        |
 | `hold_potential_enabled` | true      | Enable hold potential Î¦              |
-| `entry_fee_rate`         | 0.0       | Entry fee rate (`price * (1 + fee)`) |
+| `entry_fee_rate`         | 0.0       | Entry fee rate (`price Â· (1 + fee)`) |
 | `exit_fee_rate`          | 0.0       | Exit fee rate (`price / (1 + fee)`)  |
 
 PBRS invariance holds when: `exit_potential_mode=canonical`.
@@ -392,13 +392,13 @@ r* = r - grace    if exit_plateau and r > grace
 r* = r            if not exit_plateau
 ```
 
-| Mode      | Formula                         | Monotonic | Notes                                       | Use Case                             |
-| --------- | ------------------------------- | --------- | ------------------------------------------- | ------------------------------------ |
-| legacy    | step: Ã1.5 if r\* â¤ 1 else Ã0.5 | No        | Non-monotonic legacy mode (not recommended) | Backward compatibility only          |
-| sqrt      | 1 / â(1 + r\*)                  | Yes       | Sub-linear decay                            | Gentle long-trade penalty            |
-| linear    | 1 / (1 + slope \* r\*)          | Yes       | slope = `exit_linear_slope`                 | Balanced duration penalty (default)  |
-| power     | (1 + r\*)^(-alpha)              | Yes       | alpha = -ln(tau)/ln(2); tau=1 â alpha=0     | Tunable decay rate via tau parameter |
-| half_life | 2^(- r\* / hl)                  | Yes       | hl = `exit_half_life`; r\*=hl â factor Ã0.5 | Time-based exponential discount      |
+| Mode      | Formula                       | Monotonic | Notes                                       | Use Case                             |
+| --------- | ----------------------------- | --------- | ------------------------------------------- | ------------------------------------ |
+| legacy    | step: 1.5 if r\* â¤ 1 else 0.5 | No        | Non-monotonic legacy mode (not recommended) | Backward compatibility only          |
+| sqrt      | 1 / â(1 + r\*)                | Yes       | Sub-linear decay                            | Gentle long-trade penalty            |
+| linear    | 1 / (1 + slope Â· r\*)         | Yes       | slope = `exit_linear_slope`                 | Balanced duration penalty (default)  |
+| power     | (1 + r\*)^(-alpha)            | Yes       | alpha = -ln(tau)/ln(2); tau=1 â alpha=0     | Tunable decay rate via tau parameter |
+| half_life | 2^(-r\* / hl)                 | Yes       | hl = `exit_half_life`; r\*=hl â factor 0.5  | Time-based exponential discount      |
 
 ### Transform Functions
 
@@ -406,7 +406,7 @@ r* = r            if not exit_plateau
 | ---------- | -------------------------------- | ------- | ----------------- | ----------------------------- |
 | `tanh`     | tanh(x)                          | (-1, 1) | Smooth sigmoid    | Balanced transforms (default) |
 | `softsign` | x / (1 + \|x\|)                  | (-1, 1) | Linear near 0     | Less aggressive saturation    |
-| `arctan`   | (2/Ï) \* arctan(x)               | (-1, 1) | Slower saturation | Wide dynamic range            |
+| `arctan`   | (2/Ï) Â· arctan(x)                | (-1, 1) | Slower saturation | Wide dynamic range            |
 | `sigmoid`  | 2Ï(x) - 1, Ï(x) = 1/(1 + e^(-x)) | (-1, 1) | Standard sigmoid  | Generic shaping               |
 | `asinh`    | x / â(1 + xÂ²)                    | (-1, 1) | Outlier robust    | Extreme stability             |
 | `clip`     | clip(x, -1, 1)                   | [-1, 1] | Hard clipping     | Preserve linearity            |
diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py
index 5a20b0e..fcb2539 100644
--- a/ReforceXY/reward_space_analysis/reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py
@@ -2918,7 +2918,7 @@ def _apply_transform_softsign(value: float) -> float:
 
 
 def _apply_transform_arctan(value: float) -> float:
-    """arctan: (2/pi) * arctan(x) in (-1, 1)."""
+    """arctan: (2/Ï) Â· arctan(x) in (-1, 1)."""
     return float((2.0 / math.pi) * math.atan(value))
 
 
diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
index da99aa1..3004f75 100644
--- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
+++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
@@ -352,7 +352,7 @@ class TestRewardComponents(RewardSpaceTestBase):
         )
 
         # Verify exact values at bounds using the formula
-        # coefficient = 1.0 + weight * (ratio - center)
+        # coefficient = 1.0 + weight Â· (ratio - center)
         # ratio = (pnl - min_pnl) / range_pnl
         range_pnl = max_unrealized_profit - min_unrealized_profit
 
@@ -443,7 +443,7 @@ class TestRewardComponents(RewardSpaceTestBase):
         )
 
         # Verify exact values at bounds using the INVERTED formula for losses
-        # coefficient = 1.0 + weight * (center - ratio)
+        # coefficient = 1.0 + weight Â· (center - ratio)
         range_pnl = max_unrealized_profit - min_unrealized_profit
 
         # Worst exit bound (first element: largest loss)
diff --git a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
index fb56d63..2b81534 100644
--- a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
+++ b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py
@@ -530,7 +530,7 @@ class TestPBRS(RewardSpaceTestBase):
             gamma = float(gamma_fallback)
         except Exception:
             gamma = 0.95
-        # PBRS shaping Î = Î³Â·Î¦(next) â Î¦(prev). Here Î¦(next)=Î¦(prev) since decay clamps to 0.  # noqa: RUF003
+        # PBRS shaping Î = Î³Â·Î¦(next) - Î¦(prev). Here Î¦(next)=Î¦(prev) since decay clamps to 0.
         self.assertLessEqual(
             abs(shaping - ((gamma - 1.0) * prev_potential)),
             TOLERANCE.GENERIC_EQ,
diff --git a/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py b/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py
index 3004241..6d72d1e 100644
--- a/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py
+++ b/ReforceXY/reward_space_analysis/tests/transforms/test_transforms.py
@@ -33,7 +33,7 @@ class TestTransforms(RewardSpaceTestBase):
             ("softsign", [0.0, 1.0, -1.0], [0.0, 0.5, -0.5]),
             # asinh transform: x / sqrt(1 + x^2) in (-1, 1)
             ("asinh", [0.0], [0.0]),  # More complex calculations tested separately
-            # arctan transform: (2/pi) * arctan(x) in (-1, 1)
+            # arctan transform: (2/Ï) Â· arctan(x) in (-1, 1)
             ("arctan", [0.0, 1.0], [0.0, 2.0 / math.pi * math.atan(1.0)]),
             # sigmoid transform: 2Ï(x) - 1, Ï(x) = 1/(1 + e^(-x)) in (-1, 1)  # noqa: RUF003
             ("sigmoid", [0.0], [0.0]),  # More complex calculations tested separately
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py
index 3818ecd..c3022d8 100644
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -2817,7 +2817,7 @@ class MyRLEnv(Base5ActionRLEnv):
         model_reward_parameters: Mapping[str, Any],
     ) -> float:
         """
-        Compute exit factor: base_factor Ã time_attenuation_coefficient Ã pnl_target_coefficient Ã efficiency_coefficient.
+        Compute exit factor: base_factor Â· time_attenuation_coefficient Â· pnl_target_coefficient Â· efficiency_coefficient.
         """
         if not (
             np.isfinite(base_factor)
-- 
2.43.0