From ee8eb270a567b2b6573a8f71dcc47ca240f6a594 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Wed, 24 Dec 2025 01:06:15 +0100
Subject: [PATCH] test(ReforceXY): finish factor decoupling properly
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 .../components/test_reward_components.py      | 227 ++++++++++++++++--
 .../tests/helpers/assertions.py               |  15 +-
 .../tests/helpers/configs.py                  |  13 +-
 ReforceXY/user_data/freqaimodels/ReforceXY.py |   2 +-
 4 files changed, 233 insertions(+), 24 deletions(-)

diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
index 9937938..4f398b9 100644
--- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
+++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
@@ -199,30 +199,225 @@ class TestRewardComponents(RewardSpaceTestBase):
             validate_idle_penalty,
         )
 
-    def test_efficiency_zero_policy(self):
-        """Test efficiency zero policy produces expected PnL coefficient.
+    def test_pnl_target_coefficient_zero_pnl(self):
+        """PnL target coefficient returns neutral value for zero PnL.
 
-        Verifies:
-        - efficiency_weight = 0 -> pnl_coefficient ~= 1.0
-        - Coefficient is finite and positive
+        Validates that zero realized profit/loss produces coefficient = 1.0,
+        ensuring no amplification or attenuation of base exit factor.
+
+        **Setup:**
+        - PnL: 0.0 (breakeven)
+        - pnl_target: profit_aim Ã risk_reward_ratio
+        - Parameters: default base_params
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient equals 1.0 within TOLERANCE.GENERIC_EQ
+        """
+        params = self.base_params()
+        pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO
+
+        coefficient = _compute_pnl_target_coefficient(
+            params, pnl=0.0, pnl_target=pnl_target, risk_reward_ratio=PARAMS.RISK_REWARD_RATIO
+        )
+
+        self.assertFinite(coefficient, name="pnl_target_coefficient")
+        self.assertAlmostEqualFloat(coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ)
+
+    def test_pnl_target_coefficient_exceeds_target(self):
+        """PnL target coefficient rewards exits that exceed profit target.
+
+        Validates amplification behavior when realized PnL exceeds the target,
+        incentivizing the agent to achieve higher profits than baseline.
+
+        **Setup:**
+        - PnL: 150% of pnl_target (exceeds target by 50%)
+        - pnl_target: 0.045 (profit_aim=0.03 Ã risk_reward_ratio=1.5)
+        - Parameters: win_reward_factor=2.0, pnl_factor_beta=0.5
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient > 1.0 (rewards exceeding target)
+        """
+        params = self.base_params(win_reward_factor=2.0, pnl_factor_beta=0.5)
+        profit_aim = 0.03
+        risk_reward_ratio = 1.5
+        pnl_target = profit_aim * risk_reward_ratio
+        pnl = pnl_target * 1.5  # 50% above target
+
+        coefficient = _compute_pnl_target_coefficient(
+            params, pnl=pnl, pnl_target=pnl_target, risk_reward_ratio=risk_reward_ratio
+        )
+
+        self.assertFinite(coefficient, name="pnl_target_coefficient")
+        self.assertGreater(
+            coefficient, 1.0, "PnL exceeding target should reward with coefficient > 1.0"
+        )
+
+    def test_pnl_target_coefficient_below_loss_threshold(self):
+        """PnL target coefficient amplifies penalty for excessive losses.
+
+        Validates that losses exceeding risk-adjusted threshold produce
+        coefficient > 1.0 to amplify negative reward signal. Penalty applies
+        when BOTH conditions met: abs(pnl_ratio) > 1.0 AND pnl_ratio < -(1/rr).
+
+        **Setup:**
+        - PnL: -0.06 (exceeds pnl_target magnitude)
+        - pnl_target: 0.045 (profit_aim=0.03 Ã risk_reward_ratio=1.5)
+        - Penalty threshold: pnl < -pnl_target = -0.045
+        - Parameters: win_reward_factor=2.0, pnl_factor_beta=0.5
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient > 1.0 (amplifies loss penalty)
         """
+        params = self.base_params(win_reward_factor=2.0, pnl_factor_beta=0.5)
+        profit_aim = 0.03
+        risk_reward_ratio = 1.5
+        pnl_target = profit_aim * risk_reward_ratio  # 0.045
+        # Need abs(pnl / pnl_target) > 1.0 AND pnl / pnl_target < -1/1.5
+        # So pnl < -0.045 (exceeds pnl_target in magnitude)
+        pnl = -0.06  # Much more negative than pnl_target
+
+        coefficient = _compute_pnl_target_coefficient(
+            params, pnl=pnl, pnl_target=pnl_target, risk_reward_ratio=risk_reward_ratio
+        )
+
+        self.assertFinite(coefficient, name="pnl_target_coefficient")
+        self.assertGreater(
+            coefficient, 1.0, "Excessive loss should amplify penalty with coefficient > 1.0"
+        )
+
+    def test_efficiency_coefficient_zero_weight(self):
+        """Efficiency coefficient returns neutral value when efficiency disabled.
+
+        Validates that efficiency_weight=0 disables exit timing efficiency
+        adjustments, returning coefficient = 1.0 regardless of exit position
+        relative to unrealized PnL extremes.
+
+        **Setup:**
+        - efficiency_weight: 0.0 (disabled)
+        - PnL: 0.02 (between min=-0.01 and max=0.03)
+        - Trade context: Long position with unrealized range
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient equals 1.0 within TOLERANCE.GENERIC_EQ
+        """
+        params = self.base_params(efficiency_weight=0.0)
         ctx = self.make_ctx(
-            pnl=0.0,
-            trade_duration=1,
+            pnl=0.02,
+            trade_duration=10,
+            max_unrealized_profit=0.03,
+            min_unrealized_profit=-0.01,
+            position=Positions.Long,
+            action=Actions.Long_exit,
+        )
+
+        coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl)
+
+        self.assertFinite(coefficient, name="efficiency_coefficient")
+        self.assertAlmostEqualFloat(coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ)
+
+    def test_efficiency_coefficient_optimal_profit_exit(self):
+        """Efficiency coefficient rewards exits near peak unrealized profit.
+
+        Validates that exiting close to maximum unrealized profit produces
+        coefficient > 1.0, incentivizing optimal exit timing for profitable trades.
+
+        **Setup:**
+        - PnL: 0.029 (very close to max_unrealized_profit=0.03)
+        - Efficiency ratio: (0.029 - 0.0) / (0.03 - 0.0) â 0.967 (high)
+        - efficiency_weight: 1.0, efficiency_center: 0.5
+        - Trade context: Long position exiting near peak
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient > 1.0 (rewards optimal timing)
+        """
+        params = self.base_params(efficiency_weight=1.0, efficiency_center=0.5)
+        ctx = self.make_ctx(
+            pnl=0.029,  # Close to max
+            trade_duration=10,
+            max_unrealized_profit=0.03,
+            min_unrealized_profit=0.0,
+            position=Positions.Long,
+            action=Actions.Long_exit,
+        )
+
+        coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl)
+
+        self.assertFinite(coefficient, name="efficiency_coefficient")
+        self.assertGreater(
+            coefficient, 1.0, "Exit near max profit should reward with coefficient > 1.0"
+        )
+
+    def test_efficiency_coefficient_poor_profit_exit(self):
+        """Efficiency coefficient penalizes exits far from peak unrealized profit.
+
+        Validates that exiting far below maximum unrealized profit produces
+        coefficient < 1.0, penalizing poor exit timing that leaves profit on the table.
+
+        **Setup:**
+        - PnL: 0.005 (far from max_unrealized_profit=0.03)
+        - Efficiency ratio: (0.005 - 0.0) / (0.03 - 0.0) â 0.167 (low)
+        - efficiency_weight: 1.0, efficiency_center: 0.5
+        - Trade context: Long position exiting prematurely
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient < 1.0 (penalizes suboptimal timing)
+        """
+        params = self.base_params(efficiency_weight=1.0, efficiency_center=0.5)
+        ctx = self.make_ctx(
+            pnl=0.005,  # Far from max
+            trade_duration=10,
+            max_unrealized_profit=0.03,
+            min_unrealized_profit=0.0,
+            position=Positions.Long,
+            action=Actions.Long_exit,
+        )
+
+        coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl)
+
+        self.assertFinite(coefficient, name="efficiency_coefficient")
+        self.assertLess(
+            coefficient, 1.0, "Exit far from max profit should penalize with coefficient < 1.0"
+        )
+
+    def test_efficiency_coefficient_optimal_loss_exit(self):
+        """Efficiency coefficient rewards loss exits near minimum unrealized loss.
+
+        Validates that exiting close to minimum unrealized loss produces
+        coefficient > 1.0, rewarding quick loss-cutting behavior for losing trades.
+
+        **Setup:**
+        - PnL: -0.005 (very close to min_unrealized_profit=-0.006)
+        - Efficiency ratio: (-0.005 - (-0.006)) / (0.0 - (-0.006)) â 0.167 (low)
+        - For losses: coefficient = 1 + weight Ã (center - ratio) â rewards low ratio
+        - efficiency_weight: 1.0, efficiency_center: 0.5
+        - Trade context: Long position cutting losses quickly
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient > 1.0 (rewards optimal loss exit)
+        """
+        params = self.base_params(efficiency_weight=1.0, efficiency_center=0.5)
+        ctx = self.make_ctx(
+            pnl=-0.005,  # Close to min loss
+            trade_duration=10,
             max_unrealized_profit=0.0,
-            min_unrealized_profit=-0.02,
+            min_unrealized_profit=-0.006,
             position=Positions.Long,
             action=Actions.Long_exit,
         )
-        params = self.base_params(efficiency_weight=0.0)
-        pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO
-        pnl_target_coefficient = _compute_pnl_target_coefficient(
-            params, ctx.pnl, pnl_target, PARAMS.RISK_REWARD_RATIO
+
+        coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl)
+
+        self.assertFinite(coefficient, name="efficiency_coefficient")
+        self.assertGreater(
+            coefficient, 1.0, "Exit near min loss should reward with coefficient > 1.0"
         )
-        efficiency_coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl)
-        pnl_coefficient = pnl_target_coefficient * efficiency_coefficient
-        self.assertFinite(pnl_coefficient, name="pnl_coefficient")
-        self.assertAlmostEqualFloat(pnl_coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ)
 
     def test_exit_reward_never_positive_for_loss_due_to_efficiency(self):
         """Exit reward should not become positive for a loss trade.
diff --git a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py
index 3a124fd..9ba9398 100644
--- a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py
+++ b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py
@@ -658,12 +658,21 @@ def assert_exit_mode_mathematical_validation(
         params, context.pnl, pnl_target, risk_reward_ratio
     )
     efficiency_coefficient = _compute_efficiency_coefficient(params, context, context.pnl)
-    pnl_coefficient = pnl_target_coefficient * efficiency_coefficient
+
     observed_exit_factor = _get_exit_factor(
-        base_factor, context.pnl, pnl_target, duration_ratio, context, params, risk_reward_ratio
+        base_factor,
+        context.pnl,
+        pnl_target,
+        duration_ratio,
+        context,
+        params,
+        risk_reward_ratio,
     )
+    # Isolate time attenuation by dividing out base_factor and the two semantic coefficients
     observed_half_life_factor = observed_exit_factor / (
-        base_factor * max(pnl_coefficient, np.finfo(float).eps)
+        base_factor
+        * max(pnl_target_coefficient, np.finfo(float).eps)
+        * max(efficiency_coefficient, np.finfo(float).eps)
     )
     expected_half_life_factor = 2 ** (-duration_ratio / params["exit_half_life"])
     test_case.assertAlmostEqual(
diff --git a/ReforceXY/reward_space_analysis/tests/helpers/configs.py b/ReforceXY/reward_space_analysis/tests/helpers/configs.py
index 1b336ab..227bbfc 100644
--- a/ReforceXY/reward_space_analysis/tests/helpers/configs.py
+++ b/ReforceXY/reward_space_analysis/tests/helpers/configs.py
@@ -113,13 +113,17 @@ class ProgressiveScalingConfig:
 class ExitFactorConfig:
     """Configuration for exit factor validation.
 
-    Parameters specific to exit factor calculations, including attenuation
-    mode and plateau behavior.
+    Parameters specific to exit factor calculations, including coefficient
+    decomposition, attenuation mode and plateau behavior.
+
+    The exit factor is computed as:
+        exit_factor = base_factor Ã time_attenuation Ã pnl_target Ã efficiency
 
     Attributes:
         base_factor: Base scaling factor
         pnl: Realized profit/loss
-        pnl_coefficient: PnL amplification coefficient
+        pnl_target_coefficient: PnL target amplification coefficient (typically 0.5-2.0)
+        efficiency_coefficient: Exit timing efficiency coefficient (typically 0.5-1.5)
         duration_ratio: Ratio of current to maximum duration
         attenuation_mode: Mode of attenuation ("linear", "power", etc.)
         plateau_enabled: Whether plateau behavior is active
@@ -129,7 +133,8 @@ class ExitFactorConfig:
 
     base_factor: float
     pnl: float
-    pnl_coefficient: float
+    pnl_target_coefficient: float
+    efficiency_coefficient: float
     duration_ratio: float
     attenuation_mode: str
     plateau_enabled: bool = False
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py
index d0a2453..9c8ca33 100644
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -2216,7 +2216,7 @@ class MyRLEnv(Base5ActionRLEnv):
 
         Canonical PBRS Formula
         ----------------------
-        R'(s,a,s') = R(s,a,s') + Î³Â·Î¦(s') - Î(s,a,s')
+        R'(s,a,s') = R(s,a,s') + Î(s,a,s')
 
         where:
             Î(s,a,s') = Î³Â·Î¦(s') - Î¦(s)  (PBRS shaping term)
-- 
2.43.0