test(ReforceXY): finish factor decoupling properly

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Wed, 24 Dec 2025 00:06:15 +0000 (01:06 +0100)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Wed, 24 Dec 2025 00:06:15 +0000 (01:06 +0100)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 24 Dec 2025 00:06:15 +0000 (01:06 +0100)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Wed, 24 Dec 2025 00:06:15 +0000 (01:06 +0100)
diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py

index 9937938505a40f8dc0a8e60668c6d38eef36da13..4f398b97ea560ae6d7942c5949c874407fee7d42 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
+++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py
@@ -199,30 +199,225 @@ class TestRewardComponents(RewardSpaceTestBase):
              validate_idle_penalty,
          )
  
-    def test_efficiency_zero_policy(self):
-        """Test efficiency zero policy produces expected PnL coefficient.
+    def test_pnl_target_coefficient_zero_pnl(self):
+        """PnL target coefficient returns neutral value for zero PnL.
  
-        Verifies:
-        - efficiency_weight = 0 -> pnl_coefficient ~= 1.0
-        - Coefficient is finite and positive
+        Validates that zero realized profit/loss produces coefficient = 1.0,
+        ensuring no amplification or attenuation of base exit factor.
+
+        **Setup:**
+        - PnL: 0.0 (breakeven)
+        - pnl_target: profit_aim × risk_reward_ratio
+        - Parameters: default base_params
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient equals 1.0 within TOLERANCE.GENERIC_EQ
+        """
+        params = self.base_params()
+        pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO
+
+        coefficient = _compute_pnl_target_coefficient(
+            params, pnl=0.0, pnl_target=pnl_target, risk_reward_ratio=PARAMS.RISK_REWARD_RATIO
+        )
+
+        self.assertFinite(coefficient, name="pnl_target_coefficient")
+        self.assertAlmostEqualFloat(coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ)
+
+    def test_pnl_target_coefficient_exceeds_target(self):
+        """PnL target coefficient rewards exits that exceed profit target.
+
+        Validates amplification behavior when realized PnL exceeds the target,
+        incentivizing the agent to achieve higher profits than baseline.
+
+        **Setup:**
+        - PnL: 150% of pnl_target (exceeds target by 50%)
+        - pnl_target: 0.045 (profit_aim=0.03 × risk_reward_ratio=1.5)
+        - Parameters: win_reward_factor=2.0, pnl_factor_beta=0.5
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient > 1.0 (rewards exceeding target)
+        """
+        params = self.base_params(win_reward_factor=2.0, pnl_factor_beta=0.5)
+        profit_aim = 0.03
+        risk_reward_ratio = 1.5
+        pnl_target = profit_aim * risk_reward_ratio
+        pnl = pnl_target * 1.5  # 50% above target
+
+        coefficient = _compute_pnl_target_coefficient(
+            params, pnl=pnl, pnl_target=pnl_target, risk_reward_ratio=risk_reward_ratio
+        )
+
+        self.assertFinite(coefficient, name="pnl_target_coefficient")
+        self.assertGreater(
+            coefficient, 1.0, "PnL exceeding target should reward with coefficient > 1.0"
+        )
+
+    def test_pnl_target_coefficient_below_loss_threshold(self):
+        """PnL target coefficient amplifies penalty for excessive losses.
+
+        Validates that losses exceeding risk-adjusted threshold produce
+        coefficient > 1.0 to amplify negative reward signal. Penalty applies
+        when BOTH conditions met: abs(pnl_ratio) > 1.0 AND pnl_ratio < -(1/rr).
+
+        **Setup:**
+        - PnL: -0.06 (exceeds pnl_target magnitude)
+        - pnl_target: 0.045 (profit_aim=0.03 × risk_reward_ratio=1.5)
+        - Penalty threshold: pnl < -pnl_target = -0.045
+        - Parameters: win_reward_factor=2.0, pnl_factor_beta=0.5
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient > 1.0 (amplifies loss penalty)
          """
+        params = self.base_params(win_reward_factor=2.0, pnl_factor_beta=0.5)
+        profit_aim = 0.03
+        risk_reward_ratio = 1.5
+        pnl_target = profit_aim * risk_reward_ratio  # 0.045
+        # Need abs(pnl / pnl_target) > 1.0 AND pnl / pnl_target < -1/1.5
+        # So pnl < -0.045 (exceeds pnl_target in magnitude)
+        pnl = -0.06  # Much more negative than pnl_target
+
+        coefficient = _compute_pnl_target_coefficient(
+            params, pnl=pnl, pnl_target=pnl_target, risk_reward_ratio=risk_reward_ratio
+        )
+
+        self.assertFinite(coefficient, name="pnl_target_coefficient")
+        self.assertGreater(
+            coefficient, 1.0, "Excessive loss should amplify penalty with coefficient > 1.0"
+        )
+
+    def test_efficiency_coefficient_zero_weight(self):
+        """Efficiency coefficient returns neutral value when efficiency disabled.
+
+        Validates that efficiency_weight=0 disables exit timing efficiency
+        adjustments, returning coefficient = 1.0 regardless of exit position
+        relative to unrealized PnL extremes.
+
+        **Setup:**
+        - efficiency_weight: 0.0 (disabled)
+        - PnL: 0.02 (between min=-0.01 and max=0.03)
+        - Trade context: Long position with unrealized range
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient equals 1.0 within TOLERANCE.GENERIC_EQ
+        """
+        params = self.base_params(efficiency_weight=0.0)
          ctx = self.make_ctx(
-            pnl=0.0,
-            trade_duration=1,
+            pnl=0.02,
+            trade_duration=10,
+            max_unrealized_profit=0.03,
+            min_unrealized_profit=-0.01,
+            position=Positions.Long,
+            action=Actions.Long_exit,
+        )
+
+        coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl)
+
+        self.assertFinite(coefficient, name="efficiency_coefficient")
+        self.assertAlmostEqualFloat(coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ)
+
+    def test_efficiency_coefficient_optimal_profit_exit(self):
+        """Efficiency coefficient rewards exits near peak unrealized profit.
+
+        Validates that exiting close to maximum unrealized profit produces
+        coefficient > 1.0, incentivizing optimal exit timing for profitable trades.
+
+        **Setup:**
+        - PnL: 0.029 (very close to max_unrealized_profit=0.03)
+        - Efficiency ratio: (0.029 - 0.0) / (0.03 - 0.0) ≈ 0.967 (high)
+        - efficiency_weight: 1.0, efficiency_center: 0.5
+        - Trade context: Long position exiting near peak
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient > 1.0 (rewards optimal timing)
+        """
+        params = self.base_params(efficiency_weight=1.0, efficiency_center=0.5)
+        ctx = self.make_ctx(
+            pnl=0.029,  # Close to max
+            trade_duration=10,
+            max_unrealized_profit=0.03,
+            min_unrealized_profit=0.0,
+            position=Positions.Long,
+            action=Actions.Long_exit,
+        )
+
+        coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl)
+
+        self.assertFinite(coefficient, name="efficiency_coefficient")
+        self.assertGreater(
+            coefficient, 1.0, "Exit near max profit should reward with coefficient > 1.0"
+        )
+
+    def test_efficiency_coefficient_poor_profit_exit(self):
+        """Efficiency coefficient penalizes exits far from peak unrealized profit.
+
+        Validates that exiting far below maximum unrealized profit produces
+        coefficient < 1.0, penalizing poor exit timing that leaves profit on the table.
+
+        **Setup:**
+        - PnL: 0.005 (far from max_unrealized_profit=0.03)
+        - Efficiency ratio: (0.005 - 0.0) / (0.03 - 0.0) ≈ 0.167 (low)
+        - efficiency_weight: 1.0, efficiency_center: 0.5
+        - Trade context: Long position exiting prematurely
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient < 1.0 (penalizes suboptimal timing)
+        """
+        params = self.base_params(efficiency_weight=1.0, efficiency_center=0.5)
+        ctx = self.make_ctx(
+            pnl=0.005,  # Far from max
+            trade_duration=10,
+            max_unrealized_profit=0.03,
+            min_unrealized_profit=0.0,
+            position=Positions.Long,
+            action=Actions.Long_exit,
+        )
+
+        coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl)
+
+        self.assertFinite(coefficient, name="efficiency_coefficient")
+        self.assertLess(
+            coefficient, 1.0, "Exit far from max profit should penalize with coefficient < 1.0"
+        )
+
+    def test_efficiency_coefficient_optimal_loss_exit(self):
+        """Efficiency coefficient rewards loss exits near minimum unrealized loss.
+
+        Validates that exiting close to minimum unrealized loss produces
+        coefficient > 1.0, rewarding quick loss-cutting behavior for losing trades.
+
+        **Setup:**
+        - PnL: -0.005 (very close to min_unrealized_profit=-0.006)
+        - Efficiency ratio: (-0.005 - (-0.006)) / (0.0 - (-0.006)) ≈ 0.167 (low)
+        - For losses: coefficient = 1 + weight × (center - ratio) → rewards low ratio
+        - efficiency_weight: 1.0, efficiency_center: 0.5
+        - Trade context: Long position cutting losses quickly
+
+        **Assertions:**
+        - Coefficient is finite
+        - Coefficient > 1.0 (rewards optimal loss exit)
+        """
+        params = self.base_params(efficiency_weight=1.0, efficiency_center=0.5)
+        ctx = self.make_ctx(
+            pnl=-0.005,  # Close to min loss
+            trade_duration=10,
              max_unrealized_profit=0.0,
-            min_unrealized_profit=-0.02,
+            min_unrealized_profit=-0.006,
              position=Positions.Long,
              action=Actions.Long_exit,
          )
-        params = self.base_params(efficiency_weight=0.0)
-        pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO
-        pnl_target_coefficient = _compute_pnl_target_coefficient(
-            params, ctx.pnl, pnl_target, PARAMS.RISK_REWARD_RATIO
+
+        coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl)
+
+        self.assertFinite(coefficient, name="efficiency_coefficient")
+        self.assertGreater(
+            coefficient, 1.0, "Exit near min loss should reward with coefficient > 1.0"
          )
-        efficiency_coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl)
-        pnl_coefficient = pnl_target_coefficient * efficiency_coefficient
-        self.assertFinite(pnl_coefficient, name="pnl_coefficient")
-        self.assertAlmostEqualFloat(pnl_coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ)
  
      def test_exit_reward_never_positive_for_loss_due_to_efficiency(self):
          """Exit reward should not become positive for a loss trade.
diff --git a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py

index 3a124fdb61f1c8f7f8476e189c852bb47debfb86..9ba93980ff3688e2a98e7364dde482615eee3104 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py
+++ b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py
@@ -658,12 +658,21 @@ def assert_exit_mode_mathematical_validation(
          params, context.pnl, pnl_target, risk_reward_ratio
      )
      efficiency_coefficient = _compute_efficiency_coefficient(params, context, context.pnl)
-    pnl_coefficient = pnl_target_coefficient * efficiency_coefficient
+
      observed_exit_factor = _get_exit_factor(
-        base_factor, context.pnl, pnl_target, duration_ratio, context, params, risk_reward_ratio
+        base_factor,
+        context.pnl,
+        pnl_target,
+        duration_ratio,
+        context,
+        params,
+        risk_reward_ratio,
      )
+    # Isolate time attenuation by dividing out base_factor and the two semantic coefficients
      observed_half_life_factor = observed_exit_factor / (
-        base_factor * max(pnl_coefficient, np.finfo(float).eps)
+        base_factor
+        * max(pnl_target_coefficient, np.finfo(float).eps)
+        * max(efficiency_coefficient, np.finfo(float).eps)
      )
      expected_half_life_factor = 2 ** (-duration_ratio / params["exit_half_life"])
      test_case.assertAlmostEqual(
diff --git a/ReforceXY/reward_space_analysis/tests/helpers/configs.py b/ReforceXY/reward_space_analysis/tests/helpers/configs.py

index 1b336abcc250ee39f45961cd53f5b87481cccc16..227bbfc5a6cb483504424b5716d7adf00f148ec9 100644 (file)
--- a/ReforceXY/reward_space_analysis/tests/helpers/configs.py
+++ b/ReforceXY/reward_space_analysis/tests/helpers/configs.py
@@ -113,13 +113,17 @@ class ProgressiveScalingConfig:
  class ExitFactorConfig:
      """Configuration for exit factor validation.
  
-    Parameters specific to exit factor calculations, including attenuation
-    mode and plateau behavior.
+    Parameters specific to exit factor calculations, including coefficient
+    decomposition, attenuation mode and plateau behavior.
+
+    The exit factor is computed as:
+        exit_factor = base_factor × time_attenuation × pnl_target × efficiency
  
      Attributes:
          base_factor: Base scaling factor
          pnl: Realized profit/loss
-        pnl_coefficient: PnL amplification coefficient
+        pnl_target_coefficient: PnL target amplification coefficient (typically 0.5-2.0)
+        efficiency_coefficient: Exit timing efficiency coefficient (typically 0.5-1.5)
          duration_ratio: Ratio of current to maximum duration
          attenuation_mode: Mode of attenuation ("linear", "power", etc.)
          plateau_enabled: Whether plateau behavior is active
@@ -129,7 +133,8 @@ class ExitFactorConfig:
  
      base_factor: float
      pnl: float
-    pnl_coefficient: float
+    pnl_target_coefficient: float
+    efficiency_coefficient: float
      duration_ratio: float
      attenuation_mode: str
      plateau_enabled: bool = False
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py

index d0a2453ce46a33021c323b67de303df74ac19886..9c8ca33cc468dc9cf751ab194515df11f1756953 100644 (file)
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -2216,7 +2216,7 @@ class MyRLEnv(Base5ActionRLEnv):
  
          Canonical PBRS Formula
          ----------------------
-        R'(s,a,s') = R(s,a,s') + Î³Â·Î¦(s') - Î\94(s,a,s')
+        R'(s,a,s') = R(s,a,s') + Δ(s,a,s')
  
          where:
              Δ(s,a,s') = γ·Φ(s') - Φ(s)  (PBRS shaping term)
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Wed, 24 Dec 2025 00:06:15 +0000 (01:06 +0100)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Wed, 24 Dec 2025 00:06:15 +0000 (01:06 +0100)
ReforceXY/reward_space_analysis/tests/components/test_reward_components.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/helpers/assertions.py		patch \| blob \| blame \| history
ReforceXY/reward_space_analysis/tests/helpers/configs.py		patch \| blob \| blame \| history
ReforceXY/user_data/freqaimodels/ReforceXY.py		patch \| blob \| blame \| history