From ee8eb270a567b2b6573a8f71dcc47ca240f6a594 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 24 Dec 2025 01:06:15 +0100 Subject: [PATCH] test(ReforceXY): finish factor decoupling properly MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../components/test_reward_components.py | 227 ++++++++++++++++-- .../tests/helpers/assertions.py | 15 +- .../tests/helpers/configs.py | 13 +- ReforceXY/user_data/freqaimodels/ReforceXY.py | 2 +- 4 files changed, 233 insertions(+), 24 deletions(-) diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py index 9937938..4f398b9 100644 --- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py +++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py @@ -199,30 +199,225 @@ class TestRewardComponents(RewardSpaceTestBase): validate_idle_penalty, ) - def test_efficiency_zero_policy(self): - """Test efficiency zero policy produces expected PnL coefficient. + def test_pnl_target_coefficient_zero_pnl(self): + """PnL target coefficient returns neutral value for zero PnL. - Verifies: - - efficiency_weight = 0 -> pnl_coefficient ~= 1.0 - - Coefficient is finite and positive + Validates that zero realized profit/loss produces coefficient = 1.0, + ensuring no amplification or attenuation of base exit factor. + + **Setup:** + - PnL: 0.0 (breakeven) + - pnl_target: profit_aim × risk_reward_ratio + - Parameters: default base_params + + **Assertions:** + - Coefficient is finite + - Coefficient equals 1.0 within TOLERANCE.GENERIC_EQ + """ + params = self.base_params() + pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO + + coefficient = _compute_pnl_target_coefficient( + params, pnl=0.0, pnl_target=pnl_target, risk_reward_ratio=PARAMS.RISK_REWARD_RATIO + ) + + self.assertFinite(coefficient, name="pnl_target_coefficient") + self.assertAlmostEqualFloat(coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ) + + def test_pnl_target_coefficient_exceeds_target(self): + """PnL target coefficient rewards exits that exceed profit target. + + Validates amplification behavior when realized PnL exceeds the target, + incentivizing the agent to achieve higher profits than baseline. + + **Setup:** + - PnL: 150% of pnl_target (exceeds target by 50%) + - pnl_target: 0.045 (profit_aim=0.03 × risk_reward_ratio=1.5) + - Parameters: win_reward_factor=2.0, pnl_factor_beta=0.5 + + **Assertions:** + - Coefficient is finite + - Coefficient > 1.0 (rewards exceeding target) + """ + params = self.base_params(win_reward_factor=2.0, pnl_factor_beta=0.5) + profit_aim = 0.03 + risk_reward_ratio = 1.5 + pnl_target = profit_aim * risk_reward_ratio + pnl = pnl_target * 1.5 # 50% above target + + coefficient = _compute_pnl_target_coefficient( + params, pnl=pnl, pnl_target=pnl_target, risk_reward_ratio=risk_reward_ratio + ) + + self.assertFinite(coefficient, name="pnl_target_coefficient") + self.assertGreater( + coefficient, 1.0, "PnL exceeding target should reward with coefficient > 1.0" + ) + + def test_pnl_target_coefficient_below_loss_threshold(self): + """PnL target coefficient amplifies penalty for excessive losses. + + Validates that losses exceeding risk-adjusted threshold produce + coefficient > 1.0 to amplify negative reward signal. Penalty applies + when BOTH conditions met: abs(pnl_ratio) > 1.0 AND pnl_ratio < -(1/rr). + + **Setup:** + - PnL: -0.06 (exceeds pnl_target magnitude) + - pnl_target: 0.045 (profit_aim=0.03 × risk_reward_ratio=1.5) + - Penalty threshold: pnl < -pnl_target = -0.045 + - Parameters: win_reward_factor=2.0, pnl_factor_beta=0.5 + + **Assertions:** + - Coefficient is finite + - Coefficient > 1.0 (amplifies loss penalty) """ + params = self.base_params(win_reward_factor=2.0, pnl_factor_beta=0.5) + profit_aim = 0.03 + risk_reward_ratio = 1.5 + pnl_target = profit_aim * risk_reward_ratio # 0.045 + # Need abs(pnl / pnl_target) > 1.0 AND pnl / pnl_target < -1/1.5 + # So pnl < -0.045 (exceeds pnl_target in magnitude) + pnl = -0.06 # Much more negative than pnl_target + + coefficient = _compute_pnl_target_coefficient( + params, pnl=pnl, pnl_target=pnl_target, risk_reward_ratio=risk_reward_ratio + ) + + self.assertFinite(coefficient, name="pnl_target_coefficient") + self.assertGreater( + coefficient, 1.0, "Excessive loss should amplify penalty with coefficient > 1.0" + ) + + def test_efficiency_coefficient_zero_weight(self): + """Efficiency coefficient returns neutral value when efficiency disabled. + + Validates that efficiency_weight=0 disables exit timing efficiency + adjustments, returning coefficient = 1.0 regardless of exit position + relative to unrealized PnL extremes. + + **Setup:** + - efficiency_weight: 0.0 (disabled) + - PnL: 0.02 (between min=-0.01 and max=0.03) + - Trade context: Long position with unrealized range + + **Assertions:** + - Coefficient is finite + - Coefficient equals 1.0 within TOLERANCE.GENERIC_EQ + """ + params = self.base_params(efficiency_weight=0.0) ctx = self.make_ctx( - pnl=0.0, - trade_duration=1, + pnl=0.02, + trade_duration=10, + max_unrealized_profit=0.03, + min_unrealized_profit=-0.01, + position=Positions.Long, + action=Actions.Long_exit, + ) + + coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl) + + self.assertFinite(coefficient, name="efficiency_coefficient") + self.assertAlmostEqualFloat(coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ) + + def test_efficiency_coefficient_optimal_profit_exit(self): + """Efficiency coefficient rewards exits near peak unrealized profit. + + Validates that exiting close to maximum unrealized profit produces + coefficient > 1.0, incentivizing optimal exit timing for profitable trades. + + **Setup:** + - PnL: 0.029 (very close to max_unrealized_profit=0.03) + - Efficiency ratio: (0.029 - 0.0) / (0.03 - 0.0) ≈ 0.967 (high) + - efficiency_weight: 1.0, efficiency_center: 0.5 + - Trade context: Long position exiting near peak + + **Assertions:** + - Coefficient is finite + - Coefficient > 1.0 (rewards optimal timing) + """ + params = self.base_params(efficiency_weight=1.0, efficiency_center=0.5) + ctx = self.make_ctx( + pnl=0.029, # Close to max + trade_duration=10, + max_unrealized_profit=0.03, + min_unrealized_profit=0.0, + position=Positions.Long, + action=Actions.Long_exit, + ) + + coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl) + + self.assertFinite(coefficient, name="efficiency_coefficient") + self.assertGreater( + coefficient, 1.0, "Exit near max profit should reward with coefficient > 1.0" + ) + + def test_efficiency_coefficient_poor_profit_exit(self): + """Efficiency coefficient penalizes exits far from peak unrealized profit. + + Validates that exiting far below maximum unrealized profit produces + coefficient < 1.0, penalizing poor exit timing that leaves profit on the table. + + **Setup:** + - PnL: 0.005 (far from max_unrealized_profit=0.03) + - Efficiency ratio: (0.005 - 0.0) / (0.03 - 0.0) ≈ 0.167 (low) + - efficiency_weight: 1.0, efficiency_center: 0.5 + - Trade context: Long position exiting prematurely + + **Assertions:** + - Coefficient is finite + - Coefficient < 1.0 (penalizes suboptimal timing) + """ + params = self.base_params(efficiency_weight=1.0, efficiency_center=0.5) + ctx = self.make_ctx( + pnl=0.005, # Far from max + trade_duration=10, + max_unrealized_profit=0.03, + min_unrealized_profit=0.0, + position=Positions.Long, + action=Actions.Long_exit, + ) + + coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl) + + self.assertFinite(coefficient, name="efficiency_coefficient") + self.assertLess( + coefficient, 1.0, "Exit far from max profit should penalize with coefficient < 1.0" + ) + + def test_efficiency_coefficient_optimal_loss_exit(self): + """Efficiency coefficient rewards loss exits near minimum unrealized loss. + + Validates that exiting close to minimum unrealized loss produces + coefficient > 1.0, rewarding quick loss-cutting behavior for losing trades. + + **Setup:** + - PnL: -0.005 (very close to min_unrealized_profit=-0.006) + - Efficiency ratio: (-0.005 - (-0.006)) / (0.0 - (-0.006)) ≈ 0.167 (low) + - For losses: coefficient = 1 + weight × (center - ratio) → rewards low ratio + - efficiency_weight: 1.0, efficiency_center: 0.5 + - Trade context: Long position cutting losses quickly + + **Assertions:** + - Coefficient is finite + - Coefficient > 1.0 (rewards optimal loss exit) + """ + params = self.base_params(efficiency_weight=1.0, efficiency_center=0.5) + ctx = self.make_ctx( + pnl=-0.005, # Close to min loss + trade_duration=10, max_unrealized_profit=0.0, - min_unrealized_profit=-0.02, + min_unrealized_profit=-0.006, position=Positions.Long, action=Actions.Long_exit, ) - params = self.base_params(efficiency_weight=0.0) - pnl_target = PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO - pnl_target_coefficient = _compute_pnl_target_coefficient( - params, ctx.pnl, pnl_target, PARAMS.RISK_REWARD_RATIO + + coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl) + + self.assertFinite(coefficient, name="efficiency_coefficient") + self.assertGreater( + coefficient, 1.0, "Exit near min loss should reward with coefficient > 1.0" ) - efficiency_coefficient = _compute_efficiency_coefficient(params, ctx, ctx.pnl) - pnl_coefficient = pnl_target_coefficient * efficiency_coefficient - self.assertFinite(pnl_coefficient, name="pnl_coefficient") - self.assertAlmostEqualFloat(pnl_coefficient, 1.0, tolerance=TOLERANCE.GENERIC_EQ) def test_exit_reward_never_positive_for_loss_due_to_efficiency(self): """Exit reward should not become positive for a loss trade. diff --git a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py index 3a124fd..9ba9398 100644 --- a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py +++ b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py @@ -658,12 +658,21 @@ def assert_exit_mode_mathematical_validation( params, context.pnl, pnl_target, risk_reward_ratio ) efficiency_coefficient = _compute_efficiency_coefficient(params, context, context.pnl) - pnl_coefficient = pnl_target_coefficient * efficiency_coefficient + observed_exit_factor = _get_exit_factor( - base_factor, context.pnl, pnl_target, duration_ratio, context, params, risk_reward_ratio + base_factor, + context.pnl, + pnl_target, + duration_ratio, + context, + params, + risk_reward_ratio, ) + # Isolate time attenuation by dividing out base_factor and the two semantic coefficients observed_half_life_factor = observed_exit_factor / ( - base_factor * max(pnl_coefficient, np.finfo(float).eps) + base_factor + * max(pnl_target_coefficient, np.finfo(float).eps) + * max(efficiency_coefficient, np.finfo(float).eps) ) expected_half_life_factor = 2 ** (-duration_ratio / params["exit_half_life"]) test_case.assertAlmostEqual( diff --git a/ReforceXY/reward_space_analysis/tests/helpers/configs.py b/ReforceXY/reward_space_analysis/tests/helpers/configs.py index 1b336ab..227bbfc 100644 --- a/ReforceXY/reward_space_analysis/tests/helpers/configs.py +++ b/ReforceXY/reward_space_analysis/tests/helpers/configs.py @@ -113,13 +113,17 @@ class ProgressiveScalingConfig: class ExitFactorConfig: """Configuration for exit factor validation. - Parameters specific to exit factor calculations, including attenuation - mode and plateau behavior. + Parameters specific to exit factor calculations, including coefficient + decomposition, attenuation mode and plateau behavior. + + The exit factor is computed as: + exit_factor = base_factor × time_attenuation × pnl_target × efficiency Attributes: base_factor: Base scaling factor pnl: Realized profit/loss - pnl_coefficient: PnL amplification coefficient + pnl_target_coefficient: PnL target amplification coefficient (typically 0.5-2.0) + efficiency_coefficient: Exit timing efficiency coefficient (typically 0.5-1.5) duration_ratio: Ratio of current to maximum duration attenuation_mode: Mode of attenuation ("linear", "power", etc.) plateau_enabled: Whether plateau behavior is active @@ -129,7 +133,8 @@ class ExitFactorConfig: base_factor: float pnl: float - pnl_coefficient: float + pnl_target_coefficient: float + efficiency_coefficient: float duration_ratio: float attenuation_mode: str plateau_enabled: bool = False diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index d0a2453..9c8ca33 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -2216,7 +2216,7 @@ class MyRLEnv(Base5ActionRLEnv): Canonical PBRS Formula ---------------------- - R'(s,a,s') = R(s,a,s') + γ·Φ(s') - Δ(s,a,s') + R'(s,a,s') = R(s,a,s') + Δ(s,a,s') where: Δ(s,a,s') = γ·Φ(s') - Φ(s) (PBRS shaping term) -- 2.43.0