From abb775b74dc0e08d67ce33798a8ac2b28221d3f2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 24 Dec 2025 23:16:34 +0100 Subject: [PATCH] refactor(ReforceXY): consolidate default params in test helpers MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- .../tests/api/test_api_helpers.py | 143 +++--------- .../tests/components/test_additives.py | 18 +- .../components/test_reward_components.py | 97 ++------- .../reward_space_analysis/tests/constants.py | 2 + .../tests/helpers/__init__.py | 25 +-- .../tests/helpers/assertions.py | 205 +++++++++++++++++- .../tests/helpers/configs.py | 29 ++- .../tests/helpers/test_internal_branches.py | 6 +- .../integration/test_reward_calculation.py | 16 +- .../tests/pbrs/test_pbrs.py | 42 +--- .../tests/robustness/test_robustness.py | 47 +--- .../test_feature_analysis_failures.py | 7 +- 12 files changed, 315 insertions(+), 322 deletions(-) diff --git a/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py b/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py index 625d776..c50a678 100644 --- a/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py +++ b/ReforceXY/reward_space_analysis/tests/api/test_api_helpers.py @@ -22,13 +22,12 @@ from reward_space_analysis import ( _get_str_param, _sample_action, build_argument_parser, - calculate_reward, parse_overrides, - simulate_samples, write_complete_statistical_analysis, ) -from ..constants import PARAMS, SEEDS, TOLERANCE +from ..constants import PARAMS, SCENARIOS, SEEDS, TOLERANCE +from ..helpers import calculate_reward_with_defaults, simulate_samples_with_defaults from ..test_base import RewardSpaceTestBase pytestmark = pytest.mark.api @@ -90,17 +89,11 @@ class TestAPIAndHelpers(RewardSpaceTestBase): def test_api_simulation_and_reward_smoke(self): """Test api simulation and reward smoke.""" - df = simulate_samples( - params=self.base_params(max_trade_duration_candles=40), - num_samples=20, + df = simulate_samples_with_defaults( + self.base_params(max_trade_duration_candles=40), + num_samples=SCENARIOS.SAMPLE_SIZE_TINY, seed=SEEDS.SMOKE_TEST, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, max_duration_ratio=1.5, - trading_mode="margin", - pnl_base_std=PARAMS.PNL_STD, - pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE, ) self.assertGreater(len(df), 0) any_exit = df[df["reward_exit"] != 0].head(1) @@ -115,44 +108,21 @@ class TestAPIAndHelpers(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - breakdown = calculate_reward( - ctx, - self.DEFAULT_PARAMS, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) + breakdown = calculate_reward_with_defaults(ctx, self.DEFAULT_PARAMS) self.assertFinite(breakdown.total) def test_simulate_samples_trading_modes_spot_vs_margin(self): """simulate_samples coverage: spot should forbid shorts, margin should allow them.""" - df_spot = simulate_samples( - params=self.base_params(max_trade_duration_candles=100), - num_samples=80, - seed=SEEDS.BASE, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - max_duration_ratio=2.0, + df_spot = simulate_samples_with_defaults( + self.base_params(max_trade_duration_candles=100), + num_samples=SCENARIOS.SAMPLE_SIZE_SMALL, trading_mode="spot", - pnl_base_std=PARAMS.PNL_STD, - pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE, ) short_positions_spot = (df_spot["position"] == float(Positions.Short.value)).sum() self.assertEqual(short_positions_spot, 0, "Spot mode must not contain short positions") - df_margin = simulate_samples( - params=self.base_params(max_trade_duration_candles=100), - num_samples=80, - seed=SEEDS.BASE, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - max_duration_ratio=2.0, - trading_mode="margin", - pnl_base_std=PARAMS.PNL_STD, - pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE, + df_margin = simulate_samples_with_defaults( + self.base_params(max_trade_duration_candles=100), + num_samples=SCENARIOS.SAMPLE_SIZE_SMALL, ) for col in [ "pnl", @@ -174,17 +144,10 @@ class TestAPIAndHelpers(RewardSpaceTestBase): def test_simulate_samples_sampling_probabilities_are_bounded(self): """simulate_samples() exposes bounded sampling probabilities.""" - df = simulate_samples( - params=self.base_params(max_trade_duration_candles=40), - num_samples=200, + df = simulate_samples_with_defaults( + self.base_params(max_trade_duration_candles=40), seed=SEEDS.SMOKE_TEST, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, max_duration_ratio=1.5, - trading_mode="margin", - pnl_base_std=PARAMS.PNL_STD, - pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE, ) for col in ["sample_entry_prob", "sample_exit_prob", "sample_neutral_prob"]: @@ -197,46 +160,25 @@ class TestAPIAndHelpers(RewardSpaceTestBase): def test_to_bool(self): """Test _to_bool with various inputs.""" - df1 = simulate_samples( - params=self.base_params(action_masking="true", max_trade_duration_candles=50), + df1 = simulate_samples_with_defaults( + self.base_params(action_masking="true", max_trade_duration_candles=50), num_samples=10, - seed=SEEDS.BASE, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - max_duration_ratio=2.0, trading_mode="spot", - pnl_base_std=PARAMS.PNL_STD, - pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE, ) self.assertIsInstance(df1, pd.DataFrame) - df2 = simulate_samples( - params=self.base_params(action_masking="false", max_trade_duration_candles=50), + df2 = simulate_samples_with_defaults( + self.base_params(action_masking="false", max_trade_duration_candles=50), num_samples=10, - seed=SEEDS.BASE, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - max_duration_ratio=2.0, trading_mode="spot", - pnl_base_std=PARAMS.PNL_STD, - pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE, ) self.assertIsInstance(df2, pd.DataFrame) def test_short_allowed_via_simulation(self): """Test _is_short_allowed via different trading modes.""" - df_futures = simulate_samples( - params=self.base_params(max_trade_duration_candles=50), - num_samples=100, - seed=SEEDS.BASE, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - max_duration_ratio=2.0, + df_futures = simulate_samples_with_defaults( + self.base_params(max_trade_duration_candles=50), + num_samples=SCENARIOS.SAMPLE_SIZE_SMALL, trading_mode="futures", - pnl_base_std=PARAMS.PNL_STD, - pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE, ) short_positions = (df_futures["position"] == float(Positions.Short.value)).sum() self.assertGreater(short_positions, 0, "Futures mode should allow short positions") @@ -343,17 +285,8 @@ class TestAPIAndHelpers(RewardSpaceTestBase): def test_complete_statistical_analysis_writer(self): """Test write_complete_statistical_analysis function.""" - test_data = simulate_samples( - params=self.base_params(max_trade_duration_candles=100), - num_samples=200, - seed=SEEDS.BASE, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - max_duration_ratio=2.0, - trading_mode="margin", - pnl_base_std=PARAMS.PNL_STD, - pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE, + test_data = simulate_samples_with_defaults( + self.base_params(max_trade_duration_candles=100), ) with tempfile.TemporaryDirectory() as tmp_dir: output_path = Path(tmp_dir) @@ -393,15 +326,7 @@ class TestPrivateFunctions(RewardSpaceTestBase): position=position, action=action, ) - breakdown = calculate_reward( - context, - self.DEFAULT_PARAMS, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) + breakdown = calculate_reward_with_defaults(context, self.DEFAULT_PARAMS) self.assertNotEqual( breakdown.exit_component, 0.0, @@ -422,14 +347,8 @@ class TestPrivateFunctions(RewardSpaceTestBase): position=Positions.Short, action=Actions.Long_exit, ) - breakdown = calculate_reward( - context, - self.DEFAULT_PARAMS, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=False, + breakdown = calculate_reward_with_defaults( + context, self.DEFAULT_PARAMS, action_masking=False ) self.assertLess(breakdown.invalid_penalty, 0, "Invalid action should have negative penalty") self.assertAlmostEqualFloat( @@ -459,15 +378,7 @@ class TestPrivateFunctions(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - breakdown = calculate_reward( - context, - params, - base_factor=10000000.0, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) + breakdown = calculate_reward_with_defaults(context, params, base_factor=10000000.0) self.assertFinite(breakdown.exit_component, name="exit_component") diff --git a/ReforceXY/reward_space_analysis/tests/components/test_additives.py b/ReforceXY/reward_space_analysis/tests/components/test_additives.py index 9d4508d..a4d6650 100644 --- a/ReforceXY/reward_space_analysis/tests/components/test_additives.py +++ b/ReforceXY/reward_space_analysis/tests/components/test_additives.py @@ -10,7 +10,7 @@ import pytest from reward_space_analysis import compute_pbrs_components -from ..constants import PARAMS +from ..constants import PARAMS, TOLERANCE from ..test_base import RewardSpaceTestBase pytestmark = pytest.mark.components @@ -32,16 +32,16 @@ class TestAdditivesDeterministicContribution(RewardSpaceTestBase): **Setup:** - Base configuration: hold_potential enabled, additives disabled - Test configuration: entry_additive and exit_additive enabled - - Additive parameters: ratio=0.4, gain=1.0 for both entry/exit + - Additive parameters: ratio=PARAMS.ADDITIVE_RATIO_DEFAULT, gain=PARAMS.ADDITIVE_GAIN_DEFAULT for both entry/exit - Context: base_reward=0.05, pnl=0.01, duration_ratio=0.2 **Assertions:** - Total reward with additives > total reward without additives - - Shaping difference remains bounded: |s1 - s0| < 0.2 + - Shaping difference remains bounded: |s1 - s0| < TOLERANCE.SHAPING_BOUND_TOLERANCE - Both total and shaping rewards are finite **Tolerance rationale:** - - Custom bound 0.2 for shaping delta: Additives should not cause + - Custom bound TOLERANCE.SHAPING_BOUND_TOLERANCE for shaping delta: Additives should not cause large shifts in shaping component, which maintains PBRS properties """ base = self.base_params( @@ -55,10 +55,10 @@ class TestAdditivesDeterministicContribution(RewardSpaceTestBase): { "entry_additive_enabled": True, "exit_additive_enabled": True, - "entry_additive_ratio": 0.4, - "exit_additive_ratio": 0.4, - "entry_additive_gain": 1.0, - "exit_additive_gain": 1.0, + "entry_additive_ratio": PARAMS.ADDITIVE_RATIO_DEFAULT, + "exit_additive_ratio": PARAMS.ADDITIVE_RATIO_DEFAULT, + "entry_additive_gain": PARAMS.ADDITIVE_GAIN_DEFAULT, + "exit_additive_gain": PARAMS.ADDITIVE_GAIN_DEFAULT, } ) base_reward = 0.05 @@ -88,7 +88,7 @@ class TestAdditivesDeterministicContribution(RewardSpaceTestBase): t1 = base_reward + s1 + _entry1 + _exit1 self.assertFinite(t1) self.assertFinite(s1) - self.assertLess(abs(s1 - s0), 0.2) + self.assertLess(abs(s1 - s0), TOLERANCE.SHAPING_BOUND_TOLERANCE) self.assertGreater(t1 - t0, 0.0, "Total reward should increase with additives present") diff --git a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py index 9f0c214..9275625 100644 --- a/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py +++ b/ReforceXY/reward_space_analysis/tests/components/test_reward_components.py @@ -15,7 +15,6 @@ from reward_space_analysis import ( _compute_pnl_target_coefficient, _get_exit_factor, _get_float_param, - calculate_reward, get_max_idle_duration_candles, ) @@ -29,6 +28,7 @@ from ..helpers import ( assert_hold_penalty_threshold_behavior, assert_progressive_scaling_behavior, assert_reward_calculation_scenarios, + calculate_reward_with_defaults, make_idle_penalty_test_contexts, ) from ..test_base import RewardSpaceTestBase @@ -72,15 +72,7 @@ class TestRewardComponents(RewardSpaceTestBase): position=Positions.Long, action=Actions.Neutral, ) - breakdown = calculate_reward( - context, - self.DEFAULT_PARAMS, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) + breakdown = calculate_reward_with_defaults(context, self.DEFAULT_PARAMS) self.assertLess(breakdown.hold_penalty, 0, "Hold penalty should be negative") config = ValidationConfig( tolerance_strict=TOLERANCE.IDENTITY_STRICT, @@ -148,15 +140,7 @@ class TestRewardComponents(RewardSpaceTestBase): position=Positions.Long, action=Actions.Neutral, ) - breakdown = calculate_reward( - context, - params, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) + breakdown = calculate_reward_with_defaults(context, params) penalties.append(breakdown.hold_penalty) assert_progressive_scaling_behavior(self, penalties, durations, "Hold penalty") @@ -450,14 +434,8 @@ class TestRewardComponents(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - breakdown = calculate_reward( - context, - params, - base_factor=1.0, - profit_aim=0.03, - risk_reward_ratio=1.0, - short_allowed=True, - action_masking=True, + breakdown = calculate_reward_with_defaults( + context, params, base_factor=1.0, profit_aim=0.03 ) self.assertLessEqual( breakdown.exit_component, @@ -474,7 +452,6 @@ class TestRewardComponents(RewardSpaceTestBase): """ params_small = self.base_params(max_idle_duration_candles=50) params_large = self.base_params(max_idle_duration_candles=200) - base_factor = PARAMS.BASE_FACTOR context = self.make_ctx( pnl=0.0, trade_duration=0, @@ -482,24 +459,8 @@ class TestRewardComponents(RewardSpaceTestBase): position=Positions.Neutral, action=Actions.Neutral, ) - small = calculate_reward( - context, - params_small, - base_factor, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) - large = calculate_reward( - context, - params_large, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) + small = calculate_reward_with_defaults(context, params_small) + large = calculate_reward_with_defaults(context, params_large) self.assertLess(small.idle_penalty, 0.0) self.assertLess(large.idle_penalty, 0.0) self.assertGreater(large.idle_penalty, small.idle_penalty) @@ -628,14 +589,8 @@ class TestRewardComponents(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - br = calculate_reward( - context, - params, - base_factor=1.0, - profit_aim=profit_aim, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, + br = calculate_reward_with_defaults( + context, params, base_factor=1.0, profit_aim=profit_aim ) ratio = br.exit_component / pnl if pnl != 0 else 0.0 ratios_observed.append(float(ratio)) @@ -706,14 +661,12 @@ class TestRewardComponents(RewardSpaceTestBase): results = [] for context, description in contexts_and_descriptions: - breakdown = calculate_reward( + breakdown = calculate_reward_with_defaults( context, params, base_factor=base_factor, profit_aim=profit_aim, risk_reward_ratio=risk_reward_ratio, - short_allowed=True, - action_masking=True, ) results.append((breakdown, context.idle_duration, description)) @@ -764,15 +717,7 @@ class TestRewardComponents(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - breakdown = calculate_reward( - context, - canonical_params, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) + breakdown = calculate_reward_with_defaults(context, canonical_params) # Verify all PBRS fields are finite self.assertFinite(breakdown.base_reward, name="base_reward") @@ -823,24 +768,8 @@ class TestRewardComponents(RewardSpaceTestBase): ) params_rr.pop("risk_reward_ratio", None) - br_ratio = calculate_reward( - context, - params_ratio, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=1.0, - short_allowed=True, - action_masking=True, - ) - br_rr = calculate_reward( - context, - params_rr, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=1.0, - short_allowed=True, - action_masking=True, - ) + br_ratio = calculate_reward_with_defaults(context, params_ratio, risk_reward_ratio=1.0) + br_rr = calculate_reward_with_defaults(context, params_rr, risk_reward_ratio=1.0) self.assertAlmostEqualFloat( br_rr.total, diff --git a/ReforceXY/reward_space_analysis/tests/constants.py b/ReforceXY/reward_space_analysis/tests/constants.py index b44d7dd..3a62952 100644 --- a/ReforceXY/reward_space_analysis/tests/constants.py +++ b/ReforceXY/reward_space_analysis/tests/constants.py @@ -164,6 +164,7 @@ class TestSeeds: # Feature analysis failure seeds FEATURE_EMPTY: Seed for empty feature tests (17) + FEATURE_PRIME_7: Seed for feature test variant (7) FEATURE_PRIME_11: Seed for feature test variant (11) FEATURE_PRIME_13: Seed for feature test variant (13) FEATURE_PRIME_21: Seed for feature test variant (21) @@ -195,6 +196,7 @@ class TestSeeds: # Feature analysis failure seeds FEATURE_EMPTY: int = 17 + FEATURE_PRIME_7: int = 7 FEATURE_PRIME_11: int = 11 FEATURE_PRIME_13: int = 13 FEATURE_PRIME_21: int = 21 diff --git a/ReforceXY/reward_space_analysis/tests/helpers/__init__.py b/ReforceXY/reward_space_analysis/tests/helpers/__init__.py index 7fad62e..7160a87 100644 --- a/ReforceXY/reward_space_analysis/tests/helpers/__init__.py +++ b/ReforceXY/reward_space_analysis/tests/helpers/__init__.py @@ -6,11 +6,9 @@ capture helpers, centralizing test infrastructure and reducing duplication. from .assertions import ( assert_adjustment_reason_contains, - # Core numeric/trend assertions assert_almost_equal_list, assert_component_sum_integrity, assert_exit_factor_attenuation_modes, - # Exit factor invariance helpers assert_exit_factor_invariant_suite, assert_exit_factor_kernel_fallback, assert_exit_factor_plateau_behavior, @@ -23,33 +21,33 @@ from .assertions import ( assert_non_canonical_shaping_exceeds, assert_parameter_sensitivity_behavior, assert_pbrs_canonical_sum_within_tolerance, - # PBRS invariance/report helpers assert_pbrs_invariance_report_classification, assert_progressive_scaling_behavior, - # Relaxed validation aggregation assert_relaxed_multi_reason_aggregation, assert_reward_calculation_scenarios, assert_single_active_component, assert_single_active_component_with_additives, assert_trend, - # Validation batch builders/executors build_validation_case, + calculate_reward_with_defaults, execute_validation_batch, + get_exit_factor_with_defaults, make_idle_penalty_test_contexts, run_relaxed_validation_adjustment_cases, run_strict_validation_failure_cases, safe_float, + simulate_samples_with_defaults, ) from .configs import ( + DEFAULT_REWARD_CONFIG, + DEFAULT_SIMULATION_CONFIG, ContextFactory, ExitFactorConfig, ProgressiveScalingConfig, - # Configuration dataclasses RewardScenarioConfig, SimulationConfig, StatisticalTestConfig, ThresholdTestConfig, - # Type aliases ValidationCallback, ValidationConfig, WarningCaptureConfig, @@ -57,13 +55,11 @@ from .configs import ( from .warnings import ( assert_diagnostic_warning, assert_no_warnings, - # Warning capture utilities capture_warnings, validate_warning_content, ) __all__ = [ - # Core numeric/trend assertions "assert_monotonic_nonincreasing", "assert_monotonic_nonnegative", "assert_finite", @@ -82,22 +78,20 @@ __all__ = [ "assert_multi_parameter_sensitivity", "assert_hold_penalty_threshold_behavior", "safe_float", - # Validation batch builders/executors "build_validation_case", "execute_validation_batch", "assert_adjustment_reason_contains", "run_strict_validation_failure_cases", "run_relaxed_validation_adjustment_cases", - # Exit factor invariance helpers "assert_exit_factor_invariant_suite", "assert_exit_factor_kernel_fallback", - # Relaxed validation aggregation "assert_relaxed_multi_reason_aggregation", - # PBRS invariance/report helpers "assert_pbrs_invariance_report_classification", "assert_pbrs_canonical_sum_within_tolerance", "assert_non_canonical_shaping_exceeds", - # Configuration dataclasses + "calculate_reward_with_defaults", + "get_exit_factor_with_defaults", + "simulate_samples_with_defaults", "RewardScenarioConfig", "ValidationConfig", "ThresholdTestConfig", @@ -108,7 +102,8 @@ __all__ = [ "WarningCaptureConfig", "ValidationCallback", "ContextFactory", - # Warning capture utilities + "DEFAULT_REWARD_CONFIG", + "DEFAULT_SIMULATION_CONFIG", "capture_warnings", "assert_diagnostic_warning", "assert_no_warnings", diff --git a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py index 9ba9398..101783c 100644 --- a/ReforceXY/reward_space_analysis/tests/helpers/assertions.py +++ b/ReforceXY/reward_space_analysis/tests/helpers/assertions.py @@ -16,8 +16,15 @@ from reward_space_analysis import ( calculate_reward, ) -from ..constants import TOLERANCE -from .configs import RewardScenarioConfig, ThresholdTestConfig, ValidationConfig +from ..constants import PARAMS, TOLERANCE +from .configs import ( + DEFAULT_REWARD_CONFIG, + DEFAULT_SIMULATION_CONFIG, + RewardScenarioConfig, + SimulationConfig, + ThresholdTestConfig, + ValidationConfig, +) def safe_float(value: Any, default: float = 0.0) -> float: @@ -1300,3 +1307,197 @@ def assert_exit_factor_plateau_behavior( plateau_factor_post - tolerance_strict, "Plateau pre-grace factor should be >= post-grace factor", ) + + +# ---------------- Wrapper functions with standard defaults ---------------- # + + +def calculate_reward_with_defaults( + context, + params: Dict[str, Any], + config: RewardScenarioConfig | None = None, + **overrides, +): + """Calculate reward with standard test defaults. + + Reduces boilerplate by providing sensible defaults for common parameters. + Override any parameter by passing it as a keyword argument. + + Args: + context: RewardContext for the calculation + params: Parameter dictionary for reward calculation + config: Optional RewardScenarioConfig (defaults to DEFAULT_REWARD_CONFIG) + **overrides: Keyword arguments to override config values. Supported keys: + - base_factor: Base scaling factor + - profit_aim: Base profit target + - risk_reward_ratio: Risk/reward ratio + - short_allowed: Whether short positions are permitted + - action_masking: Whether to apply action masking + - prev_potential: Previous potential for PBRS (passed through) + + Returns: + RewardBreakdown from calculate_reward() + + Example: + # Using all defaults + breakdown = calculate_reward_with_defaults(ctx, params) + + # Overriding specific parameters + breakdown = calculate_reward_with_defaults( + ctx, params, action_masking=False + ) + + # Using custom config + custom_config = RewardScenarioConfig(...) + breakdown = calculate_reward_with_defaults(ctx, params, config=custom_config) + """ + cfg = config or DEFAULT_REWARD_CONFIG + + # Extract config values with potential overrides + base_factor = overrides.pop("base_factor", cfg.base_factor) + profit_aim = overrides.pop("profit_aim", cfg.profit_aim) + risk_reward_ratio = overrides.pop("risk_reward_ratio", cfg.risk_reward_ratio) + short_allowed = overrides.pop("short_allowed", cfg.short_allowed) + action_masking = overrides.pop("action_masking", cfg.action_masking) + + return calculate_reward( + context, + params, + base_factor=base_factor, + profit_aim=profit_aim, + risk_reward_ratio=risk_reward_ratio, + short_allowed=short_allowed, + action_masking=action_masking, + **overrides, + ) + + +def get_exit_factor_with_defaults( + pnl: float, + duration_ratio: float, + context, + params: Dict[str, Any], + base_factor: float | None = None, + pnl_target: float | None = None, + risk_reward_ratio: float | None = None, +): + """Calculate exit factor with standard test defaults. + + Reduces boilerplate by providing sensible defaults for common parameters. + This wrapper is particularly useful for tests that need to call _get_exit_factor + repeatedly with varying pnl and duration_ratio values. + + Args: + pnl: Realized profit/loss + duration_ratio: Ratio of current to maximum duration + context: RewardContext for efficiency coefficient calculation + params: Parameter dictionary + base_factor: Base scaling factor (defaults to PARAMS.BASE_FACTOR) + pnl_target: Target profit threshold (defaults to PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO) + risk_reward_ratio: Risk/reward ratio (defaults to PARAMS.RISK_REWARD_RATIO) + + Returns: + Exit factor value from _get_exit_factor() + + Example: + # Using all defaults + factor = get_exit_factor_with_defaults(0.05, 0.5, ctx, params) + + # Overriding specific parameters + factor = get_exit_factor_with_defaults( + 0.05, 0.5, ctx, params, base_factor=100.0 + ) + """ + if base_factor is None: + base_factor = PARAMS.BASE_FACTOR + if risk_reward_ratio is None: + risk_reward_ratio = PARAMS.RISK_REWARD_RATIO + if pnl_target is None: + pnl_target = PARAMS.PROFIT_AIM * risk_reward_ratio + + return _get_exit_factor( + base_factor, + pnl, + pnl_target, + duration_ratio, + context, + params, + risk_reward_ratio, + ) + + +def simulate_samples_with_defaults( + params: Dict[str, Any], + config: SimulationConfig | None = None, + base_factor: float | None = None, + profit_aim: float | None = None, + risk_reward_ratio: float | None = None, + **overrides, +): + """Simulate samples with standard test defaults. + + Reduces boilerplate by providing sensible defaults for simulation parameters. + Override any parameter by passing it as a keyword argument. + + Args: + params: Parameter dictionary for reward calculation + config: Optional SimulationConfig (defaults to DEFAULT_SIMULATION_CONFIG) + base_factor: Base scaling factor (defaults to PARAMS.BASE_FACTOR) + profit_aim: Base profit target (defaults to PARAMS.PROFIT_AIM) + risk_reward_ratio: Risk/reward ratio (defaults to PARAMS.RISK_REWARD_RATIO) + **overrides: Keyword arguments to override config values. Supported keys: + - num_samples: Number of samples to generate + - seed: Random seed for reproducibility + - max_duration_ratio: Maximum duration ratio + - trading_mode: Trading mode ("margin", "spot", etc.) + - pnl_base_std: Base standard deviation for PnL generation + - pnl_duration_vol_scale: Volatility scaling factor + + Returns: + DataFrame from simulate_samples() + + Example: + # Using all defaults + df = simulate_samples_with_defaults(params) + + # Overriding specific parameters + df = simulate_samples_with_defaults(params, num_samples=500, seed=123) + + # Using custom config + custom_config = SimulationConfig(num_samples=1000, seed=42) + df = simulate_samples_with_defaults(params, config=custom_config) + """ + # Import here to avoid circular imports + from reward_space_analysis import simulate_samples + + cfg = config or DEFAULT_SIMULATION_CONFIG + + # Use config values with potential overrides + num_samples = overrides.pop("num_samples", cfg.num_samples) + seed = overrides.pop("seed", cfg.seed) + max_duration_ratio = overrides.pop("max_duration_ratio", cfg.max_duration_ratio) + trading_mode = overrides.pop("trading_mode", cfg.trading_mode) + pnl_base_std = overrides.pop("pnl_base_std", cfg.pnl_base_std) + pnl_duration_vol_scale = overrides.pop("pnl_duration_vol_scale", cfg.pnl_duration_vol_scale) + + # Use provided values or defaults for reward calculation params + if base_factor is None: + base_factor = PARAMS.BASE_FACTOR + if profit_aim is None: + profit_aim = PARAMS.PROFIT_AIM + if risk_reward_ratio is None: + risk_reward_ratio = PARAMS.RISK_REWARD_RATIO + + return simulate_samples( + params=params, + num_samples=num_samples, + seed=seed, + base_factor=base_factor, + profit_aim=profit_aim, + risk_reward_ratio=risk_reward_ratio, + max_duration_ratio=max_duration_ratio, + trading_mode=trading_mode, + pnl_base_std=pnl_base_std, + pnl_duration_vol_scale=pnl_duration_vol_scale, + **overrides, + ) diff --git a/ReforceXY/reward_space_analysis/tests/helpers/configs.py b/ReforceXY/reward_space_analysis/tests/helpers/configs.py index 227bbfc..12742dd 100644 --- a/ReforceXY/reward_space_analysis/tests/helpers/configs.py +++ b/ReforceXY/reward_space_analysis/tests/helpers/configs.py @@ -24,7 +24,7 @@ Usage: from dataclasses import dataclass from typing import Callable, Optional -from ..constants import SEEDS, STATISTICAL, TOLERANCE +from ..constants import PARAMS, SEEDS, STATISTICAL, TOLERANCE @dataclass @@ -210,6 +210,31 @@ ValidationCallback = Callable[[object, object, str, float], None] ContextFactory = Callable[..., object] +# Default config instances for common test scenarios +# These reduce boilerplate by providing pre-configured defaults + +DEFAULT_REWARD_CONFIG: RewardScenarioConfig = RewardScenarioConfig( + base_factor=PARAMS.BASE_FACTOR, + profit_aim=PARAMS.PROFIT_AIM, + risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, + tolerance_relaxed=TOLERANCE.IDENTITY_RELAXED, + short_allowed=True, + action_masking=True, +) +"""Default RewardScenarioConfig with standard test parameters.""" + + +DEFAULT_SIMULATION_CONFIG: SimulationConfig = SimulationConfig( + num_samples=200, + seed=SEEDS.BASE, + max_duration_ratio=2.0, + trading_mode="margin", + pnl_base_std=PARAMS.PNL_STD, + pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE, +) +"""Default SimulationConfig with standard test parameters.""" + + __all__ = [ "RewardScenarioConfig", "ValidationConfig", @@ -221,4 +246,6 @@ __all__ = [ "WarningCaptureConfig", "ValidationCallback", "ContextFactory", + "DEFAULT_REWARD_CONFIG", + "DEFAULT_SIMULATION_CONFIG", ] diff --git a/ReforceXY/reward_space_analysis/tests/helpers/test_internal_branches.py b/ReforceXY/reward_space_analysis/tests/helpers/test_internal_branches.py index 952bd4c..15e1211 100644 --- a/ReforceXY/reward_space_analysis/tests/helpers/test_internal_branches.py +++ b/ReforceXY/reward_space_analysis/tests/helpers/test_internal_branches.py @@ -7,10 +7,10 @@ from reward_space_analysis import ( Positions, RewardParams, _get_bool_param, - calculate_reward, ) from ..test_base import make_ctx +from . import calculate_reward_with_defaults def test_get_bool_param_none_and_invalid_literal(): @@ -69,14 +69,12 @@ def test_calculate_reward_unrealized_pnl_hold_path(): "unrealized_pnl": True, "pnl_factor_beta": 0.5, } - breakdown = calculate_reward( + breakdown = calculate_reward_with_defaults( context, params, base_factor=100.0, profit_aim=0.05, risk_reward_ratio=1.0, - short_allowed=True, - action_masking=True, prev_potential=np.nan, ) assert math.isfinite(breakdown.prev_potential) diff --git a/ReforceXY/reward_space_analysis/tests/integration/test_reward_calculation.py b/ReforceXY/reward_space_analysis/tests/integration/test_reward_calculation.py index 91c79c2..cf53611 100644 --- a/ReforceXY/reward_space_analysis/tests/integration/test_reward_calculation.py +++ b/ReforceXY/reward_space_analysis/tests/integration/test_reward_calculation.py @@ -14,10 +14,10 @@ import pytest from reward_space_analysis import ( Actions, Positions, - calculate_reward, ) from ..constants import PARAMS, TOLERANCE +from ..helpers import calculate_reward_with_defaults from ..test_base import RewardSpaceTestBase pytestmark = pytest.mark.integration @@ -94,13 +94,9 @@ class TestRewardCalculation(RewardSpaceTestBase): for name, ctx_kwargs, expected_component in scenarios: with self.subTest(scenario=name): ctx = self.make_ctx(**ctx_kwargs) - breakdown = calculate_reward( + breakdown = calculate_reward_with_defaults( ctx, self.DEFAULT_PARAMS, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, action_masking=expected_component != "invalid_penalty", ) @@ -160,23 +156,19 @@ class TestRewardCalculation(RewardSpaceTestBase): action=Actions.Short_exit, ) - br_long = calculate_reward( + br_long = calculate_reward_with_defaults( ctx_long, params, base_factor=base_factor, profit_aim=profit_aim, risk_reward_ratio=rr, - short_allowed=True, - action_masking=True, ) - br_short = calculate_reward( + br_short = calculate_reward_with_defaults( ctx_short, params, base_factor=base_factor, profit_aim=profit_aim, risk_reward_ratio=rr, - short_allowed=True, - action_masking=True, ) if pnl > 0: diff --git a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py index e991c72..c861270 100644 --- a/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py +++ b/ReforceXY/reward_space_analysis/tests/pbrs/test_pbrs.py @@ -22,7 +22,6 @@ from reward_space_analysis import ( _compute_unrealized_pnl_estimate, _get_float_param, apply_potential_shaping, - calculate_reward, get_max_idle_duration_candles, simulate_samples, validate_reward_parameters, @@ -41,6 +40,7 @@ from ..helpers import ( assert_pbrs_invariance_report_classification, assert_relaxed_multi_reason_aggregation, build_validation_case, + calculate_reward_with_defaults, execute_validation_batch, ) from ..test_base import RewardSpaceTestBase @@ -343,16 +343,7 @@ class TestPBRS(RewardSpaceTestBase): ctx = self.make_ctx(position=Positions.Neutral, action=Actions.Neutral) prev_potential = 0.37 - breakdown = calculate_reward( - ctx, - params, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - prev_potential=prev_potential, - ) + breakdown = calculate_reward_with_defaults(ctx, params, prev_potential=prev_potential) self.assertAlmostEqualFloat( breakdown.prev_potential, @@ -602,16 +593,7 @@ class TestPBRS(RewardSpaceTestBase): ctx = self.make_ctx( position=Positions.Neutral, action=action, pnl=0.0, trade_duration=0 ) - breakdown = calculate_reward( - ctx, - params, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - prev_potential=0.0, - ) + breakdown = calculate_reward_with_defaults(ctx, params, prev_potential=0.0) self.assertTrue(np.isfinite(breakdown.next_potential)) # With any nonzero fees, immediate unrealized pnl should be negative. self.assertLess( @@ -758,14 +740,9 @@ class TestPBRS(RewardSpaceTestBase): trade_duration=trade_duration, ) - breakdown = calculate_reward( + breakdown = calculate_reward_with_defaults( ctx, {**params, "max_trade_duration_candles": max_trade_duration_candles}, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, prev_potential=0.0, ) @@ -990,15 +967,8 @@ class TestPBRS(RewardSpaceTestBase): self.assertNotEqual(prev_potential, 0.0) - breakdown = calculate_reward( - ctx, - params, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=False, - prev_potential=prev_potential, + breakdown = calculate_reward_with_defaults( + ctx, params, action_masking=False, prev_potential=prev_potential ) expected_shaping = params["potential_gamma"] * prev_potential - prev_potential diff --git a/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py b/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py index 34217ca..f7834f3 100644 --- a/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py +++ b/ReforceXY/reward_space_analysis/tests/robustness/test_robustness.py @@ -14,7 +14,6 @@ from reward_space_analysis import ( Positions, RewardContext, _get_exit_factor, - calculate_reward, simulate_samples, ) @@ -30,6 +29,7 @@ from ..helpers import ( assert_exit_factor_attenuation_modes, assert_exit_mode_mathematical_validation, assert_single_active_component_with_additives, + calculate_reward_with_defaults, capture_warnings, ) from ..test_base import RewardSpaceTestBase @@ -108,15 +108,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): potential_gamma=0.0, check_invariants=False, ) - br = calculate_reward( - ctx_obj, - params, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) + br = calculate_reward_with_defaults(ctx_obj, params) # Relaxed tolerance: Accumulated floating-point errors across multiple # reward component calculations (entry, hold, exit additives, and penalties) assert_single_active_component_with_additives( @@ -222,24 +214,15 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): action=Actions.Long_exit, ) with capture_warnings() as caught: - baseline = calculate_reward( - context, - params, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO_HIGH, - short_allowed=True, - action_masking=True, + baseline = calculate_reward_with_defaults( + context, params, risk_reward_ratio=PARAMS.RISK_REWARD_RATIO_HIGH ) amplified_base_factor = PARAMS.BASE_FACTOR * 200.0 - amplified = calculate_reward( + amplified = calculate_reward_with_defaults( context, params, base_factor=amplified_base_factor, - profit_aim=PARAMS.PROFIT_AIM, risk_reward_ratio=PARAMS.RISK_REWARD_RATIO_HIGH, - short_allowed=True, - action_masking=True, ) self.assertGreater(baseline.exit_component, 0.0) self.assertGreater(amplified.exit_component, baseline.exit_component) @@ -356,15 +339,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - br = calculate_reward( - context, - extreme_params, - base_factor=10000.0, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) + br = calculate_reward_with_defaults(context, extreme_params, base_factor=10000.0) self.assertFinite(br.total, name="breakdown.total") def test_exit_attenuation_modes_enumeration(self): @@ -399,15 +374,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): position=Positions.Long, action=Actions.Long_exit, ) - br = calculate_reward( - ctx, - test_params, - base_factor=PARAMS.BASE_FACTOR, - profit_aim=PARAMS.PROFIT_AIM, - risk_reward_ratio=PARAMS.RISK_REWARD_RATIO, - short_allowed=True, - action_masking=True, - ) + br = calculate_reward_with_defaults(ctx, test_params) self.assertFinite(br.exit_component, name="breakdown.exit_component") self.assertFinite(br.total, name="breakdown.total") diff --git a/ReforceXY/reward_space_analysis/tests/statistics/test_feature_analysis_failures.py b/ReforceXY/reward_space_analysis/tests/statistics/test_feature_analysis_failures.py index a27f233..8faa236 100644 --- a/ReforceXY/reward_space_analysis/tests/statistics/test_feature_analysis_failures.py +++ b/ReforceXY/reward_space_analysis/tests/statistics/test_feature_analysis_failures.py @@ -27,7 +27,7 @@ pytestmark = pytest.mark.statistics def _minimal_df(n: int = 30) -> pd.DataFrame: - rng = np.random.default_rng(42) + rng = np.random.default_rng(SEEDS.BASE) return pd.DataFrame( { "pnl": rng.normal(0, 1, n), @@ -100,7 +100,8 @@ def test_feature_analysis_single_feature_path(): - importance_mean is all NaN (stub path for single feature) - model is None """ - df = pd.DataFrame({"pnl": np.random.normal(0, 1, 25), "reward": np.random.normal(0, 1, 25)}) + rng = np.random.default_rng(SEEDS.FEATURE_PRIME_11) + df = pd.DataFrame({"pnl": rng.normal(0, 1, 25), "reward": rng.normal(0, 1, 25)}) importance_df, stats, partial_deps, model = _perform_feature_analysis( df, seed=SEEDS.FEATURE_PRIME_11, skip_partial_dependence=True ) @@ -123,7 +124,7 @@ def test_feature_analysis_nans_present_path(): - importance_mean is all NaN - model is None """ - rng = np.random.default_rng(9) + rng = np.random.default_rng(SEEDS.FEATURE_PRIME_7) df = pd.DataFrame( { "pnl": rng.normal(0, 1, 40), -- 2.43.0