_get_str_param,
_sample_action,
build_argument_parser,
- calculate_reward,
parse_overrides,
- simulate_samples,
write_complete_statistical_analysis,
)
-from ..constants import PARAMS, SEEDS, TOLERANCE
+from ..constants import PARAMS, SCENARIOS, SEEDS, TOLERANCE
+from ..helpers import calculate_reward_with_defaults, simulate_samples_with_defaults
from ..test_base import RewardSpaceTestBase
pytestmark = pytest.mark.api
def test_api_simulation_and_reward_smoke(self):
"""Test api simulation and reward smoke."""
- df = simulate_samples(
- params=self.base_params(max_trade_duration_candles=40),
- num_samples=20,
+ df = simulate_samples_with_defaults(
+ self.base_params(max_trade_duration_candles=40),
+ num_samples=SCENARIOS.SAMPLE_SIZE_TINY,
seed=SEEDS.SMOKE_TEST,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
max_duration_ratio=1.5,
- trading_mode="margin",
- pnl_base_std=PARAMS.PNL_STD,
- pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
)
self.assertGreater(len(df), 0)
any_exit = df[df["reward_exit"] != 0].head(1)
position=Positions.Long,
action=Actions.Long_exit,
)
- breakdown = calculate_reward(
- ctx,
- self.DEFAULT_PARAMS,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
+ breakdown = calculate_reward_with_defaults(ctx, self.DEFAULT_PARAMS)
self.assertFinite(breakdown.total)
def test_simulate_samples_trading_modes_spot_vs_margin(self):
"""simulate_samples coverage: spot should forbid shorts, margin should allow them."""
- df_spot = simulate_samples(
- params=self.base_params(max_trade_duration_candles=100),
- num_samples=80,
- seed=SEEDS.BASE,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- max_duration_ratio=2.0,
+ df_spot = simulate_samples_with_defaults(
+ self.base_params(max_trade_duration_candles=100),
+ num_samples=SCENARIOS.SAMPLE_SIZE_SMALL,
trading_mode="spot",
- pnl_base_std=PARAMS.PNL_STD,
- pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
)
short_positions_spot = (df_spot["position"] == float(Positions.Short.value)).sum()
self.assertEqual(short_positions_spot, 0, "Spot mode must not contain short positions")
- df_margin = simulate_samples(
- params=self.base_params(max_trade_duration_candles=100),
- num_samples=80,
- seed=SEEDS.BASE,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- max_duration_ratio=2.0,
- trading_mode="margin",
- pnl_base_std=PARAMS.PNL_STD,
- pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
+ df_margin = simulate_samples_with_defaults(
+ self.base_params(max_trade_duration_candles=100),
+ num_samples=SCENARIOS.SAMPLE_SIZE_SMALL,
)
for col in [
"pnl",
def test_simulate_samples_sampling_probabilities_are_bounded(self):
"""simulate_samples() exposes bounded sampling probabilities."""
- df = simulate_samples(
- params=self.base_params(max_trade_duration_candles=40),
- num_samples=200,
+ df = simulate_samples_with_defaults(
+ self.base_params(max_trade_duration_candles=40),
seed=SEEDS.SMOKE_TEST,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
max_duration_ratio=1.5,
- trading_mode="margin",
- pnl_base_std=PARAMS.PNL_STD,
- pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
)
for col in ["sample_entry_prob", "sample_exit_prob", "sample_neutral_prob"]:
def test_to_bool(self):
"""Test _to_bool with various inputs."""
- df1 = simulate_samples(
- params=self.base_params(action_masking="true", max_trade_duration_candles=50),
+ df1 = simulate_samples_with_defaults(
+ self.base_params(action_masking="true", max_trade_duration_candles=50),
num_samples=10,
- seed=SEEDS.BASE,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- max_duration_ratio=2.0,
trading_mode="spot",
- pnl_base_std=PARAMS.PNL_STD,
- pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
)
self.assertIsInstance(df1, pd.DataFrame)
- df2 = simulate_samples(
- params=self.base_params(action_masking="false", max_trade_duration_candles=50),
+ df2 = simulate_samples_with_defaults(
+ self.base_params(action_masking="false", max_trade_duration_candles=50),
num_samples=10,
- seed=SEEDS.BASE,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- max_duration_ratio=2.0,
trading_mode="spot",
- pnl_base_std=PARAMS.PNL_STD,
- pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
)
self.assertIsInstance(df2, pd.DataFrame)
def test_short_allowed_via_simulation(self):
"""Test _is_short_allowed via different trading modes."""
- df_futures = simulate_samples(
- params=self.base_params(max_trade_duration_candles=50),
- num_samples=100,
- seed=SEEDS.BASE,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- max_duration_ratio=2.0,
+ df_futures = simulate_samples_with_defaults(
+ self.base_params(max_trade_duration_candles=50),
+ num_samples=SCENARIOS.SAMPLE_SIZE_SMALL,
trading_mode="futures",
- pnl_base_std=PARAMS.PNL_STD,
- pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
)
short_positions = (df_futures["position"] == float(Positions.Short.value)).sum()
self.assertGreater(short_positions, 0, "Futures mode should allow short positions")
def test_complete_statistical_analysis_writer(self):
"""Test write_complete_statistical_analysis function."""
- test_data = simulate_samples(
- params=self.base_params(max_trade_duration_candles=100),
- num_samples=200,
- seed=SEEDS.BASE,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- max_duration_ratio=2.0,
- trading_mode="margin",
- pnl_base_std=PARAMS.PNL_STD,
- pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
+ test_data = simulate_samples_with_defaults(
+ self.base_params(max_trade_duration_candles=100),
)
with tempfile.TemporaryDirectory() as tmp_dir:
output_path = Path(tmp_dir)
position=position,
action=action,
)
- breakdown = calculate_reward(
- context,
- self.DEFAULT_PARAMS,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
+ breakdown = calculate_reward_with_defaults(context, self.DEFAULT_PARAMS)
self.assertNotEqual(
breakdown.exit_component,
0.0,
position=Positions.Short,
action=Actions.Long_exit,
)
- breakdown = calculate_reward(
- context,
- self.DEFAULT_PARAMS,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=False,
+ breakdown = calculate_reward_with_defaults(
+ context, self.DEFAULT_PARAMS, action_masking=False
)
self.assertLess(breakdown.invalid_penalty, 0, "Invalid action should have negative penalty")
self.assertAlmostEqualFloat(
position=Positions.Long,
action=Actions.Long_exit,
)
- breakdown = calculate_reward(
- context,
- params,
- base_factor=10000000.0,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
+ breakdown = calculate_reward_with_defaults(context, params, base_factor=10000000.0)
self.assertFinite(breakdown.exit_component, name="exit_component")
from reward_space_analysis import compute_pbrs_components
-from ..constants import PARAMS
+from ..constants import PARAMS, TOLERANCE
from ..test_base import RewardSpaceTestBase
pytestmark = pytest.mark.components
**Setup:**
- Base configuration: hold_potential enabled, additives disabled
- Test configuration: entry_additive and exit_additive enabled
- - Additive parameters: ratio=0.4, gain=1.0 for both entry/exit
+ - Additive parameters: ratio=PARAMS.ADDITIVE_RATIO_DEFAULT, gain=PARAMS.ADDITIVE_GAIN_DEFAULT for both entry/exit
- Context: base_reward=0.05, pnl=0.01, duration_ratio=0.2
**Assertions:**
- Total reward with additives > total reward without additives
- - Shaping difference remains bounded: |s1 - s0| < 0.2
+ - Shaping difference remains bounded: |s1 - s0| < TOLERANCE.SHAPING_BOUND_TOLERANCE
- Both total and shaping rewards are finite
**Tolerance rationale:**
- - Custom bound 0.2 for shaping delta: Additives should not cause
+ - Custom bound TOLERANCE.SHAPING_BOUND_TOLERANCE for shaping delta: Additives should not cause
large shifts in shaping component, which maintains PBRS properties
"""
base = self.base_params(
{
"entry_additive_enabled": True,
"exit_additive_enabled": True,
- "entry_additive_ratio": 0.4,
- "exit_additive_ratio": 0.4,
- "entry_additive_gain": 1.0,
- "exit_additive_gain": 1.0,
+ "entry_additive_ratio": PARAMS.ADDITIVE_RATIO_DEFAULT,
+ "exit_additive_ratio": PARAMS.ADDITIVE_RATIO_DEFAULT,
+ "entry_additive_gain": PARAMS.ADDITIVE_GAIN_DEFAULT,
+ "exit_additive_gain": PARAMS.ADDITIVE_GAIN_DEFAULT,
}
)
base_reward = 0.05
t1 = base_reward + s1 + _entry1 + _exit1
self.assertFinite(t1)
self.assertFinite(s1)
- self.assertLess(abs(s1 - s0), 0.2)
+ self.assertLess(abs(s1 - s0), TOLERANCE.SHAPING_BOUND_TOLERANCE)
self.assertGreater(t1 - t0, 0.0, "Total reward should increase with additives present")
_compute_pnl_target_coefficient,
_get_exit_factor,
_get_float_param,
- calculate_reward,
get_max_idle_duration_candles,
)
assert_hold_penalty_threshold_behavior,
assert_progressive_scaling_behavior,
assert_reward_calculation_scenarios,
+ calculate_reward_with_defaults,
make_idle_penalty_test_contexts,
)
from ..test_base import RewardSpaceTestBase
position=Positions.Long,
action=Actions.Neutral,
)
- breakdown = calculate_reward(
- context,
- self.DEFAULT_PARAMS,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
+ breakdown = calculate_reward_with_defaults(context, self.DEFAULT_PARAMS)
self.assertLess(breakdown.hold_penalty, 0, "Hold penalty should be negative")
config = ValidationConfig(
tolerance_strict=TOLERANCE.IDENTITY_STRICT,
position=Positions.Long,
action=Actions.Neutral,
)
- breakdown = calculate_reward(
- context,
- params,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
+ breakdown = calculate_reward_with_defaults(context, params)
penalties.append(breakdown.hold_penalty)
assert_progressive_scaling_behavior(self, penalties, durations, "Hold penalty")
position=Positions.Long,
action=Actions.Long_exit,
)
- breakdown = calculate_reward(
- context,
- params,
- base_factor=1.0,
- profit_aim=0.03,
- risk_reward_ratio=1.0,
- short_allowed=True,
- action_masking=True,
+ breakdown = calculate_reward_with_defaults(
+ context, params, base_factor=1.0, profit_aim=0.03
)
self.assertLessEqual(
breakdown.exit_component,
"""
params_small = self.base_params(max_idle_duration_candles=50)
params_large = self.base_params(max_idle_duration_candles=200)
- base_factor = PARAMS.BASE_FACTOR
context = self.make_ctx(
pnl=0.0,
trade_duration=0,
position=Positions.Neutral,
action=Actions.Neutral,
)
- small = calculate_reward(
- context,
- params_small,
- base_factor,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
- large = calculate_reward(
- context,
- params_large,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
+ small = calculate_reward_with_defaults(context, params_small)
+ large = calculate_reward_with_defaults(context, params_large)
self.assertLess(small.idle_penalty, 0.0)
self.assertLess(large.idle_penalty, 0.0)
self.assertGreater(large.idle_penalty, small.idle_penalty)
position=Positions.Long,
action=Actions.Long_exit,
)
- br = calculate_reward(
- context,
- params,
- base_factor=1.0,
- profit_aim=profit_aim,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
+ br = calculate_reward_with_defaults(
+ context, params, base_factor=1.0, profit_aim=profit_aim
)
ratio = br.exit_component / pnl if pnl != 0 else 0.0
ratios_observed.append(float(ratio))
results = []
for context, description in contexts_and_descriptions:
- breakdown = calculate_reward(
+ breakdown = calculate_reward_with_defaults(
context,
params,
base_factor=base_factor,
profit_aim=profit_aim,
risk_reward_ratio=risk_reward_ratio,
- short_allowed=True,
- action_masking=True,
)
results.append((breakdown, context.idle_duration, description))
position=Positions.Long,
action=Actions.Long_exit,
)
- breakdown = calculate_reward(
- context,
- canonical_params,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
+ breakdown = calculate_reward_with_defaults(context, canonical_params)
# Verify all PBRS fields are finite
self.assertFinite(breakdown.base_reward, name="base_reward")
)
params_rr.pop("risk_reward_ratio", None)
- br_ratio = calculate_reward(
- context,
- params_ratio,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=1.0,
- short_allowed=True,
- action_masking=True,
- )
- br_rr = calculate_reward(
- context,
- params_rr,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=1.0,
- short_allowed=True,
- action_masking=True,
- )
+ br_ratio = calculate_reward_with_defaults(context, params_ratio, risk_reward_ratio=1.0)
+ br_rr = calculate_reward_with_defaults(context, params_rr, risk_reward_ratio=1.0)
self.assertAlmostEqualFloat(
br_rr.total,
# Feature analysis failure seeds
FEATURE_EMPTY: Seed for empty feature tests (17)
+ FEATURE_PRIME_7: Seed for feature test variant (7)
FEATURE_PRIME_11: Seed for feature test variant (11)
FEATURE_PRIME_13: Seed for feature test variant (13)
FEATURE_PRIME_21: Seed for feature test variant (21)
# Feature analysis failure seeds
FEATURE_EMPTY: int = 17
+ FEATURE_PRIME_7: int = 7
FEATURE_PRIME_11: int = 11
FEATURE_PRIME_13: int = 13
FEATURE_PRIME_21: int = 21
from .assertions import (
assert_adjustment_reason_contains,
- # Core numeric/trend assertions
assert_almost_equal_list,
assert_component_sum_integrity,
assert_exit_factor_attenuation_modes,
- # Exit factor invariance helpers
assert_exit_factor_invariant_suite,
assert_exit_factor_kernel_fallback,
assert_exit_factor_plateau_behavior,
assert_non_canonical_shaping_exceeds,
assert_parameter_sensitivity_behavior,
assert_pbrs_canonical_sum_within_tolerance,
- # PBRS invariance/report helpers
assert_pbrs_invariance_report_classification,
assert_progressive_scaling_behavior,
- # Relaxed validation aggregation
assert_relaxed_multi_reason_aggregation,
assert_reward_calculation_scenarios,
assert_single_active_component,
assert_single_active_component_with_additives,
assert_trend,
- # Validation batch builders/executors
build_validation_case,
+ calculate_reward_with_defaults,
execute_validation_batch,
+ get_exit_factor_with_defaults,
make_idle_penalty_test_contexts,
run_relaxed_validation_adjustment_cases,
run_strict_validation_failure_cases,
safe_float,
+ simulate_samples_with_defaults,
)
from .configs import (
+ DEFAULT_REWARD_CONFIG,
+ DEFAULT_SIMULATION_CONFIG,
ContextFactory,
ExitFactorConfig,
ProgressiveScalingConfig,
- # Configuration dataclasses
RewardScenarioConfig,
SimulationConfig,
StatisticalTestConfig,
ThresholdTestConfig,
- # Type aliases
ValidationCallback,
ValidationConfig,
WarningCaptureConfig,
from .warnings import (
assert_diagnostic_warning,
assert_no_warnings,
- # Warning capture utilities
capture_warnings,
validate_warning_content,
)
__all__ = [
- # Core numeric/trend assertions
"assert_monotonic_nonincreasing",
"assert_monotonic_nonnegative",
"assert_finite",
"assert_multi_parameter_sensitivity",
"assert_hold_penalty_threshold_behavior",
"safe_float",
- # Validation batch builders/executors
"build_validation_case",
"execute_validation_batch",
"assert_adjustment_reason_contains",
"run_strict_validation_failure_cases",
"run_relaxed_validation_adjustment_cases",
- # Exit factor invariance helpers
"assert_exit_factor_invariant_suite",
"assert_exit_factor_kernel_fallback",
- # Relaxed validation aggregation
"assert_relaxed_multi_reason_aggregation",
- # PBRS invariance/report helpers
"assert_pbrs_invariance_report_classification",
"assert_pbrs_canonical_sum_within_tolerance",
"assert_non_canonical_shaping_exceeds",
- # Configuration dataclasses
+ "calculate_reward_with_defaults",
+ "get_exit_factor_with_defaults",
+ "simulate_samples_with_defaults",
"RewardScenarioConfig",
"ValidationConfig",
"ThresholdTestConfig",
"WarningCaptureConfig",
"ValidationCallback",
"ContextFactory",
- # Warning capture utilities
+ "DEFAULT_REWARD_CONFIG",
+ "DEFAULT_SIMULATION_CONFIG",
"capture_warnings",
"assert_diagnostic_warning",
"assert_no_warnings",
calculate_reward,
)
-from ..constants import TOLERANCE
-from .configs import RewardScenarioConfig, ThresholdTestConfig, ValidationConfig
+from ..constants import PARAMS, TOLERANCE
+from .configs import (
+ DEFAULT_REWARD_CONFIG,
+ DEFAULT_SIMULATION_CONFIG,
+ RewardScenarioConfig,
+ SimulationConfig,
+ ThresholdTestConfig,
+ ValidationConfig,
+)
def safe_float(value: Any, default: float = 0.0) -> float:
plateau_factor_post - tolerance_strict,
"Plateau pre-grace factor should be >= post-grace factor",
)
+
+
+# ---------------- Wrapper functions with standard defaults ---------------- #
+
+
+def calculate_reward_with_defaults(
+ context,
+ params: Dict[str, Any],
+ config: RewardScenarioConfig | None = None,
+ **overrides,
+):
+ """Calculate reward with standard test defaults.
+
+ Reduces boilerplate by providing sensible defaults for common parameters.
+ Override any parameter by passing it as a keyword argument.
+
+ Args:
+ context: RewardContext for the calculation
+ params: Parameter dictionary for reward calculation
+ config: Optional RewardScenarioConfig (defaults to DEFAULT_REWARD_CONFIG)
+ **overrides: Keyword arguments to override config values. Supported keys:
+ - base_factor: Base scaling factor
+ - profit_aim: Base profit target
+ - risk_reward_ratio: Risk/reward ratio
+ - short_allowed: Whether short positions are permitted
+ - action_masking: Whether to apply action masking
+ - prev_potential: Previous potential for PBRS (passed through)
+
+ Returns:
+ RewardBreakdown from calculate_reward()
+
+ Example:
+ # Using all defaults
+ breakdown = calculate_reward_with_defaults(ctx, params)
+
+ # Overriding specific parameters
+ breakdown = calculate_reward_with_defaults(
+ ctx, params, action_masking=False
+ )
+
+ # Using custom config
+ custom_config = RewardScenarioConfig(...)
+ breakdown = calculate_reward_with_defaults(ctx, params, config=custom_config)
+ """
+ cfg = config or DEFAULT_REWARD_CONFIG
+
+ # Extract config values with potential overrides
+ base_factor = overrides.pop("base_factor", cfg.base_factor)
+ profit_aim = overrides.pop("profit_aim", cfg.profit_aim)
+ risk_reward_ratio = overrides.pop("risk_reward_ratio", cfg.risk_reward_ratio)
+ short_allowed = overrides.pop("short_allowed", cfg.short_allowed)
+ action_masking = overrides.pop("action_masking", cfg.action_masking)
+
+ return calculate_reward(
+ context,
+ params,
+ base_factor=base_factor,
+ profit_aim=profit_aim,
+ risk_reward_ratio=risk_reward_ratio,
+ short_allowed=short_allowed,
+ action_masking=action_masking,
+ **overrides,
+ )
+
+
+def get_exit_factor_with_defaults(
+ pnl: float,
+ duration_ratio: float,
+ context,
+ params: Dict[str, Any],
+ base_factor: float | None = None,
+ pnl_target: float | None = None,
+ risk_reward_ratio: float | None = None,
+):
+ """Calculate exit factor with standard test defaults.
+
+ Reduces boilerplate by providing sensible defaults for common parameters.
+ This wrapper is particularly useful for tests that need to call _get_exit_factor
+ repeatedly with varying pnl and duration_ratio values.
+
+ Args:
+ pnl: Realized profit/loss
+ duration_ratio: Ratio of current to maximum duration
+ context: RewardContext for efficiency coefficient calculation
+ params: Parameter dictionary
+ base_factor: Base scaling factor (defaults to PARAMS.BASE_FACTOR)
+ pnl_target: Target profit threshold (defaults to PARAMS.PROFIT_AIM * PARAMS.RISK_REWARD_RATIO)
+ risk_reward_ratio: Risk/reward ratio (defaults to PARAMS.RISK_REWARD_RATIO)
+
+ Returns:
+ Exit factor value from _get_exit_factor()
+
+ Example:
+ # Using all defaults
+ factor = get_exit_factor_with_defaults(0.05, 0.5, ctx, params)
+
+ # Overriding specific parameters
+ factor = get_exit_factor_with_defaults(
+ 0.05, 0.5, ctx, params, base_factor=100.0
+ )
+ """
+ if base_factor is None:
+ base_factor = PARAMS.BASE_FACTOR
+ if risk_reward_ratio is None:
+ risk_reward_ratio = PARAMS.RISK_REWARD_RATIO
+ if pnl_target is None:
+ pnl_target = PARAMS.PROFIT_AIM * risk_reward_ratio
+
+ return _get_exit_factor(
+ base_factor,
+ pnl,
+ pnl_target,
+ duration_ratio,
+ context,
+ params,
+ risk_reward_ratio,
+ )
+
+
+def simulate_samples_with_defaults(
+ params: Dict[str, Any],
+ config: SimulationConfig | None = None,
+ base_factor: float | None = None,
+ profit_aim: float | None = None,
+ risk_reward_ratio: float | None = None,
+ **overrides,
+):
+ """Simulate samples with standard test defaults.
+
+ Reduces boilerplate by providing sensible defaults for simulation parameters.
+ Override any parameter by passing it as a keyword argument.
+
+ Args:
+ params: Parameter dictionary for reward calculation
+ config: Optional SimulationConfig (defaults to DEFAULT_SIMULATION_CONFIG)
+ base_factor: Base scaling factor (defaults to PARAMS.BASE_FACTOR)
+ profit_aim: Base profit target (defaults to PARAMS.PROFIT_AIM)
+ risk_reward_ratio: Risk/reward ratio (defaults to PARAMS.RISK_REWARD_RATIO)
+ **overrides: Keyword arguments to override config values. Supported keys:
+ - num_samples: Number of samples to generate
+ - seed: Random seed for reproducibility
+ - max_duration_ratio: Maximum duration ratio
+ - trading_mode: Trading mode ("margin", "spot", etc.)
+ - pnl_base_std: Base standard deviation for PnL generation
+ - pnl_duration_vol_scale: Volatility scaling factor
+
+ Returns:
+ DataFrame from simulate_samples()
+
+ Example:
+ # Using all defaults
+ df = simulate_samples_with_defaults(params)
+
+ # Overriding specific parameters
+ df = simulate_samples_with_defaults(params, num_samples=500, seed=123)
+
+ # Using custom config
+ custom_config = SimulationConfig(num_samples=1000, seed=42)
+ df = simulate_samples_with_defaults(params, config=custom_config)
+ """
+ # Import here to avoid circular imports
+ from reward_space_analysis import simulate_samples
+
+ cfg = config or DEFAULT_SIMULATION_CONFIG
+
+ # Use config values with potential overrides
+ num_samples = overrides.pop("num_samples", cfg.num_samples)
+ seed = overrides.pop("seed", cfg.seed)
+ max_duration_ratio = overrides.pop("max_duration_ratio", cfg.max_duration_ratio)
+ trading_mode = overrides.pop("trading_mode", cfg.trading_mode)
+ pnl_base_std = overrides.pop("pnl_base_std", cfg.pnl_base_std)
+ pnl_duration_vol_scale = overrides.pop("pnl_duration_vol_scale", cfg.pnl_duration_vol_scale)
+
+ # Use provided values or defaults for reward calculation params
+ if base_factor is None:
+ base_factor = PARAMS.BASE_FACTOR
+ if profit_aim is None:
+ profit_aim = PARAMS.PROFIT_AIM
+ if risk_reward_ratio is None:
+ risk_reward_ratio = PARAMS.RISK_REWARD_RATIO
+
+ return simulate_samples(
+ params=params,
+ num_samples=num_samples,
+ seed=seed,
+ base_factor=base_factor,
+ profit_aim=profit_aim,
+ risk_reward_ratio=risk_reward_ratio,
+ max_duration_ratio=max_duration_ratio,
+ trading_mode=trading_mode,
+ pnl_base_std=pnl_base_std,
+ pnl_duration_vol_scale=pnl_duration_vol_scale,
+ **overrides,
+ )
from dataclasses import dataclass
from typing import Callable, Optional
-from ..constants import SEEDS, STATISTICAL, TOLERANCE
+from ..constants import PARAMS, SEEDS, STATISTICAL, TOLERANCE
@dataclass
ContextFactory = Callable[..., object]
+# Default config instances for common test scenarios
+# These reduce boilerplate by providing pre-configured defaults
+
+DEFAULT_REWARD_CONFIG: RewardScenarioConfig = RewardScenarioConfig(
+ base_factor=PARAMS.BASE_FACTOR,
+ profit_aim=PARAMS.PROFIT_AIM,
+ risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
+ tolerance_relaxed=TOLERANCE.IDENTITY_RELAXED,
+ short_allowed=True,
+ action_masking=True,
+)
+"""Default RewardScenarioConfig with standard test parameters."""
+
+
+DEFAULT_SIMULATION_CONFIG: SimulationConfig = SimulationConfig(
+ num_samples=200,
+ seed=SEEDS.BASE,
+ max_duration_ratio=2.0,
+ trading_mode="margin",
+ pnl_base_std=PARAMS.PNL_STD,
+ pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
+)
+"""Default SimulationConfig with standard test parameters."""
+
+
__all__ = [
"RewardScenarioConfig",
"ValidationConfig",
"WarningCaptureConfig",
"ValidationCallback",
"ContextFactory",
+ "DEFAULT_REWARD_CONFIG",
+ "DEFAULT_SIMULATION_CONFIG",
]
Positions,
RewardParams,
_get_bool_param,
- calculate_reward,
)
from ..test_base import make_ctx
+from . import calculate_reward_with_defaults
def test_get_bool_param_none_and_invalid_literal():
"unrealized_pnl": True,
"pnl_factor_beta": 0.5,
}
- breakdown = calculate_reward(
+ breakdown = calculate_reward_with_defaults(
context,
params,
base_factor=100.0,
profit_aim=0.05,
risk_reward_ratio=1.0,
- short_allowed=True,
- action_masking=True,
prev_potential=np.nan,
)
assert math.isfinite(breakdown.prev_potential)
from reward_space_analysis import (
Actions,
Positions,
- calculate_reward,
)
from ..constants import PARAMS, TOLERANCE
+from ..helpers import calculate_reward_with_defaults
from ..test_base import RewardSpaceTestBase
pytestmark = pytest.mark.integration
for name, ctx_kwargs, expected_component in scenarios:
with self.subTest(scenario=name):
ctx = self.make_ctx(**ctx_kwargs)
- breakdown = calculate_reward(
+ breakdown = calculate_reward_with_defaults(
ctx,
self.DEFAULT_PARAMS,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
action_masking=expected_component != "invalid_penalty",
)
action=Actions.Short_exit,
)
- br_long = calculate_reward(
+ br_long = calculate_reward_with_defaults(
ctx_long,
params,
base_factor=base_factor,
profit_aim=profit_aim,
risk_reward_ratio=rr,
- short_allowed=True,
- action_masking=True,
)
- br_short = calculate_reward(
+ br_short = calculate_reward_with_defaults(
ctx_short,
params,
base_factor=base_factor,
profit_aim=profit_aim,
risk_reward_ratio=rr,
- short_allowed=True,
- action_masking=True,
)
if pnl > 0:
_compute_unrealized_pnl_estimate,
_get_float_param,
apply_potential_shaping,
- calculate_reward,
get_max_idle_duration_candles,
simulate_samples,
validate_reward_parameters,
assert_pbrs_invariance_report_classification,
assert_relaxed_multi_reason_aggregation,
build_validation_case,
+ calculate_reward_with_defaults,
execute_validation_batch,
)
from ..test_base import RewardSpaceTestBase
ctx = self.make_ctx(position=Positions.Neutral, action=Actions.Neutral)
prev_potential = 0.37
- breakdown = calculate_reward(
- ctx,
- params,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- prev_potential=prev_potential,
- )
+ breakdown = calculate_reward_with_defaults(ctx, params, prev_potential=prev_potential)
self.assertAlmostEqualFloat(
breakdown.prev_potential,
ctx = self.make_ctx(
position=Positions.Neutral, action=action, pnl=0.0, trade_duration=0
)
- breakdown = calculate_reward(
- ctx,
- params,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- prev_potential=0.0,
- )
+ breakdown = calculate_reward_with_defaults(ctx, params, prev_potential=0.0)
self.assertTrue(np.isfinite(breakdown.next_potential))
# With any nonzero fees, immediate unrealized pnl should be negative.
self.assertLess(
trade_duration=trade_duration,
)
- breakdown = calculate_reward(
+ breakdown = calculate_reward_with_defaults(
ctx,
{**params, "max_trade_duration_candles": max_trade_duration_candles},
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
prev_potential=0.0,
)
self.assertNotEqual(prev_potential, 0.0)
- breakdown = calculate_reward(
- ctx,
- params,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=False,
- prev_potential=prev_potential,
+ breakdown = calculate_reward_with_defaults(
+ ctx, params, action_masking=False, prev_potential=prev_potential
)
expected_shaping = params["potential_gamma"] * prev_potential - prev_potential
Positions,
RewardContext,
_get_exit_factor,
- calculate_reward,
simulate_samples,
)
assert_exit_factor_attenuation_modes,
assert_exit_mode_mathematical_validation,
assert_single_active_component_with_additives,
+ calculate_reward_with_defaults,
capture_warnings,
)
from ..test_base import RewardSpaceTestBase
potential_gamma=0.0,
check_invariants=False,
)
- br = calculate_reward(
- ctx_obj,
- params,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
+ br = calculate_reward_with_defaults(ctx_obj, params)
# Relaxed tolerance: Accumulated floating-point errors across multiple
# reward component calculations (entry, hold, exit additives, and penalties)
assert_single_active_component_with_additives(
action=Actions.Long_exit,
)
with capture_warnings() as caught:
- baseline = calculate_reward(
- context,
- params,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO_HIGH,
- short_allowed=True,
- action_masking=True,
+ baseline = calculate_reward_with_defaults(
+ context, params, risk_reward_ratio=PARAMS.RISK_REWARD_RATIO_HIGH
)
amplified_base_factor = PARAMS.BASE_FACTOR * 200.0
- amplified = calculate_reward(
+ amplified = calculate_reward_with_defaults(
context,
params,
base_factor=amplified_base_factor,
- profit_aim=PARAMS.PROFIT_AIM,
risk_reward_ratio=PARAMS.RISK_REWARD_RATIO_HIGH,
- short_allowed=True,
- action_masking=True,
)
self.assertGreater(baseline.exit_component, 0.0)
self.assertGreater(amplified.exit_component, baseline.exit_component)
position=Positions.Long,
action=Actions.Long_exit,
)
- br = calculate_reward(
- context,
- extreme_params,
- base_factor=10000.0,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
+ br = calculate_reward_with_defaults(context, extreme_params, base_factor=10000.0)
self.assertFinite(br.total, name="breakdown.total")
def test_exit_attenuation_modes_enumeration(self):
position=Positions.Long,
action=Actions.Long_exit,
)
- br = calculate_reward(
- ctx,
- test_params,
- base_factor=PARAMS.BASE_FACTOR,
- profit_aim=PARAMS.PROFIT_AIM,
- risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
- short_allowed=True,
- action_masking=True,
- )
+ br = calculate_reward_with_defaults(ctx, test_params)
self.assertFinite(br.exit_component, name="breakdown.exit_component")
self.assertFinite(br.total, name="breakdown.total")
def _minimal_df(n: int = 30) -> pd.DataFrame:
- rng = np.random.default_rng(42)
+ rng = np.random.default_rng(SEEDS.BASE)
return pd.DataFrame(
{
"pnl": rng.normal(0, 1, n),
- importance_mean is all NaN (stub path for single feature)
- model is None
"""
- df = pd.DataFrame({"pnl": np.random.normal(0, 1, 25), "reward": np.random.normal(0, 1, 25)})
+ rng = np.random.default_rng(SEEDS.FEATURE_PRIME_11)
+ df = pd.DataFrame({"pnl": rng.normal(0, 1, 25), "reward": rng.normal(0, 1, 25)})
importance_df, stats, partial_deps, model = _perform_feature_analysis(
df, seed=SEEDS.FEATURE_PRIME_11, skip_partial_dependence=True
)
- importance_mean is all NaN
- model is None
"""
- rng = np.random.default_rng(9)
+ rng = np.random.default_rng(SEEDS.FEATURE_PRIME_7)
df = pd.DataFrame(
{
"pnl": rng.normal(0, 1, 40),