]
markers = [
"components: component-level reward computations",
+ "transforms: mathematical transform functions",
"robustness: stress and edge-case behavior",
"api: public API surface and helpers",
"cli: command-line interface behaviors",
ATTENUATION_MODES: Tuple[str, ...] = ("sqrt", "linear", "power", "half_life")
ATTENUATION_MODES_WITH_LEGACY: Tuple[str, ...] = ATTENUATION_MODES + ("legacy",)
-# Centralized internal numeric guards & behavior toggles (single source of truth for internal tunables)
+# Centralized internal numeric guards & behavior toggles
INTERNAL_GUARDS: dict[str, float] = {
"degenerate_ci_epsilon": 1e-9,
"distribution_constant_fallback_moment": 0.0,
sanitized = dict(params)
adjustments: Dict[str, Dict[str, Any]] = {}
- # Normalize boolean-like parameters explicitly to avoid inconsistent types
+ # Boolean parameter coercion
_bool_keys = [
"check_invariants",
"hold_potential_enabled",
adjusted = original_numeric
reason_parts: List[str] = []
- # Record numeric coercion if type changed (e.g., from str/bool/None)
+ # Track type coercion
if not isinstance(original_val, (int, float)):
adjustments.setdefault(
key,
"validation_mode": "strict" if strict else "relaxed",
},
)
- # Update sanitized to numeric before clamping
sanitized[key] = original_numeric
# Bounds enforcement
max_unrealized_profit = 0.0
min_unrealized_profit = 0.0
else:
- # Unrealized profits should bracket the final PnL
- # Max represents peak profit during trade, min represents lowest point
+ # Unrealized profit bounds
span = abs(rng.gauss(0.0, 0.015))
- # Ensure max >= pnl >= min by construction
+ # max >= pnl >= min by construction
max_unrealized_profit = pnl + abs(rng.gauss(0.0, span))
min_unrealized_profit = pnl - abs(rng.gauss(0.0, span))
)
drift = total_shaping / max(1, n_invariant)
df.loc[:, "reward_shaping"] = df["reward_shaping"] - drift
- # Attach resolved reward params for downstream consumers (e.g., report derivations)
df.attrs["reward_params"] = dict(params)
except Exception:
# Graceful fallback (no invariance enforcement on failure)
)
from ..helpers import (
+ RewardScenarioConfig,
+ ThresholdTestConfig,
+ ValidationConfig,
assert_component_sum_integrity,
assert_exit_factor_plateau_behavior,
assert_hold_penalty_threshold_behavior,
def test_hold_penalty_basic_calculation(self):
"""Test hold penalty calculation when trade_duration exceeds max_duration.
- Tests:
- - Hold penalty is negative when duration exceeds threshold
- - Component sum integrity maintained
-
- Expected behavior:
- - trade_duration > max_duration → hold_penalty < 0
- - Total reward equals sum of active components
+ Verifies:
+ - trade_duration > max_duration → hold_penalty < 0
+ - Total reward equals sum of active components
"""
context = self.make_ctx(
pnl=0.01,
action_masking=True,
)
self.assertLess(breakdown.hold_penalty, 0, "Hold penalty should be negative")
- assert_component_sum_integrity(
- self,
- breakdown,
- self.TOL_IDENTITY_RELAXED,
+ config = ValidationConfig(
+ tolerance_strict=self.TOL_IDENTITY_STRICT,
+ tolerance_relaxed=self.TOL_IDENTITY_RELAXED,
exclude_components=["idle_penalty", "exit_component", "invalid_penalty"],
component_description="hold + shaping/additives",
)
+ assert_component_sum_integrity(self, breakdown, config)
def test_hold_penalty_threshold_behavior(self):
"""Test hold penalty activation at max_duration threshold.
- Tests:
- - No penalty before max_duration
- - Penalty activation at and after max_duration
-
- Expected behavior:
- - duration < max_duration → hold_penalty = 0
- - duration >= max_duration → hold_penalty <= 0
+ Verifies:
+ - duration < max_duration → hold_penalty = 0
+ - duration >= max_duration → hold_penalty <= 0
"""
max_duration = 128
threshold_test_cases = [
action=Actions.Neutral,
)
+ config = ThresholdTestConfig(
+ max_duration=max_duration,
+ test_cases=threshold_test_cases,
+ tolerance=self.TOL_IDENTITY_RELAXED,
+ )
assert_hold_penalty_threshold_behavior(
self,
- threshold_test_cases,
- max_duration,
context_factory,
self.DEFAULT_PARAMS,
self.TEST_BASE_FACTOR,
self.TEST_PROFIT_TARGET,
1.0,
- self.TOL_IDENTITY_RELAXED,
+ config,
)
def test_hold_penalty_progressive_scaling(self):
"""Test hold penalty scales progressively with increasing duration.
- Tests:
- - Penalty magnitude increases monotonically with duration
- - Progressive scaling beyond max_duration threshold
-
- Expected behavior:
- - For d1 < d2 < d3: penalty(d1) >= penalty(d2) >= penalty(d3)
- - Penalties become more negative with longer durations
+ Verifies:
+ - For d1 < d2 < d3: penalty(d1) >= penalty(d2) >= penalty(d3)
+ - Progressive scaling beyond max_duration threshold
"""
+ from ..constants import SCENARIOS
+
params = self.base_params(max_trade_duration_candles=100)
- durations = [150, 200, 300]
+ durations = list(SCENARIOS.DURATION_SCENARIOS)
penalties = []
for duration in durations:
context = self.make_ctx(
def test_idle_penalty_calculation(self):
"""Test idle penalty calculation for neutral idle state.
- Tests:
- - Idle penalty is negative for idle duration > 0
- - Component sum integrity maintained
-
- Expected behavior:
- - idle_duration > 0 → idle_penalty < 0
- - Total reward equals sum of active components
+ Verifies:
+ - idle_duration > 0 → idle_penalty < 0
+ - Component sum integrity maintained
"""
context = self.make_ctx(
pnl=0.0,
def validate_idle_penalty(test_case, breakdown, description, tolerance):
test_case.assertLess(breakdown.idle_penalty, 0, "Idle penalty should be negative")
- assert_component_sum_integrity(
- test_case,
- breakdown,
- tolerance,
+ config = ValidationConfig(
+ tolerance_strict=test_case.TOL_IDENTITY_STRICT,
+ tolerance_relaxed=tolerance,
exclude_components=["hold_penalty", "exit_component", "invalid_penalty"],
component_description="idle + shaping/additives",
)
+ assert_component_sum_integrity(test_case, breakdown, config)
scenarios = [(context, self.DEFAULT_PARAMS, "idle_penalty_basic")]
+ config = RewardScenarioConfig(
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=1.0,
+ tolerance_relaxed=self.TOL_IDENTITY_RELAXED,
+ )
assert_reward_calculation_scenarios(
self,
scenarios,
- self.TEST_BASE_FACTOR,
- self.TEST_PROFIT_TARGET,
- 1.0,
+ config,
validate_idle_penalty,
- self.TOL_IDENTITY_RELAXED,
)
def test_efficiency_zero_policy(self):
"""Test efficiency zero policy produces expected PnL factor.
- Tests:
- - PnL factor calculation with efficiency weight = 0
- - Finite and positive factor values
-
- Expected behavior:
- - efficiency_weight = 0 → pnl_factor ≈ 1.0
- - Factor is finite and well-defined
+ Verifies:
+ - efficiency_weight = 0 → pnl_factor ≈ 1.0
+ - Factor is finite and positive
"""
ctx = self.make_ctx(
pnl=0.0,
def test_max_idle_duration_candles_logic(self):
"""Test max idle duration candles parameter affects penalty magnitude.
- Tests:
- - Smaller max_idle_duration → larger penalty magnitude
- - Larger max_idle_duration → smaller penalty magnitude
- - Both penalties are negative
-
- Expected behavior:
- - penalty(max=50) < penalty(max=200) < 0
+ Verifies:
+ - penalty(max=50) < penalty(max=200) < 0
+ - Smaller max → larger penalty magnitude
"""
params_small = self.base_params(max_idle_duration_candles=50)
params_large = self.base_params(max_idle_duration_candles=200)
Non-owning smoke test; ownership: robustness/test_robustness.py:35
- Tests:
- - Exit factor finiteness for linear and power modes
- - Plateau behavior with grace period
-
- Expected behavior:
- - All exit factors are finite and positive
- - Plateau mode attenuates after grace period
+ Verifies:
+ - Exit factors are finite and positive (linear, power modes)
+ - Plateau mode attenuates after grace period
"""
modes_to_test = ["linear", "power"]
for mode in modes_to_test:
def test_idle_penalty_zero_when_profit_target_zero(self):
"""Test idle penalty is zero when profit_target is zero.
- Tests:
- - profit_target = 0 → idle_penalty = 0
- - Total reward is zero in this configuration
-
- Expected behavior:
- - profit_target = 0 → idle_factor = 0 → idle_penalty = 0
- - No other components active for neutral idle state
+ Verifies:
+ - profit_target = 0 → idle_penalty = 0
+ - Total reward is zero in this configuration
"""
context = self.make_ctx(
pnl=0.0,
)
scenarios = [(context, self.DEFAULT_PARAMS, "profit_target_zero")]
+ config = RewardScenarioConfig(
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=0.0,
+ risk_reward_ratio=self.TEST_RR,
+ tolerance_relaxed=self.TOL_IDENTITY_RELAXED,
+ )
assert_reward_calculation_scenarios(
self,
scenarios,
- self.TEST_BASE_FACTOR,
- 0.0, # profit_target=0
- self.TEST_RR,
+ config,
validate_zero_penalty,
- self.TOL_IDENTITY_RELAXED,
)
def test_win_reward_factor_saturation(self):
"""Test PnL amplification factor saturates at asymptotic limit.
- Tests:
- - Amplification ratio increases monotonically with PnL
- - Saturation approaches (1 + win_reward_factor)
- - Mathematical formula validation
-
- Expected behavior:
- - As PnL → ∞: amplification → (1 + win_reward_factor)
- - Monotonic increase: ratio(PnL1) <= ratio(PnL2) for PnL1 < PnL2
- - Observed matches theoretical tanh-based formula
+ Verifies:
+ - Amplification ratio increases monotonically with PnL
+ - Saturation approaches (1 + win_reward_factor)
+ - Observed matches theoretical saturation behavior
"""
win_reward_factor = 3.0
beta = 0.5
def test_idle_penalty_fallback_and_proportionality(self):
"""Test idle penalty fallback and proportional scaling behavior.
- Tests:
- - Fallback to max_trade_duration when max_idle_duration is None
- - Proportional scaling with idle duration (2:1 ratio validation)
- - Mathematical validation of penalty formula
-
- Expected behavior:
- - max_idle_duration = None → use max_trade_duration as fallback
- - penalty(duration=40) ≈ 2 × penalty(duration=20)
- - Formula: penalty ∝ (duration/max)^power × scale
+ Verifies:
+ - max_idle_duration = None → use max_trade_duration as fallback
+ - penalty(duration=40) ≈ 2 × penalty(duration=20)
+ - Proportional scaling with idle duration
"""
params = self.base_params(max_idle_duration_candles=None, max_trade_duration_candles=100)
base_factor = 90.0
profit_target = self.TEST_PROFIT_TARGET
risk_reward_ratio = 1.0
- # Generate test contexts using helper
base_context_kwargs = {
"pnl": 0.0,
"trade_duration": 0,
self.make_ctx, idle_scenarios, base_context_kwargs
)
- # Calculate all rewards
results = []
for context, description in contexts_and_descriptions:
breakdown = calculate_reward(
)
results.append((breakdown, context.idle_duration, description))
- # Validate proportional scaling
br_a, br_b, br_mid = [r[0] for r in results]
self.assertLess(br_a.idle_penalty, 0.0)
self.assertLess(br_b.idle_penalty, 0.0)
self.assertLess(br_mid.idle_penalty, 0.0)
- # Check 2:1 ratio between 40 and 20 idle duration
ratio = br_b.idle_penalty / br_a.idle_penalty if br_a.idle_penalty != 0 else None
self.assertIsNotNone(ratio)
if ratio is not None:
self.assertAlmostEqualFloat(abs(ratio), 2.0, tolerance=0.2)
- # Mathematical validation for mid-duration case
idle_penalty_scale = _get_float_param(params, "idle_penalty_scale", 0.5)
idle_penalty_power = _get_float_param(params, "idle_penalty_power", 1.025)
factor = _get_float_param(params, "base_factor", float(base_factor))
plateau and attenuation functions.
Attributes:
- EPS_SMALL: Small epsilon for tight continuity checks (1e-08)
- EPS_LARGE: Larger epsilon for coarser continuity tests (5e-05)
+ EPS_SMALL: Small epsilon for tight continuity checks (1e-06)
+ EPS_LARGE: Larger epsilon for coarser continuity tests (1e-05)
"""
- EPS_SMALL: float = 1e-08
- EPS_LARGE: float = 5e-05
+ EPS_SMALL: float = 1e-06
+ EPS_LARGE: float = 1e-05
@dataclass(frozen=True)
ratio bounds and power mode constraints.
Attributes:
- SCALING_RATIO_MIN: Minimum expected scaling ratio for continuity (1.5)
- SCALING_RATIO_MAX: Maximum expected scaling ratio for continuity (3.5)
+ SCALING_RATIO_MIN: Minimum expected scaling ratio for continuity (5.0)
+ SCALING_RATIO_MAX: Maximum expected scaling ratio for continuity (15.0)
MIN_POWER_TAU: Minimum valid tau value for power mode (1e-15)
"""
- SCALING_RATIO_MIN: float = 1.5
- SCALING_RATIO_MAX: float = 3.5
+ SCALING_RATIO_MIN: float = 5.0
+ SCALING_RATIO_MAX: float = 15.0
MIN_POWER_TAU: float = 1e-15
EPS_BASE: float = 1e-10
+@dataclass(frozen=True)
+class TestScenarios:
+ """Test scenario parameters and sample sizes.
+
+ Standard values for test scenarios to ensure consistency across the test
+ suite and avoid magic numbers in test implementations.
+
+ Attributes:
+ DURATION_SHORT: Short duration scenario (150)
+ DURATION_MEDIUM: Medium duration scenario (200)
+ DURATION_LONG: Long duration scenario (300)
+ DURATION_SCENARIOS: Standard duration test sequence
+ SAMPLE_SIZE_SMALL: Small sample size for quick tests (100)
+ SAMPLE_SIZE_MEDIUM: Medium sample size for standard tests (400)
+ SAMPLE_SIZE_LARGE: Large sample size for statistical power (800)
+ DEFAULT_SAMPLE_SIZE: Default for most tests (400)
+ PBRS_SIMULATION_STEPS: Number of steps for PBRS simulation tests (500)
+ NULL_HYPOTHESIS_SAMPLE_SIZE: Sample size for null hypothesis tests (400)
+ BOOTSTRAP_MINIMAL_ITERATIONS: Minimal bootstrap iterations for quick tests (25)
+ BOOTSTRAP_STANDARD_ITERATIONS: Standard bootstrap iterations (100)
+ HETEROSCEDASTICITY_MIN_EXITS: Minimum exits for heteroscedasticity validation (50)
+ CORRELATION_TEST_MIN_SIZE: Minimum sample size for correlation tests (200)
+ MONTE_CARLO_ITERATIONS: Monte Carlo simulation iterations (160)
+ """
+
+ DURATION_SHORT: int = 150
+ DURATION_MEDIUM: int = 200
+ DURATION_LONG: int = 300
+ DURATION_SCENARIOS: tuple[int, ...] = (150, 200, 300)
+
+ SAMPLE_SIZE_SMALL: int = 100
+ SAMPLE_SIZE_MEDIUM: int = 400
+ SAMPLE_SIZE_LARGE: int = 800
+ DEFAULT_SAMPLE_SIZE: int = 400
+
+ # Specialized test scenario sizes
+ PBRS_SIMULATION_STEPS: int = 500
+ NULL_HYPOTHESIS_SAMPLE_SIZE: int = 400
+ BOOTSTRAP_MINIMAL_ITERATIONS: int = 25
+ BOOTSTRAP_STANDARD_ITERATIONS: int = 100
+ HETEROSCEDASTICITY_MIN_EXITS: int = 50
+ CORRELATION_TEST_MIN_SIZE: int = 200
+ MONTE_CARLO_ITERATIONS: int = 160
+
+
+@dataclass(frozen=True)
+class StatisticalTolerances:
+ """Tolerances for statistical metrics and distribution tests.
+
+ These tolerances are used for statistical hypothesis testing, distribution
+ comparison metrics, and other statistical validation operations.
+
+ Attributes:
+ DISTRIBUTION_SHIFT: Tolerance for distribution shift metrics (5e-4)
+ KS_STATISTIC_IDENTITY: KS statistic threshold for identical distributions (5e-3)
+ CORRELATION_SIGNIFICANCE: Minimum correlation for significance (0.1)
+ VARIANCE_RATIO_THRESHOLD: Minimum variance ratio for heteroscedasticity (0.8)
+ CI_WIDTH_EPSILON: Minimum CI width for degenerate distributions (3e-9)
+ """
+
+ DISTRIBUTION_SHIFT: float = 5e-4
+ KS_STATISTIC_IDENTITY: float = 5e-3
+ CORRELATION_SIGNIFICANCE: float = 0.1
+ VARIANCE_RATIO_THRESHOLD: float = 0.8
+ CI_WIDTH_EPSILON: float = 3e-9
+
+
# Global singleton instances for easy import
TOLERANCE: Final[ToleranceConfig] = ToleranceConfig()
CONTINUITY: Final[ContinuityConfig] = ContinuityConfig()
STATISTICAL: Final[StatisticalConfig] = StatisticalConfig()
SEEDS: Final[TestSeeds] = TestSeeds()
PARAMS: Final[TestParameters] = TestParameters()
+SCENARIOS: Final[TestScenarios] = TestScenarios()
+STAT_TOL: Final[StatisticalTolerances] = StatisticalTolerances()
__all__ = [
"StatisticalConfig",
"TestSeeds",
"TestParameters",
+ "TestScenarios",
+ "StatisticalTolerances",
"TOLERANCE",
"CONTINUITY",
"EXIT_FACTOR",
"STATISTICAL",
"SEEDS",
"PARAMS",
+ "SCENARIOS",
+ "STAT_TOL",
]
calculate_reward,
)
+from .configs import RewardScenarioConfig, ThresholdTestConfig, ValidationConfig
+
def safe_float(value: Any, default: float = 0.0) -> float:
"""Coerce value to float safely for test parameter handling.
def assert_component_sum_integrity(
test_case,
breakdown,
- tolerance_relaxed,
- exclude_components=None,
- component_description="components",
+ config: ValidationConfig,
):
"""Assert that reward component sum matches total within tolerance.
Validates the mathematical integrity of reward component decomposition by
ensuring the sum of individual components equals the reported total.
+ Uses ValidationConfig to simplify parameter passing.
Args:
test_case: Test case instance with assertion methods
breakdown: Reward breakdown object with component attributes
- tolerance_relaxed: Numerical tolerance for sum validation
- exclude_components: List of component names to exclude from sum (default: None)
- component_description: Human-readable description for error messages
+ config: ValidationConfig with tolerance and exclusion settings
Components checked (if not excluded):
- hold_penalty
- exit_additive
Example:
- assert_component_sum_integrity(
- self, breakdown, 1e-09,
+ config = ValidationConfig(
+ tolerance_strict=1e-12,
+ tolerance_relaxed=1e-09,
exclude_components=["reward_shaping"],
component_description="core components"
)
+ assert_component_sum_integrity(self, breakdown, config)
"""
- if exclude_components is None:
- exclude_components = []
+ exclude_components = config.exclude_components or []
component_sum = 0.0
if "hold_penalty" not in exclude_components:
component_sum += breakdown.hold_penalty
test_case.assertAlmostEqual(
breakdown.total,
component_sum,
- delta=tolerance_relaxed,
- msg=f"Total should equal sum of {component_description}",
+ delta=config.tolerance_relaxed,
+ msg=f"Total should equal sum of {config.component_description}",
)
def assert_reward_calculation_scenarios(
test_case,
scenarios: List[Tuple[Any, Dict[str, Any], str]],
- base_factor: float,
- profit_target: float,
- risk_reward_ratio: float,
+ config: RewardScenarioConfig,
validation_fn,
- tolerance_relaxed: float,
):
"""Execute and validate multiple reward calculation scenarios.
Runs a batch of reward calculations with different contexts and parameters,
- applying a custom validation function to each result. Reduces test boilerplate
- for scenario-based testing.
+ applying a custom validation function to each result. Uses RewardScenarioConfig
+ to simplify parameter passing and improve maintainability.
Args:
test_case: Test case instance with assertion methods
scenarios: List of (context, params, description) tuples defining test cases
- base_factor: Base scaling factor for reward calculations
- profit_target: Target profit threshold
- risk_reward_ratio: Risk/reward ratio for position sizing
+ config: RewardScenarioConfig with all calculation parameters
validation_fn: Callback function (test_case, breakdown, description, tolerance) -> None
- tolerance_relaxed: Numerical tolerance passed to validation function
Example:
+ config = RewardScenarioConfig(
+ base_factor=90.0,
+ profit_target=0.06,
+ risk_reward_ratio=1.0,
+ tolerance_relaxed=1e-09
+ )
scenarios = [
(idle_context, {}, "idle scenario"),
(exit_context, {"exit_additive": 5.0}, "profitable exit"),
]
assert_reward_calculation_scenarios(
- self, scenarios, 90.0, 0.06, 1.0, my_validation_fn, 1e-09
+ self, scenarios, config, my_validation_fn
)
"""
for context, params, description in scenarios:
breakdown = calculate_reward(
context,
params,
- base_factor=base_factor,
- profit_target=profit_target,
- risk_reward_ratio=risk_reward_ratio,
- short_allowed=True,
- action_masking=True,
+ base_factor=config.base_factor,
+ profit_target=config.profit_target,
+ risk_reward_ratio=config.risk_reward_ratio,
+ short_allowed=config.short_allowed,
+ action_masking=config.action_masking,
)
- validation_fn(test_case, breakdown, description, tolerance_relaxed)
+ validation_fn(test_case, breakdown, description, config.tolerance_relaxed)
def assert_parameter_sensitivity_behavior(
parameter_variations: List[Dict[str, Any]],
base_context,
base_params: Dict[str, Any],
- base_factor: float,
- profit_target: float,
- risk_reward_ratio: float,
component_name: str,
expected_trend: str,
- tolerance_relaxed: float,
+ config: RewardScenarioConfig,
):
"""Validate that a component responds predictably to parameter changes.
Tests component sensitivity by applying parameter variations and verifying
the component value follows the expected trend (increasing, decreasing, or constant).
+ Uses RewardScenarioConfig to simplify parameter passing.
Args:
test_case: Test case instance with assertion methods
parameter_variations: List of parameter dicts to merge with base_params
base_context: Context object for reward calculation
base_params: Base parameter dictionary
- base_factor: Base scaling factor
- profit_target: Target profit threshold
- risk_reward_ratio: Risk/reward ratio
component_name: Name of component to track (e.g., "exit_component")
expected_trend: Expected trend: "increasing", "decreasing", or "constant"
- tolerance_relaxed: Numerical tolerance for trend validation
+ config: RewardScenarioConfig with calculation parameters
Example:
+ config = RewardScenarioConfig(
+ base_factor=90.0,
+ profit_target=0.06,
+ risk_reward_ratio=1.0,
+ tolerance_relaxed=1e-09
+ )
variations = [
{"exit_additive": 0.0},
{"exit_additive": 5.0},
{"exit_additive": 10.0},
]
assert_parameter_sensitivity_behavior(
- self, variations, ctx, params, 90.0, 0.06, 1.0,
- "exit_component", "increasing", 1e-09
+ self, variations, ctx, params, "exit_component", "increasing", config
)
"""
from reward_space_analysis import calculate_reward
breakdown = calculate_reward(
base_context,
params,
- base_factor=base_factor,
- profit_target=profit_target,
- risk_reward_ratio=risk_reward_ratio,
- short_allowed=True,
- action_masking=True,
+ base_factor=config.base_factor,
+ profit_target=config.profit_target,
+ risk_reward_ratio=config.risk_reward_ratio,
+ short_allowed=config.short_allowed,
+ action_masking=config.action_masking,
)
component_value = getattr(breakdown, component_name)
results.append(component_value)
for i in range(1, len(results)):
test_case.assertGreaterEqual(
results[i],
- results[i - 1] - tolerance_relaxed,
+ results[i - 1] - config.tolerance_relaxed,
f"{component_name} should increase with parameter variations",
)
elif expected_trend == "decreasing":
for i in range(1, len(results)):
test_case.assertLessEqual(
results[i],
- results[i - 1] + tolerance_relaxed,
+ results[i - 1] + config.tolerance_relaxed,
f"{component_name} should decrease with parameter variations",
)
elif expected_trend == "constant":
test_case.assertAlmostEqual(
result,
baseline,
- delta=tolerance_relaxed,
+ delta=config.tolerance_relaxed,
msg=f"{component_name} should remain constant with parameter variations",
)
parameter_test_cases: List[Tuple[float, float, str]],
context_factory_fn,
base_params: Dict[str, Any],
- base_factor: float,
- tolerance_relaxed: float,
+ config: RewardScenarioConfig,
):
"""Validate reward behavior across multiple parameter combinations.
Tests reward calculation with various profit_target and risk_reward_ratio
combinations, ensuring consistent behavior including edge cases like
- zero profit_target.
+ zero profit_target. Uses RewardScenarioConfig to simplify parameter passing.
Args:
test_case: Test case instance with assertion methods
parameter_test_cases: List of (profit_target, risk_reward_ratio, description) tuples
context_factory_fn: Factory function for creating context objects
base_params: Base parameter dictionary
- base_factor: Base scaling factor
- tolerance_relaxed: Numerical tolerance for assertions
+ config: RewardScenarioConfig with base calculation parameters
Example:
+ config = RewardScenarioConfig(
+ base_factor=90.0,
+ profit_target=0.06,
+ risk_reward_ratio=1.0,
+ tolerance_relaxed=1e-09
+ )
test_cases = [
(0.0, 1.0, "zero profit target"),
(0.06, 1.0, "standard parameters"),
(0.06, 2.0, "high risk/reward ratio"),
]
assert_multi_parameter_sensitivity(
- self, test_cases, make_context, params, 90.0, 1e-09
+ self, test_cases, make_context, params, config
)
"""
for profit_target, risk_reward_ratio, description in parameter_test_cases:
breakdown = calculate_reward(
idle_context,
base_params,
- base_factor=base_factor,
+ base_factor=config.base_factor,
profit_target=profit_target,
risk_reward_ratio=risk_reward_ratio,
- short_allowed=True,
- action_masking=True,
+ short_allowed=config.short_allowed,
+ action_masking=config.action_masking,
)
if profit_target == 0.0:
test_case.assertEqual(breakdown.idle_penalty, 0.0)
exit_breakdown = calculate_reward(
exit_context,
base_params,
- base_factor=base_factor,
+ base_factor=config.base_factor,
profit_target=profit_target,
risk_reward_ratio=risk_reward_ratio,
- short_allowed=True,
- action_masking=True,
+ short_allowed=config.short_allowed,
+ action_masking=config.action_masking,
)
test_case.assertNotEqual(exit_breakdown.exit_component, 0.0)
def assert_hold_penalty_threshold_behavior(
test_case,
- duration_test_cases: Sequence[Tuple[int, str]],
- max_duration: int,
context_factory_fn,
params: Dict[str, Any],
base_factor: float,
profit_target: float,
risk_reward_ratio: float,
- tolerance_relaxed: float,
+ config: ThresholdTestConfig,
):
"""Validate hold penalty activation at max_duration threshold.
Tests that hold penalty is zero before max_duration, then becomes
- negative (penalty) at and after the threshold. Critical for verifying
- threshold-based penalty logic.
+ negative (penalty) at and after the threshold. Uses ThresholdTestConfig
+ to simplify parameter passing.
Args:
test_case: Test case instance with assertion methods
- duration_test_cases: List of (trade_duration, description) tuples to test
- max_duration: Maximum duration threshold for penalty activation
context_factory_fn: Factory function for creating context objects
params: Parameter dictionary
base_factor: Base scaling factor
profit_target: Target profit threshold
risk_reward_ratio: Risk/reward ratio
- tolerance_relaxed: Numerical tolerance for assertions
+ config: ThresholdTestConfig with threshold settings
Example:
- test_cases = [
- (50, "below threshold"),
- (100, "at threshold"),
- (150, "above threshold"),
- ]
+ config = ThresholdTestConfig(
+ max_duration=100,
+ test_cases=[
+ (50, "below threshold"),
+ (100, "at threshold"),
+ (150, "above threshold"),
+ ],
+ tolerance=1e-09
+ )
assert_hold_penalty_threshold_behavior(
- self, test_cases, 100, make_context, params, 90.0, 0.06, 1.0, 1e-09
+ self, make_context, params, 90.0, 0.06, 1.0, config
)
"""
- for trade_duration, description in duration_test_cases:
+ for trade_duration, description in config.test_cases:
with test_case.subTest(duration=trade_duration, desc=description):
context = context_factory_fn(trade_duration=trade_duration)
breakdown = calculate_reward(
short_allowed=True,
action_masking=True,
)
- duration_ratio = trade_duration / max_duration
+ duration_ratio = trade_duration / config.max_duration
if duration_ratio < 1.0:
test_case.assertEqual(breakdown.hold_penalty, 0.0)
elif duration_ratio == 1.0:
from reward_space_analysis import PBRS_INVARIANCE_TOL, write_complete_statistical_analysis
+from ..constants import SCENARIOS
from ..test_base import RewardSpaceTestBase
real_df=real_df,
adjust_method="none",
strict_diagnostics=False,
- bootstrap_resamples=200, # keep test fast
+ bootstrap_resamples=SCENARIOS.BOOTSTRAP_STANDARD_ITERATIONS, # keep test fast
skip_partial_dependence=kwargs.get("skip_partial_dependence", False),
skip_feature_analysis=kwargs.get("skip_feature_analysis", False),
)
# ---------------- Potential transform mechanics ---------------- #
def test_pbrs_progressive_release_decay_clamped(self):
- """progressive_release decay>1 clamps -> Φ'=0 & Δ=-Φ_prev."""
+ """Verifies progressive_release mode with decay>1 clamps potential to zero."""
params = self.DEFAULT_PARAMS.copy()
params.update(
{
)
def test_pbrs_spike_cancel_invariance(self):
- """spike_cancel terminal shaping ≈0 (Φ' inversion yields cancellation)."""
+ """Verifies spike_cancel mode produces near-zero terminal shaping."""
params = self.DEFAULT_PARAMS.copy()
params.update(
{
def test_canonical_invariance_flag_and_sum(self):
"""Canonical mode + no additives -> invariant flags True and Σ shaping ≈ 0."""
+ from ..constants import SCENARIOS
+
params = self.base_params(
exit_potential_mode="canonical",
entry_additive_enabled=False,
)
df = simulate_samples(
params={**params, "max_trade_duration_candles": 100},
- num_samples=400,
+ num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
seed=self.SEED,
base_factor=self.TEST_BASE_FACTOR,
profit_target=self.TEST_PROFIT_TARGET,
def test_non_canonical_flag_false_and_sum_nonzero(self):
"""Non-canonical mode -> invariant flags False and Σ shaping significantly non-zero."""
+ from ..constants import SCENARIOS
+
params = self.base_params(
exit_potential_mode="progressive_release",
exit_potential_decay=0.25,
)
df = simulate_samples(
params={**params, "max_trade_duration_candles": 100},
- num_samples=400,
+ num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
seed=self.SEED,
base_factor=self.TEST_BASE_FACTOR,
profit_target=self.TEST_PROFIT_TARGET,
# ---------------- Additives and canonical path mechanics ---------------- #
def test_additive_components_disabled_return_zero(self):
- """Entry/exit additives return zero when disabled."""
+ """Verifies entry/exit additives return zero when disabled."""
params_entry = {"entry_additive_enabled": False, "entry_additive_scale": 1.0}
val_entry = _compute_entry_additive(0.5, 0.3, params_entry)
self.assertEqual(float(val_entry), 0.0)
self.assertEqual(float(val_exit), 0.0)
def test_exit_potential_canonical(self):
- """Canonical exit resets potential; additives auto-disabled."""
+ """Verifies canonical exit resets potential and auto-disables additives."""
params = self.base_params(
exit_potential_mode="canonical",
hold_potential_enabled=True,
self.assertTrue(np.isfinite(total))
def test_pbrs_invariance_internal_flag_set(self):
- """Canonical path sets _pbrs_invariance_applied once; second call idempotent."""
+ """Verifies canonical path sets _pbrs_invariance_applied flag (idempotent)."""
params = self.base_params(
exit_potential_mode="canonical",
hold_potential_enabled=True,
)
def test_progressive_release_negative_decay_clamped(self):
- """Negative decay clamps: next potential equals last potential (no release)."""
+ """Verifies negative decay clamping: next potential equals last potential."""
params = self.base_params(
exit_potential_mode="progressive_release",
exit_potential_decay=-0.75,
self.assertPlacesEqual(total, shaping, places=12)
def test_potential_gamma_nan_fallback(self):
- """potential_gamma=NaN falls back to default value (indirect comparison)."""
+ """Verifies potential_gamma=NaN fallback to default value."""
base_params_dict = self.base_params()
default_gamma = base_params_dict.get("potential_gamma", 0.95)
params_nan = self.base_params(potential_gamma=np.nan, hold_potential_enabled=True)
def test_validate_reward_parameters_batch_and_relaxed_aggregation(self):
"""Batch validate strict failures + relaxed multi-reason aggregation via helpers."""
- # Build strict failure cases
strict_failures = [
build_validation_case({"potential_gamma": -0.2}, strict=True, expect_error=True),
build_validation_case({"hold_potential_scale": -5.0}, strict=True, expect_error=True),
]
- # Success default (strict) case
success_case = build_validation_case({}, strict=True, expect_error=False)
- # Relaxed multi-reason aggregation case
relaxed_case = build_validation_case(
{
"potential_gamma": "not-a-number",
"derived_default",
],
)
- # Execute batch (strict successes + failures + relaxed case)
execute_validation_batch(
self,
[success_case] + strict_failures + [relaxed_case],
validate_reward_parameters,
)
- # Explicit aggregation assertions for relaxed case using helper
params_relaxed = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
params_relaxed.update(
{
# Owns invariant: pbrs-canonical-drift-correction-106
def test_pbrs_106_canonical_drift_correction_zero_sum(self):
"""Invariant 106: canonical mode enforces near zero-sum shaping (drift correction)."""
+ from ..constants import SCENARIOS
+
params = self.base_params(
exit_potential_mode="canonical",
hold_potential_enabled=True,
)
df = simulate_samples(
params={**params, "max_trade_duration_candles": 140},
- num_samples=500,
+ num_samples=SCENARIOS.SAMPLE_SIZE_LARGE // 2, # 500 ≈ 400 (keep original intent)
seed=913,
base_factor=self.TEST_BASE_FACTOR,
profit_target=self.TEST_PROFIT_TARGET,
# Owns invariant (comparison path): pbrs-canonical-drift-correction-106
def test_pbrs_106_canonical_drift_correction_uniform_offset(self):
"""Canonical drift correction reduces Σ shaping below tolerance vs non-canonical."""
+ from ..constants import SCENARIOS
+
params_can = self.base_params(
exit_potential_mode="canonical",
hold_potential_enabled=True,
)
df_can = simulate_samples(
params={**params_can, "max_trade_duration_candles": 120},
- num_samples=400,
+ num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
seed=777,
base_factor=self.TEST_BASE_FACTOR,
profit_target=self.TEST_PROFIT_TARGET,
)
df_non = simulate_samples(
params={**params_non, "max_trade_duration_candles": 120},
- num_samples=400,
+ num_samples=SCENARIOS.DEFAULT_SAMPLE_SIZE,
seed=777,
base_factor=self.TEST_BASE_FACTOR,
profit_target=self.TEST_PROFIT_TARGET,
def test_report_cumulative_invariance_aggregation(self):
"""Canonical telescoping term: small per-step mean drift, bounded increments."""
+ from ..constants import SCENARIOS
+
params = self.base_params(
hold_potential_enabled=True,
entry_additive_enabled=False,
telescoping_sum = 0.0
max_abs_step = 0.0
steps = 0
- for _ in range(500):
+ for _ in range(SCENARIOS.PBRS_SIMULATION_STEPS):
is_exit = rng.uniform() < 0.1
current_pnl = float(rng.normal(0, 0.05))
current_dur = float(rng.uniform(0, 1))
def test_report_explicit_non_invariance_progressive_release(self):
"""progressive_release cumulative shaping non-zero (release leak)."""
+ from ..constants import SCENARIOS
+
params = self.base_params(
hold_potential_enabled=True,
entry_additive_enabled=False,
rng = np.random.default_rng(321)
last_potential = 0.0
shaping_sum = 0.0
- for _ in range(160):
+ for _ in range(SCENARIOS.MONTE_CARLO_ITERATIONS):
is_exit = rng.uniform() < 0.15
next_pnl = 0.0 if is_exit else float(rng.normal(0, 0.07))
next_dur = 0.0 if is_exit else float(rng.uniform(0, 1))
from reward_space_analysis import PBRS_INVARIANCE_TOL
+ from ..constants import SCENARIOS
+
small_vals = [1.0e-7, -2.0e-7, 3.0e-7] # sum = 2.0e-7 < tolerance
total_shaping = float(sum(small_vals))
self.assertLess(
seed=self.SEED,
skip_feature_analysis=True,
skip_partial_dependence=True,
- bootstrap_resamples=25,
+ bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS,
)
report_path = out_dir / "statistical_analysis.md"
self.assertTrue(report_path.exists(), "Report file missing for canonical near-zero test")
from reward_space_analysis import PBRS_INVARIANCE_TOL
+ from ..constants import SCENARIOS
+
shaping_vals = [1.2e-4, 1.3e-4, 8.0e-5, -2.0e-5, 1.4e-4] # sum = 4.5e-4 (> tol)
total_shaping = sum(shaping_vals)
self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL)
seed=self.SEED,
skip_feature_analysis=True,
skip_partial_dependence=True,
- bootstrap_resamples=50,
+ bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS * 2,
)
report_path = out_dir / "statistical_analysis.md"
self.assertTrue(report_path.exists(), "Report file missing for canonical warning test")
"""Full report: Non-canonical classification aggregates mode + additives reasons."""
import pandas as pd
+ from ..constants import SCENARIOS
+
shaping_vals = [0.02, -0.005, 0.007]
entry_add_vals = [0.003, 0.0, 0.004]
exit_add_vals = [0.001, 0.002, 0.0]
seed=self.SEED,
skip_feature_analysis=True,
skip_partial_dependence=True,
- bootstrap_resamples=25,
+ bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS,
)
report_path = out_dir / "statistical_analysis.md"
self.assertTrue(
from reward_space_analysis import PBRS_INVARIANCE_TOL
+ from ..constants import SCENARIOS
+
shaping_vals = [0.002, -0.0005, 0.0012]
total_shaping = sum(shaping_vals)
self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL)
seed=self.SEED,
skip_feature_analysis=True,
skip_partial_dependence=True,
- bootstrap_resamples=25,
+ bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS,
)
report_path = out_dir / "statistical_analysis.md"
self.assertTrue(
out_dir = self.output_path / "pbrs_absence_and_shift_placeholder"
import reward_space_analysis as rsa
+ from ..constants import SCENARIOS
+
original_compute_summary_stats = rsa._compute_summary_stats
def _minimal_summary_stats(_df):
seed=self.SEED,
skip_feature_analysis=True,
skip_partial_dependence=True,
- bootstrap_resamples=10,
+ bootstrap_resamples=SCENARIOS.BOOTSTRAP_MINIMAL_ITERATIONS // 2,
)
finally:
rsa._compute_summary_stats = original_compute_summary_stats
validate_reward_parameters,
)
-from ..helpers import run_strict_validation_failure_cases
+from ..helpers import (
+ assert_exit_factor_invariant_suite,
+ run_relaxed_validation_adjustment_cases,
+ run_strict_validation_failure_cases,
+)
class _PyTestAdapter(unittest.TestCase):
run_strict_validation_failure_cases(adapter, failure_params, validate_reward_parameters)
-from ..helpers import run_relaxed_validation_adjustment_cases
-
-
@pytest.mark.robustness
def test_validate_reward_parameters_relaxed_adjustment_batch():
"""Batch relaxed validation adjustment scenarios using shared helper."""
assert penalty == 0.0
-from ..helpers import assert_exit_factor_invariant_suite
-
-
@pytest.mark.robustness
def test_exit_factor_invariant_suite_grouped():
"""Grouped exit factor invariant scenarios using shared helper."""
import math
import unittest
-import warnings
import numpy as np
import pytest
Actions,
Positions,
RewardContext,
- RewardDiagnosticsWarning,
_get_exit_factor,
calculate_reward,
simulate_samples,
)
from ..helpers import (
+ assert_diagnostic_warning,
assert_exit_factor_attenuation_modes,
assert_exit_mode_mathematical_validation,
assert_single_active_component_with_additives,
+ capture_warnings,
)
from ..test_base import RewardSpaceTestBase
position=Positions.Long,
action=Actions.Long_exit,
)
- with warnings.catch_warnings(record=True) as caught:
- warnings.simplefilter("always")
+ with capture_warnings() as caught:
baseline = calculate_reward(
context,
params,
pnl = 0.05
pnl_factor = 1.0
duration_ratio = 0.8
- with warnings.catch_warnings(record=True) as caught:
- warnings.simplefilter("always", RewardDiagnosticsWarning)
+ with assert_diagnostic_warning(["Unknown exit_attenuation_mode"]):
f_unknown = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
linear_params = self.base_params(exit_attenuation_mode="linear", exit_plateau=False)
f_linear = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, linear_params)
tolerance=self.TOL_IDENTITY_RELAXED,
msg=f"Fallback linear mismatch unknown={f_unknown} linear={f_linear}",
)
- diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
- self.assertTrue(
- diag_warnings, "No RewardDiagnosticsWarning emitted for unknown mode fallback"
- )
- self.assertTrue(
- any("Unknown exit_attenuation_mode" in str(w.message) for w in diag_warnings),
- "Fallback warning message content mismatch",
- )
# Owns invariant: robustness-negative-grace-clamp-103
def test_robustness_103_negative_plateau_grace_clamped(self):
pnl = 0.03
pnl_factor = 1.0
duration_ratio = 0.5
- with warnings.catch_warnings(record=True) as caught:
- warnings.simplefilter("always", RewardDiagnosticsWarning)
+ with assert_diagnostic_warning(["exit_plateau_grace < 0"]):
f_neg = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
# Reference with grace=0.0 (since negative should clamp)
ref_params = self.base_params(
tolerance=self.TOL_IDENTITY_RELAXED,
msg=f"Negative grace clamp mismatch f_neg={f_neg} f_ref={f_ref}",
)
- diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
- self.assertTrue(diag_warnings, "No RewardDiagnosticsWarning for negative grace")
- self.assertTrue(
- any("exit_plateau_grace < 0" in str(w.message) for w in diag_warnings),
- "Warning content missing for negative grace clamp",
- )
# Owns invariant: robustness-invalid-power-tau-104
def test_robustness_104_invalid_power_tau_fallback_alpha_one(self):
params = self.base_params(
exit_attenuation_mode="power", exit_power_tau=tau, exit_plateau=False
)
- with warnings.catch_warnings(record=True) as caught:
- warnings.simplefilter("always", RewardDiagnosticsWarning)
+ with assert_diagnostic_warning(["exit_power_tau"]):
f0 = _get_exit_factor(base_factor, pnl, pnl_factor, 0.0, params)
f1 = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
- diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
- self.assertTrue(diag_warnings, f"No RewardDiagnosticsWarning for invalid tau={tau}")
- self.assertTrue(any("exit_power_tau" in str(w.message) for w in diag_warnings))
ratio = f1 / max(f0, self.TOL_NUMERIC_GUARD)
self.assertAlmostEqual(
ratio,
near_zero_values = [1e-15, 1e-12, 5e-14]
for hl in near_zero_values:
params = self.base_params(exit_attenuation_mode="half_life", exit_half_life=hl)
- with warnings.catch_warnings(record=True) as caught:
- warnings.simplefilter("always", RewardDiagnosticsWarning)
+ with assert_diagnostic_warning(["exit_half_life", "close to 0"]):
_ = _get_exit_factor(base_factor, pnl, pnl_factor, 0.0, params)
fdr = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
- diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
- self.assertTrue(
- diag_warnings, f"No RewardDiagnosticsWarning for near-zero half-life hl={hl}"
- )
- self.assertTrue(
- any(
- "exit_half_life" in str(w.message) and "close to 0" in str(w.message)
- for w in diag_warnings
- )
- )
self.assertAlmostEqualFloat(
fdr,
1.0 * pnl_factor, # Kernel returns 1.0 then * pnl_factor
"""Statistical tests, distribution metrics, and bootstrap validation."""
import unittest
-import warnings
import numpy as np
import pandas as pd
statistical_hypothesis_tests,
)
+from ..helpers import assert_diagnostic_warning
from ..test_base import RewardSpaceTestBase
pytestmark = pytest.mark.statistics
float(metrics[p_key]), 1.0, places=12, msg=f"Expected 1.0 for {p_key}"
)
- def _make_idle_variance_df(self, n: int = 100) -> pd.DataFrame:
- """Synthetic dataframe focusing on idle_duration ↔ reward_idle correlation."""
- self.seed_all(self.SEED)
- idle_duration = np.random.exponential(10, n)
- reward_idle = -0.01 * idle_duration + np.random.normal(0, 0.001, n)
- return pd.DataFrame(
- {
- "idle_duration": idle_duration,
- "reward_idle": reward_idle,
- "position": np.random.choice([0.0, 0.5, 1.0], n),
- "reward": np.random.normal(0, 1, n),
- "pnl": np.random.normal(0, self.TEST_PNL_STD, n),
- "trade_duration": np.random.exponential(20, n),
- }
- )
-
def test_statistics_distribution_shift_metrics(self):
"""KL/JS/Wasserstein metrics."""
df1 = self._make_idle_variance_df(100)
f"Metric {name} expected ≈ 0 on identical distributions (got {val})",
)
elif name.endswith("_ks_statistic"):
+ from ..constants import STAT_TOL
+
self.assertLess(
abs(val),
- 0.005,
+ STAT_TOL.KS_STATISTIC_IDENTITY,
f"KS statistic should be near 0 on identical distributions (got {val})",
)
def test_stats_variance_vs_duration_spearman_sign(self):
"""trade_duration up => pnl variance up (rank corr >0)."""
+ from ..constants import SCENARIOS, STAT_TOL
+
rng = np.random.default_rng(99)
n = 250
- trade_duration = np.linspace(1, 300, n)
+ trade_duration = np.linspace(1, SCENARIOS.DURATION_LONG, n)
pnl = rng.normal(0, 1 + trade_duration / 400.0, n)
ranks_dur = pd.Series(trade_duration).rank().to_numpy()
ranks_var = pd.Series(np.abs(pnl)).rank().to_numpy()
rho = np.corrcoef(ranks_dur, ranks_var)[0, 1]
self.assertFinite(rho, name="spearman_rho")
- self.assertGreater(rho, 0.1)
+ self.assertGreater(rho, STAT_TOL.CORRELATION_SIGNIFICANCE)
def test_stats_scaling_invariance_distribution_metrics(self):
"""Equal scaling keeps KL/JS ≈0."""
- df1 = self._shift_scale_df(400)
+ from ..constants import SCENARIOS, STAT_TOL
+
+ df1 = self._shift_scale_df(SCENARIOS.DEFAULT_SAMPLE_SIZE)
scale = 3.5
df2 = df1.copy()
df2["pnl"] *= scale
if k.endswith("_kl_divergence") or k.endswith("_js_distance"):
self.assertLess(
abs(v),
- 0.0005,
+ STAT_TOL.DISTRIBUTION_SHIFT,
f"Expected near-zero divergence after equal scaling (k={k}, v={v})",
)
def test_stats_bh_correction_null_false_positive_rate(self):
"""Null: low BH discovery rate."""
+ from ..constants import SCENARIOS
+
rng = np.random.default_rng(1234)
- n = 400
+ n = SCENARIOS.NULL_HYPOTHESIS_SAMPLE_SIZE
df = pd.DataFrame(
{
"pnl": rng.normal(0, 1, n),
def test_stats_heteroscedasticity_pnl_validation(self):
"""PnL variance increases with trade duration (heteroscedasticity)."""
+ from ..constants import SCENARIOS
+
df = simulate_samples(
params=self.base_params(max_trade_duration_candles=100),
- num_samples=1000,
+ num_samples=SCENARIOS.SAMPLE_SIZE_LARGE + 200,
seed=self.SEED_HETEROSCEDASTICITY,
base_factor=self.TEST_BASE_FACTOR,
profit_target=self.TEST_PROFIT_TARGET,
pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
exit_data = df[df["reward_exit"] != 0].copy()
- if len(exit_data) < 50:
+ if len(exit_data) < SCENARIOS.HETEROSCEDASTICITY_MIN_EXITS:
self.skipTest("Insufficient exit actions for heteroscedasticity test")
exit_data["duration_bin"] = pd.cut(
exit_data["duration_ratio"], bins=4, labels=["Q1", "Q2", "Q3", "Q4"]
)
variance_by_bin = exit_data.groupby("duration_bin")["pnl"].var().dropna()
if "Q1" in variance_by_bin.index and "Q4" in variance_by_bin.index:
+ from ..constants import STAT_TOL
+
self.assertGreater(
variance_by_bin["Q4"],
- variance_by_bin["Q1"] * 0.8,
+ variance_by_bin["Q1"] * STAT_TOL.VARIANCE_RATIO_THRESHOLD,
"PnL heteroscedasticity: variance should increase with duration",
)
def test_stats_benjamini_hochberg_adjustment(self):
"""BH adjustment adds p_value_adj & significant_adj with valid bounds."""
+ from ..constants import SCENARIOS
+
df = simulate_samples(
params=self.base_params(max_trade_duration_candles=100),
- num_samples=600,
+ num_samples=SCENARIOS.SAMPLE_SIZE_LARGE - 200,
seed=self.SEED_HETEROSCEDASTICITY,
base_factor=self.TEST_BASE_FACTOR,
profit_target=self.TEST_PROFIT_TARGET,
def test_stats_bootstrap_shrinkage_with_sample_size(self):
"""Bootstrap CI half-width decreases with larger sample (~1/sqrt(n) heuristic)."""
- small = self._shift_scale_df(80)
- large = self._shift_scale_df(800)
+ from ..constants import SCENARIOS
+
+ small = self._shift_scale_df(SCENARIOS.SAMPLE_SIZE_SMALL - 20)
+ large = self._shift_scale_df(SCENARIOS.SAMPLE_SIZE_LARGE)
res_small = bootstrap_confidence_intervals(small, ["reward"], n_bootstrap=400)
res_large = bootstrap_confidence_intervals(large, ["reward"], n_bootstrap=400)
_, lo_s, hi_s = list(res_small.values())[0]
"""Invariant 113 (non-strict): constant distribution CI widened with warning (positive epsilon width)."""
df = self._const_df(80)
- with warnings.catch_warnings(record=True) as caught:
- warnings.simplefilter("always", RewardDiagnosticsWarning)
+ with assert_diagnostic_warning(
+ ["degenerate", "bootstrap", "CI"],
+ warning_category=RewardDiagnosticsWarning,
+ strict_mode=False,
+ ):
res = bootstrap_confidence_intervals(
df,
["reward", "pnl"],
confidence_level=0.95,
strict_diagnostics=False,
)
- diag_warnings = [w for w in caught if issubclass(w.category, RewardDiagnosticsWarning)]
- self.assertTrue(
- diag_warnings,
- "Expected RewardDiagnosticsWarning for degenerate bootstrap CI widening",
- )
for _metric, (mean, lo, hi) in res.items():
self.assertLess(
lo,
)
width = hi - lo
self.assertGreater(width, 0.0)
- self.assertLessEqual(width, 3e-09, "Width should be small epsilon range (<=3e-9)")
+ from ..constants import STAT_TOL
+
+ self.assertLessEqual(
+ width, STAT_TOL.CI_WIDTH_EPSILON, "Width should be small epsilon range"
+ )
# Mean should be centered (approx) within widened bounds
self.assertGreaterEqual(mean, lo)
self.assertLessEqual(mean, hi)
apply_potential_shaping,
)
+from .constants import (
+ CONTINUITY,
+ EXIT_FACTOR,
+ PBRS,
+ TOLERANCE,
+)
+
# Global constants
PBRS_INTEGRATION_PARAMS = [
"potential_gamma",
"""Clean up temporary files."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
- PBRS_TERMINAL_TOL = 1e-12
- PBRS_MAX_ABS_SHAPING = 5.0
+ # ===============================================
+ # Constants imported from tests.constants module
+ # ===============================================
+
+ # Tolerance constants
+ TOL_IDENTITY_STRICT = TOLERANCE.IDENTITY_STRICT
+ TOL_IDENTITY_RELAXED = TOLERANCE.IDENTITY_RELAXED
+ TOL_GENERIC_EQ = TOLERANCE.GENERIC_EQ
+ TOL_NUMERIC_GUARD = TOLERANCE.NUMERIC_GUARD
+ TOL_NEGLIGIBLE = TOLERANCE.NEGLIGIBLE
+ TOL_RELATIVE = TOLERANCE.RELATIVE
+ TOL_DISTRIB_SHAPE = TOLERANCE.DISTRIB_SHAPE
+
+ # PBRS constants
+ PBRS_TERMINAL_TOL = PBRS.TERMINAL_TOL
+ PBRS_MAX_ABS_SHAPING = PBRS.MAX_ABS_SHAPING
+
+ # Continuity constants
+ CONTINUITY_EPS_SMALL = CONTINUITY.EPS_SMALL
+ CONTINUITY_EPS_LARGE = CONTINUITY.EPS_LARGE
+
+ # Exit factor constants
+ MIN_EXIT_POWER_TAU = EXIT_FACTOR.MIN_POWER_TAU
+
+ # Test-specific constants (not in constants.py)
PBRS_TERMINAL_PROB = 0.08
PBRS_SWEEP_ITER = 120
- EPS_BASE = 1e-12
- TOL_NUMERIC_GUARD = EPS_BASE
- TOL_IDENTITY_STRICT = EPS_BASE
- TOL_IDENTITY_RELAXED = 1e-09
- TOL_GENERIC_EQ = 1e-06
- TOL_NEGLIGIBLE = 1e-08
- MIN_EXIT_POWER_TAU = 1e-06
- TOL_DISTRIB_SHAPE = 0.05
+ EPS_BASE = TOLERANCE.IDENTITY_STRICT # Alias for backward compatibility
JS_DISTANCE_UPPER_BOUND = math.sqrt(math.log(2.0))
- TOL_RELATIVE = 1e-09
- CONTINUITY_EPS_SMALL = 0.0001
- CONTINUITY_EPS_LARGE = 0.001
def make_ctx(
self,
"idle_duration": rng.exponential(10, n),
}
)
+
+ def _make_idle_variance_df(self, n: int = 100) -> pd.DataFrame:
+ """Synthetic dataframe focusing on idle_duration ↔ reward_idle correlation."""
+ self.seed_all(self.SEED)
+ idle_duration = np.random.exponential(10, n)
+ reward_idle = -0.01 * idle_duration + np.random.normal(0, 0.001, n)
+ return pd.DataFrame(
+ {
+ "idle_duration": idle_duration,
+ "reward_idle": reward_idle,
+ "position": np.random.choice([0.0, 0.5, 1.0], n),
+ "reward": np.random.normal(0, 1, n),
+ "pnl": np.random.normal(0, self.TEST_PNL_STD, n),
+ "trade_duration": np.random.exponential(20, n),
+ }
+ )