# Mathematical constants pre-computed for performance
_LOG_2 = math.log(2.0)
+
DEFAULT_IDLE_DURATION_MULTIPLIER = 4
-# Tolerance for PBRS invariance classification (canonical if |Σ shaping| < PBRS_INVARIANCE_TOL)
+# Tolerance for PBRS invariance classification.
+#
+# When `reward_invariance_correction` is available (reward_shaping - reward_pbrs_delta),
+# canonical PBRS should satisfy max|correction| < PBRS_INVARIANCE_TOL.
+#
+# When that diagnostic column is not available (e.g., reporting from partial datasets),
+# we fall back to the weaker heuristic |Σ shaping| < PBRS_INVARIANCE_TOL.
PBRS_INVARIANCE_TOL: float = 1e-6
# Default discount factor γ for potential-based reward shaping
POTENTIAL_GAMMA_DEFAULT: float = 0.95
pnl_target = float(profit_aim * risk_reward_ratio)
- idle_factor = factor * pnl_target / 4.0
+ idle_factor = factor * (profit_aim / 4.0)
hold_factor = idle_factor
max_trade_duration_candles = _get_int_param(
return breakdown
+def _duration_hazard_probability(
+ *,
+ duration_ratio: float,
+ base_probability: float,
+ overtime_multiplier: float,
+ max_probability: float,
+) -> float:
+ """Compute a bounded hazard probability keyed on a duration ratio.
+
+ Behavior:
+ - duration_ratio <= 1 -> returns base_probability
+ - duration_ratio > 1 -> increases linearly with overtime
+
+ Notes
+ -----
+ This is used for:
+ - exit probability when holding past max trade duration
+ - entry probability when idling past max idle duration
+ """
+
+ if not np.isfinite(duration_ratio):
+ return float(np.clip(base_probability, 0.0, max_probability))
+
+ overtime = max(0.0, float(duration_ratio - 1.0))
+ probability = base_probability * (1.0 + overtime_multiplier * overtime)
+ return float(np.clip(probability, 0.0, max_probability))
+
+
+_SAMPLE_ENTRY_PROBABILITY_MARGIN = 0.4
+_SAMPLE_ENTRY_PROBABILITY_SPOT = 0.3
+_SAMPLE_DURATION_HAZARD_OVERTIME_MULTIPLIER = 4.0
+_SAMPLE_DURATION_HAZARD_MAX_PROBABILITY = 0.9
+_SAMPLE_EXIT_PROBABILITY_MIN = 0.002
+_SAMPLE_EXIT_PROBABILITY_MAX = 0.2
+
+
+def _sampling_probabilities(
+ position: Positions,
+ *,
+ short_allowed: bool,
+ trade_duration: int,
+ max_trade_duration_candles: int,
+ idle_duration: int,
+ max_idle_duration_candles: int,
+) -> tuple[float, float, float]:
+ if position == Positions.Neutral:
+ base_entry_prob = (
+ _SAMPLE_ENTRY_PROBABILITY_MARGIN if short_allowed else _SAMPLE_ENTRY_PROBABILITY_SPOT
+ )
+ idle_ratio = idle_duration / max(1, int(max_idle_duration_candles))
+ entry_prob = _duration_hazard_probability(
+ duration_ratio=idle_ratio,
+ base_probability=base_entry_prob,
+ overtime_multiplier=_SAMPLE_DURATION_HAZARD_OVERTIME_MULTIPLIER,
+ max_probability=_SAMPLE_DURATION_HAZARD_MAX_PROBABILITY,
+ )
+ neutral_prob = max(0.0, 1.0 - entry_prob)
+ return float(entry_prob), float("nan"), float(neutral_prob)
+
+ duration_ratio = _compute_duration_ratio(trade_duration, max_trade_duration_candles)
+
+ base_exit_prob = 1.0 / max(1, int(max_trade_duration_candles))
+ base_exit_prob = float(
+ np.clip(base_exit_prob, _SAMPLE_EXIT_PROBABILITY_MIN, _SAMPLE_EXIT_PROBABILITY_MAX)
+ )
+
+ exit_prob = _duration_hazard_probability(
+ duration_ratio=duration_ratio,
+ base_probability=base_exit_prob,
+ overtime_multiplier=_SAMPLE_DURATION_HAZARD_OVERTIME_MULTIPLIER,
+ max_probability=_SAMPLE_DURATION_HAZARD_MAX_PROBABILITY,
+ )
+ return float("nan"), float(exit_prob), float("nan")
+
+
def _sample_action(
position: Positions,
rng: random.Random,
*,
short_allowed: bool,
-) -> Actions:
+ trade_duration: int,
+ max_trade_duration_candles: int,
+ idle_duration: int,
+ max_idle_duration_candles: int,
+) -> tuple[Actions, float, float, float]:
+ entry_prob, exit_prob, neutral_prob = _sampling_probabilities(
+ position,
+ short_allowed=short_allowed,
+ trade_duration=trade_duration,
+ max_trade_duration_candles=max_trade_duration_candles,
+ idle_duration=idle_duration,
+ max_idle_duration_candles=max_idle_duration_candles,
+ )
+
if position == Positions.Neutral:
if short_allowed:
choices = [Actions.Neutral, Actions.Long_enter, Actions.Short_enter]
- weights = [0.6, 0.2, 0.2]
+ weights = [neutral_prob, entry_prob * 0.5, entry_prob * 0.5]
else:
choices = [Actions.Neutral, Actions.Long_enter]
- weights = [0.7, 0.3]
- elif position == Positions.Long:
+ weights = [neutral_prob, entry_prob]
+ action = rng.choices(choices, weights=weights, k=1)[0]
+ return action, entry_prob, exit_prob, neutral_prob
+
+ if position == Positions.Long:
choices = [Actions.Neutral, Actions.Long_exit]
- weights = [0.55, 0.45]
else: # Positions.Short
choices = [Actions.Neutral, Actions.Short_exit]
- weights = [0.55, 0.45]
- return rng.choices(choices, weights=weights, k=1)[0]
+
+ weights = [1.0 - exit_prob, exit_prob]
+ action = rng.choices(choices, weights=weights, k=1)[0]
+ return action, entry_prob, exit_prob, neutral_prob
def parse_overrides(overrides: Iterable[str]) -> RewardParams:
max_unrealized_profit = 0.0
min_unrealized_profit = 0.0
- action = _sample_action(position, rng, short_allowed=short_allowed)
+ action, sample_entry_prob, sample_exit_prob, sample_neutral_prob = _sample_action(
+ position,
+ rng,
+ short_allowed=short_allowed,
+ trade_duration=trade_duration,
+ max_trade_duration_candles=max_trade_duration_candles,
+ idle_duration=idle_duration,
+ max_idle_duration_candles=max_idle_duration_candles,
+ )
context = RewardContext(
pnl=pnl,
"idle_ratio": idle_ratio,
"position": float(context.position.value),
"action": int(context.action.value),
+ # Sampling diagnostics
+ "sample_entry_prob": sample_entry_prob,
+ "sample_exit_prob": sample_exit_prob,
+ "sample_neutral_prob": sample_neutral_prob,
"reward": breakdown.total,
"reward_invalid": breakdown.invalid_penalty,
"reward_idle": breakdown.idle_penalty,
exit_additive_enabled_raw,
)
- # True invariance requires canonical mode AND no effective additives.
+ # True PBRS invariance classification:
+ # - Canonical requires canonical mode AND no effective additives.
+ # - When `reward_invariance_correction` is present, we use it as the primary
+ # diagnostic (reward_shaping - reward_pbrs_delta).
+ # - Otherwise, we fall back to the weaker heuristic |Σ shaping| ≈ 0.
is_theoretically_invariant = exit_potential_mode == "canonical" and not (
entry_additive_effective or exit_additive_effective
)
+
+ has_inv_correction = "reward_invariance_correction" in df.columns
+ max_abs_inv_correction: float | None
+ if has_inv_correction:
+ max_abs_inv_correction = float(df["reward_invariance_correction"].abs().max())
+ correction_near_zero = max_abs_inv_correction < PBRS_INVARIANCE_TOL
+ else:
+ max_abs_inv_correction = None
+ correction_near_zero = None
shaping_near_zero = abs(total_shaping) < PBRS_INVARIANCE_TOL
suppression_note = ""
# Prepare invariance summary markdown block
if is_theoretically_invariant:
- if shaping_near_zero:
+ if correction_near_zero is True:
invariance_status = "✅ Canonical"
invariance_note = (
- "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)."
+ "Theoretical invariance preserved (canonical mode, no additives, max|correction|≈0)."
+ suppression_note
)
- else:
+ elif correction_near_zero is False:
invariance_status = "⚠️ Canonical (with warning)"
invariance_note = (
- f"Canonical mode but unexpected shaping sum = {total_shaping:.6f}."
- + suppression_note
+ "Canonical mode but invariance correction is non-zero"
+ f" (max|correction|={max_abs_inv_correction:.6e})." + suppression_note
)
+ else:
+ # Fallback: without invariance correction, use Σ shaping as a heuristic.
+ if shaping_near_zero:
+ invariance_status = "✅ Canonical"
+ invariance_note = (
+ "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)."
+ + suppression_note
+ )
+ else:
+ invariance_status = "⚠️ Canonical (with warning)"
+ invariance_note = (
+ "Canonical mode but Σ shaping is non-zero"
+ f" (Σ={total_shaping:.6f}; correction column unavailable)."
+ + suppression_note
+ )
else:
invariance_status = "❌ Non-canonical"
reasons = []
else:
f.write("6. **Distribution Shift** - Not performed (no real episodes provided)\n")
if "reward_shaping" in df.columns:
- _total_shaping = df["reward_shaping"].sum()
- _canonical = abs(_total_shaping) < PBRS_INVARIANCE_TOL
- f.write(
- "7. **PBRS Invariance** - "
- + (
+ _total_shaping = float(df["reward_shaping"].sum())
+ if "reward_invariance_correction" in df.columns:
+ _max_abs_corr = float(df["reward_invariance_correction"].abs().max())
+ _canonical = _max_abs_corr < PBRS_INVARIANCE_TOL
+ _pbrs_summary = (
+ "Canonical (max|correction| ≈ 0)"
+ if _canonical
+ else f"Canonical (with warning; max|correction|={_max_abs_corr:.6e})"
+ )
+ else:
+ _canonical = abs(_total_shaping) < PBRS_INVARIANCE_TOL
+ _pbrs_summary = (
"Canonical (Σ shaping ≈ 0)"
if _canonical
- else f"Non-canonical (Σ shaping = {_total_shaping:.6f})"
+ else f"Canonical (with warning; Σ shaping={_total_shaping:.6f})"
)
- + "\n"
- )
+
+ f.write("7. **PBRS Invariance** - " + _pbrs_summary + "\n")
f.write("\n")
f.write("**Generated Files:**\n")
f.write("- `reward_samples.csv` - Raw synthetic samples\n")
"""Tests for public API and helper functions."""
import math
+import random
import tempfile
import unittest
from pathlib import Path
_get_float_param,
_get_int_param,
_get_str_param,
+ _sample_action,
build_argument_parser,
calculate_reward,
parse_overrides,
class TestAPIAndHelpers(RewardSpaceTestBase):
"""Public API + helper utility tests."""
+ def test_sample_action_idle_hazard_increases_entry_rate(self):
+ """_sample_action() increases entry probability past idle cap.
+
+ This guards the synthetic simulator against unrealistically long neutral streaks.
+ The test is statistical but deterministic via fixed RNG seeds.
+ """
+
+ max_idle_duration_candles = 20
+ max_trade_duration_candles = 100
+
+ def sample_entry_rate(*, idle_duration: int, short_allowed: bool) -> float:
+ rng = random.Random(SEEDS.REPRODUCIBILITY)
+ draws = 2000
+ entries = 0
+ for _ in range(draws):
+ action = _sample_action(
+ Positions.Neutral,
+ rng,
+ short_allowed=short_allowed,
+ trade_duration=0,
+ max_trade_duration_candles=max_trade_duration_candles,
+ idle_duration=idle_duration,
+ max_idle_duration_candles=max_idle_duration_candles,
+ )
+ if action in (Actions.Long_enter, Actions.Short_enter):
+ entries += 1
+ return entries / draws
+
+ low_idle_rate = sample_entry_rate(idle_duration=0, short_allowed=True)
+ high_idle_rate = sample_entry_rate(idle_duration=60, short_allowed=True)
+
+ self.assertGreater(
+ high_idle_rate,
+ low_idle_rate,
+ "Entry rate should increase after exceeding max idle duration",
+ )
+
+ low_idle_rate_spot = sample_entry_rate(idle_duration=0, short_allowed=False)
+ high_idle_rate_spot = sample_entry_rate(idle_duration=60, short_allowed=False)
+ self.assertGreater(high_idle_rate_spot, low_idle_rate_spot)
+
def test_parse_overrides(self):
"""Test parse overrides."""
overrides = ["alpha=1.5", "mode=linear", "limit=42"]
"idle_duration",
"position",
"action",
+ "sample_entry_prob",
+ "sample_exit_prob",
+ "sample_neutral_prob",
"reward",
"reward_invalid",
"reward_idle",
]:
self.assertIn(col, df_margin.columns)
+ def test_simulate_samples_sampling_probabilities_are_bounded(self):
+ """simulate_samples() exposes bounded sampling probabilities."""
+
+ df = simulate_samples(
+ params=self.base_params(max_trade_duration_candles=40),
+ num_samples=200,
+ seed=SEEDS.SMOKE_TEST,
+ base_factor=PARAMS.BASE_FACTOR,
+ profit_aim=PARAMS.PROFIT_AIM,
+ risk_reward_ratio=PARAMS.RISK_REWARD_RATIO,
+ max_duration_ratio=1.5,
+ trading_mode="margin",
+ pnl_base_std=PARAMS.PNL_STD,
+ pnl_duration_vol_scale=PARAMS.PNL_DUR_VOL_SCALE,
+ )
+
+ for col in ["sample_entry_prob", "sample_exit_prob", "sample_neutral_prob"]:
+ self.assertIn(col, df.columns)
+
+ values = (
+ df[["sample_entry_prob", "sample_exit_prob", "sample_neutral_prob"]].stack().dropna()
+ )
+ self.assertTrue(((values >= 0.0) & (values <= 0.9)).all())
+
def test_to_bool(self):
"""Test _to_bool with various inputs."""
df1 = simulate_samples(
def assert_pbrs_canonical_sum_within_tolerance(test_case, total_shaping: float, tolerance: float):
- """Validate cumulative PBRS shaping satisfies canonical bound.
+ """Validate cumulative shaping is small.
- For canonical PBRS, the cumulative reward shaping across a trajectory
- must be near zero (within tolerance). This is a core PBRS invariant.
+ In canonical PBRS, the per-step shaping corresponds to a telescoping term.
+ Over a full, closed episode it may cancel, but across many partial trajectories
+ or with resets/discounting it does not need to be exactly zero.
+
+ This helper remains as a *diagnostic* check for constructed test cases that
+ intentionally enforce small cumulative shaping.
Args:
- test_case: Test case instance with assertion methods
- total_shaping: Total cumulative reward shaping value
- tolerance: Maximum allowed absolute deviation from zero
+ test_case: Test case instance with assertion methods.
+ total_shaping: Total cumulative shaping value.
+ tolerance: Maximum allowed absolute deviation from zero.
Example:
- assert_pbrs_canonical_sum_within_tolerance(self, 5e-10, 1e-09)
+ assert_pbrs_canonical_sum_within_tolerance(self, 5e-10, 1e-9)
"""
test_case.assertLess(abs(total_shaping), tolerance)
def assert_non_canonical_shaping_exceeds(
test_case, total_shaping: float, tolerance_multiple: float
):
- """Validate non-canonical PBRS shaping exceeds threshold.
+ """Validate non-trivial shaping magnitude.
- For non-canonical PBRS (e.g., with additives), the cumulative shaping
- should exceed a scaled tolerance threshold, indicating violation of
- the canonical PBRS invariant.
+ In non-canonical PBRS modes or when additives are effective, the shaping
+ trajectory is expected to deviate from the pure telescoping term more often.
- Args:
- test_case: Test case instance with assertion methods
- total_shaping: Total cumulative reward shaping value
- tolerance_multiple: Threshold value (typically scaled tolerance)
+ Note: cumulative shaping being large is not a strict correctness proof; it is
+ a useful smoke-signal for test fixtures that intentionally construct such cases.
- Example:
- # Expect shaping to exceed 10x tolerance for non-canonical case
- assert_non_canonical_shaping_exceeds(self, 0.05, 1e-08)
+ Args:
+ test_case: Test case instance with assertion methods.
+ total_shaping: Total cumulative shaping value.
+ tolerance_multiple: Threshold value for the given test fixture.
"""
test_case.assertGreater(abs(total_shaping), tolerance_multiple)
PBRS_INVARIANCE_TOL,
f"Total shaping {total_shaping} exceeds invariance tolerance",
)
+ inv_corr_vals = [1.0e-7, -1.0e-7, 2.0e-7]
+ max_abs_corr = float(np.max(np.abs(inv_corr_vals)))
+ self.assertLess(max_abs_corr, PBRS_INVARIANCE_TOL)
+
n = len(small_vals)
df = pd.DataFrame(
{
"reward_shaping": small_vals,
"reward_entry_additive": [0.0] * n,
"reward_exit_additive": [0.0] * n,
+ "reward_invariance_correction": inv_corr_vals,
"reward_invalid": np.zeros(n),
"duration_ratio": np.random.uniform(0.2, 1.0, n),
"idle_ratio": np.zeros(n),
self.assertAlmostEqual(
abs(total_shaping), val_abs, places=TOLERANCE.DECIMAL_PLACES_STRICT
)
+ self.assertIn("max|correction|≈0", content)
# Non-owning smoke; ownership: robustness/test_robustness.py:35 (robustness-decomposition-integrity-101)
@pytest.mark.smoke
small_vals = [1.0e-7, -2.0e-7, 3.0e-7] # sum = 2.0e-7 < tolerance
total_shaping = float(sum(small_vals))
self.assertLess(abs(total_shaping), PBRS_INVARIANCE_TOL)
+ inv_corr_vals = [1.0e-7, -1.0e-7, 2.0e-7]
+ max_abs_corr = float(np.max(np.abs(inv_corr_vals)))
+ self.assertLess(max_abs_corr, PBRS_INVARIANCE_TOL)
+
n = len(small_vals)
df = pd.DataFrame(
{
"reward_shaping": small_vals,
"reward_entry_additive": [0.0] * n,
"reward_exit_additive": [0.0] * n,
+ "reward_invariance_correction": inv_corr_vals,
"reward_invalid": np.zeros(n),
"duration_ratio": np.random.uniform(0.2, 1.0, n),
"idle_ratio": np.zeros(n),
self.assertIn("| Exit Additive Effective | False |", content)
def test_pbrs_canonical_warning_report(self):
- """Canonical mode + no additives but |Σ shaping| > tolerance -> warning classification."""
+ """Canonical mode + no additives but max|invariance_correction| > tolerance -> warning."""
- shaping_vals = [1.2e-4, 1.3e-4, 8.0e-5, -2.0e-5, 1.4e-4] # sum = 4.5e-4 (> tol)
- total_shaping = sum(shaping_vals)
+ shaping_vals = [1.2e-4, 1.3e-4, 8.0e-5, -2.0e-5, 1.4e-4] # Σ not near 0
+ total_shaping = float(sum(shaping_vals))
self.assertGreater(abs(total_shaping), PBRS_INVARIANCE_TOL)
+
+ inv_corr_vals = [1.0e-4, -2.0e-4, 1.5e-4, -1.2e-4, 7.0e-5]
+ max_abs_corr = float(np.max(np.abs(inv_corr_vals)))
+ self.assertGreater(max_abs_corr, PBRS_INVARIANCE_TOL)
+
n = len(shaping_vals)
df = pd.DataFrame(
{
"reward_shaping": shaping_vals,
"reward_entry_additive": [0.0] * n,
"reward_exit_additive": [0.0] * n,
+ "reward_invariance_correction": inv_corr_vals,
"reward_invalid": np.zeros(n),
"duration_ratio": np.random.uniform(0.2, 1.2, n),
"idle_ratio": np.zeros(n),
assert_pbrs_invariance_report_classification(
self, content, "Canonical (with warning)", expect_additives=False
)
- expected_sum_fragment = f"{total_shaping:.6f}"
- self.assertIn(expected_sum_fragment, content)
+ expected_corr_fragment = f"{max_abs_corr:.6e}"
+ self.assertIn(expected_corr_fragment, content)
# Non-owning smoke; ownership: robustness/test_robustness.py:35 (robustness-decomposition-integrity-101)
@pytest.mark.smoke