- Extensibility: modular helpers (sampling, reward calculation, statistics, reporting).
"""
+# ---------------------------------------------------------------------------
+# Module Layout
+# ---------------------------------------------------------------------------
+# Actual order in this module (kept for conceptual proximity):
+# 1. Imports & global constants
+# 2. Enums & type aliases
+# 3. Default parameter definitions & validation utilities
+# 4. Core generic helpers (parsing, coercion, safety)
+# 5. Dataclasses (RewardContext, RewardBreakdown)
+# 6. Reward computation primitives (penalties, exit factor, pnl factor, calculate_reward)
+# 7. Simulation utilities (sampling, invariants)
+# 8. Statistical / analytical helpers (summary stats, binned stats, tests, diagnostics)
+# 9. PBRS transforms, helpers & implementation (potential shaping logic)
+# 10. CLI construction & reporting (argument parser, report writer, report generator)
+# 11. main() entry point
+
from __future__ import annotations
import argparse
Neutral = 0.5
+# Mathematical constants pre-computed for performance
+_LOG_2 = math.log(2.0)
+DEFAULT_IDLE_DURATION_MULTIPLIER = 4
+
# Tolerance for PBRS invariance classification (canonical if |Σ shaping| < PBRS_INVARIANCE_TOL)
PBRS_INVARIANCE_TOL: float = 1e-6
# Default discount factor γ for potential-based reward shaping
"bootstrap_min_recommended": 200,
}
-
-class RewardDiagnosticsWarning(RuntimeWarning):
- """Warning category for reward space diagnostic graceful fallbacks.
-
- Enables selective filtering in tests or CLI harness while leaving other
- runtime warnings unaffected.
- """
-
- pass
-
-
-def _to_bool(value: Any) -> bool:
- if isinstance(value, bool):
- return value
- if isinstance(value, (int, float)):
- return bool(int(value))
- if value is None:
- return False
- text = str(value).strip().lower()
- if text in {"true", "1", "yes", "y", "on"}:
- return True
- if text in {"false", "0", "no", "n", "off"}:
- return False
- return bool(text)
-
-
-def _get_float_param(
- params: RewardParams, key: str, default: RewardParamValue
-) -> float:
- """Extract float parameter with type safety and default fallback."""
- value = params.get(key, default)
- # None -> NaN
- if value is None:
- return np.nan
- # Bool: treat explicitly (avoid surprising True->1.0 unless intentional)
- if isinstance(value, bool):
- return float(int(value))
- # Numeric
- if isinstance(value, (int, float)):
- try:
- fval = float(value)
- except (ValueError, TypeError):
- return np.nan
- return fval if np.isfinite(fval) else np.nan
- # String parsing
- if isinstance(value, str):
- stripped = value.strip()
- if stripped == "":
- return np.nan
- try:
- fval = float(stripped)
- except ValueError:
- return np.nan
- return fval if np.isfinite(fval) else np.nan
- # Unsupported type
- return np.nan
-
-
-def _compute_duration_ratio(trade_duration: int, max_trade_duration: int) -> float:
- """Compute duration ratio with safe division."""
- return trade_duration / max(1, max_trade_duration)
-
-
-def _is_short_allowed(trading_mode: str) -> bool:
- mode = trading_mode.lower()
- if mode in {"margin", "futures"}:
- return True
- if mode == "spot":
- return False
- raise ValueError("Unsupported trading mode. Expected one of: spot, margin, futures")
-
-
-# Mathematical constants pre-computed for performance
-_LOG_2 = math.log(2.0)
-DEFAULT_IDLE_DURATION_MULTIPLIER = 4
-
-RewardParamValue = Union[float, str, bool, None]
-RewardParams = Dict[str, RewardParamValue]
-
-
-# Internal safe fallback helper for numeric failures (centralizes semantics)
-def _fail_safely(reason: str) -> float:
- """Return 0.0 on recoverable numeric failure (reason available for future debug hooks)."""
- # NOTE: presently silent to preserve legacy behavior; hook logging here if needed.
- _ = reason
- return 0.0
-
-
# PBRS constants
ALLOWED_TRANSFORMS = {
"tanh",
"idle_penalty_power": 1.025,
# Fallback: 2 * max_trade_duration_candles
"max_idle_duration_candles": None,
- # Hold keys (env defaults)
+ # Hold penalty (env defaults)
"hold_penalty_scale": 0.25,
"hold_penalty_power": 1.025,
- # Exit attenuation configuration (env default)
+ # Exit attenuation (env default)
"exit_attenuation_mode": "linear",
"exit_plateau": True,
"exit_plateau_grace": 1.0,
"exit_linear_slope": 1.0,
"exit_power_tau": 0.5,
"exit_half_life": 0.5,
- # Efficiency keys (env defaults)
+ # Efficiency factor (env defaults)
"efficiency_weight": 1.0,
"efficiency_center": 0.5,
- # Profit factor params (env defaults)
+ # Profit factor (env defaults)
"win_reward_factor": 2.0,
"pnl_factor_beta": 0.5,
- # Invariant / safety controls (env defaults)
+ # Invariant / safety (env defaults)
"check_invariants": True,
"exit_factor_threshold": 10000.0,
# === PBRS PARAMETERS ===
"exit_additive_gain": {"min": 0.0},
}
+RewardParamValue = Union[float, str, bool, None]
+RewardParams = Dict[str, RewardParamValue]
+
+
+class RewardDiagnosticsWarning(RuntimeWarning):
+ """Warning category for reward space diagnostic graceful fallbacks.
+
+ Enables selective filtering in tests or CLI harness while leaving other
+ runtime warnings unaffected.
+ """
+
+ pass
+
+
+def _to_bool(value: Any) -> bool:
+ if isinstance(value, bool):
+ return value
+ if isinstance(value, (int, float)):
+ return bool(int(value))
+ if value is None:
+ return False
+ text = str(value).strip().lower()
+ if text in {"true", "1", "yes", "y", "on"}:
+ return True
+ if text in {"false", "0", "no", "n", "off"}:
+ return False
+ return bool(text)
+
+
+def _get_bool_param(params: RewardParams, key: str, default: bool) -> bool:
+ """Extract boolean parameter with type safety."""
+ value = params.get(key, default)
+ try:
+ return _to_bool(value)
+ except Exception:
+ return bool(default)
+
+
+def _get_float_param(
+ params: RewardParams, key: str, default: RewardParamValue
+) -> float:
+ """Extract float parameter with type safety and default fallback."""
+ value = params.get(key, default)
+ # None -> NaN
+ if value is None:
+ return np.nan
+ # Bool: treat explicitly (avoid surprising True->1.0 unless intentional)
+ if isinstance(value, bool):
+ return float(int(value))
+ # Numeric
+ if isinstance(value, (int, float)):
+ try:
+ fval = float(value)
+ except (ValueError, TypeError):
+ return np.nan
+ return fval if np.isfinite(fval) else np.nan
+ # String parsing
+ if isinstance(value, str):
+ stripped = value.strip()
+ if stripped == "":
+ return np.nan
+ try:
+ fval = float(stripped)
+ except ValueError:
+ return np.nan
+ return fval if np.isfinite(fval) else np.nan
+ # Unsupported type
+ return np.nan
+
+
+def _get_int_param(params: RewardParams, key: str, default: RewardParamValue) -> int:
+ """Extract integer parameter with robust coercion.
+
+ Behavior:
+ - Accept bool/int/float/str numeric representations.
+ - Non-finite floats -> fallback to default coerced to int (or 0).
+ - Strings: strip then parse float/int; on failure fallback.
+ - None -> fallback.
+ - Final value is clamped to a signed 64-bit range implicitly by int().
+ """
+ value = params.get(key, default)
+ if value is None:
+ return int(default) if isinstance(default, (int, float)) else 0
+ if isinstance(value, bool):
+ return int(value)
+ if isinstance(value, int):
+ return value
+ if isinstance(value, float):
+ if not np.isfinite(value):
+ return int(default) if isinstance(default, (int, float)) else 0
+ try:
+ return int(value)
+ except (OverflowError, ValueError):
+ return int(default) if isinstance(default, (int, float)) else 0
+ if isinstance(value, str):
+ stripped = value.strip()
+ if stripped == "":
+ return int(default) if isinstance(default, (int, float)) else 0
+ try:
+ if any(ch in stripped for ch in (".", "e", "E")):
+ fval = float(stripped)
+ if not np.isfinite(fval):
+ return int(default) if isinstance(default, (int, float)) else 0
+ return int(fval)
+ return int(stripped)
+ except (ValueError, OverflowError):
+ return int(default) if isinstance(default, (int, float)) else 0
+ # Unsupported type
+ return int(default) if isinstance(default, (int, float)) else 0
+
+
+def _get_str_param(params: RewardParams, key: str, default: str) -> str:
+ """Extract string parameter with type safety."""
+ value = params.get(key, default)
+ if isinstance(value, str):
+ return value
+ return default
+
+
+def _compute_duration_ratio(trade_duration: int, max_trade_duration: int) -> float:
+ """Compute duration ratio with safe division."""
+ return trade_duration / max(1, max_trade_duration)
+
+
+def _is_short_allowed(trading_mode: str) -> bool:
+ mode = trading_mode.lower()
+ if mode in {"margin", "futures"}:
+ return True
+ if mode == "spot":
+ return False
+ raise ValueError("Unsupported trading mode. Expected one of: spot, margin, futures")
+
+
+# Internal safe fallback helper for numeric failures (centralizes semantics)
+def _fail_safely(reason: str) -> float:
+ """Return 0.0 on recoverable numeric failure (reason available for future debug hooks)."""
+ # NOTE: presently silent to preserve legacy behavior; hook logging here if needed.
+ _ = reason
+ return 0.0
+
def validate_reward_parameters(
params: RewardParams,
if "exit_attenuation_mode" not in params:
return
- exit_attenuation_mode = _get_str_param(params, "exit_attenuation_mode", "linear")
+ exit_attenuation_mode = _get_str_param(
+ params,
+ "exit_attenuation_mode",
+ str(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_attenuation_mode", "linear")),
+ )
if exit_attenuation_mode not in ATTENUATION_MODES_WITH_LEGACY:
params["exit_attenuation_mode"] = "linear"
if duration_ratio < 0.0:
duration_ratio = 0.0
- exit_attenuation_mode = _get_str_param(params, "exit_attenuation_mode", "linear")
- exit_plateau = _get_bool_param(params, "exit_plateau", True)
+ exit_attenuation_mode = _get_str_param(
+ params,
+ "exit_attenuation_mode",
+ str(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_attenuation_mode", "linear")),
+ )
+ exit_plateau = _get_bool_param(
+ params,
+ "exit_plateau",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_plateau", True)),
+ )
- exit_plateau_grace = _get_float_param(params, "exit_plateau_grace", 1.0)
+ exit_plateau_grace = _get_float_param(
+ params,
+ "exit_plateau_grace",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_plateau_grace", 1.0),
+ )
if exit_plateau_grace < 0.0:
exit_plateau_grace = 1.0
- exit_linear_slope = _get_float_param(params, "exit_linear_slope", 1.0)
+ exit_linear_slope = _get_float_param(
+ params,
+ "exit_linear_slope",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_linear_slope", 1.0),
+ )
if exit_linear_slope < 0.0:
exit_linear_slope = 1.0
return f / math.pow(1.0 + dr, alpha)
def _half_life_kernel(f: float, dr: float) -> float:
- hl = _get_float_param(params, "exit_half_life", 0.5)
+ hl = _get_float_param(
+ params,
+ "exit_half_life",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_half_life", 0.5),
+ )
if hl <= 0.0:
hl = 0.5
return f * math.pow(2.0, -dr / hl)
base_factor *= pnl_factor
# Invariant & safety checks
- if _get_bool_param(params, "check_invariants", True):
+ if _get_bool_param(
+ params,
+ "check_invariants",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("check_invariants", True)),
+ ):
if not np.isfinite(base_factor):
return _fail_safely("non_finite_exit_factor_after_kernel")
if base_factor < 0.0 and pnl >= 0.0:
pnl_ratio = pnl / profit_target
pnl_target_factor = 1.0
if abs(pnl_ratio) > 1.0:
- # Environment uses tanh(beta * (abs(ratio)-1)) for magnitude beyond target.
base_pnl_target_factor = math.tanh(pnl_factor_beta * (abs(pnl_ratio) - 1.0))
if pnl_ratio > 1.0:
pnl_target_factor = 1.0 + win_reward_factor * base_pnl_target_factor
"idle_penalty_power",
DEFAULT_MODEL_REWARD_PARAMETERS.get("idle_penalty_power", 1.025),
)
- max_trade_duration_candles = params.get("max_trade_duration_candles")
- try:
- if max_trade_duration_candles is not None:
- max_trade_duration_candles = int(max_trade_duration_candles)
- else:
- max_trade_duration_candles = int(context.max_trade_duration)
- except (TypeError, ValueError):
+ max_trade_duration_candles = _get_int_param(
+ params, "max_trade_duration_candles", context.max_trade_duration
+ )
+ if max_trade_duration_candles <= 0:
max_trade_duration_candles = int(context.max_trade_duration)
- max_idle_duration_candles = params.get("max_idle_duration_candles")
- if max_idle_duration_candles is None:
- max_idle_duration = (
+ max_idle_duration_candles = _get_int_param(
+ params,
+ "max_idle_duration_candles",
+ DEFAULT_IDLE_DURATION_MULTIPLIER * max_trade_duration_candles,
+ )
+ if max_idle_duration_candles <= 0:
+ max_idle_duration_candles = (
DEFAULT_IDLE_DURATION_MULTIPLIER * max_trade_duration_candles
)
- else:
- try:
- max_idle_duration = int(max_idle_duration_candles)
- except (TypeError, ValueError):
- max_idle_duration = (
- DEFAULT_IDLE_DURATION_MULTIPLIER * max_trade_duration_candles
- )
+ max_idle_duration = max_idle_duration_candles
idle_duration_ratio = context.idle_duration / max(1, max_idle_duration)
return -idle_factor * idle_penalty_scale * idle_duration_ratio**idle_penalty_power
short_allowed=short_allowed,
)
if not is_valid and not action_masking:
- breakdown.invalid_penalty = _get_float_param(params, "invalid_action", -2.0)
+ breakdown.invalid_penalty = _get_float_param(
+ params,
+ "invalid_action",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("invalid_action", -2.0),
+ )
breakdown.total = breakdown.invalid_penalty
return breakdown
# Apply PBRS if any PBRS parameters are enabled
pbrs_enabled = (
- _get_bool_param(params, "hold_potential_enabled", True)
- or _get_bool_param(params, "entry_additive_enabled", False)
- or _get_bool_param(params, "exit_additive_enabled", False)
+ _get_bool_param(
+ params,
+ "hold_potential_enabled",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("hold_potential_enabled", True)),
+ )
+ or _get_bool_param(
+ params,
+ "entry_additive_enabled",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("entry_additive_enabled", False)),
+ )
+ or _get_bool_param(
+ params,
+ "exit_additive_enabled",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_additive_enabled", False)),
+ )
)
if pbrs_enabled:
if position == Positions.Neutral:
trade_duration = 0
- max_idle_duration_candles = params.get("max_idle_duration_candles")
- try:
- if max_idle_duration_candles is not None:
- max_idle_duration_candles = int(max_idle_duration_candles)
- else:
- max_idle_duration_candles = int(
- max_trade_duration * max_duration_ratio
- )
- except (TypeError, ValueError):
+ max_idle_duration_candles = _get_int_param(
+ params,
+ "max_idle_duration_candles",
+ int(max_trade_duration * max_duration_ratio),
+ )
+ if max_idle_duration_candles <= 0:
max_idle_duration_candles = int(max_trade_duration * max_duration_ratio)
idle_duration = int(rng.uniform(0, max_idle_duration_candles))
synthetic_df: pd.DataFrame,
real_df: pd.DataFrame,
) -> Dict[str, float]:
- """Compute KL, JS, Wasserstein divergences between synthetic and real distributions."""
+ """Compute distribution shift metrics between synthetic and real samples.
+
+ Metrics:
+ - KL divergence (Kullback-Leibler, direction: synthetic || real) using discretized histograms.
+ - JS distance (square root of Jensen-Shannon divergence; bounded in [0,1]).
+ - 1D Wasserstein (Earth Mover's distance) per feature.
+ - Kolmogorov-Smirnov statistic & p-value.
+
+ Notes
+ -----
+ - Features with fewer than 10 non-NaN observations in either dataset are skipped.
+ - Degenerate (constant) distributions short-circuit to zero shift metrics with KS p=1.
+ - Histogram bin count is fixed (50) for comparability; adaptive binning could be explored.
+ """
metrics = {}
continuous_features = ["pnl", "trade_duration", "idle_duration"]
f"Bootstrap CI for {metric}: non-positive width {width:.6f}"
)
# Graceful mode: expand interval symmetrically
- epsilon = (
- INTERNAL_GUARDS.get("degenerate_ci_epsilon", 1e-9)
- if width == 0
- else abs(width) * 1e-6
- )
+ if width == 0:
+ epsilon = INTERNAL_GUARDS["degenerate_ci_epsilon"]
+ else:
+ epsilon = abs(width) * 1e-6
center = mean
# Adjust only if current bounds are identical; otherwise enforce ordering minimally.
if ci_low == ci_high:
raise AssertionError(f"Q-Q R^2 {key} must be in [0,1], got {value}")
-def build_argument_parser() -> argparse.ArgumentParser:
- parser = argparse.ArgumentParser(
- description="Synthetic stress-test of the ReforceXY reward shaping logic."
- )
- parser.add_argument(
- "--skip-feature-analysis",
- action="store_true",
- help="Skip feature importance and model-based analysis for all scenarios.",
- )
- parser.add_argument(
- "--num_samples",
- type=int,
- default=20000,
- help="Number of synthetic state/action samples to generate (default: 20000).",
- )
- parser.add_argument(
- "--seed",
- type=int,
- default=42,
- help="Random seed for reproducibility (default: 42).",
- )
- parser.add_argument(
- "--skip_partial_dependence",
- action="store_true",
- help="Skip partial dependence computation to speed up analysis.",
- )
- parser.add_argument(
- "--stats_seed",
- type=int,
- default=None,
- help="Optional separate seed for statistical analyses (default: same as --seed).",
- )
- parser.add_argument(
- "--max_trade_duration",
- type=int,
- default=128,
- help="Configured trade timeout in candles (default: 128).",
- )
- parser.add_argument(
- "--base_factor",
- type=float,
- default=100.0,
- help="Base reward factor used inside the environment (default: 100).",
- )
- parser.add_argument(
- "--profit_target",
- type=float,
- default=0.03,
- help="Target profit threshold (default: 0.03).",
- )
- parser.add_argument(
- "--risk_reward_ratio",
- type=float,
- default=1.0,
- help="Risk reward ratio multiplier (default: 1.0).",
- )
- parser.add_argument(
- "--max_duration_ratio",
- type=float,
- default=2.5,
- help="Multiple of max duration used when sampling trade/idle durations.",
- )
- parser.add_argument(
- "--pnl_base_std",
- type=float,
- default=0.02,
- help="Base standard deviation for synthetic PnL generation (pre-scaling).",
- )
- parser.add_argument(
- "--pnl_duration_vol_scale",
- type=float,
- default=0.5,
- help="Scaling factor of additional PnL volatility proportional to trade duration ratio.",
- )
- parser.add_argument(
- "--trading_mode",
- type=str.lower,
- choices=["spot", "margin", "futures"],
- default="spot",
- help=("Trading mode to simulate (spot disables shorts). Default: spot."),
- )
- parser.add_argument(
- "--action_masking",
- type=str,
- choices=["true", "false", "1", "0", "yes", "no"],
- default="true",
- help="Enable action masking simulation (default: true).",
- )
- parser.add_argument(
- "--output",
- type=Path,
- default=Path("reward_space_outputs"),
- help="Output directory for artifacts (default: reward_space_outputs).",
- )
- parser.add_argument(
- "--params",
- nargs="*",
- default=[],
- metavar="KEY=VALUE",
- help="Override reward parameters, e.g. hold_penalty_scale=0.5",
- )
- # Dynamically add CLI options for all tunables
- add_tunable_cli_args(parser)
- parser.add_argument(
- "--real_episodes",
- type=Path,
- default=None,
- help="Path to real episodes pickle for distribution shift analysis (optional).",
- )
- parser.add_argument(
- "--pvalue_adjust",
- type=str.lower,
- choices=["none", "benjamini_hochberg"],
- default="none",
- help="Multiple testing correction method for hypothesis tests (default: none).",
- )
- parser.add_argument(
- "--strict_diagnostics",
- action="store_true",
- help=(
- "Enable fail-fast mode for statistical diagnostics: raise on zero-width bootstrap CIs or undefined "
- "skewness/kurtosis/Anderson/Q-Q metrics produced by constant distributions instead of applying graceful replacements."
- ),
- )
- parser.add_argument(
- "--bootstrap_resamples",
- type=int,
- default=10000,
- metavar="N",
- help=(
- "Number of bootstrap resamples for confidence intervals (default: 10000). "
- "Lower this (e.g. 200-1000) for faster smoke tests; increase for more stable CI width estimates."
- ),
- )
- return parser
+"""PBRS (Potential-Based Reward Shaping) transforms & helpers are defined below.
+They are lifted earlier in the file (before CLI parser / reporting) to keep all
+reward computation primitives and shaping logic grouped, reducing cognitive
+distance when auditing reward correctness.
+"""
-def write_complete_statistical_analysis(
- df: pd.DataFrame,
- output_dir: Path,
- max_trade_duration: int,
- profit_target: float,
- seed: int,
- real_df: Optional[pd.DataFrame] = None,
- *,
- adjust_method: str = "none",
- stats_seed: Optional[int] = None,
- strict_diagnostics: bool = False,
- bootstrap_resamples: int = 10000,
- skip_partial_dependence: bool = False,
- skip_feature_analysis: bool = False,
-) -> None:
- """Generate a single comprehensive statistical analysis report with enhanced tests."""
- output_dir.mkdir(parents=True, exist_ok=True)
- report_path = output_dir / "statistical_analysis.md"
+# === PBRS TRANSFORM FUNCTIONS ===
- # Helpers: consistent Markdown table renderers
- def _fmt_val(v: Any, ndigits: int = 6) -> str:
- try:
- if isinstance(v, (int, np.integer)):
- return f"{int(v)}"
- if isinstance(v, (float, np.floating)):
- if np.isnan(v):
- return "NaN"
- return f"{float(v):.{ndigits}f}"
- return str(v)
- except Exception:
- return str(v)
- def _series_to_md(
- series: pd.Series, value_name: str = "value", ndigits: int = 6
- ) -> str:
- lines = [f"| Metric | {value_name} |", "|--------|-----------|"]
- for k, v in series.items():
- lines.append(f"| {k} | {_fmt_val(v, ndigits)} |")
- return "\n".join(lines) + "\n\n"
+def _apply_transform_tanh(value: float) -> float:
+ """tanh: tanh(x) in (-1, 1)."""
+ return float(math.tanh(value))
- def _df_to_md(df: pd.DataFrame, index_name: str = "index", ndigits: int = 6) -> str:
- if df is None or df.empty:
- return "_No data._\n\n"
- # Prepare header
- cols = list(df.columns)
- header = "| " + index_name + " | " + " | ".join(cols) + " |\n"
- sep = "|" + "-" * (len(index_name) + 2)
- for c in cols:
- sep += "|" + "-" * (len(str(c)) + 2)
- sep += "|\n"
- # Rows
- rows = []
- for idx, row in df.iterrows():
- vals = [_fmt_val(row[c], ndigits) for c in cols]
- rows.append("| " + str(idx) + " | " + " | ".join(vals) + " |")
- return header + sep + "\n".join(rows) + "\n\n"
- # Compute all statistics
- summary_stats = _compute_summary_stats(df)
- relationship_stats = _compute_relationship_stats(df, max_trade_duration)
- representativity_stats = _compute_representativity_stats(
- df, profit_target, max_trade_duration
- )
+def _apply_transform_softsign(value: float) -> float:
+ """softsign: x / (1 + |x|) in (-1, 1)."""
+ x = value
+ return float(x / (1.0 + abs(x)))
- # Model analysis: skip if requested or not enough samples
- importance_df = None
- analysis_stats = None
- partial_deps = {}
- if skip_feature_analysis or len(df) < 4:
- print("Skipping feature analysis: flag set or insufficient samples (<4).")
- else:
- importance_df, analysis_stats, partial_deps, _model = _perform_feature_analysis(
- df, seed, skip_partial_dependence=skip_partial_dependence
- )
- # Save feature importance CSV
- importance_df.to_csv(output_dir / "feature_importance.csv", index=False)
- # Save partial dependence CSVs
- if not skip_partial_dependence:
- for feature, pd_df in partial_deps.items():
- pd_df.to_csv(
- output_dir / f"partial_dependence_{feature}.csv",
- index=False,
- )
- # Enhanced statistics
- test_seed = (
- stats_seed
- if isinstance(stats_seed, int)
- else (seed if isinstance(seed, int) else 42)
- )
- hypothesis_tests = statistical_hypothesis_tests(
- df, adjust_method=adjust_method, seed=test_seed
- )
- metrics_for_ci = [
- "reward_total",
- "reward_idle",
- "reward_hold",
- "reward_exit",
- "pnl",
- ]
- bootstrap_ci = bootstrap_confidence_intervals(
- df,
- metrics_for_ci,
- n_bootstrap=int(bootstrap_resamples),
- seed=test_seed,
- strict_diagnostics=strict_diagnostics,
- )
- dist_diagnostics = distribution_diagnostics(
- df, seed=test_seed, strict_diagnostics=strict_diagnostics
- )
+def _apply_transform_arctan(value: float) -> float:
+ """arctan: (2/pi) * arctan(x) in (-1, 1)."""
+ return float((2.0 / math.pi) * math.atan(value))
- distribution_shift = None
- if real_df is not None:
- distribution_shift = compute_distribution_shift_metrics(df, real_df)
- # Write comprehensive report
- with report_path.open("w", encoding="utf-8") as f:
- # Header
- f.write("# Reward Space Analysis Report\n\n")
- f.write("### Run Configuration\n\n")
- f.write("| Key | Value |\n")
- f.write("|-----|-------|\n")
- f.write(f"| Generated | {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')} |\n")
- f.write(f"| Total Samples | {len(df):,} |\n")
- f.write(f"| Random Seed | {seed} |\n")
- f.write(f"| Max Trade Duration | {max_trade_duration} |\n")
- # Blank separator to visually group core simulation vs PBRS parameters
- f.write("| | |\n")
- # Extra core PBRS parameters exposed in run configuration if present
- _rp = (
- df.attrs.get("reward_params")
- if isinstance(df.attrs.get("reward_params"), dict)
- else {}
- )
- exit_mode = _rp.get("exit_potential_mode", "canonical")
- potential_gamma = _rp.get(
- "potential_gamma",
- DEFAULT_MODEL_REWARD_PARAMETERS.get(
- "potential_gamma", POTENTIAL_GAMMA_DEFAULT
- ),
- )
- f.write(f"| exit_potential_mode | {exit_mode} |\n")
- f.write(f"| potential_gamma | {potential_gamma} |\n")
- # Blank separator before overrides block
- f.write("| | |\n")
-
- overrides_pairs = []
- if _rp:
- for k, default_v in DEFAULT_MODEL_REWARD_PARAMETERS.items():
- if k in ("exit_potential_mode", "potential_gamma"):
- continue # already printed explicitly
- try:
- if k in _rp and _rp[k] != default_v:
- overrides_pairs.append(f"{k}={_rp[k]}")
- except Exception:
- continue
- if overrides_pairs:
- f.write(f"| Overrides | {', '.join(sorted(overrides_pairs))} |\n")
+def _apply_transform_sigmoid(value: float) -> float:
+ """sigmoid: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) in (-1, 1)."""
+ x = value
+ try:
+ if x >= 0:
+ exp_neg_x = math.exp(-x)
+ sigma_x = 1.0 / (1.0 + exp_neg_x)
else:
- f.write("| Overrides | (none) |\n")
- f.write("\n")
+ exp_x = math.exp(x)
+ sigma_x = exp_x / (exp_x + 1.0)
+ return 2.0 * sigma_x - 1.0
+ except OverflowError:
+ return 1.0 if x > 0 else -1.0
- f.write("---\n\n")
- # Section 1: Global Statistics
- f.write("## 1. Global Statistics\n\n")
+def _apply_transform_asinh(value: float) -> float:
+ """asinh: x / sqrt(1 + x^2) in (-1, 1)."""
+ return float(value / math.hypot(1.0, value))
- f.write("### 1.1 Reward Distribution\n\n")
- f.write(
- _series_to_md(
- summary_stats["global_stats"], value_name="reward_total", ndigits=6
- )
- )
- f.write("### 1.2 Reward Statistics by Action\n\n")
- action_df = summary_stats["action_summary"].copy()
- # Cast index to int for cleaner display (0,1,2,3,4)
- action_df.index = action_df.index.astype(int)
- if action_df.index.name is None:
- action_df.index.name = "action"
- f.write(_df_to_md(action_df, index_name=action_df.index.name, ndigits=6))
+def _apply_transform_clip(value: float) -> float:
+ """clip: clip(x, -1, 1) in [-1, 1]."""
+ return float(np.clip(value, -1.0, 1.0))
- f.write("### 1.3 Component Activation Rates\n\n")
- f.write("Percentage of samples where each reward component is non-zero:\n\n")
- comp_share = summary_stats["component_share"].copy()
- formatted_rows = [
- "| Component | Activation Rate |",
- "|-----------|----------------|",
- ]
- # Enforce deterministic logical ordering of components if present
- preferred_order = [
- "reward_invalid",
- "reward_idle",
- "reward_hold",
- "reward_exit",
- "reward_shaping",
- "reward_entry_additive",
- "reward_exit_additive",
- ]
- for comp in preferred_order:
- if comp in comp_share.index:
- val = comp_share.loc[comp]
- formatted_rows.append(f"| {comp} | {val:.1%} |")
- f.write("\n".join(formatted_rows) + "\n\n")
- f.write("### 1.4 Component Value Ranges\n\n")
- bounds_df = summary_stats["component_bounds"].copy()
- if bounds_df.index.name is None:
- bounds_df.index.name = "component"
- f.write(_df_to_md(bounds_df, index_name=bounds_df.index.name, ndigits=6))
+def apply_transform(transform_name: str, value: float, **kwargs: Any) -> float:
+ """Apply named transform; unknown names fallback to tanh with warning."""
+ transforms = {
+ "tanh": _apply_transform_tanh,
+ "softsign": _apply_transform_softsign,
+ "arctan": _apply_transform_arctan,
+ "sigmoid": _apply_transform_sigmoid,
+ "asinh": _apply_transform_asinh,
+ "clip": _apply_transform_clip,
+ }
- # Section 2: Representativity Analysis
- f.write("---\n\n")
- f.write("## 2. Sample Representativity\n\n")
- f.write(
- "This section evaluates whether the synthetic samples adequately represent "
+ if transform_name not in transforms:
+ warnings.warn(
+ f"Unknown potential transform '{transform_name}'; falling back to tanh",
+ RewardDiagnosticsWarning,
+ stacklevel=2,
)
- f.write("the full reward space across different market scenarios.\n\n")
+ return _apply_transform_tanh(value)
+ return transforms[transform_name](value, **kwargs)
- f.write("### 2.1 Position Distribution\n\n")
- f.write(
- _series_to_md(
- representativity_stats["pos_counts"], value_name="count", ndigits=0
- )
- )
- f.write("### 2.2 Action Distribution\n\n")
- f.write(
- _series_to_md(
- representativity_stats["act_counts"], value_name="count", ndigits=0
- )
- )
+# === PBRS HELPER FUNCTIONS ===
- f.write("### 2.3 Critical Regime Coverage\n\n")
- f.write("| Regime | Coverage |\n")
- f.write("|--------|----------|\n")
- f.write(
- f"| PnL > target | {representativity_stats['pnl_above_target']:.1%} |\n"
- )
- f.write(
- f"| PnL near target (±20%) | {representativity_stats['pnl_near_target']:.1%} |\n"
- )
- f.write(
- f"| Duration overage (>1.0) | {representativity_stats['duration_overage_share']:.1%} |\n"
- )
- f.write(
- f"| Extreme PnL (\\|pnl\\|≥0.14) | {representativity_stats['pnl_extreme']:.1%} |\n"
- )
- f.write("\n")
- f.write("### 2.4 Component Activation Rates\n\n")
- f.write("| Component | Activation Rate |\n")
- f.write("|-----------|----------------|\n")
- f.write(f"| Idle penalty | {representativity_stats['idle_activated']:.1%} |\n")
- f.write(f"| Hold penalty | {representativity_stats['hold_activated']:.1%} |\n")
- f.write(f"| Exit reward | {representativity_stats['exit_activated']:.1%} |\n")
- f.write("\n")
+def _get_potential_gamma(params: RewardParams) -> float:
+ """Return validated potential_gamma.
- # Section 3: Reward Component Relationships
- f.write("---\n\n")
- f.write("## 3. Reward Component Analysis\n\n")
- f.write(
- "Analysis of how reward components behave under different conditions.\n\n"
+ Process:
+ - If NaN -> default POTENTIAL_GAMMA_DEFAULT with warning (missing or unparsable).
+ - If outside [0,1] -> clamp + warning including original value.
+ - Guarantee returned float ∈ [0,1].
+ """
+ gamma = _get_float_param(params, "potential_gamma", np.nan)
+ if not np.isfinite(gamma):
+ warnings.warn(
+ f"potential_gamma not specified or invalid; defaulting to {POTENTIAL_GAMMA_DEFAULT}",
+ RewardDiagnosticsWarning,
+ stacklevel=2,
)
+ return POTENTIAL_GAMMA_DEFAULT
+ if gamma < 0.0 or gamma > 1.0:
+ original = gamma
+ gamma = float(np.clip(gamma, 0.0, 1.0))
+ warnings.warn(
+ f"potential_gamma={original} outside [0,1]; clamped to {gamma}",
+ RewardDiagnosticsWarning,
+ stacklevel=2,
+ )
+ return gamma
+ return float(gamma)
- f.write("### 3.1 Idle Penalty vs Duration\n\n")
- if relationship_stats["idle_stats"].empty:
- f.write("_No idle samples present._\n\n")
- else:
- idle_df = relationship_stats["idle_stats"].copy()
- if idle_df.index.name is None:
- idle_df.index.name = "bin"
- f.write(_df_to_md(idle_df, index_name=idle_df.index.name, ndigits=6))
-
- f.write("### 3.2 Hold Penalty vs Trade Duration\n\n")
- if relationship_stats["hold_stats"].empty:
- f.write("_No hold samples present._\n\n")
- else:
- hold_df = relationship_stats["hold_stats"].copy()
- if hold_df.index.name is None:
- hold_df.index.name = "bin"
- f.write(_df_to_md(hold_df, index_name=hold_df.index.name, ndigits=6))
- f.write("### 3.3 Exit Reward vs PnL\n\n")
- if relationship_stats["exit_stats"].empty:
- f.write("_No exit samples present._\n\n")
- else:
- exit_df = relationship_stats["exit_stats"].copy()
- if exit_df.index.name is None:
- exit_df.index.name = "bin"
- f.write(_df_to_md(exit_df, index_name=exit_df.index.name, ndigits=6))
+# === PBRS IMPLEMENTATION ===
- f.write("### 3.4 Correlation Matrix\n\n")
- f.write("Pearson correlation coefficients between key metrics:\n\n")
- corr_df = relationship_stats["correlation"].copy()
- if corr_df.index.name is None:
- corr_df.index.name = "feature"
- f.write(_df_to_md(corr_df, index_name=corr_df.index.name, ndigits=4))
- _dropped = relationship_stats.get("correlation_dropped") or []
- if _dropped:
- f.write(
- "\n_Constant features removed (no variance): "
- + ", ".join(_dropped)
- + "._\n\n"
- )
- # Section 3.5: PBRS Analysis
- f.write("### 3.5 PBRS (Potential-Based Reward Shaping) Analysis\n\n")
+def _compute_hold_potential(
+ pnl: float, duration_ratio: float, params: RewardParams
+) -> float:
+ """Compute PBRS hold potential Φ(s)."""
+ if not _get_bool_param(
+ params,
+ "hold_potential_enabled",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("hold_potential_enabled", True)),
+ ):
+ return _fail_safely("hold_potential_disabled")
+ return _compute_bi_component(
+ kind="hold_potential",
+ pnl=pnl,
+ duration_ratio=duration_ratio,
+ params=params,
+ scale_key="hold_potential_scale",
+ gain_key="hold_potential_gain",
+ transform_pnl_key="hold_potential_transform_pnl",
+ transform_dur_key="hold_potential_transform_duration",
+ non_finite_key="non_finite_hold_potential",
+ )
- # Check if PBRS components are present in the data
- pbrs_components = [
- "reward_shaping",
- "reward_entry_additive",
- "reward_exit_additive",
- ]
- pbrs_present = all(col in df.columns for col in pbrs_components)
- if pbrs_present:
- # PBRS activation rates
- pbrs_activation = {}
- for comp in pbrs_components:
- pbrs_activation[comp.replace("reward_", "")] = (df[comp] != 0).mean()
-
- f.write("**PBRS Component Activation Rates:**\n\n")
- f.write("| Component | Activation Rate | Description |\n")
- f.write("|-----------|-----------------|-------------|\n")
- f.write(
- f"| Shaping (Φ) | {pbrs_activation['shaping']:.1%} | Potential-based reward shaping |\n"
- )
- f.write(
- f"| Entry Additive | {pbrs_activation['entry_additive']:.1%} | Non-PBRS entry reward |\n"
- )
- f.write(
- f"| Exit Additive | {pbrs_activation['exit_additive']:.1%} | Non-PBRS exit reward |\n"
- )
- f.write("\n")
-
- # PBRS statistics
- f.write("**PBRS Component Statistics:**\n\n")
- pbrs_stats = df[pbrs_components].describe(
- percentiles=[0.1, 0.25, 0.5, 0.75, 0.9]
- )
- pbrs_stats_df = pbrs_stats.round(
- 6
- ).T # Transpose to make it DataFrame-compatible
- pbrs_stats_df.index.name = "component"
- f.write(_df_to_md(pbrs_stats_df, index_name="component", ndigits=6))
+def _compute_entry_additive(
+ pnl: float, duration_ratio: float, params: RewardParams
+) -> float:
+ if not _get_bool_param(
+ params,
+ "entry_additive_enabled",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("entry_additive_enabled", False)),
+ ):
+ return _fail_safely("entry_additive_disabled")
+ return _compute_bi_component(
+ kind="entry_additive",
+ pnl=pnl,
+ duration_ratio=duration_ratio,
+ params=params,
+ scale_key="entry_additive_scale",
+ gain_key="entry_additive_gain",
+ transform_pnl_key="entry_additive_transform_pnl",
+ transform_dur_key="entry_additive_transform_duration",
+ non_finite_key="non_finite_entry_additive",
+ )
- # PBRS invariance check
- total_shaping = df["reward_shaping"].sum()
- entry_add_total = df.get("reward_entry_additive", pd.Series([0])).sum()
- exit_add_total = df.get("reward_exit_additive", pd.Series([0])).sum()
- # Get configuration for proper invariance assessment
- reward_params = (
- df.attrs.get("reward_params", {}) if hasattr(df, "attrs") else {}
- )
- exit_potential_mode = reward_params.get("exit_potential_mode", "canonical")
- entry_additive_enabled = reward_params.get("entry_additive_enabled", False)
- exit_additive_enabled = reward_params.get("exit_additive_enabled", False)
+def _compute_exit_additive(
+ pnl: float, duration_ratio: float, params: RewardParams
+) -> float:
+ if not _get_bool_param(
+ params,
+ "exit_additive_enabled",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_additive_enabled", False)),
+ ):
+ return _fail_safely("exit_additive_disabled")
+ return _compute_bi_component(
+ kind="exit_additive",
+ pnl=pnl,
+ duration_ratio=duration_ratio,
+ params=params,
+ scale_key="exit_additive_scale",
+ gain_key="exit_additive_gain",
+ transform_pnl_key="exit_additive_transform_pnl",
+ transform_dur_key="exit_additive_transform_duration",
+ non_finite_key="non_finite_exit_additive",
+ )
- # True invariance requires canonical mode AND no additives
- is_theoretically_invariant = exit_potential_mode == "canonical" and not (
- entry_additive_enabled or exit_additive_enabled
- )
- shaping_near_zero = abs(total_shaping) < PBRS_INVARIANCE_TOL
- # Prepare invariance summary markdown block
- if is_theoretically_invariant:
- if shaping_near_zero:
- invariance_status = "✅ Canonical"
- invariance_note = "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)"
- else:
- invariance_status = "⚠️ Canonical (with warning)"
- invariance_note = f"Canonical mode but unexpected shaping sum = {total_shaping:.6f}"
- else:
- invariance_status = "❌ Non-canonical"
- reasons = []
- if exit_potential_mode != "canonical":
- reasons.append(f"exit_potential_mode='{exit_potential_mode}'")
- if entry_additive_enabled or exit_additive_enabled:
- additive_types = []
- if entry_additive_enabled:
- additive_types.append("entry")
- if exit_additive_enabled:
- additive_types.append("exit")
- reasons.append(f"additives={additive_types}")
- invariance_note = f"Modified for flexibility: {', '.join(reasons)}"
+def _compute_exit_potential(last_potential: float, params: RewardParams) -> float:
+ """Compute next potential Φ(s') for closing/exit transitions.
- # Summarize PBRS invariance
- f.write("**PBRS Invariance Summary:**\n\n")
- f.write("| Field | Value |\n")
- f.write("|-------|-------|\n")
- f.write(f"| Invariance Status | {invariance_status} |\n")
- f.write(f"| Analysis Note | {invariance_note} |\n")
- f.write(f"| Exit Potential Mode | {exit_potential_mode} |\n")
- f.write(f"| Entry Additive Enabled | {entry_additive_enabled} |\n")
- f.write(f"| Exit Additive Enabled | {exit_additive_enabled} |\n")
- f.write(f"| Σ Shaping Reward | {total_shaping:.6f} |\n")
- f.write(f"| Abs Σ Shaping Reward | {abs(total_shaping):.6e} |\n")
- f.write(f"| Σ Entry Additive | {entry_add_total:.6f} |\n")
- f.write(f"| Σ Exit Additive | {exit_add_total:.6f} |\n\n")
+ Semantics:
+ - canonical: Φ' = 0.0 (preserves invariance, disables additives)
+ - non-canonical: Φ' = 0.0 (allows additives, breaks invariance)
+ - progressive_release: Φ' = Φ * (1 - decay) with decay clamped to [0,1]
+ - spike_cancel: Φ' = Φ / γ (neutralizes shaping spike ≈ 0 net effect) if γ>0 else Φ
+ - retain_previous: Φ' = Φ
- else:
- f.write("_PBRS components not present in this analysis._\n\n")
+ Invalid modes fall back to canonical. Any non-finite resulting potential is
+ coerced to 0.0.
+ """
+ mode = _get_str_param(
+ params,
+ "exit_potential_mode",
+ str(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_potential_mode", "canonical")),
+ )
+ if mode == "canonical" or mode == "non-canonical":
+ return _fail_safely("canonical_exit_potential")
- # Section 4: Feature Importance Analysis
- f.write("---\n\n")
- f.write("## 4. Feature Importance\n\n")
- if skip_feature_analysis or len(df) < 4:
- reason = []
- if skip_feature_analysis:
- reason.append("flag --skip-feature-analysis set")
- if len(df) < 4:
- reason.append("insufficient samples <4")
- reason_str = "; ".join(reason) if reason else "skipped"
- f.write(f"_Skipped ({reason_str})._\n\n")
- if skip_partial_dependence:
- f.write(
- "_Note: --skip_partial_dependence is redundant when feature analysis is skipped._\n\n"
- )
- else:
- f.write(
- "Machine learning analysis to identify which features most influence total reward.\n\n"
+ if mode == "progressive_release":
+ decay = _get_float_param(
+ params,
+ "exit_potential_decay",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_potential_decay", 0.5),
+ )
+ if not np.isfinite(decay):
+ warnings.warn(
+ "exit_potential_decay invalid (NaN/inf); defaulting to 0.5",
+ RewardDiagnosticsWarning,
+ stacklevel=2,
)
- f.write("**Model:** Random Forest Regressor (400 trees) \n")
- f.write(f"**R² Score:** {analysis_stats['r2_score']:.4f}\n\n")
-
- f.write("### 4.1 Top 10 Features by Importance\n\n")
- top_imp = importance_df.head(10).copy().reset_index(drop=True)
- # Render as markdown without index column
- header = "| feature | importance_mean | importance_std |\n"
- sep = "|---------|------------------|----------------|\n"
- rows = []
- for _, r in top_imp.iterrows():
- rows.append(
- f"| {r['feature']} | {_fmt_val(r['importance_mean'], 6)} | {_fmt_val(r['importance_std'], 6)} |"
- )
- f.write(header + sep + "\n".join(rows) + "\n\n")
- f.write("**Exported Data:**\n")
- f.write("- Full feature importance: `feature_importance.csv`\n")
- if not skip_partial_dependence:
- f.write("- Partial dependence plots: `partial_dependence_*.csv`\n\n")
- else:
- f.write(
- "- Partial dependence plots: (skipped via --skip_partial_dependence)\n\n"
- )
-
- # Section 5: Statistical Validation
- if hypothesis_tests:
- f.write("---\n\n")
- f.write("## 5. Statistical Validation\n\n")
- f.write(
- "Rigorous statistical tests to validate reward behavior and relationships.\n\n"
+ decay = 0.5
+ if decay < 0.0:
+ warnings.warn(
+ f"exit_potential_decay={decay} < 0; clamped to 0.0",
+ RewardDiagnosticsWarning,
+ stacklevel=2,
+ )
+ decay = 0.0
+ if decay > 1.0:
+ warnings.warn(
+ f"exit_potential_decay={decay} > 1; clamped to 1.0",
+ RewardDiagnosticsWarning,
+ stacklevel=2,
)
+ decay = 1.0
+ next_potential = last_potential * (1.0 - decay)
+ elif mode == "spike_cancel":
+ gamma = _get_potential_gamma(params)
+ if gamma <= 0.0 or not np.isfinite(gamma):
+ next_potential = last_potential
+ else:
+ next_potential = last_potential / gamma
+ elif mode == "retain_previous":
+ next_potential = last_potential
+ else:
+ next_potential = _fail_safely("invalid_exit_potential_mode")
- f.write("### 5.1 Hypothesis Tests\n\n")
+ if not np.isfinite(next_potential):
+ next_potential = _fail_safely("non_finite_next_exit_potential")
+ return float(next_potential)
- if "idle_correlation" in hypothesis_tests:
- h = hypothesis_tests["idle_correlation"]
- f.write("#### 5.1.1 Idle Duration → Idle Penalty Correlation\n\n")
- f.write(f"**Test Method:** {h['test']}\n\n")
- f.write(f"- Spearman ρ: **{h['rho']:.4f}**\n")
- f.write(f"- p-value: {h['p_value']:.4g}\n")
- if "p_value_adj" in h:
- f.write(
- f"- p-value (adj BH): {h['p_value_adj']:.4g} -> {'✅ Yes' if h['significant_adj'] else '❌ No'} (α=0.05)\n"
- )
- f.write(f"- 95% CI: [{h['ci_95'][0]:.4f}, {h['ci_95'][1]:.4f}]\n")
- f.write(f"- Sample size: {h['n_samples']:,}\n")
- f.write(
- f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n"
- )
- f.write(f"- **Interpretation:** {h['interpretation']}\n\n")
- if "position_reward_difference" in hypothesis_tests:
- h = hypothesis_tests["position_reward_difference"]
- f.write("#### 5.1.2 Position-Based Reward Differences\n\n")
- f.write(f"**Test Method:** {h['test']}\n\n")
- f.write(f"- H-statistic: **{h['statistic']:.4f}**\n")
- f.write(f"- p-value: {h['p_value']:.4g}\n")
- if "p_value_adj" in h:
- f.write(
- f"- p-value (adj BH): {h['p_value_adj']:.4g} -> {'✅ Yes' if h['significant_adj'] else '❌ No'} (α=0.05)\n"
- )
- f.write(f"- Effect size (ε²): {h['effect_size_epsilon_sq']:.4f}\n")
- f.write(f"- Number of groups: {h['n_groups']}\n")
- f.write(
- f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n"
- )
- f.write(f"- **Interpretation:** {h['interpretation']} effect\n\n")
+def apply_potential_shaping(
+ base_reward: float,
+ current_pnl: float,
+ current_duration_ratio: float,
+ next_pnl: float,
+ next_duration_ratio: float,
+ is_terminal: bool,
+ last_potential: float,
+ params: RewardParams,
+) -> tuple[float, float, float]:
+ """
+ Apply PBRS potential-based reward shaping following Ng et al. (1999).
- if "pnl_sign_reward_difference" in hypothesis_tests:
- h = hypothesis_tests["pnl_sign_reward_difference"]
- f.write("#### 5.1.3 Positive vs Negative PnL Comparison\n\n")
- f.write(f"**Test Method:** {h['test']}\n\n")
- f.write(f"- U-statistic: **{h['statistic']:.4f}**\n")
- f.write(f"- p-value: {h['p_value']:.4g}\n")
- if "p_value_adj" in h:
- f.write(
- f"- p-value (adj BH): {h['p_value_adj']:.4g} -> {'✅ Yes' if h['significant_adj'] else '❌ No'} (α=0.05)\n"
- )
- f.write(f"- Median (PnL+): {h['median_pnl_positive']:.4f}\n")
- f.write(f"- Median (PnL-): {h['median_pnl_negative']:.4f}\n")
- f.write(
- f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n\n"
- )
+ Implements: R'(s,a,s') = R_base(s,a,s') + γΦ(s') - Φ(s)
+ """
+ params = _enforce_pbrs_invariance(params)
+ gamma = _get_potential_gamma(params)
+ current_potential = _compute_hold_potential(
+ current_pnl, current_duration_ratio, params
+ )
+ if is_terminal:
+ next_potential = _compute_exit_potential(last_potential, params)
+ else:
+ next_potential = _compute_hold_potential(next_pnl, next_duration_ratio, params)
+ shaping_reward = gamma * next_potential - current_potential
+ entry_additive = _compute_entry_additive(
+ current_pnl, current_duration_ratio, params
+ )
+ exit_additive = (
+ _compute_exit_additive(next_pnl, next_duration_ratio, params)
+ if is_terminal
+ else 0.0
+ )
+ total_reward = base_reward + shaping_reward + entry_additive + exit_additive
+ if not np.isfinite(total_reward):
+ return float(base_reward), 0.0, 0.0
+ if np.isclose(shaping_reward, 0.0):
+ shaping_reward = 0.0
+ return float(total_reward), float(shaping_reward), float(next_potential)
- # Bootstrap CI
- if bootstrap_ci:
- f.write("### 5.2 Confidence Intervals\n\n")
- f.write(
- "Bootstrap confidence intervals (95%, 10,000 resamples) for key metrics:\n\n"
- )
- f.write("| Metric | Mean | 95% CI Lower | 95% CI Upper | Width |\n")
- f.write("|--------|------|--------------|--------------|-------|\n")
- for metric, (mean, ci_low, ci_high) in bootstrap_ci.items():
- width = ci_high - ci_low
- f.write(
- f"| {metric} | {mean:.6f} | {ci_low:.6f} | {ci_high:.6f} | {width:.6f} |\n"
- )
- f.write("\n")
- # Distribution diagnostics
- if dist_diagnostics:
- f.write("### 5.3 Distribution Normality Tests\n\n")
- f.write("Statistical tests for normality of key distributions:\n\n")
- for col in ["reward_total", "pnl", "trade_duration"]:
- if f"{col}_mean" in dist_diagnostics:
- f.write(f"#### {col.replace('_', ' ').title()}\n\n")
- f.write("| Metric | Value |\n")
- f.write("|--------|-------|\n")
- f.write(f"| Mean | {dist_diagnostics[f'{col}_mean']:.4f} |\n")
- f.write(f"| Std Dev | {dist_diagnostics[f'{col}_std']:.4f} |\n")
- f.write(
- f"| Skewness | {dist_diagnostics[f'{col}_skewness']:.4f} |\n"
- )
- f.write(
- f"| Kurtosis | {dist_diagnostics[f'{col}_kurtosis']:.4f} |\n"
- )
- if f"{col}_shapiro_pval" in dist_diagnostics:
- is_normal = (
- "✅ Yes"
- if dist_diagnostics[f"{col}_is_normal_shapiro"]
- else "❌ No"
- )
- f.write(
- f"| Normal? (Shapiro-Wilk) | {is_normal} (p={dist_diagnostics[f'{col}_shapiro_pval']:.4e}) |\n"
- )
- if f"{col}_qq_r_squared" in dist_diagnostics:
- f.write(
- f"| Q-Q Plot R² | {dist_diagnostics[f'{col}_qq_r_squared']:.4f} |\n"
- )
- f.write("\n")
+def _enforce_pbrs_invariance(params: RewardParams) -> RewardParams:
+ """Enforce PBRS invariance by auto-disabling additives in canonical mode."""
+ mode = _get_str_param(
+ params,
+ "exit_potential_mode",
+ str(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_potential_mode", "canonical")),
+ )
+ if mode != "canonical":
+ return params
+ if params.get("_pbrs_invariance_applied"):
+ return params
+ entry_enabled = _get_bool_param(
+ params,
+ "entry_additive_enabled",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("entry_additive_enabled", False)),
+ )
+ exit_enabled = _get_bool_param(
+ params,
+ "exit_additive_enabled",
+ bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_additive_enabled", False)),
+ )
+ if entry_enabled:
+ warnings.warn(
+ "Disabling entry additive to preserve PBRS invariance (canonical mode).",
+ RewardDiagnosticsWarning,
+ stacklevel=2,
+ )
+ params["entry_additive_enabled"] = False
+ if exit_enabled:
+ warnings.warn(
+ "Disabling exit additive to preserve PBRS invariance (canonical mode).",
+ RewardDiagnosticsWarning,
+ stacklevel=2,
+ )
+ params["exit_additive_enabled"] = False
+ params["_pbrs_invariance_applied"] = True
+ return params
- # Distribution shift (if real data provided)
- if distribution_shift:
- f.write("### 5.4 Distribution Shift Analysis\n\n")
- f.write("Comparison between synthetic and real data distributions:\n\n")
- f.write(
- "| Feature | KL Div | JS Dist | Wasserstein | KS Stat | KS p-value |\n"
- )
- f.write(
- "|---------|--------|---------|-------------|---------|------------|\n"
- )
- features = ["pnl", "trade_duration", "idle_duration"]
- for feature in features:
- kl = distribution_shift.get(
- f"{feature}_kl_divergence", float("nan")
- )
- js = distribution_shift.get(f"{feature}_js_distance", float("nan"))
- ws = distribution_shift.get(f"{feature}_wasserstein", float("nan"))
- ks_stat = distribution_shift.get(
- f"{feature}_ks_statistic", float("nan")
- )
- ks_p = distribution_shift.get(f"{feature}_ks_pvalue", float("nan"))
+def _compute_bi_component(
+ kind: str,
+ pnl: float,
+ duration_ratio: float,
+ params: RewardParams,
+ scale_key: str,
+ gain_key: str,
+ transform_pnl_key: str,
+ transform_dur_key: str,
+ non_finite_key: str,
+) -> float:
+ """Generic helper for (pnl, duration) bi-component transforms."""
+ scale = _get_float_param(params, scale_key, 1.0)
+ gain = _get_float_param(params, gain_key, 1.0)
+ transform_pnl = _get_str_param(params, transform_pnl_key, "tanh")
+ transform_duration = _get_str_param(params, transform_dur_key, "tanh")
+ t_pnl = apply_transform(transform_pnl, gain * pnl)
+ t_dur = apply_transform(transform_duration, gain * duration_ratio)
+ value = scale * 0.5 * (t_pnl + t_dur)
+ if not np.isfinite(value):
+ return _fail_safely(non_finite_key)
+ return float(value)
- f.write(
- f"| {feature} | {kl:.4f} | {js:.4f} | {ws:.4f} | {ks_stat:.4f} | {ks_p:.4g} |\n"
- )
- f.write("\n")
- f.write("**Interpretation Guide:**\n\n")
- f.write("| Metric | Threshold | Meaning |\n")
- f.write("|--------|-----------|--------|\n")
- f.write("| KL Divergence | < 0.3 | ✅ Yes: Good representativeness |\n")
- f.write("| JS Distance | < 0.2 | ✅ Yes: Similar distributions |\n")
- f.write(
- "| KS p-value | > 0.05 | ✅ Yes: No significant difference |\n\n"
- )
- else:
- # Placeholder keeps numbering stable and explicit
- f.write("### 5.4 Distribution Shift Analysis\n\n")
- f.write("_Not performed (no real episodes provided)._\n\n")
- # Footer
- f.write("---\n\n")
- f.write("## Summary\n\n")
- f.write("This comprehensive report includes:\n\n")
- f.write(
- "1. **Global Statistics** - Overall reward distributions and component activation\n"
- )
- f.write(
- "2. **Sample Representativity** - Coverage of critical market scenarios\n"
- )
- f.write(
- "3. **Component Analysis** - Relationships between rewards and conditions (including PBRS)\n"
- )
- if skip_feature_analysis or len(df) < 4:
- f.write(
- "4. **Feature Importance** - (skipped) Machine learning analysis of key drivers\n"
- )
- else:
- f.write(
- "4. **Feature Importance** - Machine learning analysis of key drivers\n"
- )
- f.write(
- "5. **Statistical Validation** - Hypothesis tests and confidence intervals\n"
- )
- if distribution_shift:
- f.write("6. **Distribution Shift** - Comparison with real trading data\n")
- else:
- f.write(
- "6. **Distribution Shift** - Not performed (no real episodes provided)\n"
- )
- if "reward_shaping" in df.columns:
- _total_shaping = df["reward_shaping"].sum()
- _canonical = abs(_total_shaping) < 1e-6
- f.write(
- "7. **PBRS Invariance** - "
- + (
- "Canonical (Σ shaping ≈ 0)"
- if _canonical
- else f"Non-canonical (Σ shaping = {_total_shaping:.6f})"
- )
- + "\n"
- )
- f.write("\n")
- f.write("**Generated Files:**\n")
- f.write("- `reward_samples.csv` - Raw synthetic samples\n")
- if not skip_feature_analysis and len(df) >= 4:
- f.write(
- "- `feature_importance.csv` - Complete feature importance rankings\n"
- )
- f.write(
- "- `partial_dependence_*.csv` - Partial dependence data for visualization\n"
- )
+def build_argument_parser() -> argparse.ArgumentParser:
+ parser = argparse.ArgumentParser(
+ description="Synthetic stress-test of the ReforceXY reward shaping logic."
+ )
+ parser.add_argument(
+ "--skip-feature-analysis",
+ action="store_true",
+ help="Skip feature importance and model-based analysis for all scenarios.",
+ )
+ parser.add_argument(
+ "--num_samples",
+ type=int,
+ default=20000,
+ help="Number of synthetic state/action samples to generate (default: 20000).",
+ )
+ parser.add_argument(
+ "--seed",
+ type=int,
+ default=42,
+ help="Random seed for reproducibility (default: 42).",
+ )
+ parser.add_argument(
+ "--skip_partial_dependence",
+ action="store_true",
+ help="Skip partial dependence computation to speed up analysis.",
+ )
+ parser.add_argument(
+ "--stats_seed",
+ type=int,
+ default=None,
+ help="Optional separate seed for statistical analyses (default: same as --seed).",
+ )
+ parser.add_argument(
+ "--max_trade_duration",
+ type=int,
+ default=128,
+ help="Configured trade timeout in candles (default: 128).",
+ )
+ parser.add_argument(
+ "--base_factor",
+ type=float,
+ default=100.0,
+ help="Base reward factor used inside the environment (default: 100).",
+ )
+ parser.add_argument(
+ "--profit_target",
+ type=float,
+ default=0.03,
+ help="Target profit threshold (default: 0.03).",
+ )
+ parser.add_argument(
+ "--risk_reward_ratio",
+ type=float,
+ default=1.0,
+ help="Risk reward ratio multiplier (default: 1.0).",
+ )
+ parser.add_argument(
+ "--max_duration_ratio",
+ type=float,
+ default=2.5,
+ help="Multiple of max duration used when sampling trade/idle durations.",
+ )
+ parser.add_argument(
+ "--pnl_base_std",
+ type=float,
+ default=0.02,
+ help="Base standard deviation for synthetic PnL generation (pre-scaling).",
+ )
+ parser.add_argument(
+ "--pnl_duration_vol_scale",
+ type=float,
+ default=0.5,
+ help="Scaling factor of additional PnL volatility proportional to trade duration ratio.",
+ )
+ parser.add_argument(
+ "--trading_mode",
+ type=str.lower,
+ choices=["spot", "margin", "futures"],
+ default="spot",
+ help=("Trading mode to simulate (spot disables shorts). Default: spot."),
+ )
+ parser.add_argument(
+ "--action_masking",
+ type=str,
+ choices=["true", "false", "1", "0", "yes", "no"],
+ default="true",
+ help="Enable action masking simulation (default: true).",
+ )
+ parser.add_argument(
+ "--output",
+ type=Path,
+ default=Path("reward_space_outputs"),
+ help="Output directory for artifacts (default: reward_space_outputs).",
+ )
+ parser.add_argument(
+ "--params",
+ nargs="*",
+ default=[],
+ metavar="KEY=VALUE",
+ help="Override reward parameters, e.g. hold_penalty_scale=0.5",
+ )
+ # Dynamically add CLI options for all tunables
+ add_tunable_cli_args(parser)
+ parser.add_argument(
+ "--real_episodes",
+ type=Path,
+ default=None,
+ help="Path to real episodes pickle for distribution shift analysis (optional).",
+ )
+ parser.add_argument(
+ "--pvalue_adjust",
+ type=str.lower,
+ choices=["none", "benjamini_hochberg"],
+ default="none",
+ help="Multiple testing correction method for hypothesis tests (default: none).",
+ )
+ parser.add_argument(
+ "--strict_diagnostics",
+ action="store_true",
+ help=(
+ "Enable fail-fast mode for statistical diagnostics: raise on zero-width bootstrap CIs or undefined "
+ "skewness/kurtosis/Anderson/Q-Q metrics produced by constant distributions instead of applying graceful replacements."
+ ),
+ )
+ parser.add_argument(
+ "--bootstrap_resamples",
+ type=int,
+ default=10000,
+ metavar="N",
+ help=(
+ "Number of bootstrap resamples for confidence intervals (default: 10000). "
+ "Lower this (e.g. 200-1000) for faster smoke tests; increase for more stable CI width estimates."
+ ),
+ )
+ return parser
+
+def write_complete_statistical_analysis(
+ df: pd.DataFrame,
+ output_dir: Path,
+ max_trade_duration: int,
+ profit_target: float,
+ seed: int,
+ real_df: Optional[pd.DataFrame] = None,
+ *,
+ adjust_method: str = "none",
+ stats_seed: Optional[int] = None,
+ strict_diagnostics: bool = False,
+ bootstrap_resamples: int = 10000,
+ skip_partial_dependence: bool = False,
+ skip_feature_analysis: bool = False,
+) -> None:
+ """Generate a single comprehensive statistical analysis report with enhanced tests."""
+ output_dir.mkdir(parents=True, exist_ok=True)
+ report_path = output_dir / "statistical_analysis.md"
-def main() -> None:
- parser = build_argument_parser()
- args = parser.parse_args()
+ # Helpers: consistent Markdown table renderers
+ def _fmt_val(v: Any, ndigits: int = 6) -> str:
+ try:
+ if isinstance(v, (int, np.integer)):
+ return f"{int(v)}"
+ if isinstance(v, (float, np.floating)):
+ if np.isnan(v):
+ return "NaN"
+ return f"{float(v):.{ndigits}f}"
+ return str(v)
+ except Exception:
+ return str(v)
- params = dict(DEFAULT_MODEL_REWARD_PARAMETERS)
- # Merge CLI tunables first (only those explicitly provided)
- _tunable_keys = set(DEFAULT_MODEL_REWARD_PARAMETERS.keys())
- for key, value in vars(args).items():
- if key in _tunable_keys and value is not None:
- params[key] = value
- # Then apply --params KEY=VALUE overrides (highest precedence)
- params.update(parse_overrides(args.params))
+ def _series_to_md(
+ series: pd.Series, value_name: str = "value", ndigits: int = 6
+ ) -> str:
+ lines = [f"| Metric | {value_name} |", "|--------|-----------|"]
+ for k, v in series.items():
+ lines.append(f"| {k} | {_fmt_val(v, ndigits)} |")
+ return "\n".join(lines) + "\n\n"
- # Early parameter validation (moved before simulation for alignment with docs)
- params_validated, adjustments = validate_reward_parameters(params)
- params = params_validated
- if adjustments:
- # Compact adjustments summary (param: original->adjusted [reason])
- adj_lines = [
- f" - {k}: {v['original']} -> {v['adjusted']} ({v['reason']})"
- for k, v in adjustments.items()
- ]
- print("Parameter adjustments applied:\n" + "\n".join(adj_lines))
- # Normalize attenuation mode
- _normalize_and_validate_mode(params)
+ def _df_to_md(df: pd.DataFrame, index_name: str = "index", ndigits: int = 6) -> str:
+ if df is None or df.empty:
+ return "_No data._\n\n"
+ # Prepare header
+ cols = list(df.columns)
+ header = "| " + index_name + " | " + " | ".join(cols) + " |\n"
+ sep = "|" + "-" * (len(index_name) + 2)
+ for c in cols:
+ sep += "|" + "-" * (len(str(c)) + 2)
+ sep += "|\n"
+ # Rows
+ rows = []
+ for idx, row in df.iterrows():
+ vals = [_fmt_val(row[c], ndigits) for c in cols]
+ rows.append("| " + str(idx) + " | " + " | ".join(vals) + " |")
+ return header + sep + "\n".join(rows) + "\n\n"
- base_factor = _get_float_param(params, "base_factor", float(args.base_factor))
- profit_target = _get_float_param(params, "profit_target", float(args.profit_target))
- risk_reward_ratio = _get_float_param(
- params, "risk_reward_ratio", float(args.risk_reward_ratio)
+ # Compute all statistics
+ summary_stats = _compute_summary_stats(df)
+ relationship_stats = _compute_relationship_stats(df, max_trade_duration)
+ representativity_stats = _compute_representativity_stats(
+ df, profit_target, max_trade_duration
)
- cli_action_masking = _to_bool(args.action_masking)
- if "action_masking" in params:
- params["action_masking"] = _to_bool(params["action_masking"])
+ # Model analysis: skip if requested or not enough samples
+ importance_df = None
+ analysis_stats = None
+ partial_deps = {}
+ if skip_feature_analysis or len(df) < 4:
+ print("Skipping feature analysis: flag set or insufficient samples (<4).")
else:
- params["action_masking"] = cli_action_masking
-
- # Deterministic seeds cascade
- random.seed(args.seed)
- np.random.seed(args.seed)
+ importance_df, analysis_stats, partial_deps, _model = _perform_feature_analysis(
+ df, seed, skip_partial_dependence=skip_partial_dependence
+ )
+ # Save feature importance CSV
+ importance_df.to_csv(output_dir / "feature_importance.csv", index=False)
+ # Save partial dependence CSVs
+ if not skip_partial_dependence:
+ for feature, pd_df in partial_deps.items():
+ pd_df.to_csv(
+ output_dir / f"partial_dependence_{feature}.csv",
+ index=False,
+ )
- df = simulate_samples(
- num_samples=args.num_samples,
- seed=args.seed,
- params=params,
- max_trade_duration=args.max_trade_duration,
- base_factor=base_factor,
- profit_target=profit_target,
- risk_reward_ratio=risk_reward_ratio,
- max_duration_ratio=args.max_duration_ratio,
- trading_mode=args.trading_mode,
- pnl_base_std=args.pnl_base_std,
- pnl_duration_vol_scale=args.pnl_duration_vol_scale,
+ # Enhanced statistics
+ test_seed = (
+ stats_seed
+ if isinstance(stats_seed, int)
+ else (seed if isinstance(seed, int) else 42)
+ )
+ hypothesis_tests = statistical_hypothesis_tests(
+ df, adjust_method=adjust_method, seed=test_seed
)
- # Post-simulation critical NaN validation (non-PBRS structural columns)
- critical_cols = [
- "pnl",
- "trade_duration",
- "idle_duration",
- "position",
- "action",
+ metrics_for_ci = [
"reward_total",
- "reward_invalid",
"reward_idle",
"reward_hold",
"reward_exit",
+ "pnl",
]
- nan_issues = {
- c: int(df[c].isna().sum())
- for c in critical_cols
- if c in df.columns and df[c].isna().any()
- }
- if nan_issues:
- raise AssertionError(
- "NaN values detected in critical simulated columns: "
- + ", ".join(f"{k}={v}" for k, v in nan_issues.items())
- )
- # Attach simulation parameters for downstream manifest
- df.attrs["simulation_params"] = {
- "num_samples": args.num_samples,
- "seed": args.seed,
- "max_trade_duration": args.max_trade_duration,
- "base_factor": base_factor,
- "profit_target": profit_target,
- "risk_reward_ratio": risk_reward_ratio,
- "max_duration_ratio": args.max_duration_ratio,
- "trading_mode": args.trading_mode,
- "action_masking": params.get("action_masking", True),
- "pnl_base_std": args.pnl_base_std,
- "pnl_duration_vol_scale": args.pnl_duration_vol_scale,
- }
- # Attach resolved reward parameters for inline overrides rendering in report
- df.attrs["reward_params"] = dict(params)
-
- args.output.mkdir(parents=True, exist_ok=True)
- csv_path = args.output / "reward_samples.csv"
- df.to_csv(csv_path, index=False)
- sample_output_message = f"Samples saved to {csv_path}"
-
- # Load real episodes if provided
- real_df = None
- if args.real_episodes and args.real_episodes.exists():
- print(f"Loading real episodes from {args.real_episodes}...")
- real_df = load_real_episodes(args.real_episodes)
-
- # Generate consolidated statistical analysis report (with enhanced tests)
- print("Generating complete statistical analysis...")
-
- write_complete_statistical_analysis(
+ bootstrap_ci = bootstrap_confidence_intervals(
df,
- args.output,
- max_trade_duration=args.max_trade_duration,
- profit_target=float(profit_target * risk_reward_ratio),
- seed=args.seed,
- real_df=real_df,
- adjust_method=args.pvalue_adjust,
- stats_seed=args.stats_seed
- if getattr(args, "stats_seed", None) is not None
- else None,
- strict_diagnostics=bool(getattr(args, "strict_diagnostics", False)),
- bootstrap_resamples=getattr(args, "bootstrap_resamples", 10000),
- skip_partial_dependence=bool(getattr(args, "skip_partial_dependence", False)),
- skip_feature_analysis=bool(getattr(args, "skip_feature_analysis", False)),
+ metrics_for_ci,
+ n_bootstrap=int(bootstrap_resamples),
+ seed=test_seed,
+ strict_diagnostics=strict_diagnostics,
)
- print(
- f"Complete statistical analysis saved to: {args.output / 'statistical_analysis.md'}"
+ dist_diagnostics = distribution_diagnostics(
+ df, seed=test_seed, strict_diagnostics=strict_diagnostics
)
- # Generate manifest summarizing key metrics
- try:
- manifest_path = args.output / "manifest.json"
- resolved_reward_params = dict(params) # already validated/normalized upstream
- manifest = {
- "generated_at": pd.Timestamp.now().isoformat(),
- "num_samples": int(len(df)),
- "seed": int(args.seed),
- "max_trade_duration": int(args.max_trade_duration),
- "profit_target_effective": float(profit_target * risk_reward_ratio),
- "pvalue_adjust_method": args.pvalue_adjust,
- "parameter_adjustments": adjustments,
- "reward_params": resolved_reward_params,
- }
- sim_params = df.attrs.get("simulation_params", {})
- if isinstance(sim_params, dict) and sim_params:
- import hashlib as _hashlib
- import json as _json
- # Compose hash source from ALL simulation params and ALL resolved reward params for full reproducibility.
- _hash_source = {
- **{f"sim::{k}": sim_params[k] for k in sorted(sim_params)},
- **{
- f"reward::{k}": resolved_reward_params[k]
- for k in sorted(resolved_reward_params)
- },
- }
- serialized = _json.dumps(_hash_source, sort_keys=True)
- manifest["params_hash"] = _hashlib.sha256(
- serialized.encode("utf-8")
- ).hexdigest()
- manifest["simulation_params"] = sim_params
- with manifest_path.open("w", encoding="utf-8") as mh:
- import json as _json
+ distribution_shift = None
+ if real_df is not None:
+ distribution_shift = compute_distribution_shift_metrics(df, real_df)
- _json.dump(manifest, mh, indent=2)
- print(f"Manifest written to: {manifest_path}")
- except Exception as e:
- print(f"Manifest generation failed: {e}")
+ # Write comprehensive report
+ with report_path.open("w", encoding="utf-8") as f:
+ # Header
+ f.write("# Reward Space Analysis Report\n\n")
+ f.write("### Run Configuration\n\n")
+ f.write("| Key | Value |\n")
+ f.write("|-----|-------|\n")
+ f.write(f"| Generated | {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')} |\n")
+ f.write(f"| Total Samples | {len(df):,} |\n")
+ f.write(f"| Random Seed | {seed} |\n")
+ f.write(f"| Max Trade Duration | {max_trade_duration} |\n")
+ # Blank separator to visually group core simulation vs PBRS parameters
+ f.write("| | |\n")
+ # Extra core PBRS parameters exposed in run configuration if present
+ _rp = (
+ df.attrs.get("reward_params")
+ if isinstance(df.attrs.get("reward_params"), dict)
+ else {}
+ )
+ exit_mode = _get_str_param(
+ _rp,
+ "exit_potential_mode",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_potential_mode", "canonical"),
+ )
+ potential_gamma = _rp.get(
+ "potential_gamma",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get(
+ "potential_gamma", POTENTIAL_GAMMA_DEFAULT
+ ),
+ )
+ f.write(f"| exit_potential_mode | {exit_mode} |\n")
+ f.write(f"| potential_gamma | {potential_gamma} |\n")
+ # Blank separator before overrides block
+ f.write("| | |\n")
- print(f"Generated {len(df):,} synthetic samples.")
- print(sample_output_message)
- print(f"Artifacts saved to: {args.output.resolve()}")
+ overrides_pairs = []
+ if _rp:
+ for k, default_v in DEFAULT_MODEL_REWARD_PARAMETERS.items():
+ if k in ("exit_potential_mode", "potential_gamma"):
+ continue # already printed explicitly
+ try:
+ if k in _rp and _rp[k] != default_v:
+ overrides_pairs.append(f"{k}={_rp[k]}")
+ except Exception:
+ continue
+ if overrides_pairs:
+ f.write(f"| Overrides | {', '.join(sorted(overrides_pairs))} |\n")
+ else:
+ f.write("| Overrides | (none) |\n")
+ f.write("\n")
+ f.write("---\n\n")
-# === PBRS TRANSFORM FUNCTIONS ===
+ # Section 1: Global Statistics
+ f.write("## 1. Global Statistics\n\n")
+ f.write("### 1.1 Reward Distribution\n\n")
+ f.write(
+ _series_to_md(
+ summary_stats["global_stats"], value_name="reward_total", ndigits=6
+ )
+ )
-def _apply_transform_tanh(value: float) -> float:
- """tanh: tanh(x) in (-1, 1)."""
- return float(math.tanh(value))
+ f.write("### 1.2 Reward Statistics by Action\n\n")
+ action_df = summary_stats["action_summary"].copy()
+ # Cast index to int for cleaner display (0,1,2,3,4)
+ action_df.index = action_df.index.astype(int)
+ if action_df.index.name is None:
+ action_df.index.name = "action"
+ f.write(_df_to_md(action_df, index_name=action_df.index.name, ndigits=6))
+
+ f.write("### 1.3 Component Activation Rates\n\n")
+ f.write("Percentage of samples where each reward component is non-zero:\n\n")
+ comp_share = summary_stats["component_share"].copy()
+ formatted_rows = [
+ "| Component | Activation Rate |",
+ "|-----------|----------------|",
+ ]
+ # Enforce deterministic logical ordering of components if present
+ preferred_order = [
+ "reward_invalid",
+ "reward_idle",
+ "reward_hold",
+ "reward_exit",
+ "reward_shaping",
+ "reward_entry_additive",
+ "reward_exit_additive",
+ ]
+ for comp in preferred_order:
+ if comp in comp_share.index:
+ val = comp_share.loc[comp]
+ formatted_rows.append(f"| {comp} | {val:.1%} |")
+ f.write("\n".join(formatted_rows) + "\n\n")
+
+ f.write("### 1.4 Component Value Ranges\n\n")
+ bounds_df = summary_stats["component_bounds"].copy()
+ if bounds_df.index.name is None:
+ bounds_df.index.name = "component"
+ f.write(_df_to_md(bounds_df, index_name=bounds_df.index.name, ndigits=6))
+ # Section 2: Representativity Analysis
+ f.write("---\n\n")
+ f.write("## 2. Sample Representativity\n\n")
+ f.write(
+ "This section evaluates whether the synthetic samples adequately represent "
+ )
+ f.write("the full reward space across different market scenarios.\n\n")
-def _apply_transform_softsign(value: float) -> float:
- """softsign: x / (1 + |x|) in (-1, 1)."""
- x = value
- return float(x / (1.0 + abs(x)))
+ f.write("### 2.1 Position Distribution\n\n")
+ f.write(
+ _series_to_md(
+ representativity_stats["pos_counts"], value_name="count", ndigits=0
+ )
+ )
+ f.write("### 2.2 Action Distribution\n\n")
+ f.write(
+ _series_to_md(
+ representativity_stats["act_counts"], value_name="count", ndigits=0
+ )
+ )
-def _apply_transform_arctan(value: float) -> float:
- """arctan: (2/pi) * arctan(x) in (-1, 1)."""
- return float((2.0 / math.pi) * math.atan(value))
+ f.write("### 2.3 Critical Regime Coverage\n\n")
+ f.write("| Regime | Coverage |\n")
+ f.write("|--------|----------|\n")
+ f.write(
+ f"| PnL > target | {representativity_stats['pnl_above_target']:.1%} |\n"
+ )
+ f.write(
+ f"| PnL near target (±20%) | {representativity_stats['pnl_near_target']:.1%} |\n"
+ )
+ f.write(
+ f"| Duration overage (>1.0) | {representativity_stats['duration_overage_share']:.1%} |\n"
+ )
+ f.write(
+ f"| Extreme PnL (\\|pnl\\|≥0.14) | {representativity_stats['pnl_extreme']:.1%} |\n"
+ )
+ f.write("\n")
+
+ f.write("### 2.4 Component Activation Rates\n\n")
+ f.write("| Component | Activation Rate |\n")
+ f.write("|-----------|----------------|\n")
+ f.write(f"| Idle penalty | {representativity_stats['idle_activated']:.1%} |\n")
+ f.write(f"| Hold penalty | {representativity_stats['hold_activated']:.1%} |\n")
+ f.write(f"| Exit reward | {representativity_stats['exit_activated']:.1%} |\n")
+ f.write("\n")
+ # Section 3: Reward Component Relationships
+ f.write("---\n\n")
+ f.write("## 3. Reward Component Analysis\n\n")
+ f.write(
+ "Analysis of how reward components behave under different conditions.\n\n"
+ )
-def _apply_transform_sigmoid(value: float) -> float:
- """sigmoid: 2σ(x) - 1, σ(x) = 1/(1 + e^(-x)) in (-1, 1)."""
- x = value
- try:
- if x >= 0:
- exp_neg_x = math.exp(-x)
- sigma_x = 1.0 / (1.0 + exp_neg_x)
+ f.write("### 3.1 Idle Penalty vs Duration\n\n")
+ if relationship_stats["idle_stats"].empty:
+ f.write("_No idle samples present._\n\n")
else:
- exp_x = math.exp(x)
- sigma_x = exp_x / (exp_x + 1.0)
- return 2.0 * sigma_x - 1.0
- except OverflowError:
- return 1.0 if x > 0 else -1.0
-
-
-def _apply_transform_asinh(value: float) -> float:
- """asinh: x / sqrt(1 + x^2) in (-1, 1)."""
- return float(value / math.hypot(1.0, value))
+ idle_df = relationship_stats["idle_stats"].copy()
+ if idle_df.index.name is None:
+ idle_df.index.name = "bin"
+ f.write(_df_to_md(idle_df, index_name=idle_df.index.name, ndigits=6))
+ f.write("### 3.2 Hold Penalty vs Trade Duration\n\n")
+ if relationship_stats["hold_stats"].empty:
+ f.write("_No hold samples present._\n\n")
+ else:
+ hold_df = relationship_stats["hold_stats"].copy()
+ if hold_df.index.name is None:
+ hold_df.index.name = "bin"
+ f.write(_df_to_md(hold_df, index_name=hold_df.index.name, ndigits=6))
-def _apply_transform_clip(value: float) -> float:
- """clip: clip(x, -1, 1) in [-1, 1]."""
- return float(np.clip(value, -1.0, 1.0))
+ f.write("### 3.3 Exit Reward vs PnL\n\n")
+ if relationship_stats["exit_stats"].empty:
+ f.write("_No exit samples present._\n\n")
+ else:
+ exit_df = relationship_stats["exit_stats"].copy()
+ if exit_df.index.name is None:
+ exit_df.index.name = "bin"
+ f.write(_df_to_md(exit_df, index_name=exit_df.index.name, ndigits=6))
+ f.write("### 3.4 Correlation Matrix\n\n")
+ f.write("Pearson correlation coefficients between key metrics:\n\n")
+ corr_df = relationship_stats["correlation"].copy()
+ if corr_df.index.name is None:
+ corr_df.index.name = "feature"
+ f.write(_df_to_md(corr_df, index_name=corr_df.index.name, ndigits=4))
+ _dropped = relationship_stats.get("correlation_dropped") or []
+ if _dropped:
+ f.write(
+ "\n_Constant features removed (no variance): "
+ + ", ".join(_dropped)
+ + "._\n\n"
+ )
-def apply_transform(transform_name: str, value: float, **kwargs: Any) -> float:
- """Apply named transform; unknown names fallback to tanh with warning."""
- transforms = {
- "tanh": _apply_transform_tanh,
- "softsign": _apply_transform_softsign,
- "arctan": _apply_transform_arctan,
- "sigmoid": _apply_transform_sigmoid,
- "asinh": _apply_transform_asinh,
- "clip": _apply_transform_clip,
- }
+ # Section 3.5: PBRS Analysis
+ f.write("### 3.5 PBRS (Potential-Based Reward Shaping) Analysis\n\n")
- if transform_name not in transforms:
- warnings.warn(
- f"Unknown potential transform '{transform_name}'; falling back to tanh",
- RewardDiagnosticsWarning,
- stacklevel=2,
- )
- return _apply_transform_tanh(value)
- return transforms[transform_name](value, **kwargs)
+ # Check if PBRS components are present in the data
+ pbrs_components = [
+ "reward_shaping",
+ "reward_entry_additive",
+ "reward_exit_additive",
+ ]
+ pbrs_present = all(col in df.columns for col in pbrs_components)
+ if pbrs_present:
+ # PBRS activation rates
+ pbrs_activation = {}
+ for comp in pbrs_components:
+ pbrs_activation[comp.replace("reward_", "")] = (df[comp] != 0).mean()
-# === PBRS HELPER FUNCTIONS ===
+ f.write("**PBRS Component Activation Rates:**\n\n")
+ f.write("| Component | Activation Rate | Description |\n")
+ f.write("|-----------|-----------------|-------------|\n")
+ f.write(
+ f"| Shaping (Φ) | {pbrs_activation['shaping']:.1%} | Potential-based reward shaping |\n"
+ )
+ f.write(
+ f"| Entry Additive | {pbrs_activation['entry_additive']:.1%} | Non-PBRS entry reward |\n"
+ )
+ f.write(
+ f"| Exit Additive | {pbrs_activation['exit_additive']:.1%} | Non-PBRS exit reward |\n"
+ )
+ f.write("\n")
+ # PBRS statistics
+ f.write("**PBRS Component Statistics:**\n\n")
+ pbrs_stats = df[pbrs_components].describe(
+ percentiles=[0.1, 0.25, 0.5, 0.75, 0.9]
+ )
+ pbrs_stats_df = pbrs_stats.round(
+ 6
+ ).T # Transpose to make it DataFrame-compatible
+ pbrs_stats_df.index.name = "component"
+ f.write(_df_to_md(pbrs_stats_df, index_name="component", ndigits=6))
-def _get_potential_gamma(params: RewardParams) -> float:
- """Return validated potential_gamma.
+ # PBRS invariance check
+ total_shaping = df["reward_shaping"].sum()
+ entry_add_total = df.get("reward_entry_additive", pd.Series([0])).sum()
+ exit_add_total = df.get("reward_exit_additive", pd.Series([0])).sum()
- Process:
- - If NaN -> default POTENTIAL_GAMMA_DEFAULT with warning (missing or unparsable).
- - If outside [0,1] -> clamp + warning including original value.
- - Guarantee returned float ∈ [0,1].
- """
- gamma = _get_float_param(params, "potential_gamma", np.nan)
- if not np.isfinite(gamma):
- warnings.warn(
- f"potential_gamma not specified or invalid; defaulting to {POTENTIAL_GAMMA_DEFAULT}",
- RewardDiagnosticsWarning,
- stacklevel=2,
- )
- return POTENTIAL_GAMMA_DEFAULT
- if gamma < 0.0 or gamma > 1.0:
- original = gamma
- gamma = float(np.clip(gamma, 0.0, 1.0))
- warnings.warn(
- f"potential_gamma={original} outside [0,1]; clamped to {gamma}",
- RewardDiagnosticsWarning,
- stacklevel=2,
- )
- return gamma
- return float(gamma)
+ # Get configuration for proper invariance assessment
+ reward_params = (
+ df.attrs.get("reward_params", {}) if hasattr(df, "attrs") else {}
+ )
+ exit_potential_mode = _get_str_param(
+ reward_params, "exit_potential_mode", "canonical"
+ )
+ entry_additive_enabled = _get_bool_param(
+ reward_params, "entry_additive_enabled", False
+ )
+ exit_additive_enabled = _get_bool_param(
+ reward_params, "exit_additive_enabled", False
+ )
+ # True invariance requires canonical mode AND no additives
+ is_theoretically_invariant = exit_potential_mode == "canonical" and not (
+ entry_additive_enabled or exit_additive_enabled
+ )
+ shaping_near_zero = abs(total_shaping) < PBRS_INVARIANCE_TOL
-def _get_str_param(params: RewardParams, key: str, default: str) -> str:
- """Extract string parameter with type safety."""
- value = params.get(key, default)
- if isinstance(value, str):
- return value
- return default
+ # Prepare invariance summary markdown block
+ if is_theoretically_invariant:
+ if shaping_near_zero:
+ invariance_status = "✅ Canonical"
+ invariance_note = "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)"
+ else:
+ invariance_status = "⚠️ Canonical (with warning)"
+ invariance_note = f"Canonical mode but unexpected shaping sum = {total_shaping:.6f}"
+ else:
+ invariance_status = "❌ Non-canonical"
+ reasons = []
+ if exit_potential_mode != "canonical":
+ reasons.append(f"exit_potential_mode='{exit_potential_mode}'")
+ if entry_additive_enabled or exit_additive_enabled:
+ additive_types = []
+ if entry_additive_enabled:
+ additive_types.append("entry")
+ if exit_additive_enabled:
+ additive_types.append("exit")
+ reasons.append(f"additives={additive_types}")
+ invariance_note = f"Modified for flexibility: {', '.join(reasons)}"
+ # Summarize PBRS invariance
+ f.write("**PBRS Invariance Summary:**\n\n")
+ f.write("| Field | Value |\n")
+ f.write("|-------|-------|\n")
+ f.write(f"| Invariance Status | {invariance_status} |\n")
+ f.write(f"| Analysis Note | {invariance_note} |\n")
+ f.write(f"| Exit Potential Mode | {exit_potential_mode} |\n")
+ f.write(f"| Entry Additive Enabled | {entry_additive_enabled} |\n")
+ f.write(f"| Exit Additive Enabled | {exit_additive_enabled} |\n")
+ f.write(f"| Σ Shaping Reward | {total_shaping:.6f} |\n")
+ f.write(f"| Abs Σ Shaping Reward | {abs(total_shaping):.6e} |\n")
+ f.write(f"| Σ Entry Additive | {entry_add_total:.6f} |\n")
+ f.write(f"| Σ Exit Additive | {exit_add_total:.6f} |\n\n")
-def _get_bool_param(params: RewardParams, key: str, default: bool) -> bool:
- """Extract boolean parameter with type safety."""
- value = params.get(key, default)
- try:
- return _to_bool(value)
- except Exception:
- return bool(default)
+ else:
+ f.write("_PBRS components not present in this analysis._\n\n")
+ # Section 4: Feature Importance Analysis
+ f.write("---\n\n")
+ f.write("## 4. Feature Importance\n\n")
+ if skip_feature_analysis or len(df) < 4:
+ reason = []
+ if skip_feature_analysis:
+ reason.append("flag --skip-feature-analysis set")
+ if len(df) < 4:
+ reason.append("insufficient samples <4")
+ reason_str = "; ".join(reason) if reason else "skipped"
+ f.write(f"_Skipped ({reason_str})._\n\n")
+ if skip_partial_dependence:
+ f.write(
+ "_Note: --skip_partial_dependence is redundant when feature analysis is skipped._\n\n"
+ )
+ else:
+ f.write(
+ "Machine learning analysis to identify which features most influence total reward.\n\n"
+ )
+ f.write("**Model:** Random Forest Regressor (400 trees) \n")
+ f.write(f"**R² Score:** {analysis_stats['r2_score']:.4f}\n\n")
-# === PBRS IMPLEMENTATION ===
+ f.write("### 4.1 Top 10 Features by Importance\n\n")
+ top_imp = importance_df.head(10).copy().reset_index(drop=True)
+ # Render as markdown without index column
+ header = "| feature | importance_mean | importance_std |\n"
+ sep = "|---------|------------------|----------------|\n"
+ rows = []
+ for _, r in top_imp.iterrows():
+ rows.append(
+ f"| {r['feature']} | {_fmt_val(r['importance_mean'], 6)} | {_fmt_val(r['importance_std'], 6)} |"
+ )
+ f.write(header + sep + "\n".join(rows) + "\n\n")
+ f.write("**Exported Data:**\n")
+ f.write("- Full feature importance: `feature_importance.csv`\n")
+ if not skip_partial_dependence:
+ f.write("- Partial dependence plots: `partial_dependence_*.csv`\n\n")
+ else:
+ f.write(
+ "- Partial dependence plots: (skipped via --skip_partial_dependence)\n\n"
+ )
+ # Section 5: Statistical Validation
+ if hypothesis_tests:
+ f.write("---\n\n")
+ f.write("## 5. Statistical Validation\n\n")
+ f.write(
+ "Rigorous statistical tests to validate reward behavior and relationships.\n\n"
+ )
-def _compute_hold_potential(
- pnl: float, duration_ratio: float, params: RewardParams
-) -> float:
- """Compute PBRS hold potential Φ(s)."""
- if not _get_bool_param(params, "hold_potential_enabled", True):
- return _fail_safely("hold_potential_disabled")
- return _compute_bi_component(
- kind="hold_potential",
- pnl=pnl,
- duration_ratio=duration_ratio,
- params=params,
- scale_key="hold_potential_scale",
- gain_key="hold_potential_gain",
- transform_pnl_key="hold_potential_transform_pnl",
- transform_dur_key="hold_potential_transform_duration",
- non_finite_key="non_finite_hold_potential",
- )
+ f.write("### 5.1 Hypothesis Tests\n\n")
+ if "idle_correlation" in hypothesis_tests:
+ h = hypothesis_tests["idle_correlation"]
+ f.write("#### 5.1.1 Idle Duration → Idle Penalty Correlation\n\n")
+ f.write(f"**Test Method:** {h['test']}\n\n")
+ f.write(f"- Spearman ρ: **{h['rho']:.4f}**\n")
+ f.write(f"- p-value: {h['p_value']:.4g}\n")
+ if "p_value_adj" in h:
+ f.write(
+ f"- p-value (adj BH): {h['p_value_adj']:.4g} -> {'✅ Yes' if h['significant_adj'] else '❌ No'} (α=0.05)\n"
+ )
+ f.write(f"- 95% CI: [{h['ci_95'][0]:.4f}, {h['ci_95'][1]:.4f}]\n")
+ f.write(f"- Sample size: {h['n_samples']:,}\n")
+ f.write(
+ f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n"
+ )
+ f.write(f"- **Interpretation:** {h['interpretation']}\n\n")
-def _compute_entry_additive(
- pnl: float, duration_ratio: float, params: RewardParams
-) -> float:
- if not _get_bool_param(params, "entry_additive_enabled", False):
- return _fail_safely("entry_additive_disabled")
- return _compute_bi_component(
- kind="entry_additive",
- pnl=pnl,
- duration_ratio=duration_ratio,
- params=params,
- scale_key="entry_additive_scale",
- gain_key="entry_additive_gain",
- transform_pnl_key="entry_additive_transform_pnl",
- transform_dur_key="entry_additive_transform_duration",
- non_finite_key="non_finite_entry_additive",
- )
+ if "position_reward_difference" in hypothesis_tests:
+ h = hypothesis_tests["position_reward_difference"]
+ f.write("#### 5.1.2 Position-Based Reward Differences\n\n")
+ f.write(f"**Test Method:** {h['test']}\n\n")
+ f.write(f"- H-statistic: **{h['statistic']:.4f}**\n")
+ f.write(f"- p-value: {h['p_value']:.4g}\n")
+ if "p_value_adj" in h:
+ f.write(
+ f"- p-value (adj BH): {h['p_value_adj']:.4g} -> {'✅ Yes' if h['significant_adj'] else '❌ No'} (α=0.05)\n"
+ )
+ f.write(f"- Effect size (ε²): {h['effect_size_epsilon_sq']:.4f}\n")
+ f.write(f"- Number of groups: {h['n_groups']}\n")
+ f.write(
+ f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n"
+ )
+ f.write(f"- **Interpretation:** {h['interpretation']} effect\n\n")
+ if "pnl_sign_reward_difference" in hypothesis_tests:
+ h = hypothesis_tests["pnl_sign_reward_difference"]
+ f.write("#### 5.1.3 Positive vs Negative PnL Comparison\n\n")
+ f.write(f"**Test Method:** {h['test']}\n\n")
+ f.write(f"- U-statistic: **{h['statistic']:.4f}**\n")
+ f.write(f"- p-value: {h['p_value']:.4g}\n")
+ if "p_value_adj" in h:
+ f.write(
+ f"- p-value (adj BH): {h['p_value_adj']:.4g} -> {'✅ Yes' if h['significant_adj'] else '❌ No'} (α=0.05)\n"
+ )
+ f.write(f"- Median (PnL+): {h['median_pnl_positive']:.4f}\n")
+ f.write(f"- Median (PnL-): {h['median_pnl_negative']:.4f}\n")
+ f.write(
+ f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n\n"
+ )
-def _compute_exit_additive(
- pnl: float, duration_ratio: float, params: RewardParams
-) -> float:
- if not _get_bool_param(params, "exit_additive_enabled", False):
- return _fail_safely("exit_additive_disabled")
- return _compute_bi_component(
- kind="exit_additive",
- pnl=pnl,
- duration_ratio=duration_ratio,
- params=params,
- scale_key="exit_additive_scale",
- gain_key="exit_additive_gain",
- transform_pnl_key="exit_additive_transform_pnl",
- transform_dur_key="exit_additive_transform_duration",
- non_finite_key="non_finite_exit_additive",
- )
+ # Bootstrap CI
+ if bootstrap_ci:
+ f.write("### 5.2 Confidence Intervals\n\n")
+ f.write(
+ "Bootstrap confidence intervals (95%, 10,000 resamples) for key metrics:\n\n"
+ )
+ f.write("| Metric | Mean | 95% CI Lower | 95% CI Upper | Width |\n")
+ f.write("|--------|------|--------------|--------------|-------|\n")
+ for metric, (mean, ci_low, ci_high) in bootstrap_ci.items():
+ width = ci_high - ci_low
+ f.write(
+ f"| {metric} | {mean:.6f} | {ci_low:.6f} | {ci_high:.6f} | {width:.6f} |\n"
+ )
+ f.write("\n")
+ # Distribution diagnostics
+ if dist_diagnostics:
+ f.write("### 5.3 Distribution Normality Tests\n\n")
+ f.write("Statistical tests for normality of key distributions:\n\n")
+ for col in ["reward_total", "pnl", "trade_duration"]:
+ if f"{col}_mean" in dist_diagnostics:
+ f.write(f"#### {col.replace('_', ' ').title()}\n\n")
+ f.write("| Metric | Value |\n")
+ f.write("|--------|-------|\n")
+ f.write(f"| Mean | {dist_diagnostics[f'{col}_mean']:.4f} |\n")
+ f.write(f"| Std Dev | {dist_diagnostics[f'{col}_std']:.4f} |\n")
+ f.write(
+ f"| Skewness | {dist_diagnostics[f'{col}_skewness']:.4f} |\n"
+ )
+ f.write(
+ f"| Kurtosis | {dist_diagnostics[f'{col}_kurtosis']:.4f} |\n"
+ )
+ if f"{col}_shapiro_pval" in dist_diagnostics:
+ is_normal = (
+ "✅ Yes"
+ if dist_diagnostics[f"{col}_is_normal_shapiro"]
+ else "❌ No"
+ )
+ f.write(
+ f"| Normal? (Shapiro-Wilk) | {is_normal} (p={dist_diagnostics[f'{col}_shapiro_pval']:.4e}) |\n"
+ )
+ if f"{col}_qq_r_squared" in dist_diagnostics:
+ f.write(
+ f"| Q-Q Plot R² | {dist_diagnostics[f'{col}_qq_r_squared']:.4f} |\n"
+ )
+ f.write("\n")
-def _compute_exit_potential(last_potential: float, params: RewardParams) -> float:
- """Compute next potential Φ(s') for closing/exit transitions.
+ # Distribution shift (if real data provided)
+ if distribution_shift:
+ f.write("### 5.4 Distribution Shift Analysis\n\n")
+ f.write("Comparison between synthetic and real data distributions:\n\n")
+ f.write(
+ "| Feature | KL Div | JS Dist | Wasserstein | KS Stat | KS p-value |\n"
+ )
+ f.write(
+ "|---------|--------|---------|-------------|---------|------------|\n"
+ )
- Semantics:
- - canonical: Φ' = 0.0 (preserves invariance, disables additives)
- - non-canonical: Φ' = 0.0 (allows additives, breaks invariance)
- - progressive_release: Φ' = Φ * (1 - decay) with decay clamped to [0,1]
- - spike_cancel: Φ' = Φ / γ (neutralizes shaping spike ≈ 0 net effect) if γ>0 else Φ
- - retain_previous: Φ' = Φ
+ features = ["pnl", "trade_duration", "idle_duration"]
+ for feature in features:
+ kl = distribution_shift.get(
+ f"{feature}_kl_divergence", float("nan")
+ )
+ js = distribution_shift.get(f"{feature}_js_distance", float("nan"))
+ ws = distribution_shift.get(f"{feature}_wasserstein", float("nan"))
+ ks_stat = distribution_shift.get(
+ f"{feature}_ks_statistic", float("nan")
+ )
+ ks_p = distribution_shift.get(f"{feature}_ks_pvalue", float("nan"))
- Invalid modes fall back to canonical. Any non-finite resulting potential is
- coerced to 0.0.
- """
- mode = _get_str_param(params, "exit_potential_mode", "canonical")
- if mode == "canonical" or mode == "non-canonical":
- return _fail_safely("canonical_exit_potential")
+ f.write(
+ f"| {feature} | {kl:.4f} | {js:.4f} | {ws:.4f} | {ks_stat:.4f} | {ks_p:.4g} |\n"
+ )
+ f.write("\n")
+ f.write("**Interpretation Guide:**\n\n")
+ f.write("| Metric | Threshold | Meaning |\n")
+ f.write("|--------|-----------|--------|\n")
+ f.write("| KL Divergence | < 0.3 | ✅ Yes: Good representativeness |\n")
+ f.write("| JS Distance | < 0.2 | ✅ Yes: Similar distributions |\n")
+ f.write(
+ "| KS p-value | > 0.05 | ✅ Yes: No significant difference |\n\n"
+ )
+ else:
+ # Placeholder keeps numbering stable and explicit
+ f.write("### 5.4 Distribution Shift Analysis\n\n")
+ f.write("_Not performed (no real episodes provided)._\n\n")
- if mode == "progressive_release":
- decay = _get_float_param(params, "exit_potential_decay", 0.5)
- if not np.isfinite(decay):
- warnings.warn(
- "exit_potential_decay invalid (NaN/inf); defaulting to 0.5",
- RewardDiagnosticsWarning,
- stacklevel=2,
+ # Footer
+ f.write("---\n\n")
+ f.write("## Summary\n\n")
+ f.write("This comprehensive report includes:\n\n")
+ f.write(
+ "1. **Global Statistics** - Overall reward distributions and component activation\n"
+ )
+ f.write(
+ "2. **Sample Representativity** - Coverage of critical market scenarios\n"
+ )
+ f.write(
+ "3. **Component Analysis** - Relationships between rewards and conditions (including PBRS)\n"
+ )
+ if skip_feature_analysis or len(df) < 4:
+ f.write(
+ "4. **Feature Importance** - (skipped) Machine learning analysis of key drivers\n"
)
- decay = 0.5
- if decay < 0.0:
- warnings.warn(
- f"exit_potential_decay={decay} < 0; clamped to 0.0",
- RewardDiagnosticsWarning,
- stacklevel=2,
+ else:
+ f.write(
+ "4. **Feature Importance** - Machine learning analysis of key drivers\n"
)
- decay = 0.0
- if decay > 1.0:
- warnings.warn(
- f"exit_potential_decay={decay} > 1; clamped to 1.0",
- RewardDiagnosticsWarning,
- stacklevel=2,
+ f.write(
+ "5. **Statistical Validation** - Hypothesis tests and confidence intervals\n"
+ )
+ if distribution_shift:
+ f.write("6. **Distribution Shift** - Comparison with real trading data\n")
+ else:
+ f.write(
+ "6. **Distribution Shift** - Not performed (no real episodes provided)\n"
+ )
+ if "reward_shaping" in df.columns:
+ _total_shaping = df["reward_shaping"].sum()
+ _canonical = abs(_total_shaping) < PBRS_INVARIANCE_TOL
+ f.write(
+ "7. **PBRS Invariance** - "
+ + (
+ "Canonical (Σ shaping ≈ 0)"
+ if _canonical
+ else f"Non-canonical (Σ shaping = {_total_shaping:.6f})"
+ )
+ + "\n"
+ )
+ f.write("\n")
+ f.write("**Generated Files:**\n")
+ f.write("- `reward_samples.csv` - Raw synthetic samples\n")
+ if not skip_feature_analysis and len(df) >= 4:
+ f.write(
+ "- `feature_importance.csv` - Complete feature importance rankings\n"
+ )
+ f.write(
+ "- `partial_dependence_*.csv` - Partial dependence data for visualization\n"
)
- decay = 1.0
- next_potential = last_potential * (1.0 - decay)
- elif mode == "spike_cancel":
- gamma = _get_potential_gamma(params)
- if gamma <= 0.0 or not np.isfinite(gamma):
- next_potential = last_potential
- else:
- next_potential = last_potential / gamma
- elif mode == "retain_previous":
- next_potential = last_potential
- else:
- next_potential = _fail_safely("invalid_exit_potential_mode")
-
- if not np.isfinite(next_potential):
- next_potential = _fail_safely("non_finite_next_exit_potential")
- return float(next_potential)
-
-def apply_potential_shaping(
- base_reward: float,
- current_pnl: float,
- current_duration_ratio: float,
- next_pnl: float,
- next_duration_ratio: float,
- is_terminal: bool,
- last_potential: float,
- params: RewardParams,
-) -> tuple[float, float, float]:
- """
- Apply PBRS potential-based reward shaping following Ng et al. (1999).
- Implements: R'(s,a,s') = R_base(s,a,s') + γΦ(s') - Φ(s)
+def main() -> None:
+ parser = build_argument_parser()
+ args = parser.parse_args()
- This function computes the complete PBRS transformation including:
- - Hold potential: Φ(s) based on current state features
- - Closing potential: Φ(s') with mode-specific terminal handling
- - Entry/exit potentials: Non-PBRS additive components
- - Gamma discounting: Standard RL discount factor
- - Invariance guarantees: Optimal policy preservation
-
- Theory:
- PBRS maintains optimal policy invariance by ensuring that the potential-based
- shaping reward is the difference of a potential function. Terminal states
- require special handling to preserve this property.
-
- Args:
- base_reward: Base environment reward R_base(s,a,s')
- current_pnl: Current state PnL ratio
- current_duration_ratio: Current state duration ratio
- next_pnl: Next state PnL ratio
- next_duration_ratio: Next state duration ratio
- is_terminal: Whether next state is terminal
- last_potential: Previous potential for closing mode calculations
- params: Reward parameters containing PBRS configuration
-
- Returns:
- tuple[total_reward, shaping_reward, next_potential]:
- - total_reward: R_base + R_shaping + additives
- - shaping_reward: Pure PBRS component γΦ(s') - Φ(s)
- - next_potential: Φ(s') for next iteration
-
- Raises:
- ValueError: If gamma is invalid or numerical issues detected
- """
- # Enforce PBRS invariance (auto-disable additives in canonical mode)
- params = _enforce_pbrs_invariance(params)
+ params = dict(DEFAULT_MODEL_REWARD_PARAMETERS)
+ # Merge CLI tunables first (only those explicitly provided)
+ _tunable_keys = set(DEFAULT_MODEL_REWARD_PARAMETERS.keys())
+ for key, value in vars(args).items():
+ if key in _tunable_keys and value is not None:
+ params[key] = value
+ # Then apply --params KEY=VALUE overrides (highest precedence)
+ params.update(parse_overrides(args.params))
- # Resolve gamma
- gamma = _get_potential_gamma(params)
+ # Early parameter validation (moved before simulation for alignment with docs)
+ params_validated, adjustments = validate_reward_parameters(params)
+ params = params_validated
+ if adjustments:
+ # Compact adjustments summary (param: original->adjusted [reason])
+ adj_lines = [
+ f" - {k}: {v['original']} -> {v['adjusted']} ({v['reason']})"
+ for k, v in adjustments.items()
+ ]
+ print("Parameter adjustments applied:\n" + "\n".join(adj_lines))
+ # Normalize attenuation mode
+ _normalize_and_validate_mode(params)
- # Compute current potential Φ(s)
- current_potential = _compute_hold_potential(
- current_pnl, current_duration_ratio, params
+ base_factor = _get_float_param(params, "base_factor", float(args.base_factor))
+ profit_target = _get_float_param(params, "profit_target", float(args.profit_target))
+ risk_reward_ratio = _get_float_param(
+ params, "risk_reward_ratio", float(args.risk_reward_ratio)
)
- # Compute next potential Φ(s')
- if is_terminal:
- next_potential = _compute_exit_potential(last_potential, params)
+ cli_action_masking = _to_bool(args.action_masking)
+ if "action_masking" in params:
+ params["action_masking"] = _to_bool(params["action_masking"])
else:
- next_potential = _compute_hold_potential(next_pnl, next_duration_ratio, params)
+ params["action_masking"] = cli_action_masking
- # PBRS shaping reward: γΦ(s') - Φ(s)
- shaping_reward = gamma * next_potential - current_potential
+ # Deterministic seeds cascade
+ random.seed(args.seed)
+ np.random.seed(args.seed)
- # Compute additive components (non-PBRS)
- entry_additive = _compute_entry_additive(
- current_pnl, current_duration_ratio, params
- )
- exit_additive = (
- _compute_exit_additive(next_pnl, next_duration_ratio, params)
- if is_terminal
- else 0.0
+ df = simulate_samples(
+ num_samples=args.num_samples,
+ seed=args.seed,
+ params=params,
+ max_trade_duration=args.max_trade_duration,
+ base_factor=base_factor,
+ profit_target=profit_target,
+ risk_reward_ratio=risk_reward_ratio,
+ max_duration_ratio=args.max_duration_ratio,
+ trading_mode=args.trading_mode,
+ pnl_base_std=args.pnl_base_std,
+ pnl_duration_vol_scale=args.pnl_duration_vol_scale,
)
+ # Post-simulation critical NaN validation (non-PBRS structural columns)
+ critical_cols = [
+ "pnl",
+ "trade_duration",
+ "idle_duration",
+ "position",
+ "action",
+ "reward_total",
+ "reward_invalid",
+ "reward_idle",
+ "reward_hold",
+ "reward_exit",
+ ]
+ nan_issues = {
+ c: int(df[c].isna().sum())
+ for c in critical_cols
+ if c in df.columns and df[c].isna().any()
+ }
+ if nan_issues:
+ raise AssertionError(
+ "NaN values detected in critical simulated columns: "
+ + ", ".join(f"{k}={v}" for k, v in nan_issues.items())
+ )
+ # Attach simulation parameters for downstream manifest
+ df.attrs["simulation_params"] = {
+ "num_samples": args.num_samples,
+ "seed": args.seed,
+ "max_trade_duration": args.max_trade_duration,
+ "base_factor": base_factor,
+ "profit_target": profit_target,
+ "risk_reward_ratio": risk_reward_ratio,
+ "max_duration_ratio": args.max_duration_ratio,
+ "trading_mode": args.trading_mode,
+ "action_masking": _get_bool_param(params, "action_masking", True),
+ "pnl_base_std": args.pnl_base_std,
+ "pnl_duration_vol_scale": args.pnl_duration_vol_scale,
+ }
+ # Attach resolved reward parameters for inline overrides rendering in report
+ df.attrs["reward_params"] = dict(params)
- # Total reward
- total_reward = base_reward + shaping_reward + entry_additive + exit_additive
-
- # Numerical validation & normalization of tiny shaping
- if not np.isfinite(total_reward):
- return float(base_reward), 0.0, 0.0
- if np.isclose(shaping_reward, 0.0):
- shaping_reward = 0.0
- return float(total_reward), float(shaping_reward), float(next_potential)
+ args.output.mkdir(parents=True, exist_ok=True)
+ csv_path = args.output / "reward_samples.csv"
+ df.to_csv(csv_path, index=False)
+ sample_output_message = f"Samples saved to {csv_path}"
+ # Load real episodes if provided
+ real_df = None
+ if args.real_episodes and args.real_episodes.exists():
+ print(f"Loading real episodes from {args.real_episodes}...")
+ real_df = load_real_episodes(args.real_episodes)
-def _enforce_pbrs_invariance(params: RewardParams) -> RewardParams:
- """Enforce PBRS invariance by auto-disabling additives in canonical mode."""
- mode = _get_str_param(params, "exit_potential_mode", "canonical")
- if mode != "canonical":
- return params
- if params.get("_pbrs_invariance_applied"):
- return params
+ # Generate consolidated statistical analysis report (with enhanced tests)
+ print("Generating complete statistical analysis...")
- entry_enabled = _get_bool_param(params, "entry_additive_enabled", False)
- exit_enabled = _get_bool_param(params, "exit_additive_enabled", False)
- if entry_enabled:
- warnings.warn(
- "Disabling entry additive to preserve PBRS invariance (canonical mode).",
- RewardDiagnosticsWarning,
- stacklevel=2,
- )
- params["entry_additive_enabled"] = False
- if exit_enabled:
- warnings.warn(
- "Disabling exit additive to preserve PBRS invariance (canonical mode).",
- RewardDiagnosticsWarning,
- stacklevel=2,
- )
- params["exit_additive_enabled"] = False
- params["_pbrs_invariance_applied"] = True
- return params
+ write_complete_statistical_analysis(
+ df,
+ args.output,
+ max_trade_duration=args.max_trade_duration,
+ profit_target=float(profit_target * risk_reward_ratio),
+ seed=args.seed,
+ real_df=real_df,
+ adjust_method=args.pvalue_adjust,
+ stats_seed=args.stats_seed
+ if getattr(args, "stats_seed", None) is not None
+ else None,
+ strict_diagnostics=bool(getattr(args, "strict_diagnostics", False)),
+ bootstrap_resamples=getattr(args, "bootstrap_resamples", 10000),
+ skip_partial_dependence=bool(getattr(args, "skip_partial_dependence", False)),
+ skip_feature_analysis=bool(getattr(args, "skip_feature_analysis", False)),
+ )
+ print(
+ f"Complete statistical analysis saved to: {args.output / 'statistical_analysis.md'}"
+ )
+ # Generate manifest summarizing key metrics
+ try:
+ manifest_path = args.output / "manifest.json"
+ resolved_reward_params = dict(params) # already validated/normalized upstream
+ manifest = {
+ "generated_at": pd.Timestamp.now().isoformat(),
+ "num_samples": int(len(df)),
+ "seed": int(args.seed),
+ "max_trade_duration": int(args.max_trade_duration),
+ "profit_target_effective": float(profit_target * risk_reward_ratio),
+ "pvalue_adjust_method": args.pvalue_adjust,
+ "parameter_adjustments": adjustments,
+ "reward_params": resolved_reward_params,
+ }
+ sim_params = df.attrs.get("simulation_params", {})
+ if isinstance(sim_params, dict) and sim_params:
+ import hashlib as _hashlib
+ import json as _json
+ # Compose hash source from ALL simulation params and ALL resolved reward params for full reproducibility.
+ _hash_source = {
+ **{f"sim::{k}": sim_params[k] for k in sorted(sim_params)},
+ **{
+ f"reward::{k}": resolved_reward_params[k]
+ for k in sorted(resolved_reward_params)
+ },
+ }
+ serialized = _json.dumps(_hash_source, sort_keys=True)
+ manifest["params_hash"] = _hashlib.sha256(
+ serialized.encode("utf-8")
+ ).hexdigest()
+ manifest["simulation_params"] = sim_params
+ with manifest_path.open("w", encoding="utf-8") as mh:
+ import json as _json
-def _compute_bi_component(
- kind: str,
- pnl: float,
- duration_ratio: float,
- params: RewardParams,
- scale_key: str,
- gain_key: str,
- transform_pnl_key: str,
- transform_dur_key: str,
- non_finite_key: str,
-) -> float:
- """Generic helper for (pnl, duration) bi-component transforms."""
- scale = _get_float_param(params, scale_key, 1.0)
- gain = _get_float_param(params, gain_key, 1.0)
- transform_pnl = _get_str_param(params, transform_pnl_key, "tanh")
- transform_duration = _get_str_param(params, transform_dur_key, "tanh")
+ _json.dump(manifest, mh, indent=2)
+ print(f"Manifest written to: {manifest_path}")
+ except Exception as e:
+ print(f"Manifest generation failed: {e}")
- t_pnl = apply_transform(transform_pnl, gain * pnl)
- t_dur = apply_transform(transform_duration, gain * duration_ratio)
- value = scale * 0.5 * (t_pnl + t_dur)
- if not np.isfinite(value):
- return _fail_safely(non_finite_key)
- return float(value)
+ print(f"Generated {len(df):,} synthetic samples.")
+ print(sample_output_message)
+ print(f"Artifacts saved to: {args.output.resolve()}")
if __name__ == "__main__":
import json
import math
import pickle
+import random
import re
import shutil
import subprocess
import unittest
import warnings
from pathlib import Path
-from typing import Iterable, Optional, Sequence, Union
+from typing import Any, Dict, Iterable, Optional, Sequence, Union
import numpy as np
import pandas as pd
class RewardSpaceTestBase(unittest.TestCase):
- """Base class with common test utilities.
-
- Central tolerance policy (avoid per-test commentary):
- - Generic numeric equality: 1e-6
- - Component decomposition / identity: 1e-9
- - Monotonic attenuation allowance: +1e-9 drift
- """
+ """Base class with common test utilities."""
@classmethod
def setUpClass(cls):
self.temp_dir = tempfile.mkdtemp()
self.output_path = Path(self.temp_dir)
+ # PBRS structural test constants
+ PBRS_TERMINAL_TOL = 1e-12
+ PBRS_MAX_ABS_SHAPING = 5.0
+ PBRS_TERMINAL_PROB = 0.08
+ PBRS_SWEEP_ITER = 120
+
+ # Generic numeric tolerances (distinct from PBRS structural constants)
+ EPS_BASE = (
+ 1e-12 # Base epsilon for strict identity & numeric guards (single source)
+ )
+ TOL_NUMERIC_GUARD = EPS_BASE # Division-by-zero guards / min denominators (alias)
+ TOL_IDENTITY_STRICT = EPS_BASE # Strict component identity (alias of EPS_BASE)
+ TOL_IDENTITY_RELAXED = 1e-9 # Looser identity when cumulative fp drift acceptable
+ TOL_GENERIC_EQ = 1e-6 # Generic numeric equality (previous literal)
+ TOL_NEGLIGIBLE = 1e-8 # Negligible statistical or shaping effects
+ MIN_EXIT_POWER_TAU = (
+ 1e-6 # Lower bound for exit_power_tau parameter (validation semantics)
+ )
+ # Distribution shape invariance (skewness / excess kurtosis) tolerance under scaling
+ TOL_DISTRIB_SHAPE = 5e-2
+ # Theoretical upper bound for Jensen-Shannon distance: sqrt(log 2)
+ JS_DISTANCE_UPPER_BOUND = math.sqrt(math.log(2.0))
+ # Relative tolerance (scale-aware) and continuity perturbation magnitudes
+ TOL_RELATIVE = 1e-9 # For relative comparisons scaled by magnitude
+ CONTINUITY_EPS_SMALL = 1e-4 # Small epsilon step for continuity probing
+ CONTINUITY_EPS_LARGE = 1e-3 # Larger epsilon step for ratio scaling tests
+
+ def _canonical_sweep(
+ self,
+ params: dict,
+ *,
+ iterations: int | None = None,
+ terminal_prob: float | None = None,
+ seed: int = 123,
+ ) -> tuple[list[float], list[float]]:
+ """Run a lightweight canonical invariance sweep.
+
+ Returns (terminal_next_potentials, shaping_values).
+ """
+ iters = iterations or self.PBRS_SWEEP_ITER
+ term_p = terminal_prob or self.PBRS_TERMINAL_PROB
+ rng = np.random.default_rng(seed)
+ last_potential = 0.0
+ terminal_next: list[float] = []
+ shaping_vals: list[float] = []
+ current_pnl = 0.0
+ current_dur = 0.0
+ for _ in range(iters):
+ is_terminal = rng.uniform() < term_p
+ next_pnl = 0.0 if is_terminal else float(rng.normal(0, 0.2))
+ inc = rng.uniform(0, 0.12)
+ next_dur = 0.0 if is_terminal else float(min(1.0, current_dur + inc))
+ _tot, shap_val, next_pot = apply_potential_shaping(
+ base_reward=0.0,
+ current_pnl=current_pnl,
+ current_duration_ratio=current_dur,
+ next_pnl=next_pnl,
+ next_duration_ratio=next_dur,
+ is_terminal=is_terminal,
+ last_potential=last_potential,
+ params=params,
+ )
+ shaping_vals.append(shap_val)
+ if is_terminal:
+ terminal_next.append(next_pot)
+ last_potential = 0.0
+ current_pnl = 0.0
+ current_dur = 0.0
+ else:
+ last_potential = next_pot
+ current_pnl = next_pnl
+ current_dur = next_dur
+ return terminal_next, shaping_vals
+
+ def make_stats_df(
+ self,
+ *,
+ n: int,
+ reward_total_mean: float = 0.0,
+ reward_total_std: float = 1.0,
+ pnl_mean: float = 0.01,
+ pnl_std: float | None = None,
+ trade_duration_dist: str = "uniform",
+ idle_pattern: str = "mixed",
+ seed: int | None = None,
+ ) -> pd.DataFrame:
+ """Generate a synthetic statistical DataFrame.
+
+ Parameters
+ ----------
+ n : int
+ Row count.
+ reward_total_mean, reward_total_std : float
+ Normal parameters for reward_total.
+ pnl_mean : float
+ Mean PnL.
+ pnl_std : float | None
+ PnL std (defaults to TEST_PNL_STD when None).
+ trade_duration_dist : {'uniform','exponential'}
+ Distribution family for trade_duration.
+ idle_pattern : {'mixed','all_nonzero','all_zero'}
+ mixed: ~40% idle>0 (U(1,60)); all_nonzero: all idle>0 (U(5,60)); all_zero: idle=0.
+ seed : int | None
+ RNG seed.
+
+ Returns
+ -------
+ pd.DataFrame with columns: reward_total, reward_idle, reward_hold, reward_exit,
+ pnl, trade_duration, idle_duration, position. Guarantees: no NaN; reward_idle==0 where idle_duration==0.
+ """
+ if seed is not None:
+ np.random.seed(seed)
+ pnl_std_eff = self.TEST_PNL_STD if pnl_std is None else pnl_std
+ reward_total = np.random.normal(reward_total_mean, reward_total_std, n)
+ pnl = np.random.normal(pnl_mean, pnl_std_eff, n)
+ if trade_duration_dist == "exponential":
+ trade_duration = np.random.exponential(20, n)
+ else:
+ trade_duration = np.random.uniform(5, 150, n)
+ # Idle duration pattern
+ if idle_pattern == "mixed":
+ mask = np.random.rand(n) < 0.4
+ idle_duration = np.where(mask, np.random.uniform(1, 60, n), 0.0)
+ elif idle_pattern == "all_zero":
+ idle_duration = np.zeros(n)
+ else: # all_nonzero
+ idle_duration = np.random.uniform(5, 60, n)
+ # Component rewards (basic synthetic shaping consistent with sign expectations)
+ reward_idle = np.where(idle_duration > 0, np.random.normal(-1, 0.3, n), 0.0)
+ reward_hold = np.random.normal(-0.5, 0.2, n)
+ reward_exit = np.random.normal(0.8, 0.6, n)
+ position = np.random.choice([0.0, 0.5, 1.0], n)
+ return pd.DataFrame(
+ {
+ "reward_total": reward_total,
+ "reward_idle": reward_idle,
+ "reward_hold": reward_hold,
+ "reward_exit": reward_exit,
+ "pnl": pnl,
+ "trade_duration": trade_duration,
+ "idle_duration": idle_duration,
+ "position": position,
+ }
+ )
+
def tearDown(self):
"""Clean up temporary files."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
self,
first: Union[float, int],
second: Union[float, int],
- tolerance: float = 1e-6,
+ tolerance: Optional[float] = None,
+ rtol: Optional[float] = None,
msg: Union[str, None] = None,
) -> None:
- """Absolute tolerance compare with explicit failure and finite check."""
+ """Compare floats with absolute and optional relative tolerance.
+
+ precedence:
+ 1. absolute tolerance (|a-b| <= tolerance)
+ 2. relative tolerance (|a-b| <= rtol * max(|a|, |b|)) if provided
+ Both may be supplied; failure only if both criteria fail (when rtol given).
+ """
self.assertFinite(first, name="a")
self.assertFinite(second, name="b")
+ if tolerance is None:
+ tolerance = self.TOL_GENERIC_EQ
diff = abs(first - second)
- if diff > tolerance:
- self.fail(
- msg
- or f"Difference {diff} exceeds tolerance {tolerance} (a={first}, b={second})"
- )
+ # Absolute criterion
+ if diff <= tolerance:
+ return
+ # Relative criterion (optional)
+ if rtol is not None:
+ scale = max(abs(first), abs(second), 1e-15)
+ if diff <= rtol * scale:
+ return
+ self.fail(
+ msg
+ or f"Difference {diff} exceeds tolerance {tolerance} and relative tolerance {rtol} (a={first}, b={second})"
+ )
- # --- Statistical bounds helpers (factorize redundancy) ---
def assertPValue(self, value: Union[float, int], msg: str = "") -> None:
"""Assert a p-value is finite and within [0,1]."""
self.assertFinite(value, name="p-value")
self.assertGreaterEqual(value, 0.0, msg or f"p-value < 0: {value}")
self.assertLessEqual(value, 1.0, msg or f"p-value > 1: {value}")
+ def assertPlacesEqual(
+ self,
+ a: Union[float, int],
+ b: Union[float, int],
+ places: int,
+ msg: str | None = None,
+ ) -> None:
+ """Bridge for legacy places-based approximate equality.
+
+ Converts decimal places to an absolute tolerance 10**-places and delegates to
+ assertAlmostEqualFloat to keep a single numeric comparison implementation.
+ """
+ tol = 10.0 ** (-places)
+ self.assertAlmostEqualFloat(a, b, tolerance=tol, msg=msg)
+
def assertDistanceMetric(
self,
value: Union[float, int],
self.assertGreater(value, low, f"{name} <= {low}")
self.assertLess(value, high, f"{name} >= {high}")
+ def assertNearZero(
+ self,
+ value: Union[float, int],
+ *,
+ atol: Optional[float] = None,
+ msg: Optional[str] = None,
+ ) -> None:
+ """Assert a scalar is numerically near zero within absolute tolerance.
+
+ Uses strict identity tolerance by default for PBRS invariance style checks.
+ """
+ self.assertFinite(value, name="value")
+ tol = atol if atol is not None else self.TOL_IDENTITY_RELAXED
+ if abs(float(value)) > tol:
+ self.fail(msg or f"Value {value} not near zero (tol={tol})")
+
+ def assertSymmetric(
+ self,
+ func,
+ a,
+ b,
+ *,
+ atol: Optional[float] = None,
+ rtol: Optional[float] = None,
+ msg: Optional[str] = None,
+ ) -> None:
+ """Assert function(func, a, b) == function(func, b, a) within tolerance.
+
+ Intended for symmetric distance metrics (e.g., JS distance).
+ """
+ va = func(a, b)
+ vb = func(b, a)
+ self.assertAlmostEqualFloat(va, vb, tolerance=atol, rtol=rtol, msg=msg)
+
+ @staticmethod
+ def seed_all(seed: int = 123) -> None:
+ """Seed all RNGs used (numpy & random)."""
+ np.random.seed(seed)
+ random.seed(seed)
+
+ # Central tolerance mapping (single source for potential future dynamic use)
+ TOLS = {
+ "strict": EPS_BASE,
+ "relaxed": 1e-9,
+ "generic": 1e-6,
+ }
+
class TestIntegration(RewardSpaceTestBase):
- """Integration tests for CLI and file outputs."""
+ """CLI + file output integration tests."""
def test_cli_execution_produces_expected_files(self):
- """Test that CLI execution produces all expected output files."""
+ """CLI produces expected files."""
cmd = [
sys.executable,
"reward_space_analysis.py",
cmd, capture_output=True, text=True, cwd=Path(__file__).parent
)
- # Should execute successfully
+ # Exit 0
self.assertEqual(result.returncode, 0, f"CLI failed: {result.stderr}")
- # Should produce expected files (single source of truth list)
+ # Expected files
expected_files = [
"reward_samples.csv",
"feature_importance.csv",
self.assertTrue(file_path.exists(), f"Missing expected file: {filename}")
def test_manifest_structure_and_reproducibility(self):
- """Test manifest structure and reproducibility with same seed."""
+ """Manifest structure + reproducibility (same seed)."""
# First run
cmd1 = [
sys.executable,
)
-class TestStatisticalCoherence(RewardSpaceTestBase):
- """Statistical coherence validation tests."""
+class TestStatistics(RewardSpaceTestBase):
+ """Statistical tests: metrics, diagnostics, bootstrap, correlations."""
def _make_test_dataframe(self, n: int = 100) -> pd.DataFrame:
- """Generate test dataframe for statistical validation."""
+ """Build synthetic dataframe."""
np.random.seed(self.SEED)
-
- # Create correlated data for Spearman test
idle_duration = np.random.exponential(10, n)
reward_idle = -0.01 * idle_duration + np.random.normal(0, 0.001, n)
-
return pd.DataFrame(
{
"idle_duration": idle_duration,
}
)
- def test_distribution_shift_metrics(self):
- """Test KL divergence, JS distance, Wasserstein distance calculations."""
+ def test_stats_distribution_shift_metrics(self):
+ """KL/JS/Wasserstein metrics."""
df1 = self._make_test_dataframe(100)
df2 = self._make_test_dataframe(100)
- # Add slight shift to second dataset
+ # Shift second dataset
df2["reward_total"] += 0.1
metrics = compute_distribution_shift_metrics(df1, df2)
- # Should contain expected metrics for pnl (continuous feature)
+ # Expected pnl metrics
expected_keys = {
"pnl_kl_divergence",
"pnl_js_distance",
# Values should be finite and reasonable
for metric_name, value in metrics.items():
if "pnl" in metric_name:
- # All metrics must be finite; selected metrics must be non-negative
+ # Metrics finite; distances non-negative
if any(
suffix in metric_name
for suffix in [
else:
self.assertFinite(value, name=metric_name)
- def test_distribution_shift_identity_null_metrics(self):
- """Identical distributions should yield (near) zero shift metrics."""
+ def test_stats_distribution_shift_identity_null_metrics(self):
+ """Identity distributions -> near-zero shift metrics."""
df = self._make_test_dataframe(180)
metrics_id = compute_distribution_shift_metrics(df, df.copy())
for name, val in metrics_id.items():
if name.endswith(("_kl_divergence", "_js_distance", "_wasserstein")):
self.assertLess(
abs(val),
- 1e-6,
+ self.TOL_GENERIC_EQ,
f"Metric {name} expected ≈ 0 on identical distributions (got {val})",
)
elif name.endswith("_ks_statistic"):
f"KS statistic should be near 0 on identical distributions (got {val})",
)
- def test_hypothesis_testing(self):
- """Test statistical hypothesis tests."""
+ def test_stats_hypothesis_testing(self):
+ """Light correlation sanity check."""
df = self._make_test_dataframe(200)
- # Test only if we have enough data - simple correlation validation
+ # Only if enough samples
if len(df) > 30:
idle_data = df[df["idle_duration"] > 0]
if len(idle_data) > 10:
- # Simple correlation check: idle duration should correlate negatively with idle reward
+ # Negative correlation expected
idle_dur = idle_data["idle_duration"].to_numpy()
idle_rew = idle_data["reward_idle"].to_numpy()
- # Basic validation that data makes sense
+ # Sanity shape
self.assertTrue(
len(idle_dur) == len(idle_rew),
"Idle duration and reward arrays should have same length",
"Idle durations should be non-negative",
)
- # Idle rewards should generally be negative (penalty for hold)
+ # Majority negative
negative_rewards = (idle_rew < 0).sum()
total_rewards = len(idle_rew)
negative_ratio = negative_rewards / total_rewards
"Most idle rewards should be negative (penalties)",
)
- def test_distribution_diagnostics(self):
- """Test distribution normality diagnostics."""
+ def test_stats_distribution_diagnostics(self):
+ """Distribution diagnostics."""
df = self._make_test_dataframe(100)
diagnostics = distribution_diagnostics(df)
- # Should contain diagnostics for key columns (flattened format)
+ # Expect keys
expected_prefixes = ["reward_total_", "pnl_"]
for prefix in expected_prefixes:
matching_keys = [
len(matching_keys), 0, f"Should have diagnostics for {prefix}"
)
- # Check for basic statistical measures
+ # Basic moments
expected_suffixes = ["mean", "std", "skewness", "kurtosis"]
for suffix in expected_suffixes:
key = f"{prefix}{suffix}"
if key in diagnostics:
self.assertFinite(diagnostics[key], name=key)
+ def test_statistical_functions(self):
+ """Smoke test statistical_hypothesis_tests on synthetic data (API integration)."""
+ base = self.make_stats_df(n=200, seed=self.SEED, idle_pattern="mixed")
+ base.loc[:149, ["reward_idle", "reward_hold", "reward_exit"]] = 0.0
+ results = statistical_hypothesis_tests(base)
+ self.assertIsInstance(results, dict)
-class TestRewardAlignment(RewardSpaceTestBase):
- """Test reward calculation alignment with environment."""
-
- def test_basic_reward_calculation(self):
- """Test basic reward calculation consistency."""
- context = make_ctx(
- pnl=self.TEST_PROFIT_TARGET,
- trade_duration=10,
- max_trade_duration=100,
- max_unrealized_profit=0.025,
- min_unrealized_profit=0.015,
- position=Positions.Long,
- action=Actions.Long_exit,
+ # Helper data generators
+ def _const_df(self, n: int = 64) -> pd.DataFrame:
+ return pd.DataFrame(
+ {
+ "reward_total": np.ones(n) * 0.5,
+ "pnl": np.zeros(n),
+ "trade_duration": np.ones(n) * 10,
+ "idle_duration": np.ones(n) * 3,
+ }
)
- breakdown = calculate_reward(
- context,
- self.DEFAULT_PARAMS,
- base_factor=self.TEST_BASE_FACTOR,
- profit_target=0.06, # Scenario-specific larger target kept explicit
- risk_reward_ratio=self.TEST_RR_HIGH,
- short_allowed=True,
- action_masking=True,
+ def _shift_scale_df(
+ self, n: int = 256, shift: float = 0.0, scale: float = 1.0
+ ) -> pd.DataFrame:
+ rng = np.random.default_rng(123)
+ base = rng.normal(0, 1, n)
+ return pd.DataFrame(
+ {
+ "reward_total": shift + scale * base,
+ "pnl": shift + scale * base * 0.2,
+ "trade_duration": rng.exponential(20, n),
+ "idle_duration": rng.exponential(10, n),
+ }
)
- # Should return valid breakdown
- self.assertIsInstance(breakdown.total, (int, float))
- self.assertFinite(breakdown.total, name="breakdown.total")
-
- # Exit reward should be positive for profitable trade
- self.assertGreater(
- breakdown.exit_component, 0, "Profitable exit should have positive reward"
+ def test_stats_constant_distribution_bootstrap_and_diagnostics(self):
+ """Bootstrap on constant columns (degenerate)."""
+ df = self._const_df(80)
+ res = bootstrap_confidence_intervals(
+ df, ["reward_total", "pnl"], n_bootstrap=200, confidence_level=0.95
)
+ for k, (mean, lo, hi) in res.items(): # tuple: mean, low, high
+ self.assertAlmostEqualFloat(mean, lo, tolerance=2e-9)
+ self.assertAlmostEqualFloat(mean, hi, tolerance=2e-9)
+ self.assertLessEqual(hi - lo, 2e-9)
- def test_efficiency_zero_policy(self):
- """Ensure pnl == 0 with max_unrealized_profit == 0 does not get boosted.
-
- This verifies the policy: near-zero pnl -> no efficiency modulation.
- """
+ def test_stats_js_distance_symmetry_violin(self):
+ """JS distance symmetry d(P,Q)==d(Q,P)."""
+ df1 = self._shift_scale_df(300, shift=0.0)
+ df2 = self._shift_scale_df(300, shift=0.3)
+ metrics = compute_distribution_shift_metrics(df1, df2)
+ js_key = next((k for k in metrics if k.endswith("pnl_js_distance")), None)
+ if js_key is None:
+ self.skipTest("JS distance key not present in metrics output")
+ metrics_swapped = compute_distribution_shift_metrics(df2, df1)
+ js_key_swapped = next(
+ (k for k in metrics_swapped if k.endswith("pnl_js_distance")), None
+ )
+ self.assertIsNotNone(js_key_swapped)
+ self.assertAlmostEqualFloat(
+ metrics[js_key],
+ metrics_swapped[js_key_swapped],
+ tolerance=self.TOL_IDENTITY_STRICT,
+ rtol=self.TOL_RELATIVE,
+ )
+
+ def test_stats_js_distance_symmetry_helper(self):
+ """Symmetry helper assertion for JS distance."""
+ rng = np.random.default_rng(777)
+ p_raw = rng.uniform(0.0, 1.0, size=400)
+ q_raw = rng.uniform(0.0, 1.0, size=400)
+ p = p_raw / p_raw.sum()
+ q = q_raw / q_raw.sum()
+
+ def _kl(a: np.ndarray, b: np.ndarray) -> float:
+ mask = (a > 0) & (b > 0)
+ return float(np.sum(a[mask] * np.log(a[mask] / b[mask])))
+
+ def js_distance(a: np.ndarray, b: np.ndarray) -> float:
+ m = 0.5 * (a + b)
+ js_div = 0.5 * _kl(a, m) + 0.5 * _kl(b, m)
+ return math.sqrt(max(js_div, 0.0))
+
+ self.assertSymmetric(
+ js_distance, p, q, atol=self.TOL_IDENTITY_STRICT, rtol=self.TOL_RELATIVE
+ )
+ self.assertLessEqual(
+ js_distance(p, q), self.JS_DISTANCE_UPPER_BOUND + self.TOL_IDENTITY_STRICT
+ )
+
+ def test_stats_bootstrap_shrinkage_with_sample_size(self):
+ """Bootstrap CI shrinks ~1/sqrt(n)."""
+ small = self._shift_scale_df(80)
+ large = self._shift_scale_df(800)
+ res_small = bootstrap_confidence_intervals(
+ small, ["reward_total"], n_bootstrap=400
+ )
+ res_large = bootstrap_confidence_intervals(
+ large, ["reward_total"], n_bootstrap=400
+ )
+ (_, lo_s, hi_s) = list(res_small.values())[0]
+ (_, lo_l, hi_l) = list(res_large.values())[0]
+ hw_small = (hi_s - lo_s) / 2.0
+ hw_large = (hi_l - lo_l) / 2.0
+ self.assertLess(hw_large, hw_small * 0.55)
+
+ def test_stats_variance_vs_duration_spearman_sign(self):
+ """trade_duration up => pnl variance up (rank corr >0)."""
+ rng = np.random.default_rng(99)
+ n = 250
+ trade_duration = np.linspace(1, 300, n)
+ pnl = rng.normal(0, 1 + trade_duration / 400.0, n)
+ df = pd.DataFrame(
+ {"trade_duration": trade_duration, "pnl": pnl, "reward_total": pnl}
+ )
+ ranks_dur = pd.Series(trade_duration).rank().to_numpy()
+ ranks_var = pd.Series(np.abs(pnl)).rank().to_numpy()
+ rho = np.corrcoef(ranks_dur, ranks_var)[0, 1]
+ self.assertGreater(rho, 0.1)
+
+ def test_stats_scaling_invariance_distribution_metrics(self):
+ """Equal scaling keeps KL/JS ≈0."""
+ df1 = self._shift_scale_df(400)
+ scale = 3.5
+ df2 = df1.copy()
+ df2["pnl"] *= scale
+ df1["pnl"] *= scale
+ metrics = compute_distribution_shift_metrics(df1, df2)
+ for k, v in metrics.items():
+ if k.endswith("_kl_divergence") or k.endswith("_js_distance"):
+ self.assertLess(
+ abs(v),
+ 5e-4,
+ f"Expected near-zero divergence after equal scaling (k={k}, v={v})",
+ )
- # Build context where pnl == 0.0 and max_unrealized_profit == pnl
- ctx = make_ctx(
- pnl=0.0,
- trade_duration=1,
- max_trade_duration=100,
- max_unrealized_profit=0.0,
- min_unrealized_profit=-0.02,
- position=Positions.Long,
- action=Actions.Long_exit,
+ def test_stats_mean_decomposition_consistency(self):
+ """Batch mean additivity."""
+ df_a = self._shift_scale_df(120)
+ df_b = self._shift_scale_df(180, shift=0.2)
+ m_concat = pd.concat([df_a["pnl"], df_b["pnl"]]).mean()
+ m_weighted = (
+ df_a["pnl"].mean() * len(df_a) + df_b["pnl"].mean() * len(df_b)
+ ) / (len(df_a) + len(df_b))
+ self.assertAlmostEqualFloat(
+ m_concat,
+ m_weighted,
+ tolerance=self.TOL_IDENTITY_STRICT,
+ rtol=self.TOL_RELATIVE,
)
- params = self.DEFAULT_PARAMS.copy()
- profit_target = self.TEST_PROFIT_TARGET * self.TEST_RR
-
- pnl_factor = _get_pnl_factor(params, ctx, profit_target, self.TEST_RR)
- # Expect no efficiency modulation: factor should be >= 0 and close to 1.0
- self.assertFinite(pnl_factor, name="pnl_factor")
- self.assertAlmostEqualFloat(pnl_factor, 1.0, tolerance=1e-6)
-
- def test_max_idle_duration_candles_logic(self):
- """Idle penalty scaling test with explicit max_idle_duration_candles."""
- params_small = self.DEFAULT_PARAMS.copy()
- params_large = self.DEFAULT_PARAMS.copy()
- # Activate explicit max idle durations
- params_small["max_idle_duration_candles"] = 50
- params_large["max_idle_duration_candles"] = 200
-
- base_factor = self.TEST_BASE_FACTOR
- idle_duration = 40 # below large threshold, near small threshold
- context = make_ctx(
- pnl=0.0,
- trade_duration=0,
- idle_duration=idle_duration,
- max_trade_duration=128,
- max_unrealized_profit=0.0,
- min_unrealized_profit=0.0,
- position=Positions.Neutral,
- action=Actions.Neutral,
+ def test_stats_ks_statistic_bounds(self):
+ """KS in [0,1]."""
+ df1 = self._shift_scale_df(150)
+ df2 = self._shift_scale_df(150, shift=0.4)
+ metrics = compute_distribution_shift_metrics(df1, df2)
+ for k, v in metrics.items():
+ if k.endswith("_ks_statistic"):
+ self.assertWithin(v, 0.0, 1.0, name=k)
+
+ def test_stats_bh_correction_null_false_positive_rate(self):
+ """Null: low BH discovery rate."""
+ rng = np.random.default_rng(1234)
+ n = 400
+ df = pd.DataFrame(
+ {
+ "pnl": rng.normal(0, 1, n),
+ "reward_total": rng.normal(0, 1, n),
+ "idle_duration": rng.exponential(5, n),
+ }
)
+ df["reward_idle"] = rng.normal(0, 1, n) * 1e-3
+ df["position"] = rng.choice([0.0, 1.0], size=n)
+ df["action"] = rng.choice([0.0, 2.0], size=n)
+ tests = statistical_hypothesis_tests(df)
+ flags: list[bool] = []
+ for v in tests.values():
+ if isinstance(v, dict):
+ if "significant_adj" in v:
+ flags.append(bool(v["significant_adj"]))
+ elif "significant" in v:
+ flags.append(bool(v["significant"]))
+ if flags:
+ rate = sum(flags) / len(flags)
+ self.assertLess(
+ rate, 0.15, f"BH null FP rate too high under null: {rate:.3f}"
+ )
- breakdown_small = calculate_reward(
- context,
- params_small,
- base_factor,
- profit_target=self.TEST_PROFIT_TARGET,
- risk_reward_ratio=self.TEST_RR,
- short_allowed=True,
- action_masking=True,
- )
- breakdown_large = calculate_reward(
- context,
- params_large,
- base_factor=self.TEST_BASE_FACTOR,
- profit_target=0.06,
- risk_reward_ratio=self.TEST_RR,
- short_allowed=True,
- action_masking=True,
+ def test_stats_half_life_monotonic_series(self):
+ """Smoothed exponential decay monotonic."""
+ x = np.arange(0, 80)
+ y = np.exp(-x / 15.0)
+ rng = np.random.default_rng(5)
+ y_noisy = y + rng.normal(0, 1e-4, len(y))
+ window = 5
+ y_smooth = np.convolve(y_noisy, np.ones(window) / window, mode="valid")
+ self.assertMonotonic(y_smooth, non_increasing=True, tolerance=1e-5)
+
+ def test_stats_bootstrap_confidence_intervals_basic(self):
+ """Bootstrap CI calculation (basic)."""
+ test_data = self.make_stats_df(n=100, seed=self.SEED)
+ results = bootstrap_confidence_intervals(
+ test_data,
+ ["reward_total", "pnl"],
+ n_bootstrap=100,
)
+ for metric, (mean, ci_low, ci_high) in results.items():
+ self.assertFinite(mean, name=f"mean[{metric}]")
+ self.assertFinite(ci_low, name=f"ci_low[{metric}]")
+ self.assertFinite(ci_high, name=f"ci_high[{metric}]")
+ self.assertLess(ci_low, ci_high)
- self.assertLess(breakdown_small.idle_penalty, 0.0)
- self.assertLess(breakdown_large.idle_penalty, 0.0)
- # Because denominator is larger, absolute penalty (negative) should be smaller in magnitude
- self.assertGreater(
- breakdown_large.idle_penalty,
- breakdown_small.idle_penalty,
- f"Expected less severe penalty with larger max_idle_duration_candles (large={breakdown_large.idle_penalty}, small={breakdown_small.idle_penalty})",
- )
+ def test_stats_hypothesis_seed_reproducibility(self):
+ """Seed reproducibility for statistical_hypothesis_tests + bootstrap."""
+ df = self.make_stats_df(n=300, seed=123, idle_pattern="mixed")
+ r1 = statistical_hypothesis_tests(df, seed=777)
+ r2 = statistical_hypothesis_tests(df, seed=777)
+ self.assertEqual(set(r1.keys()), set(r2.keys()))
+ for k in r1:
+ for field in ("p_value", "significant"):
+ v1 = r1[k][field]
+ v2 = r2[k][field]
+ if (
+ isinstance(v1, float)
+ and isinstance(v2, float)
+ and (np.isnan(v1) and np.isnan(v2))
+ ):
+ continue
+ self.assertEqual(v1, v2, f"Mismatch for {k}:{field}")
+ # Bootstrap reproducibility
+ metrics = ["reward_total", "pnl"]
+ ci_a = bootstrap_confidence_intervals(df, metrics, n_bootstrap=150, seed=2024)
+ ci_b = bootstrap_confidence_intervals(df, metrics, n_bootstrap=150, seed=2024)
+ self.assertEqual(ci_a, ci_b)
- def test_pbrs_progressive_release_decay_clamped(self):
- """progressive_release with decay>1 must clamp to 1 so Φ' = 0 and Δ = -Φ_prev."""
- params = self.DEFAULT_PARAMS.copy()
- params.update(
+ def test_stats_distribution_metrics_mathematical_bounds(self):
+ """Mathematical bounds and validity of distribution shift metrics."""
+ np.random.seed(self.SEED)
+ df1 = pd.DataFrame(
{
- "potential_gamma": DEFAULT_MODEL_REWARD_PARAMETERS["potential_gamma"],
- "exit_potential_mode": "progressive_release",
- "exit_potential_decay": 5.0, # should clamp to 1.0
- "hold_potential_enabled": True,
- "entry_additive_enabled": False,
- "exit_additive_enabled": False,
+ "pnl": np.random.normal(0, self.TEST_PNL_STD, 500),
+ "trade_duration": np.random.exponential(30, 500),
+ "idle_duration": np.random.gamma(2, 5, 500),
}
)
- current_pnl = 0.02
- current_dur = 0.5
- prev_potential = _compute_hold_potential(current_pnl, current_dur, params)
- total_reward, shaping_reward, next_potential = apply_potential_shaping(
- base_reward=0.0,
- current_pnl=current_pnl,
- current_duration_ratio=current_dur,
- next_pnl=0.02,
- next_duration_ratio=0.6,
- is_terminal=True,
- last_potential=prev_potential,
- params=params,
- )
- self.assertAlmostEqualFloat(next_potential, 0.0, tolerance=1e-9)
- self.assertAlmostEqualFloat(shaping_reward, -prev_potential, tolerance=1e-9)
- self.assertAlmostEqualFloat(total_reward, shaping_reward, tolerance=1e-9)
-
- def test_pbrs_spike_cancel_invariance(self):
- """spike_cancel terminal shaping should be ≈ 0 (γ*(Φ/γ) - Φ)."""
- params = self.DEFAULT_PARAMS.copy()
- params.update(
+ df2 = pd.DataFrame(
{
- "potential_gamma": 0.9,
- "exit_potential_mode": "spike_cancel",
- "hold_potential_enabled": True,
- "entry_additive_enabled": False,
- "exit_additive_enabled": False,
+ "pnl": np.random.normal(0.01, 0.025, 500),
+ "trade_duration": np.random.exponential(35, 500),
+ "idle_duration": np.random.gamma(2.5, 6, 500),
}
)
- current_pnl = 0.015
- current_dur = 0.4
- prev_potential = _compute_hold_potential(current_pnl, current_dur, params)
- # Use helper accessor to avoid union type issues
- gamma = _get_float_param(
- params,
- "potential_gamma",
- DEFAULT_MODEL_REWARD_PARAMETERS.get("potential_gamma", 0.95),
+ metrics = compute_distribution_shift_metrics(df1, df2)
+ for feature in ["pnl", "trade_duration", "idle_duration"]:
+ for suffix, upper in [
+ ("kl_divergence", None),
+ ("js_distance", 1.0),
+ ("wasserstein", None),
+ ("ks_statistic", 1.0),
+ ]:
+ key = f"{feature}_{suffix}"
+ if key in metrics:
+ if upper is None:
+ self.assertDistanceMetric(metrics[key], name=key)
+ else:
+ self.assertDistanceMetric(metrics[key], upper=upper, name=key)
+ p_key = f"{feature}_ks_pvalue"
+ if p_key in metrics:
+ self.assertPValue(metrics[p_key])
+
+ def test_stats_heteroscedasticity_pnl_validation(self):
+ """PnL variance increases with trade duration (heteroscedasticity)."""
+ df = simulate_samples(
+ num_samples=1000,
+ seed=123,
+ params=self.DEFAULT_PARAMS,
+ max_trade_duration=100,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
+ max_duration_ratio=2.0,
+ trading_mode="margin",
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
- expected_next = (
- prev_potential / gamma if gamma not in (0.0, None) else prev_potential
+ exit_data = df[df["reward_exit"] != 0].copy()
+ if len(exit_data) < 50:
+ self.skipTest("Insufficient exit actions for heteroscedasticity test")
+ exit_data["duration_bin"] = pd.cut(
+ exit_data["duration_ratio"], bins=4, labels=["Q1", "Q2", "Q3", "Q4"]
)
- total_reward, shaping_reward, next_potential = apply_potential_shaping(
- base_reward=0.0,
- current_pnl=current_pnl,
- current_duration_ratio=current_dur,
- next_pnl=0.016,
- next_duration_ratio=0.45,
- is_terminal=True,
- last_potential=prev_potential,
- params=params,
- )
- self.assertAlmostEqualFloat(next_potential, expected_next, tolerance=1e-9)
- self.assertAlmostEqualFloat(shaping_reward, 0.0, tolerance=1e-9)
- self.assertAlmostEqualFloat(total_reward, 0.0, tolerance=1e-9)
-
- def test_idle_penalty_fallback_and_proportionality(self):
- """Fallback & proportionality validation.
+ variance_by_bin = exit_data.groupby("duration_bin")["pnl"].var().dropna()
+ if "Q1" in variance_by_bin.index and "Q4" in variance_by_bin.index:
+ self.assertGreater(
+ variance_by_bin["Q4"],
+ variance_by_bin["Q1"] * 0.8,
+ "PnL heteroscedasticity: variance should increase with duration",
+ )
- Semantics:
- - When max_idle_duration_candles is unset, fallback must be 2 * max_trade_duration.
- - Idle penalty scales ~ linearly with idle_duration (power=1), so doubling idle_duration doubles penalty magnitude.
- - We also infer the implicit denominator from a mid-range idle duration (>1x and <2x trade duration) to ensure the
- 2x fallback.
- """
- params = self.DEFAULT_PARAMS.copy()
- params["max_idle_duration_candles"] = None
- base_factor = 90.0
- profit_target = self.TEST_PROFIT_TARGET
- risk_reward_ratio = 1.0
+ def test_stats_statistical_functions_bounds_validation(self):
+ """All statistical functions respect bounds."""
+ df = self.make_stats_df(n=300, seed=self.SEED, idle_pattern="all_nonzero")
+ diagnostics = distribution_diagnostics(df)
+ for col in ["reward_total", "pnl", "trade_duration", "idle_duration"]:
+ if f"{col}_skewness" in diagnostics:
+ self.assertFinite(
+ diagnostics[f"{col}_skewness"], name=f"skewness[{col}]"
+ )
+ if f"{col}_kurtosis" in diagnostics:
+ self.assertFinite(
+ diagnostics[f"{col}_kurtosis"], name=f"kurtosis[{col}]"
+ )
+ if f"{col}_shapiro_pval" in diagnostics:
+ self.assertPValue(
+ diagnostics[f"{col}_shapiro_pval"],
+ msg=f"Shapiro p-value bounds for {col}",
+ )
+ hypothesis_results = statistical_hypothesis_tests(df, seed=42)
+ for test_name, result in hypothesis_results.items():
+ if "p_value" in result:
+ self.assertPValue(
+ result["p_value"], msg=f"p-value bounds for {test_name}"
+ )
+ if "effect_size_epsilon_sq" in result:
+ eps2 = result["effect_size_epsilon_sq"]
+ self.assertFinite(eps2, name=f"epsilon_sq[{test_name}]")
+ self.assertGreaterEqual(eps2, 0.0)
+ if "effect_size_rank_biserial" in result:
+ rb = result["effect_size_rank_biserial"]
+ self.assertFinite(rb, name=f"rank_biserial[{test_name}]")
+ self.assertWithin(rb, -1.0, 1.0, name="rank_biserial")
+ if "rho" in result and result["rho"] is not None:
+ rho = result["rho"]
+ self.assertFinite(rho, name=f"rho[{test_name}]")
+ self.assertWithin(rho, -1.0, 1.0, name="rho")
- # Two contexts with different idle durations
- ctx_a = make_ctx(
- pnl=0.0,
- trade_duration=0,
- idle_duration=20,
+ def test_stats_benjamini_hochberg_adjustment(self):
+ """BH adjustment adds p_value_adj & significant_adj with valid bounds."""
+ df = simulate_samples(
+ num_samples=600,
+ seed=123,
+ params=self.DEFAULT_PARAMS,
max_trade_duration=100,
- max_unrealized_profit=0.0,
- min_unrealized_profit=0.0,
- position=Positions.Neutral,
- action=Actions.Neutral,
- )
- ctx_b = dataclasses.replace(ctx_a, idle_duration=40)
-
- br_a = calculate_reward(
- ctx_a,
- params,
- base_factor=base_factor,
- profit_target=profit_target,
- risk_reward_ratio=risk_reward_ratio,
- short_allowed=True,
- action_masking=True,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
+ max_duration_ratio=2.0,
+ trading_mode="margin",
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
- br_b = calculate_reward(
- ctx_b,
- params,
- base_factor=base_factor,
- profit_target=profit_target,
- risk_reward_ratio=risk_reward_ratio,
- short_allowed=True,
- action_masking=True,
+ results_adj = statistical_hypothesis_tests(
+ df, adjust_method="benjamini_hochberg", seed=777
)
+ self.assertGreater(len(results_adj), 0)
+ for name, res in results_adj.items():
+ self.assertIn("p_value", res)
+ self.assertIn("p_value_adj", res)
+ self.assertIn("significant_adj", res)
+ p_raw = res["p_value"]
+ p_adj = res["p_value_adj"]
+ self.assertPValue(p_raw)
+ self.assertPValue(p_adj)
+ self.assertGreaterEqual(p_adj, p_raw - self.TOL_IDENTITY_STRICT)
+ alpha = 0.05
+ self.assertEqual(res["significant_adj"], bool(p_adj < alpha))
+ if "effect_size_epsilon_sq" in res:
+ eff = res["effect_size_epsilon_sq"]
+ self.assertFinite(eff)
+ self.assertGreaterEqual(eff, 0)
+ if "effect_size_rank_biserial" in res:
+ rb = res["effect_size_rank_biserial"]
+ self.assertFinite(rb)
+ self.assertWithin(rb, -1, 1, name="rank_biserial")
- self.assertLess(br_a.idle_penalty, 0.0)
- self.assertLess(br_b.idle_penalty, 0.0)
-
- # Ratio of penalties should be close to 2 (linear power=1 scaling) allowing small numerical tolerance
- ratio = (
- br_b.idle_penalty / br_a.idle_penalty if br_a.idle_penalty != 0 else None
- )
- self.assertIsNotNone(ratio, "Idle penalty A should not be zero")
- # Both penalties are negative, ratio should be ~ (40/20) = 2, hence ratio ~ 2
- self.assertAlmostEqualFloat(
- abs(ratio),
- 2.0,
- tolerance=0.2,
- msg=f"Idle penalty proportionality mismatch (ratio={ratio})",
- )
- # Additional mid-range inference check (idle_duration between 1x and 2x trade duration)
- ctx_mid = dataclasses.replace(
- ctx_a, idle_duration=120, max_trade_duration=100
- ) # Adjusted context for mid-range check
- br_mid = calculate_reward(
- ctx_mid,
- params,
- base_factor=base_factor,
- profit_target=profit_target,
- risk_reward_ratio=risk_reward_ratio,
- short_allowed=True,
- action_masking=True,
- )
- self.assertLess(br_mid.idle_penalty, 0.0)
- idle_penalty_scale = _get_float_param(params, "idle_penalty_scale", 0.5)
- idle_penalty_power = _get_float_param(params, "idle_penalty_power", 1.025)
- # Internal factor may come from params (overrides provided base_factor argument)
- factor = _get_float_param(params, "base_factor", float(base_factor))
- idle_factor = factor * (profit_target * risk_reward_ratio) / 4.0
- observed_ratio = abs(br_mid.idle_penalty) / (idle_factor * idle_penalty_scale)
- if observed_ratio > 0:
- implied_D = 120 / (observed_ratio ** (1 / idle_penalty_power))
- self.assertAlmostEqualFloat(
- implied_D,
- 400.0,
- tolerance=20.0,
- msg=f"Fallback denominator mismatch (implied={implied_D}, expected≈400, factor={factor})",
- )
- def test_exit_factor_threshold_warning_non_capping(self):
- """Ensure exit_factor_threshold does not cap the exit factor (warning-only semantics).
+class TestRewardComponents(RewardSpaceTestBase):
+ """Core reward component tests."""
- We approximate by computing two rewards: a baseline and an amplified one (via larger base_factor & pnl) and
- ensure the amplified reward scales proportionally beyond the threshold rather than flattening.
- """
- params = self.DEFAULT_PARAMS.copy()
- # Remove base_factor from params so that the function uses the provided argument (makes scaling observable)
- params.pop("base_factor", None)
- exit_factor_threshold = _get_float_param(
- params, "exit_factor_threshold", 10_000.0
- )
+ def test_reward_calculation_scenarios_basic(self):
+ """Reward calculation scenarios: expected components become non-zero."""
+ test_cases = [
+ (Positions.Neutral, Actions.Neutral, "idle_penalty"),
+ (Positions.Long, Actions.Long_exit, "exit_component"),
+ (Positions.Short, Actions.Short_exit, "exit_component"),
+ ]
+ for position, action, expected_type in test_cases:
+ with self.subTest(position=position, action=action):
+ context = RewardContext(
+ pnl=0.02 if expected_type == "exit_component" else 0.0,
+ trade_duration=50 if position != Positions.Neutral else 0,
+ idle_duration=10 if position == Positions.Neutral else 0,
+ max_trade_duration=100,
+ max_unrealized_profit=0.03,
+ min_unrealized_profit=-0.01,
+ position=position,
+ action=action,
+ )
+ breakdown = calculate_reward(
+ context,
+ self.DEFAULT_PARAMS,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=1.0,
+ short_allowed=True,
+ action_masking=True,
+ )
+ if expected_type == "idle_penalty":
+ self.assertNotEqual(breakdown.idle_penalty, 0.0)
+ elif expected_type == "exit_component":
+ self.assertNotEqual(breakdown.exit_component, 0.0)
+ self.assertFinite(breakdown.total, name="breakdown.total")
- context = RewardContext(
- pnl=0.08, # above typical profit_target * RR
+ def test_basic_reward_calculation(self):
+ context = make_ctx(
+ pnl=self.TEST_PROFIT_TARGET,
trade_duration=10,
- idle_duration=0,
max_trade_duration=100,
- max_unrealized_profit=0.09,
- min_unrealized_profit=0.0,
+ max_unrealized_profit=0.025,
+ min_unrealized_profit=0.015,
position=Positions.Long,
action=Actions.Long_exit,
)
-
- # Baseline with moderate base_factor
- baseline = calculate_reward(
+ br = calculate_reward(
context,
- params,
+ self.DEFAULT_PARAMS,
base_factor=self.TEST_BASE_FACTOR,
- profit_target=self.TEST_PROFIT_TARGET,
+ profit_target=0.06,
risk_reward_ratio=self.TEST_RR_HIGH,
short_allowed=True,
action_masking=True,
)
+ self.assertFinite(br.total, name="total")
+ self.assertGreater(br.exit_component, 0)
+
+ def test_efficiency_zero_policy(self):
+ ctx = make_ctx(
+ pnl=0.0,
+ trade_duration=1,
+ max_trade_duration=100,
+ max_unrealized_profit=0.0,
+ min_unrealized_profit=-0.02,
+ position=Positions.Long,
+ action=Actions.Long_exit,
+ )
+ params = self.DEFAULT_PARAMS.copy()
+ profit_target = self.TEST_PROFIT_TARGET * self.TEST_RR
+ pnl_factor = _get_pnl_factor(params, ctx, profit_target, self.TEST_RR)
+ self.assertFinite(pnl_factor, name="pnl_factor")
+ self.assertAlmostEqualFloat(pnl_factor, 1.0, tolerance=self.TOL_GENERIC_EQ)
- # Amplified: choose a much larger base_factor (ensure > threshold relative scale)
- amplified_base_factor = max(
- self.TEST_BASE_FACTOR * 50,
- exit_factor_threshold * self.TEST_RR_HIGH / max(context.pnl, 1e-9),
+ def test_max_idle_duration_candles_logic(self):
+ params_small = self.DEFAULT_PARAMS.copy()
+ params_large = self.DEFAULT_PARAMS.copy()
+ params_small["max_idle_duration_candles"] = 50
+ params_large["max_idle_duration_candles"] = 200
+ base_factor = self.TEST_BASE_FACTOR
+ context = make_ctx(
+ pnl=0.0,
+ trade_duration=0,
+ idle_duration=40,
+ max_trade_duration=128,
+ max_unrealized_profit=0.0,
+ min_unrealized_profit=0.0,
+ position=Positions.Neutral,
+ action=Actions.Neutral,
)
- amplified = calculate_reward(
+ small = calculate_reward(
context,
- params,
- base_factor=amplified_base_factor,
+ params_small,
+ base_factor,
profit_target=self.TEST_PROFIT_TARGET,
- risk_reward_ratio=self.TEST_RR_HIGH,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
-
- scale_observed = (
- amplified.exit_component / baseline.exit_component
- if baseline.exit_component
- else 0.0
- )
- self.assertGreater(baseline.exit_component, 0.0)
- self.assertGreater(amplified.exit_component, baseline.exit_component)
- # Expect at least ~10x increase (safe margin) to confirm absence of clipping
- self.assertGreater(
- scale_observed,
- 10.0,
- f"Amplified reward did not scale sufficiently (factor={scale_observed:.2f}, amplified_base_factor={amplified_base_factor})",
+ large = calculate_reward(
+ context,
+ params_large,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=0.06,
+ risk_reward_ratio=self.TEST_RR,
+ short_allowed=True,
+ action_masking=True,
)
+ self.assertLess(small.idle_penalty, 0.0)
+ self.assertLess(large.idle_penalty, 0.0)
+ self.assertGreater(large.idle_penalty, small.idle_penalty)
def test_exit_factor_calculation(self):
"""Test exit factor calculation consistency across core modes + plateau variant.
self.assertGreater(plateau_factor_post, 0)
self.assertGreaterEqual(
plateau_factor_pre,
- plateau_factor_post - 1e-12,
+ plateau_factor_post - self.TOL_IDENTITY_STRICT,
"Plateau pre-grace factor should be >= post-grace factor",
)
- def test_negative_slope_sanitization(self):
- """Negative slopes for linear must be sanitized to positive default (1.0)."""
- base_factor = self.TEST_BASE_FACTOR
- pnl = 0.04
- pnl_factor = 1.0
- duration_ratio_linear = 1.2 # any positive ratio
- duration_ratio_plateau = 1.3 # > grace so slope matters
-
- # Linear mode: slope -5.0 should behave like slope=1.0 (sanitized)
- params_lin_neg = self.DEFAULT_PARAMS.copy()
- params_lin_neg.update(
- {"exit_attenuation_mode": "linear", "exit_linear_slope": -5.0}
- )
- params_lin_pos = self.DEFAULT_PARAMS.copy()
- params_lin_pos.update(
- {"exit_attenuation_mode": "linear", "exit_linear_slope": 1.0}
- )
- val_lin_neg = _get_exit_factor(
- base_factor, pnl, pnl_factor, duration_ratio_linear, params_lin_neg
- )
- val_lin_pos = _get_exit_factor(
- base_factor, pnl, pnl_factor, duration_ratio_linear, params_lin_pos
- )
- self.assertAlmostEqualFloat(
- val_lin_neg,
- val_lin_pos,
- tolerance=1e-9,
- msg="Negative linear slope not sanitized to default behavior",
- )
-
- # Plateau+linear: negative slope sanitized similarly
- params_pl_neg = self.DEFAULT_PARAMS.copy()
- params_pl_neg.update(
- {
- "exit_attenuation_mode": "linear",
- "exit_plateau": True,
- "exit_plateau_grace": 1.0,
- "exit_linear_slope": -3.0,
- }
- )
- params_pl_pos = self.DEFAULT_PARAMS.copy()
- params_pl_pos.update(
- {
- "exit_attenuation_mode": "linear",
- "exit_plateau": True,
- "exit_plateau_grace": 1.0,
- "exit_linear_slope": 1.0,
- }
- )
- val_pl_neg = _get_exit_factor(
- base_factor, pnl, pnl_factor, duration_ratio_plateau, params_pl_neg
- )
- val_pl_pos = _get_exit_factor(
- base_factor, pnl, pnl_factor, duration_ratio_plateau, params_pl_pos
- )
- self.assertAlmostEqualFloat(
- val_pl_neg,
- val_pl_pos,
- tolerance=1e-9,
- msg="Negative plateau+linear slope not sanitized to default behavior",
- )
-
def test_idle_penalty_zero_when_profit_target_zero(self):
"""If profit_target=0 → idle_factor=0 → idle penalty must be exactly 0 for neutral idle state."""
context = make_ctx(
br.total, 0.0, "Total reward should be zero in this configuration"
)
- def test_power_mode_alpha_formula(self):
- """Validate power mode: factor ≈ base_factor / (1+r)^alpha where alpha=-log(tau)/log(2)."""
- tau = 0.5
- r = 1.2
- alpha = -math.log(tau) / math.log(2.0)
- base_factor = self.TEST_BASE_FACTOR
- pnl = self.TEST_PROFIT_TARGET
- pnl_factor = 1.0 # isolate attenuation
- params = self.DEFAULT_PARAMS.copy()
- params.update(
- {
- "exit_attenuation_mode": "power",
- "exit_power_tau": tau,
- "exit_plateau": False,
- }
- )
- observed = _get_exit_factor(base_factor, pnl, pnl_factor, r, params)
- expected = base_factor / (1.0 + r) ** alpha
- self.assertAlmostEqualFloat(
- observed,
- expected,
- tolerance=1e-9,
- msg=f"Power mode attenuation mismatch (obs={observed}, exp={expected}, alpha={alpha})",
- )
-
def test_win_reward_factor_saturation(self):
"""Saturation test: pnl amplification factor should monotonically approach (1 + win_reward_factor)."""
win_reward_factor = 3.0 # asymptote = 4.0
self.assertMonotonic(
ratios_observed,
non_decreasing=True,
- tolerance=1e-12,
+ tolerance=self.TOL_IDENTITY_STRICT,
name="pnl_amplification_ratio",
)
)
def test_scale_invariance_and_decomposition(self):
- """Core reward components scale ~ linearly with base_factor; total = core + shaping + additives.
-
- Contract:
- For each non-zero core component C: C(base_factor * k) ≈ k * C(base_factor).
- Decomposition uses total = (exit + idle + hold + invalid + shaping + entry_additive + exit_additive).
- """
+ """Components scale ~ linearly with base_factor; total equals sum(core + shaping + additives)."""
params = self.DEFAULT_PARAMS.copy()
# Remove internal base_factor so the explicit argument is used
params.pop("base_factor", None)
),
]
- tol_scale = 1e-9
+ tol_scale = self.TOL_RELATIVE
for ctx in contexts:
br1 = calculate_reward(
ctx,
# Magnitudes should be close (tolerance scaled)
self.assertLess(
abs(abs(br_long.exit_component) - abs(br_short.exit_component)),
- 1e-9 * max(1.0, abs(br_long.exit_component)),
+ self.TOL_RELATIVE * max(1.0, abs(br_long.exit_component)),
f"Long/Short asymmetry pnl={pnl}: long={br_long.exit_component}, short={br_short.exit_component}",
)
-class TestPublicAPI(RewardSpaceTestBase):
- """Test public API functions and interfaces."""
+class TestAPIAndHelpers(RewardSpaceTestBase):
+ """Public API + helper utility tests."""
+ # --- Public API ---
def test_parse_overrides(self):
- """Test parse_overrides function."""
- # Test valid overrides
- overrides = ["key1=1.5", "key2=hello", "key3=42"]
+ overrides = ["alpha=1.5", "mode=linear", "limit=42"]
result = parse_overrides(overrides)
-
- self.assertEqual(result["key1"], 1.5)
- self.assertEqual(result["key2"], "hello")
- self.assertEqual(result["key3"], 42.0)
-
- # Test invalid format
+ self.assertEqual(result["alpha"], 1.5)
+ self.assertEqual(result["mode"], "linear")
+ self.assertEqual(result["limit"], 42.0)
with self.assertRaises(ValueError):
- parse_overrides(["invalid_format"])
+ parse_overrides(["badpair"]) # missing '='
- def test_bootstrap_confidence_intervals(self):
- """Test bootstrap confidence intervals calculation."""
- # Create test data
- np.random.seed(self.SEED)
- test_data = pd.DataFrame(
- {
- "reward_total": np.random.normal(0, 1, 100),
- "pnl": np.random.normal(0.01, self.TEST_PNL_STD, 100),
- }
- )
-
- results = bootstrap_confidence_intervals(
- test_data,
- ["reward_total", "pnl"],
- n_bootstrap=100, # Small for speed
- )
-
- self.assertIn("reward_total", results)
- self.assertIn("pnl", results)
-
- for metric, (mean, ci_low, ci_high) in results.items():
- self.assertFinite(mean, name=f"mean[{metric}]")
- self.assertFinite(ci_low, name=f"ci_low[{metric}]")
- self.assertFinite(ci_high, name=f"ci_high[{metric}]")
- self.assertLess(
- ci_low, ci_high, f"CI bounds for {metric} should be ordered"
- )
-
- def test_statistical_hypothesis_tests_seed_reproducibility(self):
- """Ensure statistical_hypothesis_tests + bootstrap CIs are reproducible with stats_seed."""
-
- np.random.seed(123)
- # Create idle_duration with variability throughout to avoid constant Spearman warnings
- idle_base = np.random.uniform(3, 40, 300)
- idle_mask = np.random.rand(300) < 0.4
- idle_duration = (
- idle_base * idle_mask
- ) # some zeros but not fully constant subset
- df = pd.DataFrame(
- {
- "reward_total": np.random.normal(0, 1, 300),
- "reward_idle": np.where(idle_mask, np.random.normal(-1, 0.3, 300), 0.0),
- "reward_hold": np.where(
- ~idle_mask, np.random.normal(-0.5, 0.2, 300), 0.0
- ),
- "reward_exit": np.random.normal(0.8, 0.6, 300),
- "pnl": np.random.normal(0.01, self.TEST_PNL_STD, 300),
- "trade_duration": np.random.uniform(5, 150, 300),
- "idle_duration": idle_duration,
- "position": np.random.choice([0.0, 0.5, 1.0], 300),
- }
- )
-
- # Two runs with same seed
- r1 = statistical_hypothesis_tests(df, seed=777)
- r2 = statistical_hypothesis_tests(df, seed=777)
- self.assertEqual(set(r1.keys()), set(r2.keys()))
- for k in r1:
- for field in ("p_value", "significant"):
- v1 = r1[k][field]
- v2 = r2[k][field]
- if (
- isinstance(v1, float)
- and isinstance(v2, float)
- and (np.isnan(v1) and np.isnan(v2))
- ):
- continue
- self.assertEqual(v1, v2, f"Mismatch for {k}:{field}")
-
- # Different seed may change bootstrap CI of rho if present
- r3 = statistical_hypothesis_tests(df, seed=888)
- if "idle_correlation" in r1 and "idle_correlation" in r3:
- ci1 = r1["idle_correlation"]["ci_95"]
- ci3 = r3["idle_correlation"]["ci_95"]
- # Not guaranteed different, but often; only assert shape
- self.assertEqual(len(ci1), 2)
- self.assertEqual(len(ci3), 2)
-
- # Test bootstrap reproducibility path
- metrics = ["reward_total", "pnl"]
- ci_a = bootstrap_confidence_intervals(df, metrics, n_bootstrap=250, seed=2024)
- ci_b = bootstrap_confidence_intervals(df, metrics, n_bootstrap=250, seed=2024)
- self.assertEqual(ci_a, ci_b)
-
-
-class TestStatisticalValidation(RewardSpaceTestBase):
- """Test statistical validation and mathematical bounds."""
-
- def test_pnl_invariant_validation(self):
- """Test critical PnL invariant: only exit actions should have non-zero PnL."""
- # Generate samples and verify PnL invariant
- df = simulate_samples(
- num_samples=200,
- seed=42,
- params=self.DEFAULT_PARAMS,
- max_trade_duration=50,
- base_factor=self.TEST_BASE_FACTOR,
- profit_target=self.TEST_PROFIT_TARGET,
- risk_reward_ratio=self.TEST_RR,
- max_duration_ratio=2.0,
- trading_mode="margin",
- pnl_base_std=self.TEST_PNL_STD,
- pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
- )
-
- # Critical invariant: Total PnL must equal sum of exit PnL
- total_pnl = df["pnl"].sum()
- exit_mask = df["reward_exit"] != 0
- exit_pnl_sum = df.loc[exit_mask, "pnl"].sum()
-
- self.assertAlmostEqual(
- total_pnl,
- exit_pnl_sum,
- places=10,
- msg="PnL invariant violation: total PnL != sum of exit PnL",
- )
-
- # Only exit actions (2.0, 4.0) should have non-zero PnL
- non_zero_pnl_actions = set(df[df["pnl"] != 0]["action"].unique())
- expected_exit_actions = {2.0, 4.0} # Long_exit, Short_exit
-
- self.assertTrue(
- non_zero_pnl_actions.issubset(expected_exit_actions),
- f"Non-exit actions have PnL: {non_zero_pnl_actions - expected_exit_actions}",
- )
-
- # No actions should have zero PnL but non-zero exit reward
- invalid_combinations = df[(df["pnl"] == 0) & (df["reward_exit"] != 0)]
- self.assertEqual(
- len(invalid_combinations),
- 0,
- "Found actions with zero PnL but non-zero exit reward",
- )
-
- def test_distribution_metrics_mathematical_bounds(self):
- """Test mathematical bounds and validity of distribution shift metrics."""
- # Create two different distributions for testing
- np.random.seed(self.SEED)
- df1 = pd.DataFrame(
- {
- "pnl": np.random.normal(0, self.TEST_PNL_STD, 500),
- "trade_duration": np.random.exponential(30, 500),
- "idle_duration": np.random.gamma(2, 5, 500),
- }
- )
-
- df2 = pd.DataFrame(
- {
- "pnl": np.random.normal(0.01, 0.025, 500),
- "trade_duration": np.random.exponential(35, 500),
- "idle_duration": np.random.gamma(2.5, 6, 500),
- }
- )
-
- metrics = compute_distribution_shift_metrics(df1, df2)
-
- # Validate mathematical bounds for each feature
- for feature in ["pnl", "trade_duration", "idle_duration"]:
- # KL divergence must be >= 0
- kl_key = f"{feature}_kl_divergence"
- if kl_key in metrics:
- self.assertDistanceMetric(metrics[kl_key], name=kl_key)
-
- # JS distance must be in [0, 1]
- js_key = f"{feature}_js_distance"
- if js_key in metrics:
- self.assertDistanceMetric(metrics[js_key], upper=1.0, name=js_key)
-
- # Wasserstein must be >= 0
- ws_key = f"{feature}_wasserstein"
- if ws_key in metrics:
- self.assertDistanceMetric(metrics[ws_key], name=ws_key)
-
- # KS statistic must be in [0, 1]
- ks_stat_key = f"{feature}_ks_statistic"
- if ks_stat_key in metrics:
- self.assertDistanceMetric(
- metrics[ks_stat_key], upper=1.0, name=ks_stat_key
- )
-
- # KS p-value must be in [0, 1]
- ks_p_key = f"{feature}_ks_pvalue"
- if ks_p_key in metrics:
- self.assertPValue(
- metrics[ks_p_key], msg=f"KS p-value out of bounds for {feature}"
- )
-
- def test_heteroscedasticity_pnl_validation(self):
- """Test that PnL variance increases with trade duration (heteroscedasticity)."""
- # Generate larger sample for statistical power
- df = simulate_samples(
- num_samples=1000,
- seed=123,
- params=self.DEFAULT_PARAMS,
- max_trade_duration=100,
- base_factor=self.TEST_BASE_FACTOR,
- profit_target=self.TEST_PROFIT_TARGET,
- risk_reward_ratio=self.TEST_RR,
- max_duration_ratio=2.0,
- trading_mode="margin",
- pnl_base_std=self.TEST_PNL_STD,
- pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
- )
-
- # Filter to exit actions only (where PnL is meaningful)
- exit_data = df[df["reward_exit"] != 0].copy()
-
- if len(exit_data) < 50:
- self.skipTest("Insufficient exit actions for heteroscedasticity test")
-
- # Create duration bins and test variance pattern
- exit_data["duration_bin"] = pd.cut(
- exit_data["duration_ratio"], bins=4, labels=["Q1", "Q2", "Q3", "Q4"]
- )
-
- variance_by_bin = exit_data.groupby("duration_bin")["pnl"].var().dropna()
-
- # Should see increasing variance with duration (not always strict monotonic due to sampling)
- # Test that Q4 variance > Q1 variance (should hold with heteroscedastic model)
- if "Q1" in variance_by_bin.index and "Q4" in variance_by_bin.index:
- self.assertGreater(
- variance_by_bin["Q4"],
- variance_by_bin["Q1"] * 0.8, # Allow some tolerance
- "PnL heteroscedasticity: variance should increase with duration",
- )
-
- def test_exit_factor_mathematical_formulas(self):
- """Test mathematical correctness of exit factor calculations."""
-
- # Test context with known values
- context = RewardContext(
- pnl=0.05,
- trade_duration=50,
- idle_duration=0,
- max_trade_duration=100,
- max_unrealized_profit=0.06,
- min_unrealized_profit=0.04,
- position=Positions.Long,
- action=Actions.Long_exit,
- )
-
- params = self.DEFAULT_PARAMS.copy()
- duration_ratio = 50 / 100 # 0.5
-
- # Test power mode with known tau
- params["exit_attenuation_mode"] = "power"
- params["exit_power_tau"] = 0.5
- params["exit_plateau"] = False
-
- reward_power = calculate_reward(
- context,
- params,
- self.TEST_BASE_FACTOR,
- self.TEST_PROFIT_TARGET,
- self.TEST_RR,
- short_allowed=True,
- action_masking=True,
- )
-
- # Mathematical validation: alpha = -ln(tau) = -ln(0.5) ≈ 0.693
- # Analytical alpha value (unused directly now, kept for clarity): -ln(0.5) ≈ 0.693
-
- # The reward should be attenuated by this factor
- self.assertGreater(
- reward_power.exit_component, 0, "Power mode should generate positive reward"
- )
-
- # Test half_life mode
- params["exit_attenuation_mode"] = "half_life"
- params["exit_half_life"] = 0.5
-
- reward_half_life = calculate_reward(
- context,
- params,
- self.TEST_BASE_FACTOR,
- self.TEST_PROFIT_TARGET,
- self.TEST_RR,
- short_allowed=True,
- action_masking=True,
- )
-
- # Mathematical validation: 2^(-duration_ratio/half_life) = 2^(-0.5/0.5) = 0.5
- expected_half_life_factor = 2 ** (-duration_ratio / 0.5)
- self.assertAlmostEqual(expected_half_life_factor, 0.5, places=6)
-
- # Test that different modes produce different results (mathematical diversity)
- params["exit_attenuation_mode"] = "linear"
- params["exit_linear_slope"] = 1.0
-
- reward_linear = calculate_reward(
- context,
- params,
- self.TEST_BASE_FACTOR,
- self.TEST_PROFIT_TARGET,
- self.TEST_RR,
- short_allowed=True,
- action_masking=True,
- )
-
- # All modes should produce positive rewards but different values
- self.assertGreater(reward_power.exit_component, 0)
- self.assertGreater(reward_half_life.exit_component, 0)
- self.assertGreater(reward_linear.exit_component, 0)
-
- # They should be different (mathematical distinctness)
- rewards = [
- reward_power.exit_component,
- reward_half_life.exit_component,
- reward_linear.exit_component,
- ]
- unique_rewards = set(f"{r:.6f}" for r in rewards)
- self.assertGreater(
- len(unique_rewards),
- 1,
- "Different exit factor modes should produce different rewards",
- )
-
- def test_statistical_functions_bounds_validation(self):
- """Test that all statistical functions respect mathematical bounds."""
- # Create test data with known statistical properties
- np.random.seed(self.SEED)
- df = pd.DataFrame(
- {
- "reward_total": np.random.normal(0, 1, 300),
- "reward_idle": np.random.normal(-1, 0.5, 300),
- "reward_hold": np.random.normal(-0.5, 0.3, 300),
- "reward_exit": np.random.normal(1, 0.8, 300),
- "pnl": np.random.normal(0.01, 0.02, 300),
- "trade_duration": np.random.uniform(5, 150, 300),
- "idle_duration": np.random.uniform(0, 100, 300),
- "position": np.random.choice([0.0, 0.5, 1.0], 300),
- }
- )
-
- # Test distribution diagnostics bounds
- diagnostics = distribution_diagnostics(df)
-
- for col in ["reward_total", "pnl", "trade_duration", "idle_duration"]:
- # Skewness can be any real number, but should be finite
- if f"{col}_skewness" in diagnostics:
- skew = diagnostics[f"{col}_skewness"]
- self.assertFinite(skew, name=f"skewness[{col}]")
-
- # Kurtosis should be finite (can be negative for platykurtic distributions)
- if f"{col}_kurtosis" in diagnostics:
- kurt = diagnostics[f"{col}_kurtosis"]
- self.assertFinite(kurt, name=f"kurtosis[{col}]")
-
- # Shapiro p-value must be in [0, 1]
- if f"{col}_shapiro_pval" in diagnostics:
- self.assertPValue(
- diagnostics[f"{col}_shapiro_pval"],
- msg=f"Shapiro p-value bounds for {col}",
- )
-
- # Test hypothesis tests results bounds
- hypothesis_results = statistical_hypothesis_tests(df, seed=42)
-
- for test_name, result in hypothesis_results.items():
- # All p-values must be in [0, 1]
- if "p_value" in result:
- self.assertPValue(
- result["p_value"], msg=f"p-value bounds for {test_name}"
- )
- # Effect size epsilon squared (ANOVA/Kruskal) must be finite and >= 0
- if "effect_size_epsilon_sq" in result:
- eps2 = result["effect_size_epsilon_sq"]
- self.assertFinite(eps2, name=f"epsilon_sq[{test_name}]")
- self.assertGreaterEqual(
- eps2, 0.0, f"Effect size epsilon^2 for {test_name} must be >= 0"
- )
- # Rank-biserial correlation (Mann-Whitney) must be finite in [-1, 1]
- if "effect_size_rank_biserial" in result:
- rb = result["effect_size_rank_biserial"]
- self.assertFinite(rb, name=f"rank_biserial[{test_name}]")
- self.assertGreaterEqual(
- rb, -1.0, f"Rank-biserial correlation for {test_name} must be >= -1"
- )
- self.assertLessEqual(
- rb, 1.0, f"Rank-biserial correlation for {test_name} must be <= 1"
- )
- # Generic correlation effect size (Spearman/Pearson) if present
- if "rho" in result:
- rho = result["rho"]
- if rho is not None:
- self.assertFinite(rho, name=f"rho[{test_name}]")
- self.assertGreaterEqual(
- rho, -1.0, f"Correlation rho for {test_name} must be >= -1"
- )
- self.assertLessEqual(
- rho, 1.0, f"Correlation rho for {test_name} must be <= 1"
- )
-
- def test_benjamini_hochberg_adjustment(self):
- """Benjamini-Hochberg adjustment adds p_value_adj & significant_adj fields with valid bounds."""
-
- # Use simulation to trigger multiple tests
+ def test_api_simulation_and_reward_smoke(self):
df = simulate_samples(
- num_samples=1000,
- seed=123,
+ num_samples=20,
+ seed=7,
params=self.DEFAULT_PARAMS,
- max_trade_duration=100,
+ max_trade_duration=40,
base_factor=self.TEST_BASE_FACTOR,
profit_target=self.TEST_PROFIT_TARGET,
risk_reward_ratio=self.TEST_RR,
- max_duration_ratio=2.0,
+ max_duration_ratio=1.5,
trading_mode="margin",
pnl_base_std=self.TEST_PNL_STD,
pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
-
- results_adj = statistical_hypothesis_tests(
- df, adjust_method="benjamini_hochberg", seed=777
- )
- # At least one test should have run (idle or kruskal etc.)
- self.assertGreater(len(results_adj), 0, "No hypothesis tests executed")
- for name, res in results_adj.items():
- self.assertIn("p_value", res, f"Missing p_value in {name}")
- self.assertIn("p_value_adj", res, f"Missing p_value_adj in {name}")
- self.assertIn("significant_adj", res, f"Missing significant_adj in {name}")
- p_raw = res["p_value"]
- p_adj = res["p_value_adj"]
- self.assertPValue(p_raw, msg=f"Raw p-value out of bounds ({p_raw})")
- self.assertPValue(p_adj, msg=f"Adjusted p-value out of bounds ({p_adj})")
- # BH should not reduce p-value (non-decreasing) after monotonic enforcement
- self.assertGreaterEqual(
- p_adj,
- p_raw - 1e-12,
- f"Adjusted p-value {p_adj} is smaller than raw {p_raw}",
+ self.assertGreater(len(df), 0)
+ any_exit = df[df["reward_exit"] != 0].head(1)
+ if not any_exit.empty:
+ row = any_exit.iloc[0]
+ ctx = RewardContext(
+ pnl=float(row["pnl"]),
+ trade_duration=int(row["trade_duration"]),
+ idle_duration=int(row["idle_duration"]),
+ max_trade_duration=40,
+ max_unrealized_profit=float(row["pnl"]) + 0.01,
+ min_unrealized_profit=float(row["pnl"]) - 0.01,
+ position=Positions.Long,
+ action=Actions.Long_exit,
)
- # Consistency of significance flags
- alpha = 0.05
- self.assertEqual(
- res["significant_adj"],
- bool(p_adj < alpha),
- f"significant_adj inconsistent for {name}",
+ breakdown = calculate_reward(
+ ctx,
+ self.DEFAULT_PARAMS,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
+ short_allowed=True,
+ action_masking=True,
)
- # Optional: if effect sizes present, basic bounds
- if "effect_size_epsilon_sq" in res:
- eff = res["effect_size_epsilon_sq"]
- self.assertFinite(eff, name=f"effect_size[{name}]")
- self.assertGreaterEqual(eff, 0, f"ε² should be >=0 for {name}")
- if "effect_size_rank_biserial" in res:
- rb = res["effect_size_rank_biserial"]
- self.assertFinite(rb, name=f"rank_biserial[{name}]")
- self.assertGreaterEqual(rb, -1, f"Rank-biserial lower bound {name}")
- self.assertLessEqual(rb, 1, f"Rank-biserial upper bound {name}")
+ self.assertFinite(breakdown.total)
- def test_simulate_samples_with_different_modes(self):
- """Test simulate_samples with different trading modes."""
- # Test spot mode (no shorts)
+ def test_simulate_samples_trading_modes_spot_vs_margin(self):
+ """simulate_samples coverage: spot should forbid shorts, margin should allow them."""
df_spot = simulate_samples(
- num_samples=100,
+ num_samples=80,
seed=42,
params=self.DEFAULT_PARAMS,
max_trade_duration=100,
pnl_base_std=self.TEST_PNL_STD,
pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
-
- # Should not have any short positions
- short_positions = (df_spot["position"] == float(Positions.Short.value)).sum()
+ short_positions_spot = (
+ df_spot["position"] == float(Positions.Short.value)
+ ).sum()
self.assertEqual(
- short_positions, 0, "Spot mode should not have short positions"
+ short_positions_spot, 0, "Spot mode must not contain short positions"
)
-
- # Test margin mode (shorts allowed)
df_margin = simulate_samples(
- num_samples=100,
+ num_samples=80,
seed=42,
params=self.DEFAULT_PARAMS,
max_trade_duration=100,
pnl_base_std=self.TEST_PNL_STD,
pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
-
- # Should have required columns
- required_columns = [
+ for col in [
"pnl",
"trade_duration",
"idle_duration",
"reward_idle",
"reward_hold",
"reward_exit",
- ]
- for col in required_columns:
- self.assertIn(col, df_margin.columns, f"Column {col} should be present")
+ ]:
+ self.assertIn(col, df_margin.columns)
- def test_reward_calculation(self):
- """Test reward calculation scenarios."""
- # Test different reward scenarios
- test_cases = [
- # (position, action, expected_reward_type)
- (Positions.Neutral, Actions.Neutral, "idle_penalty"),
- (Positions.Long, Actions.Long_exit, "exit_component"),
- (Positions.Short, Actions.Short_exit, "exit_component"),
- ]
-
- for position, action, expected_type in test_cases:
- with self.subTest(position=position, action=action):
- context = RewardContext(
- pnl=0.02 if expected_type == "exit_component" else 0.0,
- trade_duration=50 if position != Positions.Neutral else 0,
- idle_duration=10 if position == Positions.Neutral else 0,
- max_trade_duration=100,
- max_unrealized_profit=0.03,
- min_unrealized_profit=-0.01,
- position=position,
- action=action,
- )
-
- breakdown = calculate_reward(
- context,
- self.DEFAULT_PARAMS,
- base_factor=self.TEST_BASE_FACTOR,
- profit_target=self.TEST_PROFIT_TARGET,
- risk_reward_ratio=1.0,
- short_allowed=True,
- action_masking=True,
- )
-
- # Check that the appropriate component is non-zero
- if expected_type == "idle_penalty":
- self.assertNotEqual(
- breakdown.idle_penalty,
- 0.0,
- f"Expected idle penalty for {position}/{action}",
- )
- elif expected_type == "exit_component":
- self.assertNotEqual(
- breakdown.exit_component,
- 0.0,
- f"Expected exit component for {position}/{action}",
- )
-
- # Total should always be finite
- self.assertFinite(breakdown.total, name="breakdown.total")
-
-
-class TestBoundaryConditions(RewardSpaceTestBase):
- """Test boundary conditions and edge cases."""
-
- def test_extreme_parameter_values(self):
- """Test behavior with extreme parameter values."""
- # Test with very large parameters
- extreme_params = self.DEFAULT_PARAMS.copy()
- extreme_params["win_reward_factor"] = 1000.0
- extreme_params["base_factor"] = 10000.0
-
- context = RewardContext(
- pnl=0.05,
- trade_duration=50,
- idle_duration=0,
- max_trade_duration=100,
- max_unrealized_profit=0.06,
- min_unrealized_profit=0.02,
- position=Positions.Long,
- action=Actions.Long_exit,
- )
-
- breakdown = calculate_reward(
- context,
- extreme_params,
- base_factor=10000.0,
- profit_target=self.TEST_PROFIT_TARGET,
- risk_reward_ratio=self.TEST_RR,
- short_allowed=True,
- action_masking=True,
- )
-
- self.assertFinite(breakdown.total, name="breakdown.total")
-
- def test_different_exit_attenuation_modes(self):
- """Test different exit attenuation modes (legacy, sqrt, linear, power, half_life)."""
- modes = ATTENUATION_MODES_WITH_LEGACY
-
- for mode in modes:
- with self.subTest(mode=mode):
- test_params = self.DEFAULT_PARAMS.copy()
- test_params["exit_attenuation_mode"] = mode
-
- context = RewardContext(
- pnl=0.02,
- trade_duration=50,
- idle_duration=0,
- max_trade_duration=100,
- max_unrealized_profit=0.03,
- min_unrealized_profit=0.01,
- position=Positions.Long,
- action=Actions.Long_exit,
- )
-
- breakdown = calculate_reward(
- context,
- test_params,
- base_factor=self.TEST_BASE_FACTOR,
- profit_target=self.TEST_PROFIT_TARGET,
- risk_reward_ratio=self.TEST_RR,
- short_allowed=True,
- action_masking=True,
- )
-
- self.assertFinite(
- breakdown.exit_component, name="breakdown.exit_component"
- )
- self.assertFinite(breakdown.total, name="breakdown.total")
-
-
-class TestHelperFunctions(RewardSpaceTestBase):
- """Test utility and helper functions."""
+ # --- Helper functions ---
def test_to_bool(self):
"""Test _to_bool with various inputs."""
short_positions = (df_futures["position"] == float(Positions.Short.value)).sum()
self.assertGreater(
short_positions, 0, "Futures mode should allow short positions"
- )
-
- def test_statistical_functions(self):
- """Test statistical functions."""
- # Create test data with specific patterns
- np.random.seed(self.SEED)
- test_data = pd.DataFrame(
- {
- "reward_total": np.random.normal(0, 1, 200),
- "reward_idle": np.concatenate(
- [np.zeros(150), np.random.normal(-1, 0.5, 50)]
- ),
- "reward_hold": np.concatenate(
- [np.zeros(150), np.random.normal(-0.5, 0.3, 50)]
- ),
- "reward_exit": np.concatenate(
- [np.zeros(150), np.random.normal(2, 1, 50)]
- ),
- "pnl": np.random.normal(0.01, 0.02, 200),
- "trade_duration": np.random.uniform(10, 100, 200),
- "idle_duration": np.concatenate(
- [np.random.uniform(5, 50, 50), np.zeros(150)]
- ),
- "position": np.random.choice([0.0, 0.5, 1.0], 200),
- }
- )
-
- # Test hypothesis tests
- results = statistical_hypothesis_tests(test_data)
- self.assertIsInstance(results, dict)
+ )
def test_argument_parser_construction(self):
"""Test build_argument_parser function."""
+ breakdown.shaping_reward
+ breakdown.entry_additive
+ breakdown.exit_additive,
- tolerance=1e-9,
+ tolerance=self.TOL_IDENTITY_RELAXED,
msg="Total should equal sum of components (idle + shaping/additives)",
)
+ breakdown.shaping_reward
+ breakdown.entry_additive
+ breakdown.exit_additive,
- tolerance=1e-9,
+ tolerance=self.TOL_IDENTITY_RELAXED,
msg="Total should equal sum of components (hold + shaping/additives)",
)
+ breakdown.shaping_reward
+ breakdown.entry_additive
+ breakdown.exit_additive,
- tolerance=1e-9,
+ tolerance=self.TOL_IDENTITY_RELAXED,
msg="Total should equal invalid penalty plus shaping/additives",
)
+ breakdown.shaping_reward
+ breakdown.entry_additive
+ breakdown.exit_additive,
- tolerance=1e-9,
+ tolerance=self.TOL_IDENTITY_RELAXED,
msg=f"Total mismatch including shaping {description}",
)
self.assertFinite(breakdown.exit_component, name="exit_component")
-class TestRewardRobustness(RewardSpaceTestBase):
- """Tests implementing all prioritized robustness enhancements.
-
- Covers:
- - Reward decomposition integrity (total == core components + shaping + additives)
- - Exit factor monotonic attenuation per mode where mathematically expected
- - Boundary parameter conditions (tau extremes, plateau grace edges, linear slope = 0)
- - Non-linear power tests for idle & hold penalties (power != 1)
- - Warning emission (exit_factor_threshold) without capping
- """
+class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase):
+ """Robustness & boundary assertions: invariants, attenuation maths, parameter edges, scaling, warnings."""
def _mk_context(
self,
action=action,
)
- def test_decomposition_integrity(self):
- """Assert reward_total equals exactly the single active component (mutual exclusivity).
+ def make_params(self, **overrides: Any) -> Dict[str, Any]: # type: ignore[override]
+ """Factory for parameter dict used in robustness tests.
+
+ Returns a shallow copy of DEFAULT_PARAMS with optional overrides.
+ Type hints facilitate future mypy strict mode.
+ """
+ p: Dict[str, Any] = self.DEFAULT_PARAMS.copy()
+ p.update(overrides)
+ return p
+
+ def test_decomposition_integrity(self):
+ """Assert reward_total equals exactly the single active component (mutual exclusivity).
+
+ We sample a grid of mutually exclusive scenarios and validate decomposition.
+ """
+ scenarios = [
+ # Idle penalty only
+ dict(
+ ctx=RewardContext(
+ pnl=0.0,
+ trade_duration=0,
+ idle_duration=25,
+ max_trade_duration=100,
+ max_unrealized_profit=0.0,
+ min_unrealized_profit=0.0,
+ position=Positions.Neutral,
+ action=Actions.Neutral,
+ ),
+ active="idle_penalty",
+ ),
+ # Hold penalty only
+ dict(
+ ctx=RewardContext(
+ pnl=0.0,
+ trade_duration=150,
+ idle_duration=0,
+ max_trade_duration=100,
+ max_unrealized_profit=0.0,
+ min_unrealized_profit=0.0,
+ position=Positions.Long,
+ action=Actions.Neutral,
+ ),
+ active="hold_penalty",
+ ),
+ # Exit reward only (positive pnl)
+ dict(
+ ctx=self._mk_context(pnl=self.TEST_PROFIT_TARGET, trade_duration=60),
+ active="exit_component",
+ ),
+ # Invalid action only
+ dict(
+ ctx=RewardContext(
+ pnl=0.01,
+ trade_duration=10,
+ idle_duration=0,
+ max_trade_duration=100,
+ max_unrealized_profit=0.02,
+ min_unrealized_profit=0.0,
+ position=Positions.Short,
+ action=Actions.Long_exit, # invalid
+ ),
+ active="invalid_penalty",
+ ),
+ ]
+ for sc in scenarios: # type: ignore[var-annotated]
+ ctx_obj: RewardContext = sc["ctx"] # type: ignore[index]
+ active_label: str = sc["active"] # type: ignore[index]
+ with self.subTest(active=active_label):
+ # Build parameters disabling shaping and additives to enforce strict decomposition
+ params_local = self.DEFAULT_PARAMS.copy()
+ params_local.update(
+ {
+ "entry_additive_enabled": False,
+ "exit_additive_enabled": False,
+ "hold_potential_enabled": False,
+ "potential_gamma": 0.0,
+ # Ensure any invariance flags do not add shaping
+ "check_invariants": False,
+ }
+ )
+ br = calculate_reward(
+ ctx_obj,
+ params_local,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
+ short_allowed=True,
+ action_masking=True,
+ )
+ # Single active component must match total; others zero (or near zero for shaping/additives)
+ core_components = {
+ "exit_component": br.exit_component,
+ "idle_penalty": br.idle_penalty,
+ "hold_penalty": br.hold_penalty,
+ "invalid_penalty": br.invalid_penalty,
+ }
+ for name, value in core_components.items():
+ if name == active_label:
+ self.assertAlmostEqualFloat(
+ value,
+ br.total,
+ tolerance=self.TOL_IDENTITY_RELAXED,
+ msg=f"Active component {name} != total",
+ )
+ else:
+ self.assertNearZero(
+ value,
+ atol=self.TOL_IDENTITY_RELAXED,
+ msg=f"Inactive component {name} not near zero (val={value})",
+ )
+ # Shaping and additives explicitly disabled
+ self.assertAlmostEqualFloat(
+ br.shaping_reward, 0.0, tolerance=self.TOL_IDENTITY_RELAXED
+ )
+ self.assertAlmostEqualFloat(
+ br.entry_additive, 0.0, tolerance=self.TOL_IDENTITY_RELAXED
+ )
+ self.assertAlmostEqualFloat(
+ br.exit_additive, 0.0, tolerance=self.TOL_IDENTITY_RELAXED
+ )
+
+ def test_pnl_invariant_exit_only(self):
+ """Invariant: only exit actions have non-zero PnL (robustness category)."""
+ df = simulate_samples(
+ num_samples=200,
+ seed=42,
+ params=self.DEFAULT_PARAMS,
+ max_trade_duration=50,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
+ max_duration_ratio=2.0,
+ trading_mode="margin",
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
+ )
+ total_pnl = df["pnl"].sum()
+ exit_mask = df["reward_exit"] != 0
+ exit_pnl_sum = df.loc[exit_mask, "pnl"].sum()
+ self.assertAlmostEqual(
+ total_pnl,
+ exit_pnl_sum,
+ places=10,
+ msg="PnL invariant violation: total PnL != sum of exit PnL",
+ )
+ non_zero_pnl_actions = set(df[df["pnl"] != 0]["action"].unique())
+ expected_exit_actions = {2.0, 4.0}
+ self.assertTrue(
+ non_zero_pnl_actions.issubset(expected_exit_actions),
+ f"Non-exit actions have PnL: {non_zero_pnl_actions - expected_exit_actions}",
+ )
+ invalid_combinations = df[(df["pnl"] == 0) & (df["reward_exit"] != 0)]
+ self.assertEqual(len(invalid_combinations), 0)
+
+ def test_exit_factor_mathematical_formulas(self):
+ """Mathematical correctness of exit factor calculations across modes."""
+ context = self._mk_context(pnl=0.05, trade_duration=50)
+ params = self.DEFAULT_PARAMS.copy()
+ duration_ratio = 50 / 100
+ params["exit_attenuation_mode"] = "power"
+ params["exit_power_tau"] = 0.5
+ params["exit_plateau"] = False
+ reward_power = calculate_reward(
+ context,
+ params,
+ self.TEST_BASE_FACTOR,
+ self.TEST_PROFIT_TARGET,
+ self.TEST_RR,
+ short_allowed=True,
+ action_masking=True,
+ )
+ self.assertGreater(reward_power.exit_component, 0)
+ params["exit_attenuation_mode"] = "half_life"
+ params["exit_half_life"] = 0.5
+ reward_half_life = calculate_reward(
+ context,
+ params,
+ self.TEST_BASE_FACTOR,
+ self.TEST_PROFIT_TARGET,
+ self.TEST_RR,
+ short_allowed=True,
+ action_masking=True,
+ )
+ expected_half_life_factor = 2 ** (-duration_ratio / 0.5)
+ self.assertPlacesEqual(expected_half_life_factor, 0.5, places=6)
+ params["exit_attenuation_mode"] = "linear"
+ params["exit_linear_slope"] = 1.0
+ reward_linear = calculate_reward(
+ context,
+ params,
+ self.TEST_BASE_FACTOR,
+ self.TEST_PROFIT_TARGET,
+ self.TEST_RR,
+ short_allowed=True,
+ action_masking=True,
+ )
+ rewards = [
+ reward_power.exit_component,
+ reward_half_life.exit_component,
+ reward_linear.exit_component,
+ ]
+ self.assertTrue(all(r > 0 for r in rewards))
+ unique_rewards = set(f"{r:.6f}" for r in rewards)
+ self.assertGreater(len(unique_rewards), 1)
+
+ def test_idle_penalty_fallback_and_proportionality(self):
+ """Idle penalty fallback denominator & proportional scaling (robustness)."""
+ params = self.DEFAULT_PARAMS.copy()
+ params["max_idle_duration_candles"] = None
+ base_factor = 90.0
+ profit_target = self.TEST_PROFIT_TARGET
+ risk_reward_ratio = 1.0
+ ctx_a = self._mk_context(
+ pnl=0.0,
+ trade_duration=0,
+ idle_duration=20,
+ max_trade_duration=100,
+ position=Positions.Neutral,
+ action=Actions.Neutral,
+ )
+ ctx_b = dataclasses.replace(ctx_a, idle_duration=40)
+ br_a = calculate_reward(
+ ctx_a,
+ params,
+ base_factor=base_factor,
+ profit_target=profit_target,
+ risk_reward_ratio=risk_reward_ratio,
+ short_allowed=True,
+ action_masking=True,
+ )
+ br_b = calculate_reward(
+ ctx_b,
+ params,
+ base_factor=base_factor,
+ profit_target=profit_target,
+ risk_reward_ratio=risk_reward_ratio,
+ short_allowed=True,
+ action_masking=True,
+ )
+ self.assertLess(br_a.idle_penalty, 0.0)
+ self.assertLess(br_b.idle_penalty, 0.0)
+ ratio = (
+ br_b.idle_penalty / br_a.idle_penalty if br_a.idle_penalty != 0 else None
+ )
+ self.assertIsNotNone(ratio)
+ self.assertAlmostEqualFloat(abs(ratio), 2.0, tolerance=0.2)
+ ctx_mid = dataclasses.replace(ctx_a, idle_duration=120, max_trade_duration=100)
+ br_mid = calculate_reward(
+ ctx_mid,
+ params,
+ base_factor=base_factor,
+ profit_target=profit_target,
+ risk_reward_ratio=risk_reward_ratio,
+ short_allowed=True,
+ action_masking=True,
+ )
+ self.assertLess(br_mid.idle_penalty, 0.0)
+ idle_penalty_scale = _get_float_param(params, "idle_penalty_scale", 0.5)
+ idle_penalty_power = _get_float_param(params, "idle_penalty_power", 1.025)
+ factor = _get_float_param(params, "base_factor", float(base_factor))
+ idle_factor = factor * (profit_target * risk_reward_ratio) / 4.0
+ observed_ratio = abs(br_mid.idle_penalty) / (idle_factor * idle_penalty_scale)
+ if observed_ratio > 0:
+ implied_D = 120 / (observed_ratio ** (1 / idle_penalty_power))
+ self.assertAlmostEqualFloat(implied_D, 400.0, tolerance=20.0)
+
+ def test_exit_factor_threshold_warning_and_non_capping(self):
+ """Warning emission without capping when exit_factor_threshold exceeded."""
+ params = self.make_params(exit_factor_threshold=10.0)
+ params.pop("base_factor", None)
+ context = self._mk_context(
+ pnl=0.08,
+ trade_duration=10,
+ max_unrealized_profit=0.09,
+ min_unrealized_profit=0.0,
+ )
+ with warnings.catch_warnings(record=True) as caught:
+ warnings.simplefilter("always")
+ baseline = calculate_reward(
+ context,
+ params,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR_HIGH,
+ short_allowed=True,
+ action_masking=True,
+ )
+ amplified_base_factor = self.TEST_BASE_FACTOR * 200.0
+ amplified = calculate_reward(
+ context,
+ params,
+ base_factor=amplified_base_factor,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR_HIGH,
+ short_allowed=True,
+ action_masking=True,
+ )
+ self.assertGreater(baseline.exit_component, 0.0)
+ self.assertGreater(amplified.exit_component, baseline.exit_component)
+ scale = amplified.exit_component / baseline.exit_component
+ self.assertGreater(scale, 10.0)
+ runtime_warnings = [w for w in caught if issubclass(w.category, RuntimeWarning)]
+ self.assertTrue(runtime_warnings)
+ self.assertTrue(
+ any(
+ (
+ "exceeded threshold" in str(w.message)
+ or "exceeds threshold" in str(w.message)
+ or "|factor|=" in str(w.message)
+ )
+ for w in runtime_warnings
+ )
+ )
+
+ def test_negative_slope_sanitization(self):
+ """Negative exit_linear_slope is sanitized to 1.0 (default) producing identical factors.
+
+ We compare exit factors with an explicit negative slope vs explicit slope=1.0.
+ The sanitized version should match reference within strict tolerance.
+ """
+ base_factor = 100.0
+ pnl = 0.03
+ pnl_factor = 1.0
+ duration_ratios = [0.0, 0.2, 0.5, 1.0, 1.5]
+ params_bad = self.make_params(
+ exit_attenuation_mode="linear", exit_linear_slope=-5.0, exit_plateau=False
+ )
+ params_ref = self.make_params(
+ exit_attenuation_mode="linear", exit_linear_slope=1.0, exit_plateau=False
+ )
+ for dr in duration_ratios:
+ f_bad = _get_exit_factor(base_factor, pnl, pnl_factor, dr, params_bad)
+ f_ref = _get_exit_factor(base_factor, pnl, pnl_factor, dr, params_ref)
+ self.assertAlmostEqualFloat(
+ f_bad,
+ f_ref,
+ tolerance=self.TOL_IDENTITY_RELAXED,
+ msg=f"Sanitized slope mismatch at dr={dr} f_bad={f_bad} f_ref={f_ref}",
+ )
+
+ def test_power_mode_alpha_formula(self):
+ """Power mode: alpha = -log(tau)/log(2); verify analytic attenuation.
+
+ For several tau values we evaluate the exit factor ratio between dr=0 and dr=1
+ and compare to expected 1/(1+1)^alpha = 2^{-alpha}.
+ """
+ base_factor = 200.0
+ pnl = 0.04
+ pnl_factor = 1.0
+ duration_ratio = 1.0
+ taus = [
+ 0.9,
+ 0.5,
+ 0.25,
+ 1.0,
+ ] # include boundary 1.0 => alpha=0 per formula? actually -> -log(1)/log2 = 0
+ for tau in taus:
+ params = self.make_params(
+ exit_attenuation_mode="power", exit_power_tau=tau, exit_plateau=False
+ )
+ f0 = _get_exit_factor(base_factor, pnl, pnl_factor, 0.0, params)
+ f1 = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params)
+ # Extract alpha using same formula (replicate logic)
+ if 0.0 < tau <= 1.0:
+ alpha = -math.log(tau) / math.log(2.0)
+ else:
+ alpha = 1.0
+ expected_ratio = 1.0 / (1.0 + duration_ratio) ** alpha
+ observed_ratio = f1 / f0 if f0 != 0 else float("nan")
+ self.assertFinite(observed_ratio, name="observed_ratio")
+ self.assertLess(
+ abs(observed_ratio - expected_ratio),
+ 5e-12 if tau == 1.0 else 5e-9,
+ f"Alpha attenuation mismatch tau={tau} alpha={alpha} obs_ratio={observed_ratio} exp_ratio={expected_ratio}",
+ )
+
+ # Fused from former TestBoundaryConditions
+ def test_extreme_parameter_values(self):
+ extreme_params = self.DEFAULT_PARAMS.copy()
+ extreme_params["win_reward_factor"] = 1000.0
+ extreme_params["base_factor"] = 10000.0
+ context = RewardContext(
+ pnl=0.05,
+ trade_duration=50,
+ idle_duration=0,
+ max_trade_duration=100,
+ max_unrealized_profit=0.06,
+ min_unrealized_profit=0.02,
+ position=Positions.Long,
+ action=Actions.Long_exit,
+ )
+ br = calculate_reward(
+ context,
+ extreme_params,
+ base_factor=10000.0,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
+ short_allowed=True,
+ action_masking=True,
+ )
+ self.assertFinite(br.total, name="breakdown.total")
- We sample a grid of mutually exclusive scenarios and validate decomposition.
- """
- scenarios = [
- # Idle penalty only
- dict(
- ctx=RewardContext(
- pnl=0.0,
- trade_duration=0,
- idle_duration=25,
- max_trade_duration=100,
- max_unrealized_profit=0.0,
- min_unrealized_profit=0.0,
- position=Positions.Neutral,
- action=Actions.Neutral,
- ),
- active="idle_penalty",
- ),
- # Hold penalty only
- dict(
- ctx=RewardContext(
- pnl=0.0,
- trade_duration=150,
+ def test_exit_attenuation_modes_enumeration(self):
+ modes = ATTENUATION_MODES_WITH_LEGACY
+ for mode in modes:
+ with self.subTest(mode=mode):
+ test_params = self.DEFAULT_PARAMS.copy()
+ test_params["exit_attenuation_mode"] = mode
+ ctx = RewardContext(
+ pnl=0.02,
+ trade_duration=50,
idle_duration=0,
max_trade_duration=100,
- max_unrealized_profit=0.0,
- min_unrealized_profit=0.0,
+ max_unrealized_profit=0.03,
+ min_unrealized_profit=0.01,
position=Positions.Long,
- action=Actions.Neutral,
- ),
- active="hold_penalty",
- ),
- # Exit reward only (positive pnl)
- dict(
- ctx=self._mk_context(pnl=self.TEST_PROFIT_TARGET, trade_duration=60),
- active="exit_component",
- ),
- # Invalid action only
- dict(
- ctx=RewardContext(
- pnl=0.01,
- trade_duration=10,
- idle_duration=0,
- max_trade_duration=100,
- max_unrealized_profit=0.02,
- min_unrealized_profit=0.0,
- position=Positions.Short,
- action=Actions.Long_exit, # invalid
- ),
- active="invalid_penalty",
- ),
- ]
- for sc in scenarios: # type: ignore[var-annotated]
- ctx_obj: RewardContext = sc["ctx"] # type: ignore[index]
- active_label: str = sc["active"] # type: ignore[index]
- with self.subTest(active=active_label):
+ action=Actions.Long_exit,
+ )
br = calculate_reward(
- ctx_obj,
- self.DEFAULT_PARAMS,
+ ctx,
+ test_params,
base_factor=self.TEST_BASE_FACTOR,
profit_target=self.TEST_PROFIT_TARGET,
risk_reward_ratio=self.TEST_RR,
short_allowed=True,
- action_masking=(active_label != "invalid_penalty"),
- )
- components = [
- br.invalid_penalty,
- br.idle_penalty,
- br.hold_penalty,
- br.exit_component,
- ]
- non_zero = [
- c for c in components if not math.isclose(c, 0.0, abs_tol=1e-12)
- ]
- self.assertEqual(
- len(non_zero),
- 1,
- f"Exactly one component must be active for {sc['active']}",
- )
- self.assertAlmostEqualFloat(
- br.total,
- non_zero[0]
- + br.shaping_reward
- + br.entry_additive
- + br.exit_additive,
- tolerance=1e-9,
- msg=f"Total mismatch including shaping for {sc['active']}",
+ action_masking=True,
)
+ self.assertFinite(br.exit_component, name="breakdown.exit_component")
+ self.assertFinite(br.total, name="breakdown.total")
def test_exit_factor_monotonic_attenuation(self):
"""For attenuation modes: factor should be non-increasing w.r.t duration_ratio.
# Plateau+linear: ignore initial flat region when checking monotonic decrease
if mode == "plateau_linear":
grace = float(params["exit_plateau_grace"]) # type: ignore[index]
- filtered = [(r, v) for r, v in zip(ratios, values) if r >= grace - 1e-9]
+ filtered = [
+ (r, v)
+ for r, v in zip(ratios, values)
+ if r >= grace - self.TOL_IDENTITY_RELAXED
+ ]
values_to_check = [v for _, v in filtered]
else:
values_to_check = values
for earlier, later in zip(values_to_check, values_to_check[1:]):
self.assertLessEqual(
later,
- earlier + 1e-9,
+ earlier + self.TOL_IDENTITY_RELAXED,
f"Non-monotonic attenuation in mode={mode}",
)
params_hi = self.DEFAULT_PARAMS.copy()
params_hi.update({"exit_attenuation_mode": "power", "exit_power_tau": 0.999999})
params_lo = self.DEFAULT_PARAMS.copy()
- params_lo.update({"exit_attenuation_mode": "power", "exit_power_tau": 1e-6})
+ params_lo.update(
+ {
+ "exit_attenuation_mode": "power",
+ "exit_power_tau": self.MIN_EXIT_POWER_TAU,
+ }
+ )
r = 1.5
hi_val = _get_exit_factor(base_factor, pnl, pnl_factor, r, params_hi)
lo_val = _get_exit_factor(base_factor, pnl, pnl_factor, r, params_lo)
self.assertAlmostEqualFloat(
v,
first,
- tolerance=1e-9,
+ tolerance=self.TOL_IDENTITY_RELAXED,
msg=f"Plateau+linear slope=0 factor drift at ratio set {ratios} => {values}",
)
self.assertAlmostEqualFloat(
vals[i],
ref,
- 1e-9,
+ self.TOL_IDENTITY_RELAXED,
msg=f"Unexpected attenuation before grace end at ratio {r}",
)
# Last ratio (1.6) should be attenuated (strictly less than ref)
self.assertLess(vals[-1], ref, "Attenuation should begin after grace boundary")
- def test_legacy_step_non_monotonic(self):
- """Legacy mode applies step change at duration_ratio=1 (should not be monotonic)."""
-
- params = self.DEFAULT_PARAMS.copy()
- params["exit_attenuation_mode"] = "legacy"
- params["exit_plateau"] = False
- base_factor = self.TEST_BASE_FACTOR
- pnl = 0.02
- pnl_factor = 1.0
- # ratio below 1 vs above 1
- below = _get_exit_factor(base_factor, pnl, pnl_factor, 0.5, params)
- above = _get_exit_factor(base_factor, pnl, pnl_factor, 1.5, params)
- # Legacy multiplies by 1.5 then 0.5 -> below should be > above * 2 (since (1.5)/(0.5)=3)
- self.assertGreater(
- below, above, "Legacy pre-threshold factor should exceed post-threshold"
- )
- ratio = below / max(above, 1e-12)
- self.assertGreater(
- ratio,
- 2.5,
- f"Legacy step ratio unexpectedly small (expected ~3, got {ratio:.3f})",
- )
-
- def test_exit_factor_non_negative_with_positive_pnl(self):
- """Exit factor must not be negative when pnl >= 0 (invariant clamp)."""
-
- params = self.DEFAULT_PARAMS.copy()
- # All canonical modes + legacy + synthetic plateau variant
- modes = list(ATTENUATION_MODES_WITH_LEGACY) + ["plateau_linear"]
- base_factor = self.TEST_BASE_FACTOR
- pnl = 0.05
- pnl_factor = 2.0 # amplified
- for mode in modes:
- params_mode = params.copy()
- if mode == "plateau_linear":
- params_mode["exit_attenuation_mode"] = "linear"
- params_mode["exit_plateau"] = True
- params_mode["exit_plateau_grace"] = 0.4
- else:
- params_mode["exit_attenuation_mode"] = mode
- val = _get_exit_factor(base_factor, pnl, pnl_factor, 2.0, params_mode)
- self.assertGreaterEqual(
- val,
- 0.0,
- f"Exit factor should be >=0 for non-negative pnl in mode {mode}",
- )
-
-
-class TestParameterValidation(RewardSpaceTestBase):
- """Tests for validate_reward_parameters adjustments and reasons."""
-
- def test_validate_reward_parameters_adjustments(self):
- raw = self.DEFAULT_PARAMS.copy()
- # Introduce out-of-bound values
- raw["idle_penalty_scale"] = -5.0 # < min 0
- raw["efficiency_center"] = 2.0 # > max 1
- raw["exit_power_tau"] = 0.0 # below min (treated as 1e-6)
- sanitized, adjustments = validate_reward_parameters(raw)
- # Idle penalty scale should be clamped to 0.0
- self.assertEqual(sanitized["idle_penalty_scale"], 0.0)
- self.assertIn("idle_penalty_scale", adjustments)
- self.assertIsInstance(adjustments["idle_penalty_scale"]["reason"], str)
- self.assertIn("min=0.0", str(adjustments["idle_penalty_scale"]["reason"]))
- # Efficiency center should be clamped to 1.0
- self.assertEqual(sanitized["efficiency_center"], 1.0)
- self.assertIn("efficiency_center", adjustments)
- self.assertIn("max=1.0", str(adjustments["efficiency_center"]["reason"]))
- # exit_power_tau should be raised to lower bound (>=1e-6)
- self.assertGreaterEqual(float(sanitized["exit_power_tau"]), 1e-6)
- self.assertIn("exit_power_tau", adjustments)
- self.assertIn("min=", str(adjustments["exit_power_tau"]["reason"]))
-
- def test_idle_and_hold_penalty_power(self):
- """Test non-linear scaling when penalty powers != 1."""
- params = self.DEFAULT_PARAMS.copy()
- params["idle_penalty_power"] = 2.0
- params["max_idle_duration_candles"] = 100
- base_factor = 90.0
- profit_target = self.TEST_PROFIT_TARGET
- # Idle penalties for durations 20 vs 40 (quadratic → (40/100)^2 / (20/100)^2 = (0.4^2)/(0.2^2)=4)
- ctx_a = RewardContext(
- pnl=0.0,
- trade_duration=0,
- idle_duration=20,
- max_trade_duration=128,
- max_unrealized_profit=0.0,
- min_unrealized_profit=0.0,
- position=Positions.Neutral,
- action=Actions.Neutral,
- )
- ctx_b = dataclasses.replace(ctx_a, idle_duration=40)
- br_a = calculate_reward(
- ctx_a,
- params,
- base_factor=base_factor,
- profit_target=profit_target,
- risk_reward_ratio=self.TEST_RR,
- short_allowed=True,
- action_masking=True,
- )
- br_b = calculate_reward(
- ctx_b,
- params,
- base_factor=base_factor,
- profit_target=profit_target,
- risk_reward_ratio=self.TEST_RR,
- short_allowed=True,
- action_masking=True,
- )
- ratio_quadratic = 0.0
- if br_a.idle_penalty != 0:
- ratio_quadratic = br_b.idle_penalty / br_a.idle_penalty
- # Both negative; absolute ratio should be close to 4
- self.assertAlmostEqual(
- abs(ratio_quadratic),
- 4.0,
- delta=0.8,
- msg=f"Idle penalty quadratic scaling mismatch (ratio={ratio_quadratic})",
- )
- # Hold penalty with power 2: durations just above threshold
- params["hold_penalty_power"] = 2.0
- ctx_h1 = RewardContext(
- pnl=0.0,
- trade_duration=130,
- idle_duration=0,
- max_trade_duration=100,
- max_unrealized_profit=0.0,
- min_unrealized_profit=0.0,
- position=Positions.Long,
- action=Actions.Neutral,
- )
- ctx_h2 = dataclasses.replace(ctx_h1, trade_duration=140)
- # Compute baseline and comparison hold penalties
- br_h1 = calculate_reward(
- ctx_h1,
- params,
- base_factor=base_factor,
- profit_target=profit_target,
- risk_reward_ratio=self.TEST_RR,
- short_allowed=True,
- action_masking=True,
- )
- br_h2 = calculate_reward(
- ctx_h2,
- params,
- base_factor=base_factor,
- profit_target=profit_target,
- risk_reward_ratio=1.0,
- short_allowed=True,
- action_masking=True,
- )
- # Quadratic scaling: ((140-100)/(130-100))^2 = (40/30)^2 ≈ 1.777...
- hold_ratio = 0.0
- if br_h1.hold_penalty != 0:
- hold_ratio = br_h2.hold_penalty / br_h1.hold_penalty
- self.assertAlmostEqual(
- abs(hold_ratio),
- (40 / 30) ** 2,
- delta=0.4,
- msg=f"Hold penalty quadratic scaling mismatch (ratio={hold_ratio})",
- )
-
- def test_exit_factor_threshold_warning_emission(self):
- """Ensure a RuntimeWarning is emitted when exit_factor exceeds threshold (no capping)."""
-
- params = self.DEFAULT_PARAMS.copy()
- params["exit_factor_threshold"] = 10.0 # low threshold to trigger easily
- # Remove base_factor to allow argument override
- params.pop("base_factor", None)
-
- context = RewardContext(
- pnl=0.06,
- trade_duration=10,
- idle_duration=0,
- max_trade_duration=128,
- max_unrealized_profit=0.08,
- min_unrealized_profit=0.0,
- position=Positions.Long,
- action=Actions.Long_exit,
- )
- with warnings.catch_warnings(record=True) as w:
- warnings.simplefilter("always")
- br = calculate_reward(
- context,
- params,
- base_factor=5000.0, # large enough to exceed threshold
- profit_target=self.TEST_PROFIT_TARGET,
- risk_reward_ratio=self.TEST_RR_HIGH,
- short_allowed=True,
- action_masking=True,
- )
- self.assertGreater(br.exit_component, 0.0)
- emitted = [warn for warn in w if issubclass(warn.category, RuntimeWarning)]
- self.assertTrue(
- len(emitted) >= 1, "Expected a RuntimeWarning for exit factor threshold"
- )
- # Confirm message indicates threshold exceedance (supports legacy and new message format)
- self.assertTrue(
- any(
- (
- "exceeded threshold" in str(warn.message)
- or "exceeds threshold" in str(warn.message)
- or "|factor|=" in str(warn.message)
- )
- for warn in emitted
- ),
- "Warning message should indicate threshold exceedance",
- )
-
-
-class TestContinuityPlateau(RewardSpaceTestBase):
- """Continuity tests for plateau-enabled exit attenuation (excluding legacy)."""
-
def test_plateau_continuity_at_grace_boundary(self):
modes = ["sqrt", "linear", "power", "half_life"]
grace = 0.8
- eps = 1e-4
+ eps = self.CONTINUITY_EPS_SMALL
base_factor = self.TEST_BASE_FACTOR
pnl = 0.01
pnl_factor = 1.0
self.assertAlmostEqualFloat(
left,
boundary,
- tolerance=1e-9,
+ tolerance=self.TOL_IDENTITY_RELAXED,
msg=f"Left/boundary mismatch for mode {mode}",
)
self.assertLess(
mode = "linear"
grace = 0.6
- eps1 = 1e-3
- eps2 = 1e-4
+ eps1 = self.CONTINUITY_EPS_LARGE
+ eps2 = self.CONTINUITY_EPS_SMALL
base_factor = 80.0
pnl = 0.02
params = self.DEFAULT_PARAMS.copy()
diff1 = f_boundary - f1
diff2 = f_boundary - f2
- ratio = diff1 / max(diff2, 1e-12)
+ ratio = diff1 / max(diff2, self.TOL_NUMERIC_GUARD)
self.assertGreater(ratio, 5.0, f"Scaling ratio too small (ratio={ratio:.2f})")
self.assertLess(ratio, 15.0, f"Scaling ratio too large (ratio={ratio:.2f})")
class TestLoadRealEpisodes(RewardSpaceTestBase):
- """Unit tests for load_real_episodes (moved from separate file)."""
+ """Unit tests for load_real_episodes."""
def write_pickle(self, obj, path: Path):
with path.open("wb") as f:
_ = w
self.assertEqual(len(loaded), 1)
- self.assertAlmostEqual(float(loaded.iloc[0]["pnl"]), 0.02, places=7)
+ self.assertPlacesEqual(float(loaded.iloc[0]["pnl"]), 0.02, places=7)
def test_non_iterable_transitions_raises(self):
bad = {"transitions": 123}
loaded = load_real_episodes(p)
self.assertIn("pnl", loaded.columns)
self.assertIn(loaded["pnl"].dtype.kind, ("f", "i"))
- self.assertAlmostEqual(float(loaded.iloc[0]["pnl"]), 0.04, places=7)
+ self.assertPlacesEqual(float(loaded.iloc[0]["pnl"]), 0.04, places=7)
+
+ def test_pickled_dataframe_loads(self):
+ """Ensure a directly pickled DataFrame loads correctly."""
+ test_episodes = pd.DataFrame(
+ {
+ "pnl": [0.01, -0.02, 0.03],
+ "trade_duration": [10, 20, 15],
+ "idle_duration": [5, 0, 8],
+ "position": [1.0, 0.0, 1.0],
+ "action": [2.0, 0.0, 2.0],
+ "reward_total": [10.5, -5.2, 15.8],
+ }
+ )
+ p = Path(self.temp_dir) / "test_episodes.pkl"
+ self.write_pickle(test_episodes, p)
+
+ loaded_data = load_real_episodes(p)
+ self.assertIsInstance(loaded_data, pd.DataFrame)
+ self.assertEqual(len(loaded_data), 3)
+ self.assertIn("pnl", loaded_data.columns)
+
+
+class TestPBRS(RewardSpaceTestBase):
+ """PBRS mechanics tests (transforms, parameters, potentials, invariance)."""
+
+ def test_pbrs_progressive_release_decay_clamped(self):
+ """progressive_release decay>1 clamps -> Φ'=0 & Δ=-Φ_prev."""
+ params = self.DEFAULT_PARAMS.copy()
+ params.update(
+ {
+ "potential_gamma": DEFAULT_MODEL_REWARD_PARAMETERS["potential_gamma"],
+ "exit_potential_mode": "progressive_release",
+ "exit_potential_decay": 5.0, # clamp to 1.0
+ "hold_potential_enabled": True,
+ "entry_additive_enabled": False,
+ "exit_additive_enabled": False,
+ }
+ )
+ current_pnl = 0.02
+ current_dur = 0.5
+ prev_potential = _compute_hold_potential(current_pnl, current_dur, params)
+ _total_reward, shaping_reward, next_potential = apply_potential_shaping(
+ base_reward=0.0,
+ current_pnl=current_pnl,
+ current_duration_ratio=current_dur,
+ next_pnl=0.02,
+ next_duration_ratio=0.6,
+ is_terminal=True,
+ last_potential=prev_potential,
+ params=params,
+ )
+ self.assertAlmostEqualFloat(
+ next_potential, 0.0, tolerance=self.TOL_IDENTITY_RELAXED
+ )
+ self.assertAlmostEqualFloat(
+ shaping_reward, -prev_potential, tolerance=self.TOL_IDENTITY_RELAXED
+ )
- def test_pickled_dataframe_loads(self):
- """Ensure a directly pickled DataFrame loads correctly."""
- test_episodes = pd.DataFrame(
+ def test_pbrs_spike_cancel_invariance(self):
+ """spike_cancel terminal shaping ≈0 (Φ' inversion yields cancellation)."""
+ params = self.DEFAULT_PARAMS.copy()
+ params.update(
{
- "pnl": [0.01, -0.02, 0.03],
- "trade_duration": [10, 20, 15],
- "idle_duration": [5, 0, 8],
- "position": [1.0, 0.0, 1.0],
- "action": [2.0, 0.0, 2.0],
- "reward_total": [10.5, -5.2, 15.8],
+ "potential_gamma": 0.9,
+ "exit_potential_mode": "spike_cancel",
+ "hold_potential_enabled": True,
+ "entry_additive_enabled": False,
+ "exit_additive_enabled": False,
}
)
- p = Path(self.temp_dir) / "test_episodes.pkl"
- self.write_pickle(test_episodes, p)
-
- loaded_data = load_real_episodes(p)
- self.assertIsInstance(loaded_data, pd.DataFrame)
- self.assertEqual(len(loaded_data), 3)
- self.assertIn("pnl", loaded_data.columns)
-
-
-class TestPBRSIntegration(RewardSpaceTestBase):
- """Tests for PBRS (Potential-Based Reward Shaping) integration."""
+ current_pnl = 0.015
+ current_dur = 0.4
+ prev_potential = _compute_hold_potential(current_pnl, current_dur, params)
+ gamma = _get_float_param(
+ params,
+ "potential_gamma",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("potential_gamma", 0.95),
+ )
+ expected_next = (
+ prev_potential / gamma if gamma not in (0.0, None) else prev_potential
+ )
+ _total_reward, shaping_reward, next_potential = apply_potential_shaping(
+ base_reward=0.0,
+ current_pnl=current_pnl,
+ current_duration_ratio=current_dur,
+ next_pnl=0.016,
+ next_duration_ratio=0.45,
+ is_terminal=True,
+ last_potential=prev_potential,
+ params=params,
+ )
+ self.assertAlmostEqualFloat(
+ next_potential, expected_next, tolerance=self.TOL_IDENTITY_RELAXED
+ )
+ self.assertNearZero(shaping_reward, atol=self.TOL_IDENTITY_RELAXED)
def test_tanh_transform(self):
"""tanh transform: tanh(x) in (-1, 1)."""
self.assertAlmostEqualFloat(
apply_transform("asinh", 1.2345),
-apply_transform("asinh", -1.2345),
- tolerance=1e-12,
+ tolerance=self.TOL_IDENTITY_STRICT,
)
# Monotonicity
vals = [apply_transform("asinh", x) for x in [-5.0, -1.0, 0.0, 1.0, 5.0]]
"""Test error handling for invalid transforms."""
# Environment falls back silently to tanh
self.assertAlmostEqualFloat(
- apply_transform("invalid_transform", 1.0), math.tanh(1.0), tolerance=1e-9
+ apply_transform("invalid_transform", 1.0),
+ math.tanh(1.0),
+ tolerance=self.TOL_IDENTITY_RELAXED,
)
def test_get_float_param(self):
self.assertEqual(val, 0.0)
def test_exit_potential_canonical(self):
- """Test exit potential in canonical mode."""
- params = {"exit_potential_mode": "canonical"}
- val = _compute_exit_potential(1.0, params)
- self.assertEqual(val, 0.0)
-
- def test_exit_potential_progressive_release(self):
- """Progressive release: Φ' = Φ * (1 - decay)."""
- params = {
- "exit_potential_mode": "progressive_release",
- "exit_potential_decay": 0.8,
- }
- # Expected: Φ' = Φ * (1 - decay) = 1 * (1 - 0.8) = 0.2
- val = _compute_exit_potential(1.0, params)
- self.assertAlmostEqual(val, 0.2)
+ params = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
+ params.update(
+ {
+ "exit_potential_mode": "canonical",
+ "hold_potential_enabled": True,
+ "entry_additive_enabled": True, # expected to be auto-disabled by canonical invariance
+ "exit_additive_enabled": True, # expected to be auto-disabled by canonical invariance
+ }
+ )
+ base_reward = 0.25
+ current_pnl = 0.05
+ current_duration_ratio = 0.4
+ next_pnl = 0.0
+ next_duration_ratio = 0.0
+ total, shaping, next_potential = apply_potential_shaping(
+ base_reward=base_reward,
+ current_pnl=current_pnl,
+ current_duration_ratio=current_duration_ratio,
+ next_pnl=next_pnl,
+ next_duration_ratio=next_duration_ratio,
+ is_terminal=True,
+ last_potential=0.789, # arbitrary, should be ignored for Φ'
+ params=params,
+ )
+ # Canonical invariance must DISABLE additives (invariance of terminal shaping)
+ self.assertIn("_pbrs_invariance_applied", params)
+ self.assertFalse(
+ params["entry_additive_enabled"],
+ "Entry additive should be auto-disabled in canonical mode",
+ )
+ self.assertFalse(
+ params["exit_additive_enabled"],
+ "Exit additive should be auto-disabled in canonical mode",
+ )
+ # Next potential is forced to 0
+ self.assertPlacesEqual(next_potential, 0.0, places=12)
+ # Compute current potential independently to assert shaping = -Φ(s)
+ current_potential = _compute_hold_potential(
+ current_pnl,
+ current_duration_ratio,
+ {"hold_potential_enabled": True, "hold_potential_scale": 1.0},
+ )
+ # shaping should equal -current_potential within tolerance
+ self.assertAlmostEqual(
+ shaping, -current_potential, delta=self.TOL_IDENTITY_RELAXED
+ )
+ # Since additives are disabled, total ≈ base_reward + shaping (residual ~0)
+ residual = total - base_reward - shaping
+ self.assertAlmostEqual(residual, 0.0, delta=self.TOL_IDENTITY_RELAXED)
+ self.assertTrue(np.isfinite(total))
- def test_exit_potential_spike_cancel(self):
- """Spike cancel: Φ' = Φ / γ (inversion)."""
- params = {
- "exit_potential_mode": "spike_cancel",
- "potential_gamma": DEFAULT_MODEL_REWARD_PARAMETERS["potential_gamma"],
- }
- val = _compute_exit_potential(1.0, params)
- self.assertAlmostEqual(val, 1.0 / 0.95, places=7)
-
- def test_exit_potential_retain_previous(self):
- """Test exit potential in retain previous mode."""
- params = {"exit_potential_mode": "retain_previous"}
- val = _compute_exit_potential(1.0, params)
- self.assertEqual(val, 1.0)
-
- def test_pbrs_terminal_canonical(self):
- """Canonical mode structural invariance checks.
-
- We do NOT enforce Σ shaping ≈ 0 here (finite-horizon drift acceptable). We verify:
- - Additive components auto-disabled (policy invariance enforcement)
- - Terminal potential always zero (Φ_terminal = 0)
- - Shaping magnitudes bounded (safety guard against spikes)
- """
+ def test_pbrs_invariance_internal_flag_set(self):
+ """Canonical path sets _pbrs_invariance_applied once; second call idempotent."""
params = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
params.update(
{
"exit_potential_mode": "canonical",
"hold_potential_enabled": True,
- "entry_additive_enabled": False,
- "exit_additive_enabled": False,
+ "entry_additive_enabled": True, # will be auto-disabled
+ "exit_additive_enabled": True,
}
)
- rng = np.random.default_rng(123)
- last_potential = 0.0
- terminal_next_potentials: list[float] = []
- shaping_values: list[float] = []
- current_pnl = 0.0
- current_dur = 0.0
- for _ in range(350):
- is_terminal = rng.uniform() < 0.08
- # Sample candidate next state (ignored for terminal potential because canonical sets Φ'=0)
- next_pnl = 0.0 if is_terminal else float(rng.normal(0, 0.2))
- inc = rng.uniform(0, 0.12)
- next_dur = 0.0 if is_terminal else float(min(1.0, current_dur + inc))
- _total, shaping, next_potential = apply_potential_shaping(
- base_reward=0.0,
- current_pnl=current_pnl,
- current_duration_ratio=current_dur,
- next_pnl=next_pnl,
- next_duration_ratio=next_dur,
- is_terminal=is_terminal,
- last_potential=last_potential,
- params=params,
- )
- shaping_values.append(shaping)
- if is_terminal:
- terminal_next_potentials.append(next_potential)
- last_potential = 0.0
- current_pnl = 0.0
- current_dur = 0.0
- else:
- last_potential = next_potential
- current_pnl = next_pnl
- current_dur = next_dur
- # Assertions
- self.assertFalse(params["entry_additive_enabled"]) # enforced
- self.assertFalse(params["exit_additive_enabled"]) # enforced
+ # Structural sweep (ensures terminal Φ'==0 and shaping bounded)
+ terminal_next_potentials, shaping_values = self._canonical_sweep(params)
+
+ # Premier appel (terminal pour forcer chemin exit) pour activer le flag
+ _t1, _s1, _n1 = apply_potential_shaping(
+ base_reward=0.0,
+ current_pnl=0.05,
+ current_duration_ratio=0.3,
+ next_pnl=0.0,
+ next_duration_ratio=0.0,
+ is_terminal=True,
+ last_potential=0.4,
+ params=params,
+ )
+ self.assertIn("_pbrs_invariance_applied", params)
+ self.assertFalse(params["entry_additive_enabled"])
+ self.assertFalse(params["exit_additive_enabled"])
if terminal_next_potentials:
- self.assertTrue(all(abs(p) < 1e-12 for p in terminal_next_potentials))
- max_abs = max(abs(v) for v in shaping_values) if shaping_values else 0.0
- self.assertLessEqual(max_abs, 5.0)
-
- # Test basic PBRS function integration works
- params = {"hold_potential_enabled": True, "hold_potential_scale": 1.0}
- potential = _compute_hold_potential(0.1, 0.2, params)
- self.assertFinite(potential, name="hold_potential")
-
- def test_pbrs_default_parameters_completeness(self):
- """Test that all required PBRS parameters have defaults."""
- for param in PBRS_REQUIRED_PARAMS:
- self.assertIn(
- param,
- DEFAULT_MODEL_REWARD_PARAMETERS,
- f"Missing PBRS parameter: {param}",
+ self.assertTrue(
+ all(abs(p) < self.PBRS_TERMINAL_TOL for p in terminal_next_potentials)
)
+ max_abs = max(abs(v) for v in shaping_values) if shaping_values else 0.0
+ self.assertLessEqual(max_abs, self.PBRS_MAX_ABS_SHAPING)
+
+ # Capture state and re-run (idempotence)
+ state_after = (
+ params["entry_additive_enabled"],
+ params["exit_additive_enabled"],
+ ) # type: ignore[index]
+ _t2, _s2, _n2 = apply_potential_shaping(
+ base_reward=0.0,
+ current_pnl=0.02,
+ current_duration_ratio=0.1,
+ next_pnl=0.0,
+ next_duration_ratio=0.0,
+ is_terminal=True,
+ last_potential=0.1,
+ params=params,
+ )
+ self.assertEqual(
+ state_after,
+ (params["entry_additive_enabled"], params["exit_additive_enabled"]),
+ )
- def test_pbrs_progressive_release_decay_clamped_zero_current(self):
- """progressive_release with decay>1 clamps to 1 (full release to 0).
-
- Scenario isolates current Φ(s)=0 and non-zero previous potential carried in last_potential,
- verifying clamp logic independent of current state potential.
- """
+ def test_progressive_release_negative_decay_clamped(self):
+ """Negative decay must clamp to 0 => next potential equals last potential (no release)."""
params = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
params.update(
{
"exit_potential_mode": "progressive_release",
- "exit_potential_decay": 5.0, # will clamp to 1.0
+ "exit_potential_decay": -0.75, # clamped to 0
+ "hold_potential_enabled": True,
}
)
- _total, shaping, next_potential = apply_potential_shaping(
+ last_potential = 0.42
+ # Use neutral current state so Φ(s) ≈ 0 (approx) if transforms remain small.
+ total, shaping, next_potential = apply_potential_shaping(
base_reward=0.0,
current_pnl=0.0,
current_duration_ratio=0.0,
next_pnl=0.0,
next_duration_ratio=0.0,
is_terminal=True,
- last_potential=0.8,
+ last_potential=last_potential,
params=params,
)
- # After clamp: next_potential = 0 and shaping = γ*0 - 0 (Φ(s)=0) => 0
- self.assertAlmostEqualFloat(next_potential, 0.0, tolerance=1e-12)
- self.assertAlmostEqualFloat(shaping, 0.0, tolerance=1e-9)
-
- def test_pbrs_invalid_transform_fallback_potential(self):
- """Invalid transform name in potential path must fall back without NaNs."""
- params = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
- params.update({"hold_potential_transform_pnl": "not_a_transform"})
- _total, shaping, next_potential = apply_potential_shaping(
- base_reward=0.0,
- current_pnl=0.2,
- current_duration_ratio=0.3,
- next_pnl=0.25,
- next_duration_ratio=0.35,
+ self.assertPlacesEqual(next_potential, last_potential, places=12)
+ # shaping = γ*Φ' - Φ(s) ≈ γ*last_potential - 0
+ gamma_raw = DEFAULT_MODEL_REWARD_PARAMETERS.get("potential_gamma", 0.95)
+ try:
+ gamma = float(gamma_raw) # type: ignore[assignment]
+ except Exception:
+ gamma = 0.95
+ self.assertLessEqual(
+ abs(shaping - gamma * last_potential), self.TOL_GENERIC_EQ
+ )
+ self.assertPlacesEqual(total, shaping, places=12)
+
+ def test_potential_gamma_nan_fallback(self):
+ """potential_gamma=NaN should fall back to default value (indirect comparison)."""
+ base_params = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
+ default_gamma = base_params.get("potential_gamma", 0.95)
+ params_nan = base_params.copy()
+ params_nan.update(
+ {"potential_gamma": float("nan"), "hold_potential_enabled": True}
+ )
+ # Non-terminal transition so Φ(s') is computed and depends on gamma
+ res_nan = apply_potential_shaping(
+ base_reward=0.1,
+ current_pnl=0.03,
+ current_duration_ratio=0.2,
+ next_pnl=0.035,
+ next_duration_ratio=0.25,
is_terminal=False,
last_potential=0.0,
- params=params,
+ params=params_nan,
)
- self.assertTrue(np.isfinite(shaping))
- self.assertTrue(np.isfinite(next_potential))
-
- def test_pbrs_gamma_zero_disables_progressive_effect(self):
- """Gamma=0 => shaping term = -Φ(s); potential still updated for next step (Φ' used only if γ>0).
-
- We only assert shaping bounded since Φ is transformed & scaled; ensures stability without exceptions.
- """
- params = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
- params.update({"potential_gamma": 0.0, "hold_potential_enabled": True})
- base_reward = 1.23
- _total, shaping, next_potential = apply_potential_shaping(
- base_reward=base_reward,
- current_pnl=0.1,
+ params_ref = base_params.copy()
+ params_ref.update(
+ {"potential_gamma": default_gamma, "hold_potential_enabled": True}
+ )
+ res_ref = apply_potential_shaping(
+ base_reward=0.1,
+ current_pnl=0.03,
current_duration_ratio=0.2,
- next_pnl=0.15,
+ next_pnl=0.035,
next_duration_ratio=0.25,
is_terminal=False,
last_potential=0.0,
- params=params,
+ params=params_ref,
)
- # Shaping bounded (Φ in [-scale, scale] after transforms), conservatively check |shaping| <= 1
- self.assertLessEqual(abs(shaping), 1.0)
- self.assertTrue(np.isfinite(next_potential))
-
- def test_pbrs_non_canonical_runtime_behavior(self):
- """Non-canonical mode: Φ'=0 at terminal but additives remain enabled (should not be auto-disabled).
-
- We construct a simple scenario:
- - enable hold potential (so Φ(s) != 0)
- - set exit_potential_mode = 'non-canonical'
- - enable both entry & exit additives with small scales so they contribute deterministically
- - take a terminal transition
-
- Expectations:
- - canonical auto-disabling does NOT occur (additives stay True)
- - next_potential returned by _compute_exit_potential is exactly 0.0 (Φ'=0)
- - shaping_reward = γ * 0 - Φ(s) = -Φ(s)
- - total_reward = base + shaping + entry_add + exit_add (all finite)
- - invariance would be broken (but we just assert mechanism, not report here)
- """
+ # Compare shaping & total (deterministic path here)
+ self.assertLess(
+ abs(res_nan[1] - res_ref[1]),
+ self.TOL_IDENTITY_RELAXED,
+ "Unexpected shaping difference under gamma NaN fallback",
+ )
+ self.assertLess(
+ abs(res_nan[0] - res_ref[0]),
+ self.TOL_IDENTITY_RELAXED,
+ "Unexpected total difference under gamma NaN fallback",
+ )
+
+ def test_transform_bulk_monotonicity_and_bounds(self):
+ """Non-decreasing monotonicity & (-1,1) bounds for smooth transforms (excluding clip)."""
+ transforms = ["tanh", "softsign", "arctan", "sigmoid", "asinh"]
+ xs = [-5.0, -1.0, -0.5, 0.0, 0.5, 1.0, 5.0]
+ for name in transforms:
+ with self.subTest(transform=name):
+ vals = [apply_transform(name, x) for x in xs]
+ # Strict bounds (-1,1) (sigmoid & tanh asymptotic)
+ self.assertTrue(
+ all(-1.0 < v < 1.0 for v in vals), f"{name} out of bounds"
+ )
+ # Non-decreasing monotonicity
+ for a, b in zip(vals, vals[1:]):
+ self.assertLessEqual(
+ a,
+ b + self.TOL_IDENTITY_STRICT,
+ f"{name} not monotonic between {a} and {b}",
+ )
+
+ def test_pbrs_retain_previous_cumulative_drift(self):
+ """retain_previous mode accumulates negative shaping drift (non-invariant)."""
params = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
params.update(
{
+ "exit_potential_mode": "retain_previous",
"hold_potential_enabled": True,
- "hold_potential_scale": 1.0,
- "exit_potential_mode": "non-canonical",
- "entry_additive_enabled": True,
- "exit_additive_enabled": True,
- # deterministic small values
- "entry_additive_scale": 0.5,
- "exit_additive_scale": 0.5,
- "entry_additive_gain": 1.0,
- "exit_additive_gain": 1.0,
+ "entry_additive_enabled": False,
+ "exit_additive_enabled": False,
+ "potential_gamma": 0.9,
}
)
- base_reward = 0.123
- current_pnl = 0.2
- current_duration_ratio = 0.4
- # terminal next state values (ignored for potential since Φ'=0 in non-canonical exit path)
- next_pnl = 0.0
- next_duration_ratio = 0.0
- total, shaping, next_potential = apply_potential_shaping(
- base_reward=base_reward,
- current_pnl=current_pnl,
- current_duration_ratio=current_duration_ratio,
- next_pnl=next_pnl,
- next_duration_ratio=next_duration_ratio,
- is_terminal=True,
- last_potential=0.789, # arbitrary, should be ignored for Φ'
- params=params,
+ gamma = _get_float_param(
+ params,
+ "potential_gamma",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("potential_gamma", 0.95),
)
- # Additives should not have been disabled
- self.assertTrue(params["entry_additive_enabled"])
- self.assertTrue(params["exit_additive_enabled"])
- # Next potential is forced to 0
- self.assertAlmostEqual(next_potential, 0.0, places=12)
- # Compute current potential independently to assert shaping = -Φ(s)
- current_potential = _compute_hold_potential(
- current_pnl,
- current_duration_ratio,
- {"hold_potential_enabled": True, "hold_potential_scale": 1.0},
+ rng = np.random.default_rng(555)
+ potentials = rng.uniform(0.05, 0.85, size=220)
+ deltas = [(gamma * p - p) for p in potentials]
+ cumulative = float(np.sum(deltas))
+ self.assertLess(cumulative, -self.TOL_NEGLIGIBLE)
+ self.assertGreater(abs(cumulative), 10 * self.TOL_IDENTITY_RELAXED)
+
+ def test_normality_invariance_under_scaling(self):
+ """Skewness & excess kurtosis invariant under positive scaling of normal sample."""
+ rng = np.random.default_rng(808)
+ base = rng.normal(0.0, 1.0, size=7000)
+ scaled = 5.0 * base
+
+ def _skew_kurt(x: np.ndarray) -> tuple[float, float]:
+ m = np.mean(x)
+ c = x - m
+ m2 = np.mean(c**2)
+ m3 = np.mean(c**3)
+ m4 = np.mean(c**4)
+ skew = m3 / (m2**1.5 + 1e-18)
+ kurt = m4 / (m2**2 + 1e-18) - 3.0
+ return skew, kurt
+
+ s_base, k_base = _skew_kurt(base)
+ s_scaled, k_scaled = _skew_kurt(scaled)
+ self.assertAlmostEqualFloat(s_base, s_scaled, tolerance=self.TOL_DISTRIB_SHAPE)
+ self.assertAlmostEqualFloat(k_base, k_scaled, tolerance=self.TOL_DISTRIB_SHAPE)
+
+ def test_js_symmetry_and_kl_relation_bound(self):
+ """JS distance symmetry & upper bound sqrt(log 2)."""
+ rng = np.random.default_rng(9090)
+ p_raw = rng.uniform(0.0, 1.0, size=300)
+ q_raw = rng.uniform(0.0, 1.0, size=300)
+ p = p_raw / p_raw.sum()
+ q = q_raw / q_raw.sum()
+ m = 0.5 * (p + q)
+
+ def _kl(a, b):
+ mask = (a > 0) & (b > 0)
+ return float(np.sum(a[mask] * np.log(a[mask] / b[mask])))
+
+ js_div = 0.5 * _kl(p, m) + 0.5 * _kl(q, m)
+ js_dist = math.sqrt(max(js_div, 0.0))
+ self.assertDistanceMetric(js_dist, name="js_distance")
+ # Upper bound plus strict identity epsilon guard
+ self.assertLessEqual(
+ js_dist,
+ self.JS_DISTANCE_UPPER_BOUND + self.TOL_IDENTITY_STRICT,
+ )
+ js_div_swap = 0.5 * _kl(q, m) + 0.5 * _kl(p, m)
+ js_dist_swap = math.sqrt(max(js_div_swap, 0.0))
+ self.assertAlmostEqualFloat(
+ js_dist, js_dist_swap, tolerance=self.TOL_GENERIC_EQ
)
- # shaping should equal -current_potential within tolerance
- self.assertAlmostEqual(shaping, -current_potential, delta=1e-9)
- # Total reward includes additives: ensure total - base - shaping differs from 0 (i.e., additives present)
- residual = total - base_reward - shaping
- self.assertNotAlmostEqual(residual, 0.0, delta=1e-12)
- self.assertTrue(np.isfinite(total))
class TestReportFormatting(RewardSpaceTestBase):
"""Tests for report formatting elements not previously covered."""
def test_abs_shaping_line_present_and_constant(self):
- """Ensure the report contains the Abs Σ Shaping Reward line and tolerance not hard-coded elsewhere.
-
- Strategy: run a minimal simulation via generate_report setUp artifact.
- We reuse helper simulate_samples (already tested) to produce output then search in report.
- """
- # Generate a tiny dataset directly using existing pathway by invoking analysis main logic would be heavy.
- # Instead we mimic the invariance block logic locally to ensure formatting; this keeps test fast and stable.
- # Create a temporary markdown file to exercise the function that writes invariance section.
- # Sanity check tolerance value
- self.assertAlmostEqual(PBRS_INVARIANCE_TOL, 1e-6, places=12)
+ """Abs Σ Shaping Reward line present, formatted, uses constant not literal."""
+ # Minimal synthetic construction to exercise invariance formatting logic.
+ self.assertPlacesEqual(PBRS_INVARIANCE_TOL, self.TOL_GENERIC_EQ, places=12)
# Use small synthetic DataFrame with zero shaping sum (pandas imported globally)
df = pd.DataFrame(
{
- "reward_shaping": [1e-12, -1e-12],
+ "reward_shaping": [self.TOL_IDENTITY_STRICT, -self.TOL_IDENTITY_STRICT],
"reward_entry_additive": [0.0, 0.0],
"reward_exit_additive": [0.0, 0.0],
}
# Ensure scientific notation magnitude consistent with small number
val = float(m.group(1)) if m else None # type: ignore[arg-type]
if val is not None:
- self.assertLess(val, 1e-8 + 1e-12)
+ self.assertLess(val, self.TOL_NEGLIGIBLE + self.TOL_IDENTITY_STRICT)
# Ensure no stray hard-coded tolerance string inside content
self.assertNotIn(
- "1e-6", content, "Tolerance should not be hard-coded in line output"
+ str(self.TOL_GENERIC_EQ),
+ content,
+ "Tolerance constant value should appear, not raw literal",
)
def test_pbrs_non_canonical_report_generation(self):
df = pd.DataFrame(
{
- "reward_shaping": [0.01, -0.002], # somme = 0.008 (>> tolérance)
+ "reward_shaping": [0.01, -0.002], # sum = 0.008 (>> tolerance)
"reward_entry_additive": [0.0, 0.0],
"reward_exit_additive": [0.001, 0.0],
}
if m_abs:
self.assertAlmostEqual(abs(total_shaping), float(m_abs.group(1)), places=12)
+ def test_additive_activation_deterministic_contribution(self):
+ """Additives enabled increase total reward; shaping impact limited."""
+ # Use a non-canonical exit mode to avoid automatic invariance enforcement
+ # disabling the additive components on first call (canonical path auto-disables).
+ base = base_params(
+ hold_potential_enabled=True,
+ entry_additive_enabled=False,
+ exit_additive_enabled=False,
+ exit_potential_mode="non-canonical",
+ )
+ with_add = base.copy()
+ with_add.update(
+ {
+ "entry_additive_enabled": True,
+ "exit_additive_enabled": True,
+ "entry_additive_scale": 0.4,
+ "exit_additive_scale": 0.4,
+ "entry_additive_gain": 1.0,
+ "exit_additive_gain": 1.0,
+ }
+ )
+ ctx = {
+ "base_reward": 0.05,
+ "current_pnl": 0.01,
+ "current_duration_ratio": 0.2,
+ "next_pnl": 0.012,
+ "next_duration_ratio": 0.25,
+ "is_terminal": False,
+ }
+ _t0, s0, _n0 = apply_potential_shaping(last_potential=0.0, params=base, **ctx)
+ t1, s1, _n1 = apply_potential_shaping(
+ last_potential=0.0, params=with_add, **ctx
+ )
+ self.assertFinite(t1)
+ self.assertFinite(s1)
+ # Additives should not alter invariance: shaping difference small
+ self.assertLess(abs(s1 - s0), 0.2)
+ self.assertGreater(
+ t1 - _t0, 0.0, "Total reward should increase with additives present"
+ )
+
+ def test_report_cumulative_invariance_aggregation(self):
+ """Canonical telescoping term: small per-step mean drift, bounded increments."""
+ params = base_params(
+ hold_potential_enabled=True,
+ entry_additive_enabled=False,
+ exit_additive_enabled=False,
+ exit_potential_mode="canonical",
+ )
+ gamma = _get_float_param(
+ params,
+ "potential_gamma",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("potential_gamma", 0.95),
+ )
+ rng = np.random.default_rng(321)
+ last_potential = 0.0
+ telescoping_sum = 0.0
+ max_abs_step = 0.0
+ steps = 0
+ for _ in range(500):
+ is_terminal = rng.uniform() < 0.1
+ current_pnl = float(rng.normal(0, 0.05))
+ current_dur = float(rng.uniform(0, 1))
+ next_pnl = 0.0 if is_terminal else float(rng.normal(0, 0.05))
+ next_dur = 0.0 if is_terminal else float(rng.uniform(0, 1))
+ _tot, _shap, next_potential = apply_potential_shaping(
+ base_reward=0.0,
+ current_pnl=current_pnl,
+ current_duration_ratio=current_dur,
+ next_pnl=next_pnl,
+ next_duration_ratio=next_dur,
+ is_terminal=is_terminal,
+ last_potential=last_potential,
+ params=params,
+ )
+ # Accumulate theoretical telescoping term: γ Φ(s') - Φ(s)
+ inc = gamma * next_potential - last_potential
+ telescoping_sum += inc
+ if abs(inc) > max_abs_step:
+ max_abs_step = abs(inc)
+ steps += 1
+ if is_terminal:
+ # Reset potential at terminal per canonical semantics
+ last_potential = 0.0
+ else:
+ last_potential = next_potential
+ mean_drift = telescoping_sum / max(1, steps)
+ self.assertLess(
+ abs(mean_drift),
+ 2e-2,
+ f"Per-step telescoping drift too large (mean={mean_drift}, steps={steps})",
+ )
+ self.assertLessEqual(
+ max_abs_step,
+ self.PBRS_MAX_ABS_SHAPING,
+ f"Unexpected large telescoping increment (max={max_abs_step})",
+ )
+
+ def test_report_explicit_non_invariance_progressive_release(self):
+ """progressive_release should generally yield non-zero cumulative shaping (release leak)."""
+ params = base_params(
+ hold_potential_enabled=True,
+ entry_additive_enabled=False,
+ exit_additive_enabled=False,
+ exit_potential_mode="progressive_release",
+ exit_potential_decay=0.25,
+ )
+ rng = np.random.default_rng(321)
+ last_potential = 0.0
+ shaping_sum = 0.0
+ for _ in range(160):
+ is_terminal = rng.uniform() < 0.15
+ next_pnl = 0.0 if is_terminal else float(rng.normal(0, 0.07))
+ next_dur = 0.0 if is_terminal else float(rng.uniform(0, 1))
+ _tot, shap, next_pot = apply_potential_shaping(
+ base_reward=0.0,
+ current_pnl=float(rng.normal(0, 0.07)),
+ current_duration_ratio=float(rng.uniform(0, 1)),
+ next_pnl=next_pnl,
+ next_duration_ratio=next_dur,
+ is_terminal=is_terminal,
+ last_potential=last_potential,
+ params=params,
+ )
+ shaping_sum += shap
+ last_potential = 0.0 if is_terminal else next_pot
+ self.assertGreater(
+ abs(shaping_sum),
+ PBRS_INVARIANCE_TOL * 50,
+ f"Expected non-zero Σ shaping (got {shaping_sum})",
+ )
+
+ def test_gamma_extremes(self):
+ """Gamma=0 and gamma≈1 boundary behaviours produce bounded shaping and finite potentials."""
+ for gamma in [0.0, 0.999999]:
+ params = base_params(
+ hold_potential_enabled=True,
+ entry_additive_enabled=False,
+ exit_additive_enabled=False,
+ exit_potential_mode="canonical",
+ potential_gamma=gamma,
+ )
+ _tot, shap, next_pot = apply_potential_shaping(
+ base_reward=0.0,
+ current_pnl=0.02,
+ current_duration_ratio=0.3,
+ next_pnl=0.025,
+ next_duration_ratio=0.35,
+ is_terminal=False,
+ last_potential=0.0,
+ params=params,
+ )
+ self.assertTrue(np.isfinite(shap))
+ self.assertTrue(np.isfinite(next_pot))
+ self.assertLessEqual(abs(shap), self.PBRS_MAX_ABS_SHAPING)
+
if __name__ == "__main__":
# Configure test discovery and execution