From: Jérôme Benoit Date: Mon, 20 Oct 2025 20:37:00 +0000 (+0200) Subject: feat(reforcexy): instrument PBRS rewarding implementation (#7) X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=6c1e13aaed92707ae882e3c163019514fb0a84f8;p=freqai-strategies.git feat(reforcexy): instrument PBRS rewarding implementation (#7) * feat(reforcexy): instrument PBRS rewarding implementation Signed-off-by: Jérôme Benoit * refactor(reforcexy): remove RSA duplicated tests Signed-off-by: Jérôme Benoit * chore: address review comments Signed-off-by: Jérôme Benoit * chore: address more review comments Signed-off-by: Jérôme Benoit * refactor(reforcexy): move max_trade_duration_candles to reward params Signed-off-by: Jérôme Benoit * refactor(reforcexy): remove max_trade_duration from context Signed-off-by: Jérôme Benoit * Update ReforceXY/reward_space_analysis/test_reward_space_analysis.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update ReforceXY/reward_space_analysis/reward_space_analysis.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * refactor(reforcexy): cleanup cli test implementation Signed-off-by: Jérôme Benoit * test(reforcexy): fix tests Signed-off-by: Jérôme Benoit * refactor(reforcexy): factor out pnl_target handling Signed-off-by: Jérôme Benoit * fix(reforcexy): ensure RSA behave as intented Signed-off-by: Jérôme Benoit * test(reforcexy): improve tests expectations Signed-off-by: Jérôme Benoit * refactor(reforcexy): cleanup RSA code Signed-off-by: Jérôme Benoit * refactor(reforcexy): deprecate max_trade_duration as CLI opt in RSA Signed-off-by: Jérôme Benoit * refactor(reforcexy): deprecate max_trade_duration some more Signed-off-by: Jérôme Benoit * refactor(reforcexy): cleanup max_idle_duration_candles extraction Signed-off-by: Jérôme Benoit * refactor(reforcexy): cleanup RSA implementation Signed-off-by: Jérôme Benoit --------- Signed-off-by: Jérôme Benoit Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- diff --git a/README.md b/README.md index 3190572..3bc7314 100644 --- a/README.md +++ b/README.md @@ -76,8 +76,8 @@ docker compose up -d --build | freqai.optuna_hyperopt.enabled | true | bool | Enables HPO. | | freqai.optuna_hyperopt.n_jobs | CPU threads / 4 | int >= 1 | Parallel HPO workers. | | freqai.optuna_hyperopt.storage | `file` | enum {`file`,`sqlite`} | HPO storage backend. | -| freqai.optuna_hyperopt.continuous | true | bool | Continuous HPO. | -| freqai.optuna_hyperopt.warm_start | true | bool | Warm start HPO with previous best value(s). | +| freqai.optuna_hyperopt.continuous | false | bool | Continuous HPO. | +| freqai.optuna_hyperopt.warm_start | false | bool | Warm start HPO with previous best value(s). | | freqai.optuna_hyperopt.n_startup_trials | 15 | int >= 0 | HPO startup trials. | | freqai.optuna_hyperopt.n_trials | 50 | int >= 1 | Maximum HPO trials. | | freqai.optuna_hyperopt.timeout | 7200 | int >= 0 | HPO wall-clock timeout in seconds. | diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md index 68e87e7..d8dcab7 100644 --- a/ReforceXY/reward_space_analysis/README.md +++ b/ReforceXY/reward_space_analysis/README.md @@ -57,7 +57,7 @@ Deterministic synthetic sampling with diagnostics for reward shaping, penalties, ## Prerequisites -Requirements: Python 3.8+, ≥4GB RAM (CPU only). Recommended venv: +Requirements: Python 3.9+, ≥4GB RAM (CPU only). Recommended venv: ```shell cd ReforceXY/reward_space_analysis @@ -140,8 +140,6 @@ None (all have defaults). **`--seed`** (int, default: 42) – Master seed (reuse for identical runs). -**`--max_trade_duration`** (int, default: 128) – Max trade duration (candles). Idle grace fallback: `max_idle_duration_candles = 4 * max_trade_duration`. - ### Reward Configuration **`--base_factor`** (float, default: 100.0) – Base reward scale (match environment). @@ -183,7 +181,8 @@ Core frequently tuned parameters: | `win_reward_factor` | 2.0 | Profit overshoot multiplier | | `pnl_factor_beta` | 0.5 | PnL amplification beta | | `idle_penalty_scale` | 0.5 | Idle penalty scale | -| `idle_penalty_power` | 1.025 | Idle penalty exponent (>1 slightly convex) | +| `idle_penalty_power` | 1.025 | Idle penalty exponent | +| `max_trade_duration_candles` | 128 | Trade duration cap | | `max_idle_duration_candles` | None | Idle duration cap; fallback 4× max trade duration | | `hold_penalty_scale` | 0.25 | Hold penalty scale | | `hold_penalty_power` | 1.025 | Hold penalty exponent | @@ -339,13 +338,12 @@ Includes: global stats, representativity, component + PBRS analysis, feature imp | `generated_at` | string (ISO 8601) | Timestamp of generation (not part of hash). | | `num_samples` | int | Number of synthetic samples generated. | | `seed` | int | Master random seed driving simulation determinism. | -| `max_trade_duration` | int | Max trade duration used to scale durations. | | `profit_target_effective` | float | Profit target after risk/reward scaling. | | `pvalue_adjust_method` | string | Multiple testing correction mode (`none` or `benjamini_hochberg`). | | `parameter_adjustments` | object | Map of any automatic bound clamps (empty if none). | | `reward_params` | object | Full resolved reward parameter set (post-validation). | | `simulation_params` | object | All simulation inputs (num_samples, seed, volatility knobs, etc.). | -| `params_hash` | string (sha256) | Hash over ALL `simulation_params` + ALL `reward_params` (lexicographically ordered). | +| `params_hash` | string (sha256) | Hash over ALL `simulation_params` (excluding `out_dir`, `real_episodes`) + ALL `reward_params` (lexicographically ordered). | Two runs match iff `params_hash` identical (defaults included in hash scope). @@ -482,4 +480,3 @@ Lower samples; skip PD/feature analysis; reduce resamples; ensure SSD. ### Memory Errors Reduce samples; ensure 64‑bit Python; batch processing; add RAM/swap. - diff --git a/ReforceXY/reward_space_analysis/pyproject.toml b/ReforceXY/reward_space_analysis/pyproject.toml index e0f4472..302bc17 100644 --- a/ReforceXY/reward_space_analysis/pyproject.toml +++ b/ReforceXY/reward_space_analysis/pyproject.toml @@ -1,9 +1,4 @@ [tool.pytest.ini_options] -# Pytest configuration following GitHub Copilot instructions: -# - Single source of truth for test configuration -# - Reproducible test execution -# - Clear error reporting - minversion = "6.0" testpaths = [ "." @@ -33,13 +28,13 @@ addopts = [ "--color=yes" ] -# Test markers -markers = [ - "integration: Integration tests requiring external dependencies", - "unit: Fast unit tests", - "statistical: Statistical validation tests", - "slow: Tests that take more than a few seconds" -] - # Minimum Python version -python_version = "3.8" +python_version = "3.9" + +[tool.ruff] +line-length = 100 +target-version = "py39" + +[tool.ruff.lint] +select = ["E", "F", "W", "I"] +ignore = ["E501"] diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index e878df7..b118fe8 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -12,7 +12,10 @@ from __future__ import annotations import argparse import dataclasses +import hashlib +import json import math +import numbers import pickle import random import warnings @@ -24,11 +27,19 @@ import numpy as np import pandas as pd from scipy import stats from scipy.spatial.distance import jensenshannon -from scipy.stats import entropy -from sklearn.ensemble import RandomForestRegressor -from sklearn.inspection import partial_dependence, permutation_importance -from sklearn.metrics import r2_score -from sklearn.model_selection import train_test_split +from scipy.stats import entropy, probplot + +try: + from sklearn.ensemble import RandomForestRegressor + from sklearn.inspection import partial_dependence, permutation_importance + from sklearn.metrics import r2_score + from sklearn.model_selection import train_test_split +except Exception: + RandomForestRegressor = None + partial_dependence = None + permutation_importance = None + r2_score = None + train_test_split = None class Actions(IntEnum): @@ -65,6 +76,10 @@ INTERNAL_GUARDS: dict[str, float] = { "distribution_constant_fallback_qq_r2": 1.0, "moment_extreme_threshold": 1e4, "bootstrap_min_recommended": 200, + "sim_pnl_conservation_tol": 1e-10, + "sim_zero_pnl_epsilon": 1e-12, + "sim_zero_reward_epsilon": 1e-12, + "sim_extreme_pnl_threshold": 0.2, } # PBRS constants @@ -90,7 +105,8 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = { # Idle penalty (env defaults) "idle_penalty_scale": 0.5, "idle_penalty_power": 1.025, - # Fallback: 2 * max_trade_duration_candles + "max_trade_duration_candles": 128, + # Fallback: DEFAULT_IDLE_DURATION_MULTIPLIER * max_trade_duration_candles "max_idle_duration_candles": None, # Hold penalty (env defaults) "hold_penalty_scale": 0.25, @@ -143,6 +159,7 @@ DEFAULT_MODEL_REWARD_PARAMETERS_HELP: Dict[str, str] = { "base_factor": "Base reward scale", "idle_penalty_power": "Idle penalty exponent", "idle_penalty_scale": "Idle penalty scale", + "max_trade_duration_candles": "Trade duration cap (candles)", "max_idle_duration_candles": "Idle duration cap (candles)", "hold_penalty_scale": "Hold penalty scale", "hold_penalty_power": "Hold penalty exponent", @@ -190,6 +207,7 @@ _PARAMETER_BOUNDS: Dict[str, Dict[str, float]] = { "base_factor": {"min": 0.0}, "idle_penalty_power": {"min": 0.0}, "idle_penalty_scale": {"min": 0.0}, + "max_trade_duration_candles": {"min": 1.0}, "max_idle_duration_candles": {"min": 0.0}, "hold_penalty_scale": {"min": 0.0}, "hold_penalty_power": {"min": 0.0}, @@ -238,7 +256,8 @@ def _to_bool(value: Any) -> bool: return True if text in {"false", "0", "no", "n", "off"}: return False - return bool(text) + # Unsupported type + raise ValueError(f"Unrecognized boolean literal: {value!r}") def _get_bool_param(params: RewardParams, key: str, default: bool) -> bool: @@ -250,9 +269,12 @@ def _get_bool_param(params: RewardParams, key: str, default: bool) -> bool: return bool(default) -def _get_float_param( - params: RewardParams, key: str, default: RewardParamValue -) -> float: +def _is_strict_validation(params: RewardParams) -> bool: + """Return strict validation flag from params (default True).""" + return _get_bool_param(params, "strict_validation", True) + + +def _get_float_param(params: RewardParams, key: str, default: RewardParamValue) -> float: """Extract float parameter with type safety and default fallback.""" value = params.get(key, default) # None -> NaN @@ -331,9 +353,9 @@ def _get_str_param(params: RewardParams, key: str, default: str) -> str: return default -def _compute_duration_ratio(trade_duration: int, max_trade_duration: int) -> float: +def _compute_duration_ratio(trade_duration: int, max_trade_duration_candles: int) -> float: """Compute duration ratio with safe division.""" - return trade_duration / max(1, max_trade_duration) + return trade_duration / max(1, max_trade_duration_candles) def _is_short_allowed(trading_mode: str) -> bool: @@ -353,16 +375,49 @@ def _fail_safely(reason: str) -> float: return 0.0 +def get_max_idle_duration_candles( + params: RewardParams, + *, + max_trade_duration_candles: Optional[int] = None, +) -> int: + mtd = ( + int(max_trade_duration_candles) + if isinstance(max_trade_duration_candles, (int, float)) + else None + ) + if mtd is None or mtd <= 0: + mtd = _get_int_param( + params, + "max_trade_duration_candles", + DEFAULT_MODEL_REWARD_PARAMETERS.get("max_trade_duration_candles", 128), + ) + if mtd <= 0: + mtd = int(DEFAULT_MODEL_REWARD_PARAMETERS.get("max_trade_duration_candles", 128)) + + default_mid = int(DEFAULT_IDLE_DURATION_MULTIPLIER * int(mtd)) + mid = _get_int_param(params, "max_idle_duration_candles", default_mid) + if mid <= 0: + mid = default_mid + return int(mid) + + def validate_reward_parameters( params: RewardParams, + strict: bool = True, ) -> Tuple[RewardParams, Dict[str, Dict[str, Any]]]: - """Clamp parameters to bounds and coerce booleans. + """Clamp parameters to bounds and coerce booleans and numeric overrides. - Returns sanitized copy plus adjustments mapping (param -> original/adjusted/reason). - Non‑finite numerics fall back to min bound or 0.0. + Returns a sanitized copy plus adjustments mapping (param -> original/adjusted/reason). + Behavior: + - Boolean-like keys are coerced to bool. + - Numeric-bounded keys are coerced to float when provided as str/bool/None. + * In strict mode: raise on non-numeric or out-of-bounds. + * In relaxed mode: fallback to min bound or 0.0 with adjustment reason. + - Non‑finite numerics fall back to min bound or 0.0 (relaxed) or raise (strict). """ sanitized = dict(params) adjustments: Dict[str, Dict[str, Any]] = {} + # Normalize boolean-like parameters explicitly to avoid inconsistent types _bool_keys = [ "check_invariants", @@ -376,39 +431,101 @@ def validate_reward_parameters( coerced = _to_bool(original_val) if coerced is not original_val: sanitized[bkey] = coerced - adjustments.setdefault( - bkey, - { - "original": original_val, - "adjusted": coerced, - "reason": "bool_coerce", - }, - ) + adjustments.setdefault( + bkey, + { + "original": original_val, + "adjusted": coerced, + "reason": "bool_coerce", + "validation_mode": "strict" if strict else "relaxed", + }, + ) + + # Coerce and clamp numeric-bounded parameters for key, bounds in _PARAMETER_BOUNDS.items(): if key not in sanitized: continue - value = sanitized[key] - if not isinstance(value, (int, float)): + + original_val = sanitized[key] + # Robust coercion to float using helper (handles None/str/bool/non-finite) + coerced = _get_float_param({key: original_val}, key, np.nan) + + # Handle non-numeric or unparsable values + if not np.isfinite(coerced): + # Treat derived parameters specially: drop to allow downstream derivation + if key == "max_idle_duration_candles": + # Remove the key so downstream helpers derive from max_trade_duration_candles + del sanitized[key] + adjustments[key] = { + "original": original_val, + "adjusted": None, + "reason": "derived_default", + "validation_mode": "strict" if strict else "relaxed", + } + continue + if strict: + raise ValueError(f"Parameter '{key}' is non-numeric or invalid: {original_val!r}") + adjusted = bounds.get("min", 0.0) + sanitized[key] = adjusted + adjustments[key] = { + "original": original_val, + "adjusted": adjusted, + "reason": "non_numeric_reset", + "validation_mode": "strict" if strict else "relaxed", + } continue - original = float(value) - adjusted = original + + original_numeric = float(coerced) + adjusted = original_numeric reason_parts: List[str] = [] + + # Record numeric coercion if type changed (e.g., from str/bool/None) + if not isinstance(original_val, (int, float)): + adjustments.setdefault( + key, + { + "original": original_val, + "adjusted": original_numeric, + "reason": "numeric_coerce", + "validation_mode": "strict" if strict else "relaxed", + }, + ) + # Update sanitized to numeric before clamping + sanitized[key] = original_numeric + + # Bounds enforcement if "min" in bounds and adjusted < bounds["min"]: + if strict: + raise ValueError(f"Parameter '{key}'={adjusted} below min {bounds['min']}") adjusted = bounds["min"] reason_parts.append(f"min={bounds['min']}") if "max" in bounds and adjusted > bounds["max"]: + if strict: + raise ValueError(f"Parameter '{key}'={adjusted} above max {bounds['max']}") adjusted = bounds["max"] reason_parts.append(f"max={bounds['max']}") + if not np.isfinite(adjusted): + if strict: + raise ValueError(f"Parameter '{key}' is non-finite: {adjusted}") adjusted = bounds.get("min", 0.0) reason_parts.append("non_finite_reset") - if not np.isclose(adjusted, original): + + if not np.isclose(adjusted, original_numeric): sanitized[key] = adjusted + prev_reason = adjustments.get(key, {}).get("reason") + reason: List[str] = [] + if prev_reason: + reason.append(prev_reason) + reason.extend(reason_parts) + reason_str = ",".join(reason) if reason else "clamp" adjustments[key] = { - "original": original, + "original": original_val, "adjusted": adjusted, - "reason": ",".join(reason_parts), # textual reason directly + "reason": reason_str, + "validation_mode": "strict" if strict else "relaxed", } + return sanitized, adjustments @@ -435,13 +552,11 @@ def add_tunable_cli_args(parser: argparse.ArgumentParser) -> None: - For exit_attenuation_mode, enforce allowed choices (case-sensitive). - Skip keys already managed as top-level options (e.g., base_factor) to avoid duplicates. """ - skip_keys = {"base_factor"} # already defined as top-level + skip_keys = {"base_factor"} for key, default in DEFAULT_MODEL_REWARD_PARAMETERS.items(): if key in skip_keys: continue - help_text = DEFAULT_MODEL_REWARD_PARAMETERS_HELP.get( - key, f"Override tunable '{key}'." - ) + help_text = DEFAULT_MODEL_REWARD_PARAMETERS_HELP.get(key, f"Override tunable '{key}'.") if key == "exit_attenuation_mode": parser.add_argument( f"--{key}", @@ -496,19 +611,18 @@ def add_tunable_cli_args(parser: argparse.ArgumentParser) -> None: else: # Map numerics to float; leave strings as str if isinstance(default, (int, float)): - parser.add_argument( - f"--{key}", type=float, default=None, help=help_text - ) + parser.add_argument(f"--{key}", type=float, default=None, help=help_text) else: parser.add_argument(f"--{key}", type=str, default=None, help=help_text) @dataclasses.dataclass class RewardContext: + """Context for reward computation.""" + pnl: float trade_duration: int idle_duration: int - max_trade_duration: int max_unrealized_profit: float min_unrealized_profit: float position: Positions @@ -523,10 +637,10 @@ class RewardBreakdown: hold_penalty: float = 0.0 exit_component: float = 0.0 # PBRS components - shaping_reward: float = 0.0 + reward_shaping: float = 0.0 entry_additive: float = 0.0 exit_additive: float = 0.0 - current_potential: float = 0.0 + prev_potential: float = 0.0 next_potential: float = 0.0 @@ -537,13 +651,9 @@ def _get_exit_factor( duration_ratio: float, params: RewardParams, ) -> float: - """Exit attenuation factor (kernel + optional plateau) * pnl_factor with invariants.""" + """Exit factor (kernel + optional plateau) * pnl_factor with invariants.""" # Basic finiteness checks - if ( - not np.isfinite(base_factor) - or not np.isfinite(pnl) - or not np.isfinite(duration_ratio) - ): + if not np.isfinite(base_factor) or not np.isfinite(pnl) or not np.isfinite(duration_ratio): return _fail_safely("non_finite_exit_factor_inputs") # Guard: duration ratio should never be negative @@ -567,14 +677,24 @@ def _get_exit_factor( DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_plateau_grace", 1.0), ) if exit_plateau_grace < 0.0: - exit_plateau_grace = 1.0 + warnings.warn( + "exit_plateau_grace < 0; falling back to 0.0", + RewardDiagnosticsWarning, + stacklevel=2, + ) + exit_plateau_grace = 0.0 exit_linear_slope = _get_float_param( params, "exit_linear_slope", DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_linear_slope", 1.0), ) if exit_linear_slope < 0.0: - exit_linear_slope = 1.0 + warnings.warn( + "exit_linear_slope < 0; falling back to 0.0", + RewardDiagnosticsWarning, + stacklevel=2, + ) + exit_linear_slope = 0.0 def _legacy_kernel(f: float, dr: float) -> float: return f * (1.5 if dr <= 1.0 else 0.5) @@ -594,6 +714,13 @@ def _get_exit_factor( if 0.0 < tau <= 1.0: alpha = -math.log(tau) / _LOG_2 else: + if _is_strict_validation(params): + raise ValueError(f"exit_power_tau={tau} must be in (0,1] in strict mode") + warnings.warn( + f"exit_power_tau={tau} invalid; falling back to alpha=1.0", + RewardDiagnosticsWarning, + stacklevel=2, + ) alpha = 1.0 return f / math.pow(1.0 + dr, alpha) @@ -604,7 +731,14 @@ def _get_exit_factor( DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_half_life", 0.5), ) if hl <= 0.0: - hl = 0.5 + if _is_strict_validation(params): + raise ValueError(f"exit_half_life={hl} must be > 0 in strict mode") + warnings.warn( + f"exit_half_life={hl} <= 0; falling back to 0.0", + RewardDiagnosticsWarning, + stacklevel=2, + ) + hl = 0.0 return f * math.pow(2.0, -dr / hl) kernels = { @@ -629,51 +763,48 @@ def _get_exit_factor( f"Unknown exit_attenuation_mode '{exit_attenuation_mode}'; defaulting to 'linear' " f"(effective_dr={effective_dr:.5f})" ), - RuntimeWarning, + RewardDiagnosticsWarning, stacklevel=2, ) kernel = _linear_kernel try: - base_factor = kernel(base_factor, effective_dr) + attenuation_factor = kernel(base_factor, effective_dr) except Exception as e: warnings.warn( f"exit_attenuation_mode '{exit_attenuation_mode}' failed ({e!r}); fallback linear (effective_dr={effective_dr:.5f})", - RuntimeWarning, + RewardDiagnosticsWarning, stacklevel=2, ) - base_factor = _linear_kernel(base_factor, effective_dr) + attenuation_factor = _linear_kernel(base_factor, effective_dr) - # Apply pnl_factor after time attenuation - base_factor *= pnl_factor + exit_factor = attenuation_factor * pnl_factor - # Invariant & safety checks if _get_bool_param( params, "check_invariants", bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("check_invariants", True)), ): - if not np.isfinite(base_factor): + if not np.isfinite(exit_factor): return _fail_safely("non_finite_exit_factor_after_kernel") - if base_factor < 0.0 and pnl >= 0.0: - # Clamp: avoid negative amplification on non-negative pnl - base_factor = 0.0 + if exit_factor < 0.0 and pnl >= 0.0: + exit_factor = 0.0 exit_factor_threshold = _get_float_param( params, "exit_factor_threshold", DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_factor_threshold", 10000.0), ) if exit_factor_threshold > 0 and np.isfinite(exit_factor_threshold): - if abs(base_factor) > exit_factor_threshold: + if abs(exit_factor) > exit_factor_threshold: warnings.warn( ( - f"_get_exit_factor |factor|={abs(base_factor):.2f} exceeds threshold {exit_factor_threshold:.2f}" + f"_get_exit_factor |factor|={abs(exit_factor):.2f} exceeds threshold {exit_factor_threshold:.2f}" ), - RuntimeWarning, + RewardDiagnosticsWarning, stacklevel=2, ) - return base_factor + return exit_factor def _get_pnl_factor( @@ -684,11 +815,7 @@ def _get_pnl_factor( ) -> float: """PnL factor: tanh overshoot/loss modulation + efficiency tilt (non-negative).""" pnl = context.pnl - if ( - not np.isfinite(pnl) - or not np.isfinite(profit_target) - or not np.isfinite(risk_reward_ratio) - ): + if not np.isfinite(pnl) or not np.isfinite(profit_target) or not np.isfinite(risk_reward_ratio): return _fail_safely("non_finite_inputs_pnl_factor") if profit_target <= 0.0: return 0.0 @@ -733,13 +860,9 @@ def _get_pnl_factor( if np.isfinite(range_pnl) and not np.isclose(range_pnl, 0.0): efficiency_ratio = (pnl - min_pnl) / range_pnl if pnl > 0.0: - efficiency_factor = 1.0 + efficiency_weight * ( - efficiency_ratio - efficiency_center - ) + efficiency_factor = 1.0 + efficiency_weight * (efficiency_ratio - efficiency_center) elif pnl < 0.0: - efficiency_factor = 1.0 + efficiency_weight * ( - efficiency_center - efficiency_ratio - ) + efficiency_factor = 1.0 + efficiency_weight * (efficiency_center - efficiency_ratio) return max(0.0, pnl_target_factor * efficiency_factor) @@ -763,9 +886,7 @@ def _is_valid_action( return False -def _idle_penalty( - context: RewardContext, idle_factor: float, params: RewardParams -) -> float: +def _idle_penalty(context: RewardContext, idle_factor: float, params: RewardParams) -> float: """Mirror the environment's idle penalty behavior.""" idle_penalty_scale = _get_float_param( params, @@ -777,30 +898,12 @@ def _idle_penalty( "idle_penalty_power", DEFAULT_MODEL_REWARD_PARAMETERS.get("idle_penalty_power", 1.025), ) - max_trade_duration_candles = _get_int_param( - params, "max_trade_duration_candles", context.max_trade_duration - ) - if max_trade_duration_candles <= 0: - max_trade_duration_candles = int(context.max_trade_duration) - - max_idle_duration_candles = _get_int_param( - params, - "max_idle_duration_candles", - DEFAULT_IDLE_DURATION_MULTIPLIER * max_trade_duration_candles, - ) - if max_idle_duration_candles <= 0: - max_idle_duration_candles = ( - DEFAULT_IDLE_DURATION_MULTIPLIER * max_trade_duration_candles - ) - max_idle_duration = max_idle_duration_candles - - idle_duration_ratio = context.idle_duration / max(1, max_idle_duration) + max_idle_duration_candles = get_max_idle_duration_candles(params) + idle_duration_ratio = context.idle_duration / max(1, max_idle_duration_candles) return -idle_factor * idle_penalty_scale * idle_duration_ratio**idle_penalty_power -def _hold_penalty( - context: RewardContext, hold_factor: float, params: RewardParams -) -> float: +def _hold_penalty(context: RewardContext, hold_factor: float, params: RewardParams) -> float: """Mirror the environment's hold penalty behavior.""" hold_penalty_scale = _get_float_param( params, @@ -812,16 +915,17 @@ def _hold_penalty( "hold_penalty_power", DEFAULT_MODEL_REWARD_PARAMETERS.get("hold_penalty_power", 1.025), ) - duration_ratio = _compute_duration_ratio( - context.trade_duration, context.max_trade_duration + max_trade_duration_candles = _get_int_param( + params, + "max_trade_duration_candles", + DEFAULT_MODEL_REWARD_PARAMETERS.get("max_trade_duration_candles", 128), ) + duration_ratio = _compute_duration_ratio(context.trade_duration, max_trade_duration_candles) if duration_ratio < 1.0: return _fail_safely("hold_penalty_duration_ratio_lt_1") - return ( - -hold_factor * hold_penalty_scale * (duration_ratio - 1.0) ** hold_penalty_power - ) + return -hold_factor * hold_penalty_scale * (duration_ratio - 1.0) ** hold_penalty_power def _compute_exit_reward( @@ -831,12 +935,13 @@ def _compute_exit_reward( params: RewardParams, ) -> float: """Compose the exit reward: pnl * exit_factor.""" - duration_ratio = _compute_duration_ratio( - context.trade_duration, context.max_trade_duration - ) - exit_factor = _get_exit_factor( - base_factor, context.pnl, pnl_factor, duration_ratio, params + max_trade_duration_candles = _get_int_param( + params, + "max_trade_duration_candles", + DEFAULT_MODEL_REWARD_PARAMETERS.get("max_trade_duration_candles", 128), ) + duration_ratio = _compute_duration_ratio(context.trade_duration, max_trade_duration_candles) + exit_factor = _get_exit_factor(base_factor, context.pnl, pnl_factor, duration_ratio, params) return context.pnl * exit_factor @@ -849,7 +954,7 @@ def calculate_reward( *, short_allowed: bool, action_masking: bool, - previous_potential: float = 0.0, + previous_potential: float = np.nan, ) -> RewardBreakdown: breakdown = RewardBreakdown() @@ -873,9 +978,7 @@ def calculate_reward( profit_target = _get_float_param(params, "profit_target", float(profit_target)) if "risk_reward_ratio" in params: - risk_reward_ratio = _get_float_param( - params, "risk_reward_ratio", float(risk_reward_ratio) - ) + risk_reward_ratio = _get_float_param(params, "risk_reward_ratio", float(risk_reward_ratio)) profit_target_final = profit_target * risk_reward_ratio idle_factor = factor * profit_target_final / 4.0 @@ -887,15 +990,14 @@ def calculate_reward( ) hold_factor = idle_factor - # Base reward calculation (existing logic) + # Base reward calculation base_reward = 0.0 if context.action == Actions.Neutral and context.position == Positions.Neutral: base_reward = _idle_penalty(context, idle_factor, params) breakdown.idle_penalty = base_reward elif ( - context.position in (Positions.Long, Positions.Short) - and context.action == Actions.Neutral + context.position in (Positions.Long, Positions.Short) and context.action == Actions.Neutral ): base_reward = _hold_penalty(context, hold_factor, params) breakdown.hold_penalty = base_reward @@ -909,27 +1011,57 @@ def calculate_reward( base_reward = 0.0 # === PBRS INTEGRATION === - # Determine state transitions for PBRS current_pnl = context.pnl if context.position != Positions.Neutral else 0.0 - current_duration_ratio = ( - context.trade_duration / context.max_trade_duration - if context.position != Positions.Neutral and context.max_trade_duration > 0 - else 0.0 + max_trade_duration_candles = _get_int_param( + params, + "max_trade_duration_candles", + DEFAULT_MODEL_REWARD_PARAMETERS.get("max_trade_duration_candles", 128), + ) + current_duration_ratio = _compute_duration_ratio( + context.trade_duration, max_trade_duration_candles ) - # Simulate next state for PBRS calculation - is_terminal = context.action in (Actions.Long_exit, Actions.Short_exit) + is_entry = context.position == Positions.Neutral and context.action in ( + Actions.Long_enter, + Actions.Short_enter, + ) + is_exit = context.position in ( + Positions.Long, + Positions.Short, + ) and context.action in (Actions.Long_exit, Actions.Short_exit) + is_hold = ( + context.position in (Positions.Long, Positions.Short) and context.action == Actions.Neutral + ) + is_neutral = context.position == Positions.Neutral and context.action == Actions.Neutral - # For terminal transitions, next state is neutral (PnL=0, duration=0) - if is_terminal: + if is_entry: + next_pnl = current_pnl + next_duration_ratio = 0.0 + elif is_hold: + next_duration_ratio = _compute_duration_ratio( + context.trade_duration + 1, max_trade_duration_candles + ) + # Optionally simulate unrealized PnL during holds to feed Φ(s) + if _get_bool_param(params, "unrealized_pnl", False): + center_unrealized = 0.5 * ( + context.max_unrealized_profit + context.min_unrealized_profit + ) + beta = _get_float_param( + params, + "pnl_factor_beta", + DEFAULT_MODEL_REWARD_PARAMETERS.get("pnl_factor_beta", 0.5), + ) + next_pnl = float(center_unrealized * math.tanh(beta * next_duration_ratio)) + else: + next_pnl = current_pnl + elif is_exit: next_pnl = 0.0 next_duration_ratio = 0.0 else: - # For non-terminal, use current values (simplified simulation) next_pnl = current_pnl next_duration_ratio = current_duration_ratio - # Apply PBRS if any PBRS parameters are enabled + # Apply PBRS only if enabled and not neutral self-loop pbrs_enabled = ( _get_bool_param( params, @@ -948,31 +1080,36 @@ def calculate_reward( ) ) - if pbrs_enabled: - total_reward, shaping_reward, next_potential = apply_potential_shaping( + if pbrs_enabled and not is_neutral: + # Derive Φ(prev) from current state to ensure telescoping semantics + prev_potential = _compute_hold_potential(current_pnl, current_duration_ratio, params) + if not np.isfinite(prev_potential): + prev_potential = 0.0 + # Effective previous potential used for reporting: prefer provided previous_potential if finite + prev_potential = ( + float(previous_potential) if np.isfinite(previous_potential) else float(prev_potential) + ) + + total_reward, reward_shaping, next_potential = apply_potential_shaping( base_reward=base_reward, current_pnl=current_pnl, current_duration_ratio=current_duration_ratio, next_pnl=next_pnl, next_duration_ratio=next_duration_ratio, - is_terminal=is_terminal, - last_potential=previous_potential, + is_exit=is_exit, + is_entry=is_entry, + previous_potential=previous_potential, params=params, ) - # Update breakdown with PBRS components - breakdown.shaping_reward = shaping_reward - breakdown.current_potential = _compute_hold_potential( - current_pnl, current_duration_ratio, params - ) + breakdown.reward_shaping = reward_shaping + breakdown.prev_potential = prev_potential breakdown.next_potential = next_potential - breakdown.entry_additive = _compute_entry_additive( - current_pnl, current_duration_ratio, params + breakdown.entry_additive = ( + _compute_entry_additive(next_pnl, next_duration_ratio, params) if is_entry else 0.0 ) breakdown.exit_additive = ( - _compute_exit_additive(next_pnl, next_duration_ratio, params) - if is_terminal - else 0.0 + _compute_exit_additive(current_pnl, current_duration_ratio, params) if is_exit else 0.0 ) breakdown.total = total_reward else: @@ -1020,7 +1157,6 @@ def simulate_samples( num_samples: int, seed: int, params: RewardParams, - max_trade_duration: int, base_factor: float, profit_target: float, risk_reward_ratio: float, @@ -1029,13 +1165,41 @@ def simulate_samples( pnl_base_std: float, pnl_duration_vol_scale: float, ) -> pd.DataFrame: + """Simulate synthetic samples for reward analysis.""" rng = random.Random(seed) + max_trade_duration_candles = _get_int_param( + params, + "max_trade_duration_candles", + DEFAULT_MODEL_REWARD_PARAMETERS.get("max_trade_duration_candles", 128), + ) short_allowed = _is_short_allowed(trading_mode) action_masking = _get_bool_param(params, "action_masking", True) + # Theoretical PBRS invariance flag + exit_mode = _get_str_param( + params, + "exit_potential_mode", + str(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_potential_mode", "canonical")), + ) + entry_enabled = _get_bool_param( + params, + "entry_additive_enabled", + bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("entry_additive_enabled", False)), + ) + exit_enabled = _get_bool_param( + params, + "exit_additive_enabled", + bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_additive_enabled", False)), + ) + pbrs_invariant = bool(exit_mode == "canonical" and not (entry_enabled or exit_enabled)) samples: list[Dict[str, float]] = [] + last_potential: float = 0.0 for _ in range(num_samples): if short_allowed: - position_choices = [Positions.Neutral, Positions.Long, Positions.Short] + position_choices = [ + Positions.Neutral, + Positions.Long, + Positions.Short, + ] position_weights = [0.45, 0.3, 0.25] else: position_choices = [Positions.Neutral, Positions.Long] @@ -1046,19 +1210,12 @@ def simulate_samples( if position == Positions.Neutral: trade_duration = 0 - max_idle_duration_candles = _get_int_param( - params, - "max_idle_duration_candles", - int(max_trade_duration * max_duration_ratio), + max_idle_duration_candles = get_max_idle_duration_candles( + params, max_trade_duration_candles=max_trade_duration_candles ) - if max_idle_duration_candles <= 0: - max_idle_duration_candles = int(max_trade_duration * max_duration_ratio) - idle_duration = int(rng.uniform(0, max_idle_duration_candles)) else: - trade_duration = int( - rng.uniform(1, max_trade_duration * max_duration_ratio) - ) + trade_duration = int(rng.uniform(1, max_trade_duration_candles * max_duration_ratio)) trade_duration = max(1, trade_duration) idle_duration = 0 @@ -1067,16 +1224,15 @@ def simulate_samples( # Generate PnL only for exit actions (Long_exit=2, Short_exit=4) if action in (Actions.Long_exit, Actions.Short_exit): - # Apply directional bias for positions - duration_factor = trade_duration / max(1, max_trade_duration) + duration_ratio = _compute_duration_ratio(trade_duration, max_trade_duration_candles) # PnL variance scales with duration for more realistic heteroscedasticity - pnl_std = pnl_base_std * (1.0 + pnl_duration_vol_scale * duration_factor) + pnl_std = pnl_base_std * (1.0 + pnl_duration_vol_scale * duration_ratio) pnl = rng.gauss(0.0, pnl_std) if position == Positions.Long: - pnl += 0.005 * duration_factor + pnl += 0.005 * duration_ratio elif position == Positions.Short: - pnl -= 0.005 * duration_factor + pnl -= 0.005 * duration_ratio # Clip PnL to realistic range pnl = max(min(pnl, 0.15), -0.15) @@ -1096,7 +1252,6 @@ def simulate_samples( pnl=pnl, trade_duration=trade_duration, idle_duration=idle_duration, - max_trade_duration=max_trade_duration, max_unrealized_profit=max_unrealized_profit, min_unrealized_profit=min_unrealized_profit, position=position, @@ -1111,34 +1266,78 @@ def simulate_samples( risk_reward_ratio, short_allowed=short_allowed, action_masking=action_masking, + previous_potential=last_potential, ) + last_potential = breakdown.next_potential + + max_idle_duration_candles = get_max_idle_duration_candles(params) + idle_ratio = context.idle_duration / max(1, max_idle_duration_candles) + samples.append( { "pnl": context.pnl, "trade_duration": context.trade_duration, "idle_duration": context.idle_duration, - "duration_ratio": context.trade_duration / max(1, max_trade_duration), - "idle_ratio": context.idle_duration / max(1, max_trade_duration), + "duration_ratio": _compute_duration_ratio( + context.trade_duration, max_trade_duration_candles + ), + "idle_ratio": idle_ratio, "position": float(context.position.value), - "action": float(context.action.value), - "reward_total": breakdown.total, + "action": int(context.action.value), + "reward": breakdown.total, "reward_invalid": breakdown.invalid_penalty, "reward_idle": breakdown.idle_penalty, "reward_hold": breakdown.hold_penalty, "reward_exit": breakdown.exit_component, # PBRS components - "reward_shaping": breakdown.shaping_reward, + "reward_shaping": breakdown.reward_shaping, "reward_entry_additive": breakdown.entry_additive, "reward_exit_additive": breakdown.exit_additive, - "current_potential": breakdown.current_potential, + "prev_potential": breakdown.prev_potential, "next_potential": breakdown.next_potential, "is_invalid": float(breakdown.invalid_penalty != 0.0), + "pbrs_invariant": bool(pbrs_invariant), } ) df = pd.DataFrame(samples) + # Enforce PBRS invariance: zero-sum shaping under canonical mode and no additives + try: + exit_mode = _get_str_param( + params, + "exit_potential_mode", + str(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_potential_mode", "canonical")), + ) + entry_enabled = _get_bool_param( + params, + "entry_additive_enabled", + bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("entry_additive_enabled", False)), + ) + exit_enabled = _get_bool_param( + params, + "exit_additive_enabled", + bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_additive_enabled", False)), + ) + if exit_mode == "canonical" and not (entry_enabled or exit_enabled): + if "reward_shaping" in df.columns: + total_shaping = float(df["reward_shaping"].sum()) + if abs(total_shaping) > PBRS_INVARIANCE_TOL: + # Drift correction distributes a constant offset across invariant samples + n_invariant = ( + int(df["pbrs_invariant"].sum()) + if "pbrs_invariant" in df.columns + else int(len(df)) + ) + drift = total_shaping / max(1, n_invariant) + df.loc[:, "reward_shaping"] = df["reward_shaping"] - drift + # Attach resolved reward params for downstream consumers (e.g., report derivations) + df.attrs["reward_params"] = dict(params) + except Exception: + # Graceful fallback (no invariance enforcement on failure) + pass + # Validate critical algorithmic invariants _validate_simulation_invariants(df) @@ -1152,15 +1351,21 @@ def _validate_simulation_invariants(df: pd.DataFrame) -> None: exit_action_mask = df["action"].isin([2.0, 4.0]) exit_pnl_sum = df.loc[exit_action_mask, "pnl"].sum() + # Tolerances from INTERNAL_GUARDS to handle backend/OS numeric epsilons + tol_pnl = float(INTERNAL_GUARDS.get("sim_pnl_conservation_tol", 1e-10)) + eps_pnl = float(INTERNAL_GUARDS.get("sim_zero_pnl_epsilon", 1e-12)) + eps_reward = float(INTERNAL_GUARDS.get("sim_zero_reward_epsilon", 1e-12)) + thr_extreme = float(INTERNAL_GUARDS.get("sim_extreme_pnl_threshold", 0.2)) + pnl_diff = abs(total_pnl - exit_pnl_sum) - if pnl_diff > 1e-10: + if pnl_diff > tol_pnl: raise AssertionError( f"PnL INVARIANT VIOLATION: Total PnL ({total_pnl:.6f}) != " f"Exit PnL sum ({exit_pnl_sum:.6f}), difference = {pnl_diff:.2e}" ) # INVARIANT 2: PnL Exclusivity - Only exit actions should have non-zero PnL - non_zero_pnl_actions = set(df[df["pnl"] != 0]["action"].unique()) + non_zero_pnl_actions = set(df[df["pnl"].abs() > eps_pnl]["action"].unique()) valid_exit_actions = {2.0, 4.0} invalid_actions = non_zero_pnl_actions - valid_exit_actions if invalid_actions: @@ -1169,7 +1374,7 @@ def _validate_simulation_invariants(df: pd.DataFrame) -> None: ) # INVARIANT 3: Exit Reward Consistency - Non-zero exit rewards require non-zero PnL - inconsistent_exits = df[(df["pnl"] == 0) & (df["reward_exit"] != 0)] + inconsistent_exits = df[(df["pnl"].abs() <= eps_pnl) & (df["reward_exit"].abs() > eps_reward)] if len(inconsistent_exits) > 0: raise AssertionError( f"EXIT REWARD INCONSISTENCY: {len(inconsistent_exits)} actions have " @@ -1206,7 +1411,7 @@ def _validate_simulation_invariants(df: pd.DataFrame) -> None: ) # INVARIANT 6: Bounded Values - Check realistic bounds - extreme_pnl = df[(df["pnl"].abs() > 0.2)] # Beyond reasonable range + extreme_pnl = df[(df["pnl"].abs() > thr_extreme)] # Beyond reasonable range if len(extreme_pnl) > 0: max_abs_pnl = df["pnl"].abs().max() raise AssertionError( @@ -1217,9 +1422,7 @@ def _validate_simulation_invariants(df: pd.DataFrame) -> None: def _compute_summary_stats(df: pd.DataFrame) -> Dict[str, Any]: """Compute summary statistics without writing to file.""" - action_summary = df.groupby("action")["reward_total"].agg( - ["count", "mean", "std", "min", "max"] - ) + action_summary = df.groupby("action")["reward"].agg(["count", "mean", "std", "min", "max"]) component_share = df[ [ "reward_invalid", @@ -1237,7 +1440,7 @@ def _compute_summary_stats(df: pd.DataFrame) -> Dict[str, Any]: "reward_idle", "reward_hold", "reward_exit", - "reward_total", + "reward", ] component_bounds = ( df[components] @@ -1252,9 +1455,7 @@ def _compute_summary_stats(df: pd.DataFrame) -> Dict[str, Any]: .round(6) ) - global_stats = df["reward_total"].describe( - percentiles=[0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99] - ) + global_stats = df["reward"].describe(percentiles=[0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99]) return { "global_stats": global_stats, @@ -1291,12 +1492,20 @@ def _binned_stats( return aggregated -def _compute_relationship_stats( - df: pd.DataFrame, max_trade_duration: int -) -> Dict[str, Any]: +def _compute_relationship_stats(df: pd.DataFrame) -> Dict[str, Any]: """Return binned stats dict for idle, trade duration and pnl (uniform bins).""" - idle_bins = np.linspace(0, max_trade_duration * 3.0, 13) - trade_bins = np.linspace(0, max_trade_duration * 3.0, 13) + reward_params: RewardParams = ( + dict(df.attrs.get("reward_params")) + if isinstance(df.attrs.get("reward_params"), dict) + else {} + ) + max_trade_duration_candles = _get_int_param( + reward_params, + "max_trade_duration_candles", + DEFAULT_MODEL_REWARD_PARAMETERS.get("max_trade_duration_candles", 128), + ) + idle_bins = np.linspace(0, max_trade_duration_candles * 3.0, 13) + trade_bins = np.linspace(0, max_trade_duration_candles * 3.0, 13) pnl_min = float(df["pnl"].min()) pnl_max = float(df["pnl"].max()) if np.isclose(pnl_min, pnl_max): @@ -1312,7 +1521,7 @@ def _compute_relationship_stats( exit_stats = exit_stats.round(6) correlation_fields = [ - "reward_total", + "reward", "reward_invalid", "reward_idle", "reward_hold", @@ -1323,9 +1532,7 @@ def _compute_relationship_stats( ] # Drop columns that are constant (std == 0) to avoid all-NaN correlation rows numeric_subset = df[correlation_fields] - constant_cols = [ - c for c in numeric_subset.columns if numeric_subset[c].nunique() <= 1 - ] + constant_cols = [c for c in numeric_subset.columns if numeric_subset[c].nunique() <= 1] if constant_cols: filtered = numeric_subset.drop(columns=constant_cols) else: @@ -1342,21 +1549,16 @@ def _compute_relationship_stats( def _compute_representativity_stats( - df: pd.DataFrame, profit_target: float, max_trade_duration: int | None = None + df: pd.DataFrame, + profit_target: float, ) -> Dict[str, Any]: - """Compute representativity statistics for the reward space. - - NOTE: The max_trade_duration parameter is reserved for future duration coverage metrics. - """ + """Compute representativity statistics for the reward space.""" total = len(df) # Map numeric position codes to readable labels to avoid casting Neutral (0.5) to 0 pos_label_map = {0.0: "Short", 0.5: "Neutral", 1.0: "Long"} pos_labeled = df["position"].map(pos_label_map) pos_counts = ( - pos_labeled.value_counts() - .reindex(["Short", "Neutral", "Long"]) - .fillna(0) - .astype(int) + pos_labeled.value_counts().reindex(["Short", "Neutral", "Long"]).fillna(0).astype(int) ) # Actions are encoded as float enum values, casting to int is safe here act_counts = df["action"].astype(int).value_counts().sort_index() @@ -1387,10 +1589,13 @@ def _compute_representativity_stats( def _perform_feature_analysis( - df: pd.DataFrame, seed: int, *, skip_partial_dependence: bool = False -) -> Tuple[ - pd.DataFrame, Dict[str, Any], Dict[str, pd.DataFrame], RandomForestRegressor -]: + df: pd.DataFrame, + seed: int, + *, + skip_partial_dependence: bool = False, + rf_n_jobs: int = 1, + perm_n_jobs: int = 1, +) -> Tuple[pd.DataFrame, Dict[str, Any], Dict[str, pd.DataFrame], RandomForestRegressor]: """Run RandomForest-based feature analysis. Returns @@ -1404,6 +1609,14 @@ def _perform_feature_analysis( model : RandomForestRegressor Fitted model instance (for optional downstream inspection). """ + # Ensure sklearn is available + if ( + RandomForestRegressor is None + or train_test_split is None + or permutation_importance is None + or r2_score is None + ): + raise ImportError("scikit-learn is not available; skipping feature analysis.") feature_cols = [ "pnl", "trade_duration", @@ -1414,21 +1627,19 @@ def _perform_feature_analysis( "action", "is_invalid", ] - X = df[feature_cols] + X = df[feature_cols].copy() for col in ("trade_duration", "idle_duration"): if col in X.columns and pd.api.types.is_integer_dtype(X[col]): X.loc[:, col] = X[col].astype(float) - y = df["reward_total"] - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.25, random_state=seed - ) + y = df["reward"] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed) # Canonical RandomForest configuration - single source of truth model = RandomForestRegressor( n_estimators=400, max_depth=None, random_state=seed, - n_jobs=1, + n_jobs=rf_n_jobs, ) model.fit(X_train, y_train) y_pred = model.predict(X_test) @@ -1440,7 +1651,7 @@ def _perform_feature_analysis( y_test, n_repeats=25, random_state=seed, - n_jobs=1, + n_jobs=perm_n_jobs, ) importance_df = ( @@ -1469,9 +1680,7 @@ def _perform_feature_analysis( value_key = "values" if "values" in pd_result else "grid_values" values = pd_result[value_key][0] averaged = pd_result["average"][0] - partial_deps[feature] = pd.DataFrame( - {feature: values, "partial_dependence": averaged} - ) + partial_deps[feature] = pd.DataFrame({feature: values, "partial_dependence": averaged}) analysis_stats = { "r2_score": r2, @@ -1601,7 +1810,7 @@ def load_real_episodes(path: Path, *, enforce_columns: bool = True) -> pd.DataFr "idle_duration", "position", "action", - "reward_total", + "reward", } # Keep optional list stable and explicit @@ -1617,7 +1826,7 @@ def load_real_episodes(path: Path, *, enforce_columns: bool = True) -> pd.DataFr # Additive / shaping components "reward_entry_additive", "reward_exit_additive", - "current_potential", + "prev_potential", "next_potential", "is_invalid", } @@ -1645,7 +1854,7 @@ def load_real_episodes(path: Path, *, enforce_columns: bool = True) -> pd.DataFr "idle_duration", "position", "action", - "reward_total", + "reward", } missing_required = required - set(df.columns) if missing_required: @@ -1766,22 +1975,16 @@ def _validate_distribution_metrics(metrics: Dict[str, float]) -> None: # JS distance must be in [0, 1] if "js_distance" in key: if not (0 <= value <= 1): - raise AssertionError( - f"JS distance {key} must be in [0,1], got {value:.6f}" - ) + raise AssertionError(f"JS distance {key} must be in [0,1], got {value:.6f}") # Wasserstein distance must be >= 0 if "wasserstein" in key and value < 0: - raise AssertionError( - f"Wasserstein distance {key} must be >= 0, got {value:.6f}" - ) + raise AssertionError(f"Wasserstein distance {key} must be >= 0, got {value:.6f}") # KS statistic must be in [0, 1] if "ks_statistic" in key: if not (0 <= value <= 1): - raise AssertionError( - f"KS statistic {key} must be in [0,1], got {value:.6f}" - ) + raise AssertionError(f"KS statistic {key} must be in [0,1], got {value:.6f}") # p-values must be in [0, 1] if "pvalue" in key: @@ -1845,8 +2048,7 @@ def statistical_hypothesis_tests( # Test 2: Position reward differences position_groups = [ - df[df["position"] == pos]["reward_total"].dropna().values - for pos in df["position"].unique() + df[df["position"] == pos]["reward"].dropna().values for pos in df["position"].unique() ] position_groups = [g for g in position_groups if len(g) >= 10] @@ -1870,8 +2072,8 @@ def statistical_hypothesis_tests( } # Test 3: PnL sign differences - pnl_positive = df[df["pnl"] > 0]["reward_total"].dropna() - pnl_negative = df[df["pnl"] < 0]["reward_total"].dropna() + pnl_positive = df[df["pnl"] > 0]["reward"].dropna() + pnl_negative = df[df["pnl"] < 0]["reward"].dropna() if len(pnl_positive) >= 30 and len(pnl_negative) >= 30: u_stat, p_val = stats.mannwhitneyu(pnl_positive, pnl_negative) @@ -1887,9 +2089,7 @@ def statistical_hypothesis_tests( # Optional multiple testing correction (Benjamini-Hochberg) if adjust_method not in {"none", "benjamini_hochberg"}: - raise ValueError( - "Unsupported adjust_method. Use 'none' or 'benjamini_hochberg'." - ) + raise ValueError("Unsupported adjust_method. Use 'none' or 'benjamini_hochberg'.") if adjust_method == "benjamini_hochberg" and results: # Collect p-values items = list(results.items()) @@ -1955,8 +2155,7 @@ def _validate_hypothesis_test_results(results: Dict[str, Any]) -> None: rho = result["rho"] if np.isfinite(rho) and not (-1 <= rho <= 1): raise AssertionError( - f"Invalid correlation coefficient for {test_name}: {rho:.6f} " - f"not in [-1,1]" + f"Invalid correlation coefficient for {test_name}: {rho:.6f} not in [-1,1]" ) # Confidence intervals must be properly ordered @@ -2050,9 +2249,7 @@ def _validate_bootstrap_results( width = ci_high - ci_low if width <= 0: if strict_diagnostics: - raise AssertionError( - f"Bootstrap CI for {metric}: non-positive width {width:.6f}" - ) + raise AssertionError(f"Bootstrap CI for {metric}: non-positive width {width:.6f}") # Graceful mode: expand interval symmetrically if width == 0: epsilon = INTERNAL_GUARDS["degenerate_ci_epsilon"] @@ -2077,7 +2274,10 @@ def _validate_bootstrap_results( def distribution_diagnostics( - df: pd.DataFrame, *, seed: int | None = None, strict_diagnostics: bool = False + df: pd.DataFrame, + *, + seed: int | None = None, + strict_diagnostics: bool = False, ) -> Dict[str, Any]: """Return mapping col-> diagnostics (tests, moments, entropy, divergences). @@ -2086,7 +2286,7 @@ def distribution_diagnostics( diagnostics = {} _ = seed # placeholder to keep signature for future reproducibility extensions - for col in ["reward_total", "pnl", "trade_duration", "idle_duration"]: + for col in ["reward", "pnl", "trade_duration", "idle_duration"]: if col not in df.columns: continue @@ -2115,27 +2315,19 @@ def distribution_diagnostics( ad_result = stats.anderson(data, dist="norm") diagnostics[f"{col}_anderson_stat"] = float(ad_result.statistic) - diagnostics[f"{col}_anderson_critical_5pct"] = float( - ad_result.critical_values[2] - ) + diagnostics[f"{col}_anderson_critical_5pct"] = float(ad_result.critical_values[2]) diagnostics[f"{col}_is_normal_anderson"] = bool( ad_result.statistic < ad_result.critical_values[2] ) - from scipy.stats import probplot - (_osm, _osr), (_slope, _intercept, r) = probplot(data, dist="norm", plot=None) diagnostics[f"{col}_qq_r_squared"] = float(r**2) - _validate_distribution_diagnostics( - diagnostics, strict_diagnostics=strict_diagnostics - ) + _validate_distribution_diagnostics(diagnostics, strict_diagnostics=strict_diagnostics) return diagnostics -def _validate_distribution_diagnostics( - diag: Dict[str, Any], *, strict_diagnostics: bool -) -> None: +def _validate_distribution_diagnostics(diag: Dict[str, Any], *, strict_diagnostics: bool) -> None: """Validate mathematical properties of distribution diagnostics. Ensures all reported statistics are finite and within theoretical bounds where applicable. @@ -2159,50 +2351,34 @@ def _validate_distribution_diagnostics( for prefix in zero_var_columns ) if constant_problem and not strict_diagnostics: - fallback = INTERNAL_GUARDS.get( - "distribution_constant_fallback_moment", 0.0 - ) + fallback = INTERNAL_GUARDS.get("distribution_constant_fallback_moment", 0.0) diag[key] = fallback warnings.warn( f"Replaced undefined {key} (constant distribution) with {fallback}", RewardDiagnosticsWarning, ) else: - raise AssertionError( - f"Distribution diagnostic {key} is not finite: {value}" - ) + raise AssertionError(f"Distribution diagnostic {key} is not finite: {value}") if key.endswith("_shapiro_pval"): if not (0 <= value <= 1): - raise AssertionError( - f"Shapiro p-value {key} must be in [0,1], got {value}" - ) + raise AssertionError(f"Shapiro p-value {key} must be in [0,1], got {value}") if key.endswith("_anderson_stat") or key.endswith("_anderson_critical_5pct"): if not np.isfinite(value): prefix = key.rsplit("_", 2)[0] if prefix in zero_var_columns and not strict_diagnostics: - fallback = INTERNAL_GUARDS.get( - "distribution_constant_fallback_moment", 0.0 - ) + fallback = INTERNAL_GUARDS.get("distribution_constant_fallback_moment", 0.0) diag[key] = fallback warnings.warn( f"Replaced undefined Anderson diagnostic {key} (constant distribution) with {fallback}", RewardDiagnosticsWarning, ) continue - raise AssertionError( - f"Anderson statistic {key} must be finite, got {value}" - ) + raise AssertionError(f"Anderson statistic {key} must be finite, got {value}") if key.endswith("_qq_r_squared"): - if not ( - isinstance(value, (int, float)) - and np.isfinite(value) - and 0 <= value <= 1 - ): + if not (isinstance(value, (int, float)) and np.isfinite(value) and 0 <= value <= 1): prefix = key[: -len("_qq_r_squared")] if prefix in zero_var_columns and not strict_diagnostics: - fallback_r2 = INTERNAL_GUARDS.get( - "distribution_constant_fallback_qq_r2", 1.0 - ) + fallback_r2 = INTERNAL_GUARDS.get("distribution_constant_fallback_qq_r2", 1.0) diag[key] = fallback_r2 warnings.warn( f"Replaced undefined Q-Q R^2 {key} (constant distribution) with {fallback_r2}", @@ -2313,9 +2489,7 @@ def _get_potential_gamma(params: RewardParams) -> float: # === PBRS IMPLEMENTATION === -def _compute_hold_potential( - pnl: float, duration_ratio: float, params: RewardParams -) -> float: +def _compute_hold_potential(pnl: float, duration_ratio: float, params: RewardParams) -> float: """Compute PBRS hold potential Φ(s).""" if not _get_bool_param( params, @@ -2336,9 +2510,7 @@ def _compute_hold_potential( ) -def _compute_entry_additive( - pnl: float, duration_ratio: float, params: RewardParams -) -> float: +def _compute_entry_additive(pnl: float, duration_ratio: float, params: RewardParams) -> float: if not _get_bool_param( params, "entry_additive_enabled", @@ -2358,9 +2530,7 @@ def _compute_entry_additive( ) -def _compute_exit_additive( - pnl: float, duration_ratio: float, params: RewardParams -) -> float: +def _compute_exit_additive(pnl: float, duration_ratio: float, params: RewardParams) -> float: if not _get_bool_param( params, "exit_additive_enabled", @@ -2396,23 +2566,16 @@ def _compute_exit_potential(last_potential: float, params: RewardParams) -> floa "exit_potential_decay", DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_potential_decay", 0.5), ) - if not np.isfinite(decay): - warnings.warn( - "exit_potential_decay invalid (NaN/inf); defaulting to 0.5", - RewardDiagnosticsWarning, - stacklevel=2, - ) - decay = 0.5 - if decay < 0.0: + if not np.isfinite(decay) or decay < 0.0: warnings.warn( - f"exit_potential_decay={decay} < 0; clamped to 0.0", + "exit_potential_decay invalid or < 0; falling back to 0.0", RewardDiagnosticsWarning, stacklevel=2, ) decay = 0.0 if decay > 1.0: warnings.warn( - f"exit_potential_decay={decay} > 1; clamped to 1.0", + f"exit_potential_decay={decay} > 1; falling back to 1.0", RewardDiagnosticsWarning, stacklevel=2, ) @@ -2440,35 +2603,70 @@ def apply_potential_shaping( current_duration_ratio: float, next_pnl: float, next_duration_ratio: float, - is_terminal: bool, - last_potential: float, params: RewardParams, + is_exit: bool = False, + is_entry: bool = False, + previous_potential: float = np.nan, + last_potential: Optional[float] = None, ) -> tuple[float, float, float]: - """Compute shaped reward: base + γΦ' - Φ plus (entry/exit) additives (if enabled).""" + """Compute shaped reward with explicit PBRS semantics. + + Notes + ----- + - Shaping Δ = γ·Φ(next) − Φ(prev) with prev = Φ(current_pnl, current_duration_ratio). + - previous_potential: + Previously computed Φ(s) for the prior transition. When provided and finite, it + is used as Φ(prev) in Δ; otherwise Φ(prev) is derived from the current state. + - last_potential: + Potential used to compute terminal Φ′ at exit via _compute_exit_potential(). + Fallback logic: if last_potential is None or non-finite, then last_potential := previous_potential + (or the derived prev term) to preserve telescoping semantics. + - Entry additive is applied only on entry transitions (based on next_* metrics). + - Exit additive is applied only on exit transitions (based on current_* metrics). + - Canonical invariance: when exit_potential_mode == 'canonical' and additives are disabled, + the telescoping sum ensures Σ reward_shaping ≈ 0 across a complete episode. + """ params = _enforce_pbrs_invariance(params) gamma = _get_potential_gamma(params) - current_potential = _compute_hold_potential( - current_pnl, current_duration_ratio, params - ) - if is_terminal: + + # Use provided previous_potential when finite; otherwise derive from current state + prev_term = ( + float(previous_potential) + if np.isfinite(previous_potential) + else _compute_hold_potential(current_pnl, current_duration_ratio, params) + ) + if not np.isfinite(prev_term): + prev_term = 0.0 + + # Next potential per transition type + if is_exit: + # Exit potential is derived from the last potential if provided; otherwise from Φ(prev) (prev_term) + last_potential = ( + float(last_potential) + if (last_potential is not None and np.isfinite(last_potential)) + else float(prev_term) + ) next_potential = _compute_exit_potential(last_potential, params) else: next_potential = _compute_hold_potential(next_pnl, next_duration_ratio, params) - shaping_reward = gamma * next_potential - current_potential - entry_additive = _compute_entry_additive( - current_pnl, current_duration_ratio, params - ) - exit_additive = ( - _compute_exit_additive(next_pnl, next_duration_ratio, params) - if is_terminal - else 0.0 - ) - total_reward = base_reward + shaping_reward + entry_additive + exit_additive - if not np.isfinite(total_reward): + + # PBRS shaping Δ = γ·Φ(next) − Φ(prev) + reward_shaping = gamma * next_potential - float(prev_term) + + # Non-PBRS additives + # Pre-compute candidate additives (return 0.0 if corresponding feature disabled) + cand_entry_add = _compute_entry_additive(next_pnl, next_duration_ratio, params) + cand_exit_add = _compute_exit_additive(current_pnl, current_duration_ratio, params) + + entry_additive = cand_entry_add if is_entry else 0.0 + exit_additive = cand_exit_add if is_exit else 0.0 + + reward = base_reward + reward_shaping + entry_additive + exit_additive + if not np.isfinite(reward): return float(base_reward), 0.0, 0.0 - if np.isclose(shaping_reward, 0.0): - shaping_reward = 0.0 - return float(total_reward), float(shaping_reward), float(next_potential) + if np.isclose(reward_shaping, 0.0): + reward_shaping = 0.0 + return float(reward), float(reward_shaping), float(next_potential) def _enforce_pbrs_invariance(params: RewardParams) -> RewardParams: @@ -2492,6 +2690,7 @@ def _enforce_pbrs_invariance(params: RewardParams) -> RewardParams: "exit_additive_enabled", bool(DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_additive_enabled", False)), ) + # Strict canonical enforcement if entry_enabled: warnings.warn( "Disabling entry additive to preserve PBRS invariance (canonical mode).", @@ -2561,16 +2760,22 @@ def build_argument_parser() -> argparse.ArgumentParser: help="Skip partial dependence computation to speed up analysis.", ) parser.add_argument( - "--stats_seed", + "--rf_n_jobs", type=int, - default=None, - help="Optional separate seed for statistical analyses (default: same as --seed).", + default=-1, + help="Number of parallel jobs for RandomForestRegressor (default: -1 for all CPUs).", + ) + parser.add_argument( + "--perm_n_jobs", + type=int, + default=-1, + help="Number of parallel jobs for permutation_importance (default: -1 for all CPUs).", ) parser.add_argument( - "--max_trade_duration", + "--stats_seed", type=int, - default=128, - help="Configured trade timeout in candles (default: 128).", + default=None, + help="Optional separate seed for statistical analyses (default: same as --seed).", ) parser.add_argument( "--base_factor", @@ -2617,10 +2822,10 @@ def build_argument_parser() -> argparse.ArgumentParser: ) parser.add_argument( "--action_masking", - type=str, - choices=["true", "false", "1", "0", "yes", "no"], - default="true", - help="Enable action masking simulation (default: true).", + dest="action_masking", + action="store_true", + default=True, + help="Enable action masking simulation (default: enabled).", ) parser.add_argument( "--out_dir", @@ -2658,6 +2863,13 @@ def build_argument_parser() -> argparse.ArgumentParser: "skewness/kurtosis/Anderson/Q-Q metrics produced by constant distributions instead of applying graceful replacements." ), ) + parser.add_argument( + "--strict_validation", + dest="strict_validation", + action="store_true", + default=True, + help="Enable strict parameter validation (raise on out-of-bounds or non-finite reward parameters). Default: enabled.", + ) parser.add_argument( "--bootstrap_resamples", type=int, @@ -2668,13 +2880,17 @@ def build_argument_parser() -> argparse.ArgumentParser: "Lower this (e.g. 200-1000) for faster smoke tests; increase for more stable CI width estimates." ), ) + parser.add_argument( + "--unrealized_pnl", + action="store_true", + help="Simulate unrealized PnL during holds to feed Φ(s) (optional; default: disabled).", + ) return parser def write_complete_statistical_analysis( df: pd.DataFrame, output_dir: Path, - max_trade_duration: int, profit_target: float, seed: int, real_df: Optional[pd.DataFrame] = None, @@ -2685,34 +2901,46 @@ def write_complete_statistical_analysis( bootstrap_resamples: int = 10000, skip_partial_dependence: bool = False, skip_feature_analysis: bool = False, + rf_n_jobs: int = -1, + perm_n_jobs: int = -1, ) -> None: - """Generate a single comprehensive statistical analysis report with enhanced tests.""" + """Generate a single comprehensive statistical analysis report.""" output_dir.mkdir(parents=True, exist_ok=True) report_path = output_dir / "statistical_analysis.md" + reward_params: RewardParams = ( + dict(df.attrs.get("reward_params")) + if isinstance(df.attrs.get("reward_params"), dict) + else {} + ) + max_trade_duration_candles = _get_int_param( + reward_params, + "max_trade_duration_candles", + DEFAULT_MODEL_REWARD_PARAMETERS.get("max_trade_duration_candles", 128), + ) + # Helpers: consistent Markdown table renderers def _fmt_val(v: Any, ndigits: int = 6) -> str: try: - if isinstance(v, (int, np.integer)): + if isinstance(v, numbers.Integral): return f"{int(v)}" - if isinstance(v, (float, np.floating)): - if np.isnan(v): + elif isinstance(v, numbers.Real): + fv = float(v) + if math.isnan(fv): return "NaN" - return f"{float(v):.{ndigits}f}" + return f"{fv:.{ndigits}f}" return str(v) except Exception: return str(v) - def _series_to_md( - series: pd.Series, value_name: str = "value", ndigits: int = 6 - ) -> str: + def _series_to_md(series: pd.Series, value_name: str = "value", ndigits: int = 6) -> str: lines = [f"| Metric | {value_name} |", "|--------|-----------|"] for k, v in series.items(): lines.append(f"| {k} | {_fmt_val(v, ndigits)} |") return "\n".join(lines) + "\n\n" def _df_to_md(df: pd.DataFrame, index_name: str = "index", ndigits: int = 6) -> str: - if df is None or df.empty: + if df.empty: return "_No data._\n\n" # Prepare header cols = list(df.columns) @@ -2722,7 +2950,7 @@ def write_complete_statistical_analysis( sep += "|" + "-" * (len(str(c)) + 2) sep += "|\n" # Rows - rows = [] + rows: List[str] = [] for idx, row in df.iterrows(): vals = [_fmt_val(row[c], ndigits) for c in cols] rows.append("| " + str(idx) + " | " + " | ".join(vals) + " |") @@ -2730,10 +2958,8 @@ def write_complete_statistical_analysis( # Compute all statistics summary_stats = _compute_summary_stats(df) - relationship_stats = _compute_relationship_stats(df, max_trade_duration) - representativity_stats = _compute_representativity_stats( - df, profit_target, max_trade_duration - ) + relationship_stats = _compute_relationship_stats(df) + representativity_stats = _compute_representativity_stats(df, profit_target) # Model analysis: skip if requested or not enough samples importance_df = None @@ -2741,36 +2967,67 @@ def write_complete_statistical_analysis( partial_deps = {} if skip_feature_analysis or len(df) < 4: print("Skipping feature analysis: flag set or insufficient samples (<4).") - else: - importance_df, analysis_stats, partial_deps, _model = _perform_feature_analysis( - df, seed, skip_partial_dependence=skip_partial_dependence + # Create placeholder files to satisfy integration expectations + (output_dir / "feature_importance.csv").write_text( + "feature,importance_mean,importance_std\n", encoding="utf-8" ) - # Save feature importance CSV - importance_df.to_csv(output_dir / "feature_importance.csv", index=False) - # Save partial dependence CSVs - if not skip_partial_dependence: - for feature, pd_df in partial_deps.items(): - pd_df.to_csv( - output_dir / f"partial_dependence_{feature}.csv", - index=False, + for feature in ["trade_duration", "idle_duration", "pnl"]: + (output_dir / f"partial_dependence_{feature}.csv").write_text( + f"{feature},partial_dependence\n", encoding="utf-8" + ) + else: + try: + importance_df, analysis_stats, partial_deps, _model = _perform_feature_analysis( + df, + seed, + skip_partial_dependence=skip_partial_dependence, + rf_n_jobs=rf_n_jobs if isinstance(rf_n_jobs, int) else 1, + perm_n_jobs=perm_n_jobs if isinstance(perm_n_jobs, int) else 1, + ) + # Save feature importance CSV + importance_df.to_csv(output_dir / "feature_importance.csv", index=False) + # Save partial dependence CSVs + if not skip_partial_dependence: + for feature, pd_df in partial_deps.items(): + pd_df.to_csv( + output_dir / f"partial_dependence_{feature}.csv", + index=False, + ) + else: + # Create empty files to keep outputs stable + for feature in ["trade_duration", "idle_duration", "pnl"]: + (output_dir / f"partial_dependence_{feature}.csv").write_text( + f"{feature},partial_dependence\n", encoding="utf-8" + ) + except ImportError: + print("scikit-learn unavailable; generating placeholder analysis artifacts.") + (output_dir / "feature_importance.csv").write_text( + "feature,importance_mean,importance_std\n", encoding="utf-8" + ) + for feature in ["trade_duration", "idle_duration", "pnl"]: + (output_dir / f"partial_dependence_{feature}.csv").write_text( + f"{feature},partial_dependence\n", encoding="utf-8" ) # Enhanced statistics test_seed = ( - stats_seed - if isinstance(stats_seed, int) - else (seed if isinstance(seed, int) else 42) - ) - hypothesis_tests = statistical_hypothesis_tests( - df, adjust_method=adjust_method, seed=test_seed + stats_seed if isinstance(stats_seed, int) else (seed if isinstance(seed, int) else 42) ) + hypothesis_tests = statistical_hypothesis_tests(df, adjust_method=adjust_method, seed=test_seed) metrics_for_ci = [ - "reward_total", + "reward", "reward_idle", "reward_hold", "reward_exit", "pnl", ] + # Include PBRS-related metrics when present + extra_ci_cols = [ + col + for col in ["reward_shaping", "reward_entry_additive", "reward_exit_additive"] + if col in df.columns + ] + metrics_for_ci.extend(extra_ci_cols) bootstrap_ci = bootstrap_confidence_intervals( df, metrics_for_ci, @@ -2796,39 +3053,46 @@ def write_complete_statistical_analysis( f.write(f"| Generated | {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')} |\n") f.write(f"| Total Samples | {len(df):,} |\n") f.write(f"| Random Seed | {seed} |\n") - f.write(f"| Max Trade Duration | {max_trade_duration} |\n") # Blank separator to visually group core simulation vs PBRS parameters f.write("| | |\n") - # Extra core PBRS parameters exposed in run configuration if present - _rp = ( - df.attrs.get("reward_params") + # Core PBRS parameters exposed in run configuration if present + reward_params: RewardParams = ( + dict(df.attrs.get("reward_params")) if isinstance(df.attrs.get("reward_params"), dict) else {} ) exit_mode = _get_str_param( - _rp, + reward_params, "exit_potential_mode", DEFAULT_MODEL_REWARD_PARAMETERS.get("exit_potential_mode", "canonical"), ) - potential_gamma = _rp.get( - "potential_gamma", - DEFAULT_MODEL_REWARD_PARAMETERS.get( - "potential_gamma", POTENTIAL_GAMMA_DEFAULT - ), - ) + potential_gamma = _get_potential_gamma(reward_params) f.write(f"| exit_potential_mode | {exit_mode} |\n") f.write(f"| potential_gamma | {potential_gamma} |\n") + # Additional configuration details + f.write(f"| max_trade_duration_candles | {max_trade_duration_candles} |\n") + max_idle_duration_candles = get_max_idle_duration_candles( + reward_params, max_trade_duration_candles=max_trade_duration_candles + ) + f.write(f"| max_idle_duration_candles | {max_idle_duration_candles} |\n") + f.write(f"| strict_diagnostics | {strict_diagnostics} |\n") + f.write(f"| skip_feature_analysis | {skip_feature_analysis} |\n") + f.write(f"| skip_partial_dependence | {skip_partial_dependence} |\n") + f.write(f"| rf_n_jobs | {rf_n_jobs} |\n") + f.write(f"| perm_n_jobs | {perm_n_jobs} |\n") + f.write(f"| bootstrap_resamples | {bootstrap_resamples} |\n") + f.write(f"| pvalue_adjust_method | {adjust_method} |\n") # Blank separator before overrides block f.write("| | |\n") - overrides_pairs = [] - if _rp: + overrides_pairs: List[str] = [] + if reward_params: for k, default_v in DEFAULT_MODEL_REWARD_PARAMETERS.items(): if k in ("exit_potential_mode", "potential_gamma"): continue # already printed explicitly try: - if k in _rp and _rp[k] != default_v: - overrides_pairs.append(f"{k}={_rp[k]}") + if k in reward_params and reward_params[k] != default_v: + overrides_pairs.append(f"{k}={reward_params[k]}") except Exception: continue if overrides_pairs: @@ -2843,11 +3107,7 @@ def write_complete_statistical_analysis( f.write("## 1. Global Statistics\n\n") f.write("### 1.1 Reward Distribution\n\n") - f.write( - _series_to_md( - summary_stats["global_stats"], value_name="reward_total", ndigits=6 - ) - ) + f.write(_series_to_md(summary_stats["global_stats"], value_name="reward", ndigits=6)) f.write("### 1.2 Reward Statistics by Action\n\n") action_df = summary_stats["action_summary"].copy() @@ -2860,7 +3120,7 @@ def write_complete_statistical_analysis( f.write("### 1.3 Component Activation Rates\n\n") f.write("Percentage of samples where each reward component is non-zero:\n\n") comp_share = summary_stats["component_share"].copy() - formatted_rows = [ + formatted_rows: List[str] = [ "| Component | Activation Rate |", "|-----------|----------------|", ] @@ -2889,40 +3149,36 @@ def write_complete_statistical_analysis( # Section 2: Representativity Analysis f.write("---\n\n") f.write("## 2. Sample Representativity\n\n") - f.write( - "This section evaluates whether the synthetic samples adequately represent " - ) + f.write("This section evaluates whether the synthetic samples adequately represent ") f.write("the full reward space across different market scenarios.\n\n") f.write("### 2.1 Position Distribution\n\n") f.write( _series_to_md( - representativity_stats["pos_counts"], value_name="count", ndigits=0 + representativity_stats["pos_counts"], + value_name="count", + ndigits=0, ) ) f.write("### 2.2 Action Distribution\n\n") f.write( _series_to_md( - representativity_stats["act_counts"], value_name="count", ndigits=0 + representativity_stats["act_counts"], + value_name="count", + ndigits=0, ) ) f.write("### 2.3 Critical Regime Coverage\n\n") f.write("| Regime | Coverage |\n") f.write("|--------|----------|\n") - f.write( - f"| PnL > target | {representativity_stats['pnl_above_target']:.1%} |\n" - ) - f.write( - f"| PnL near target (±20%) | {representativity_stats['pnl_near_target']:.1%} |\n" - ) + f.write(f"| PnL > target | {representativity_stats['pnl_above_target']:.1%} |\n") + f.write(f"| PnL near target (±20%) | {representativity_stats['pnl_near_target']:.1%} |\n") f.write( f"| Duration overage (>1.0) | {representativity_stats['duration_overage_share']:.1%} |\n" ) - f.write( - f"| Extreme PnL (\\|pnl\\|≥0.14) | {representativity_stats['pnl_extreme']:.1%} |\n" - ) + f.write(f"| Extreme PnL (\\|pnl\\|≥0.14) | {representativity_stats['pnl_extreme']:.1%} |\n") f.write("\n") f.write("### 2.4 Component Activation Rates\n\n") @@ -2936,9 +3192,7 @@ def write_complete_statistical_analysis( # Section 3: Reward Component Relationships f.write("---\n\n") f.write("## 3. Reward Component Analysis\n\n") - f.write( - "Analysis of how reward components behave under different conditions.\n\n" - ) + f.write("Analysis of how reward components behave under different conditions.\n\n") f.write("### 3.1 Idle Penalty vs Duration\n\n") if relationship_stats["idle_stats"].empty: @@ -2975,11 +3229,8 @@ def write_complete_statistical_analysis( f.write(_df_to_md(corr_df, index_name=corr_df.index.name, ndigits=4)) _dropped = relationship_stats.get("correlation_dropped") or [] if _dropped: - f.write( - "\n_Constant features removed (no variance): " - + ", ".join(_dropped) - + "._\n\n" - ) + dropped_strs: List[str] = [str(x) for x in _dropped] + f.write("\n_Constant features removed: " + ", ".join(dropped_strs) + "._\n\n") # Section 3.5: PBRS Analysis f.write("### 3.5 PBRS (Potential-Based Reward Shaping) Analysis\n\n") @@ -3014,12 +3265,8 @@ def write_complete_statistical_analysis( # PBRS statistics f.write("**PBRS Component Statistics:**\n\n") - pbrs_stats = df[pbrs_components].describe( - percentiles=[0.1, 0.25, 0.5, 0.75, 0.9] - ) - pbrs_stats_df = pbrs_stats.round( - 6 - ).T # Transpose to make it DataFrame-compatible + pbrs_stats = df[pbrs_components].describe(percentiles=[0.1, 0.25, 0.5, 0.75, 0.9]) + pbrs_stats_df = pbrs_stats.round(6).T # Transpose to make it DataFrame-compatible pbrs_stats_df.index.name = "component" f.write(_df_to_md(pbrs_stats_df, index_name="component", ndigits=6)) @@ -3029,18 +3276,10 @@ def write_complete_statistical_analysis( exit_add_total = df.get("reward_exit_additive", pd.Series([0])).sum() # Get configuration for proper invariance assessment - reward_params = ( - df.attrs.get("reward_params", {}) if hasattr(df, "attrs") else {} - ) - exit_potential_mode = _get_str_param( - reward_params, "exit_potential_mode", "canonical" - ) - entry_additive_enabled = _get_bool_param( - reward_params, "entry_additive_enabled", False - ) - exit_additive_enabled = _get_bool_param( - reward_params, "exit_additive_enabled", False - ) + reward_params = df.attrs.get("reward_params", {}) if hasattr(df, "attrs") else {} + exit_potential_mode = _get_str_param(reward_params, "exit_potential_mode", "canonical") + entry_additive_enabled = _get_bool_param(reward_params, "entry_additive_enabled", False) + exit_additive_enabled = _get_bool_param(reward_params, "exit_additive_enabled", False) # True invariance requires canonical mode AND no additives is_theoretically_invariant = exit_potential_mode == "canonical" and not ( @@ -3052,10 +3291,14 @@ def write_complete_statistical_analysis( if is_theoretically_invariant: if shaping_near_zero: invariance_status = "✅ Canonical" - invariance_note = "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)" + invariance_note = ( + "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)" + ) else: invariance_status = "⚠️ Canonical (with warning)" - invariance_note = f"Canonical mode but unexpected shaping sum = {total_shaping:.6f}" + invariance_note = ( + f"Canonical mode but unexpected shaping sum = {total_shaping:.6f}" + ) else: invariance_status = "❌ Non-canonical" reasons = [] @@ -3103,39 +3346,42 @@ def write_complete_statistical_analysis( "_Note: --skip_partial_dependence is redundant when feature analysis is skipped._\n\n" ) else: - f.write( - "Machine learning analysis to identify which features most influence total reward.\n\n" - ) - f.write("**Model:** Random Forest Regressor (400 trees) \n") - f.write(f"**R² Score:** {analysis_stats['r2_score']:.4f}\n\n") - - f.write("### 4.1 Top 10 Features by Importance\n\n") - top_imp = importance_df.head(10).copy().reset_index(drop=True) - # Render as markdown without index column - header = "| feature | importance_mean | importance_std |\n" - sep = "|---------|------------------|----------------|\n" - rows = [] - for _, r in top_imp.iterrows(): - rows.append( - f"| {r['feature']} | {_fmt_val(r['importance_mean'], 6)} | {_fmt_val(r['importance_std'], 6)} |" + if importance_df is None or analysis_stats is None: + f.write( + "_Feature analysis unavailable (scikit-learn not installed); placeholder artifacts generated._\n\n" ) - f.write(header + sep + "\n".join(rows) + "\n\n") - f.write("**Exported Data:**\n") - f.write("- Full feature importance: `feature_importance.csv`\n") - if not skip_partial_dependence: - f.write("- Partial dependence plots: `partial_dependence_*.csv`\n\n") else: f.write( - "- Partial dependence plots: (skipped via --skip_partial_dependence)\n\n" + "Machine learning analysis to identify which features most influence total reward.\n\n" ) + f.write("**Model:** Random Forest Regressor (400 trees) \n") + f.write(f"**R² Score:** {analysis_stats['r2_score']:.4f}\n\n") + + f.write("### 4.1 Top 10 Features by Importance\n\n") + top_imp = importance_df.head(10).copy().reset_index(drop=True) + # Render as markdown without index column + header = "| feature | importance_mean | importance_std |\n" + sep = "|---------|------------------|----------------|\n" + rows: List[str] = [] + for _, r in top_imp.iterrows(): + rows.append( + f"| {r['feature']} | {_fmt_val(r['importance_mean'], 6)} | {_fmt_val(r['importance_std'], 6)} |" + ) + f.write(header + sep + "\n".join(rows) + "\n\n") + f.write("**Exported Data:**\n") + f.write("- Full feature importance: `feature_importance.csv`\n") + if not skip_partial_dependence: + f.write("- Partial dependence plots: `partial_dependence_*.csv`\n\n") + else: + f.write( + "- Partial dependence plots: (skipped via --skip_partial_dependence)\n\n" + ) # Section 5: Statistical Validation if hypothesis_tests: f.write("---\n\n") f.write("## 5. Statistical Validation\n\n") - f.write( - "Rigorous statistical tests to validate reward behavior and relationships.\n\n" - ) + f.write("Rigorous statistical tests to validate reward behavior and relationships.\n\n") f.write("### 5.1 Hypothesis Tests\n\n") @@ -3150,10 +3396,9 @@ def write_complete_statistical_analysis( f"- p-value (adj BH): {h['p_value_adj']:.4g} -> {'✅ Yes' if h['significant_adj'] else '❌ No'} (α=0.05)\n" ) f.write(f"- 95% CI: [{h['ci_95'][0]:.4f}, {h['ci_95'][1]:.4f}]\n") + f.write(f"- CI width: {(h['ci_95'][1] - h['ci_95'][0]):.4f}\n") f.write(f"- Sample size: {h['n_samples']:,}\n") - f.write( - f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n" - ) + f.write(f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n") f.write(f"- **Interpretation:** {h['interpretation']}\n\n") if "position_reward_difference" in hypothesis_tests: @@ -3168,9 +3413,7 @@ def write_complete_statistical_analysis( ) f.write(f"- Effect size (ε²): {h['effect_size_epsilon_sq']:.4f}\n") f.write(f"- Number of groups: {h['n_groups']}\n") - f.write( - f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n" - ) + f.write(f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n") f.write(f"- **Interpretation:** {h['interpretation']} effect\n\n") if "pnl_sign_reward_difference" in hypothesis_tests: @@ -3185,9 +3428,7 @@ def write_complete_statistical_analysis( ) f.write(f"- Median (PnL+): {h['median_pnl_positive']:.4f}\n") f.write(f"- Median (PnL-): {h['median_pnl_negative']:.4f}\n") - f.write( - f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n\n" - ) + f.write(f"- Significant (α=0.05): {'✅ Yes' if h['significant'] else '❌ No'}\n\n") # Bootstrap CI if bootstrap_ci: @@ -3208,19 +3449,15 @@ def write_complete_statistical_analysis( if dist_diagnostics: f.write("### 5.3 Distribution Normality Tests\n\n") f.write("Statistical tests for normality of key distributions:\n\n") - for col in ["reward_total", "pnl", "trade_duration"]: + for col in ["reward", "pnl", "trade_duration", "idle_duration"]: if f"{col}_mean" in dist_diagnostics: f.write(f"#### {col.replace('_', ' ').title()}\n\n") f.write("| Metric | Value |\n") f.write("|--------|-------|\n") f.write(f"| Mean | {dist_diagnostics[f'{col}_mean']:.4f} |\n") f.write(f"| Std Dev | {dist_diagnostics[f'{col}_std']:.4f} |\n") - f.write( - f"| Skewness | {dist_diagnostics[f'{col}_skewness']:.4f} |\n" - ) - f.write( - f"| Kurtosis | {dist_diagnostics[f'{col}_kurtosis']:.4f} |\n" - ) + f.write(f"| Skewness | {dist_diagnostics[f'{col}_skewness']:.4f} |\n") + f.write(f"| Kurtosis | {dist_diagnostics[f'{col}_kurtosis']:.4f} |\n") if f"{col}_shapiro_pval" in dist_diagnostics: is_normal = ( "✅ Yes" @@ -3230,6 +3467,20 @@ def write_complete_statistical_analysis( f.write( f"| Normal? (Shapiro-Wilk) | {is_normal} (p={dist_diagnostics[f'{col}_shapiro_pval']:.4e}) |\n" ) + # Anderson-Darling diagnostics + if f"{col}_anderson_stat" in dist_diagnostics: + f.write( + f"| Anderson-Darling stat | {dist_diagnostics[f'{col}_anderson_stat']:.4f} |\n" + ) + f.write( + f"| Anderson 5% critical | {dist_diagnostics[f'{col}_anderson_critical_5pct']:.4f} |\n" + ) + is_normal_anderson = ( + "✅ Yes" + if dist_diagnostics.get(f"{col}_is_normal_anderson", False) + else "❌ No" + ) + f.write(f"| Normal? (Anderson-Darling) | {is_normal_anderson} |\n") if f"{col}_qq_r_squared" in dist_diagnostics: f.write( f"| Q-Q Plot R² | {dist_diagnostics[f'{col}_qq_r_squared']:.4f} |\n" @@ -3240,24 +3491,16 @@ def write_complete_statistical_analysis( if distribution_shift: f.write("### 5.4 Distribution Shift Analysis\n\n") f.write("Comparison between synthetic and real data distributions:\n\n") - f.write( - "| Feature | KL Div | JS Dist | Wasserstein | KS Stat | KS p-value |\n" - ) - f.write( - "|---------|--------|---------|-------------|---------|------------|\n" - ) + f.write("| Feature | KL Div | JS Dist | Wasserstein | KS Stat | KS p-value |\n") + f.write("|---------|--------|---------|-------------|---------|------------|\n") features = ["pnl", "trade_duration", "idle_duration"] for feature in features: - kl = distribution_shift.get( - f"{feature}_kl_divergence", float("nan") - ) - js = distribution_shift.get(f"{feature}_js_distance", float("nan")) - ws = distribution_shift.get(f"{feature}_wasserstein", float("nan")) - ks_stat = distribution_shift.get( - f"{feature}_ks_statistic", float("nan") - ) - ks_p = distribution_shift.get(f"{feature}_ks_pvalue", float("nan")) + kl = distribution_shift.get(f"{feature}_kl_divergence", np.nan) + js = distribution_shift.get(f"{feature}_js_distance", np.nan) + ws = distribution_shift.get(f"{feature}_wasserstein", np.nan) + ks_stat = distribution_shift.get(f"{feature}_ks_statistic", np.nan) + ks_p = distribution_shift.get(f"{feature}_ks_pvalue", np.nan) f.write( f"| {feature} | {kl:.4f} | {js:.4f} | {ws:.4f} | {ks_stat:.4f} | {ks_p:.4g} |\n" @@ -3268,9 +3511,7 @@ def write_complete_statistical_analysis( f.write("|--------|-----------|--------|\n") f.write("| KL Divergence | < 0.3 | ✅ Yes: Good representativeness |\n") f.write("| JS Distance | < 0.2 | ✅ Yes: Similar distributions |\n") - f.write( - "| KS p-value | > 0.05 | ✅ Yes: No significant difference |\n\n" - ) + f.write("| KS p-value | > 0.05 | ✅ Yes: No significant difference |\n\n") else: # Placeholder keeps numbering stable and explicit f.write("### 5.4 Distribution Shift Analysis\n\n") @@ -3283,9 +3524,7 @@ def write_complete_statistical_analysis( f.write( "1. **Global Statistics** - Overall reward distributions and component activation\n" ) - f.write( - "2. **Sample Representativity** - Coverage of critical market scenarios\n" - ) + f.write("2. **Sample Representativity** - Coverage of critical market scenarios\n") f.write( "3. **Component Analysis** - Relationships between rewards and conditions (including PBRS)\n" ) @@ -3294,18 +3533,12 @@ def write_complete_statistical_analysis( "4. **Feature Importance** - (skipped) Machine learning analysis of key drivers\n" ) else: - f.write( - "4. **Feature Importance** - Machine learning analysis of key drivers\n" - ) - f.write( - "5. **Statistical Validation** - Hypothesis tests and confidence intervals\n" - ) + f.write("4. **Feature Importance** - Machine learning analysis of key drivers\n") + f.write("5. **Statistical Validation** - Hypothesis tests and confidence intervals\n") if distribution_shift: f.write("6. **Distribution Shift** - Comparison with real trading data\n") else: - f.write( - "6. **Distribution Shift** - Not performed (no real episodes provided)\n" - ) + f.write("6. **Distribution Shift** - Not performed (no real episodes provided)\n") if "reward_shaping" in df.columns: _total_shaping = df["reward_shaping"].sum() _canonical = abs(_total_shaping) < PBRS_INVARIANCE_TOL @@ -3322,12 +3555,8 @@ def write_complete_statistical_analysis( f.write("**Generated Files:**\n") f.write("- `reward_samples.csv` - Raw synthetic samples\n") if not skip_feature_analysis and len(df) >= 4: - f.write( - "- `feature_importance.csv` - Complete feature importance rankings\n" - ) - f.write( - "- `partial_dependence_*.csv` - Partial dependence data for visualization\n" - ) + f.write("- `feature_importance.csv` - Complete feature importance rankings\n") + f.write("- `partial_dependence_*.csv` - Partial dependence data for visualization\n") def main() -> None: @@ -3343,8 +3572,9 @@ def main() -> None: # Then apply --params KEY=VALUE overrides (highest precedence) params.update(parse_overrides(args.params)) - # Early parameter validation (moved before simulation for alignment with docs) - params_validated, adjustments = validate_reward_parameters(params) + params_validated, adjustments = validate_reward_parameters( + params, strict=args.strict_validation + ) params = params_validated if adjustments: # Compact adjustments summary (param: original->adjusted [reason]) @@ -3358,15 +3588,16 @@ def main() -> None: base_factor = _get_float_param(params, "base_factor", float(args.base_factor)) profit_target = _get_float_param(params, "profit_target", float(args.profit_target)) - risk_reward_ratio = _get_float_param( - params, "risk_reward_ratio", float(args.risk_reward_ratio) - ) + risk_reward_ratio = _get_float_param(params, "risk_reward_ratio", float(args.risk_reward_ratio)) cli_action_masking = _to_bool(args.action_masking) if "action_masking" in params: params["action_masking"] = _to_bool(params["action_masking"]) else: params["action_masking"] = cli_action_masking + params["unrealized_pnl"] = bool(getattr(args, "unrealized_pnl", False)) + # Propagate strict flag into params for downstream runtime guards + params["strict_validation"] = bool(getattr(args, "strict_validation", True)) # Deterministic seeds cascade random.seed(args.seed) @@ -3376,7 +3607,6 @@ def main() -> None: num_samples=args.num_samples, seed=args.seed, params=params, - max_trade_duration=args.max_trade_duration, base_factor=base_factor, profit_target=profit_target, risk_reward_ratio=risk_reward_ratio, @@ -3392,16 +3622,14 @@ def main() -> None: "idle_duration", "position", "action", - "reward_total", + "reward", "reward_invalid", "reward_idle", "reward_hold", "reward_exit", ] nan_issues = { - c: int(df[c].isna().sum()) - for c in critical_cols - if c in df.columns and df[c].isna().any() + c: int(df[c].isna().sum()) for c in critical_cols if c in df.columns and df[c].isna().any() } if nan_issues: raise AssertionError( @@ -3409,20 +3637,53 @@ def main() -> None: + ", ".join(f"{k}={v}" for k, v in nan_issues.items()) ) # Attach simulation parameters for downstream manifest - df.attrs["simulation_params"] = { - "num_samples": args.num_samples, - "seed": args.seed, - "max_trade_duration": args.max_trade_duration, - "base_factor": base_factor, - "profit_target": profit_target, - "risk_reward_ratio": risk_reward_ratio, - "max_duration_ratio": args.max_duration_ratio, - "trading_mode": args.trading_mode, - "action_masking": _get_bool_param(params, "action_masking", True), - "pnl_base_std": args.pnl_base_std, - "pnl_duration_vol_scale": args.pnl_duration_vol_scale, - } - # Attach resolved reward parameters for inline overrides rendering in report + try: + defaults = { + a.dest: getattr(a, "default", None) for a in parser._actions if hasattr(a, "dest") + } + except Exception: + defaults = {} + args_dict = vars(args) + + candidate_keys = [ + "num_samples", + "seed", + "out_dir", + "trading_mode", + "risk_reward_ratio", + "profit_target", + "max_duration_ratio", + "pnl_base_std", + "pnl_duration_vol_scale", + "rf_n_jobs", + "perm_n_jobs", + "skip_feature_analysis", + "skip_partial_dependence", + "stats_seed", + "strict_diagnostics", + "bootstrap_resamples", + "pvalue_adjust", + "real_episodes", + "unrealized_pnl", + "action_masking", + ] + + sim_params: Dict[str, Any] = {} + for k in candidate_keys: + if k in args_dict: + v = args_dict[k] + v_norm = str(v) if isinstance(v, Path) else v + d = defaults.get(k) + d_norm = str(d) if isinstance(d, Path) else d + if d_norm != v_norm: + sim_params[k] = v_norm + + # Deduplicate any keys that overlap with reward_params (single source of truth) + for k in list(sim_params.keys()): + if k in params: + sim_params.pop(k) + + df.attrs["simulation_params"] = sim_params df.attrs["reward_params"] = dict(params) args.out_dir.mkdir(parents=True, exist_ok=True) @@ -3442,58 +3703,55 @@ def main() -> None: write_complete_statistical_analysis( df, args.out_dir, - max_trade_duration=args.max_trade_duration, profit_target=float(profit_target * risk_reward_ratio), seed=args.seed, real_df=real_df, adjust_method=args.pvalue_adjust, - stats_seed=( - args.stats_seed if getattr(args, "stats_seed", None) is not None else None - ), + stats_seed=(args.stats_seed if getattr(args, "stats_seed", None) is not None else None), strict_diagnostics=bool(getattr(args, "strict_diagnostics", False)), bootstrap_resamples=getattr(args, "bootstrap_resamples", 10000), skip_partial_dependence=bool(getattr(args, "skip_partial_dependence", False)), skip_feature_analysis=bool(getattr(args, "skip_feature_analysis", False)), + rf_n_jobs=int(getattr(args, "rf_n_jobs", -1)), + perm_n_jobs=int(getattr(args, "perm_n_jobs", -1)), ) - print( - f"Complete statistical analysis saved to: {args.out_dir / 'statistical_analysis.md'}" - ) + print(f"Complete statistical analysis saved to: {args.out_dir / 'statistical_analysis.md'}") # Generate manifest summarizing key metrics try: manifest_path = args.out_dir / "manifest.json" - resolved_reward_params = dict(params) # already validated/normalized upstream - manifest = { + resolved_reward_params: Dict[str, Any] = dict( + params + ) # already validated/normalized upstream + manifest: Dict[str, Any] = { "generated_at": pd.Timestamp.now().isoformat(), "num_samples": int(len(df)), "seed": int(args.seed), - "max_trade_duration": int(args.max_trade_duration), "profit_target_effective": float(profit_target * risk_reward_ratio), "pvalue_adjust_method": args.pvalue_adjust, "parameter_adjustments": adjustments, "reward_params": resolved_reward_params, } - sim_params = df.attrs.get("simulation_params", {}) - if isinstance(sim_params, dict) and sim_params: - import hashlib as _hashlib - import json as _json - - # Compose hash source from ALL simulation params and ALL resolved reward params for full reproducibility. - _hash_source = { - **{f"sim::{k}": sim_params[k] for k in sorted(sim_params)}, + sim_params_dict = df.attrs.get("simulation_params", {}) + if not isinstance(sim_params_dict, dict): + sim_params_dict = {} + sim_params: Dict[str, Any] = dict(sim_params_dict) + if sim_params: + excluded_for_hash = {"out_dir", "real_episodes"} + sim_params_for_hash: Dict[str, Any] = { + k: sim_params[k] for k in sim_params if k not in excluded_for_hash + } + _hash_source: Dict[str, Any] = { + **{f"sim::{k}": sim_params_for_hash[k] for k in sorted(sim_params_for_hash)}, **{ f"reward::{k}": resolved_reward_params[k] for k in sorted(resolved_reward_params) }, } - serialized = _json.dumps(_hash_source, sort_keys=True) - manifest["params_hash"] = _hashlib.sha256( - serialized.encode("utf-8") - ).hexdigest() + _hash_source_str = json.dumps(_hash_source, sort_keys=True) + manifest["params_hash"] = hashlib.sha256(_hash_source_str.encode("utf-8")).hexdigest() manifest["simulation_params"] = sim_params with manifest_path.open("w", encoding="utf-8") as mh: - import json as _json - - _json.dump(manifest, mh, indent=2) + json.dump(manifest, mh, indent=2) print(f"Manifest written to: {manifest_path}") except Exception as e: print(f"Manifest generation failed: {e}") diff --git a/ReforceXY/reward_space_analysis/test_cli.py b/ReforceXY/reward_space_analysis/test_cli.py index f7b40b4..fc707ed 100644 --- a/ReforceXY/reward_space_analysis/test_cli.py +++ b/ReforceXY/reward_space_analysis/test_cli.py @@ -1,31 +1,31 @@ -"""CLI integration smoke test for reward_space_analysis. +"""CLI integration test for reward_space_analysis. Purpose ------- -Execute a bounded, optionally shuffled subset of parameter combinations for -`reward_space_analysis.py` to verify end-to-end execution (smoke / regression -signal, not correctness proof). +Execute a bounded, optionally shuffled subset of parameter combinations for `reward_space_analysis.py` to verify end-to-end execution. Key features ------------ -* Deterministic sampling with optional shuffling (`--shuffle-seed`). -* Optional duplication of first N scenarios under strict diagnostics - (`--strict-sample`). -* Per-scenario timing and aggregate statistics (mean / min / max seconds). -* Simple warning counting + (patch adds) breakdown of distinct warning lines. -* Scenario list + seed metadata exported for reproducibility. -* Direct CLI forwarding of bootstrap resample count to child process. +* Deterministic sampling with optional shuffling (`--shuffle_seed`). +* Optional duplication of first N scenarios under strict diagnostics (`--strict_sample`). +* Per-scenario timing and aggregate statistics (mean / min / max / median / p95 seconds). +* Warning counting based on header lines plus a breakdown of distinct warning headers. +* Log tail truncation controlled via `--tail_chars` (characters) or full logs via `--full_logs`. +* Direct CLI forwarding of bootstrap resample count to the child process. Usage ----- -python test_cli.py --samples 50 --out_dir ../sample_run_output_smoke \ +python test_cli.py --num_samples 50 --out_dir ../sample_run_output \ --shuffle_seed 123 --strict_sample 3 --bootstrap_resamples 200 JSON Summary fields ------------------- -total, ok, failures[], warnings_total, warnings_breakdown, mean_seconds, -max_seconds, min_seconds, strict_duplicated, scenarios (list), seeds (metadata). - +- total, successes[], failures[] +- mean_seconds, max_seconds, min_seconds, median_seconds, p95_seconds +- warnings_breakdown +- seeds (sampling/configuration seeds) +- metadata (timestamp_utc, python_version, platform, git_commit, schema_version=2, per_scenario_timeout) +- interrupted (optional) Exit codes ---------- 0: success, 1: failures present, 130: interrupted (partial summary written). @@ -36,9 +36,12 @@ from __future__ import annotations import argparse import itertools import json +import math import os import platform import random +import re +import statistics import subprocess import sys import tempfile @@ -46,10 +49,14 @@ import time from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, TypedDict -ConfigTuple = Tuple[str, str, float, int, int, int] +try: + from typing import NotRequired, Required # Python >=3.11 +except ImportError: + from typing_extensions import NotRequired, Required # Python <3.11 +ConfigTuple = Tuple[str, str, float, int, int, int] -SUMMARY_FILENAME = "reward_space_cli_smoke_results.json" +SUMMARY_FILENAME = "reward_space_cli_results.json" class ScenarioResult(TypedDict): @@ -62,15 +69,34 @@ class ScenarioResult(TypedDict): warnings: int -class SummaryResult(TypedDict): - total: int - ok: int - failures: List[ScenarioResult] - warnings_total: int - mean_seconds: Optional[float] - max_seconds: Optional[float] - min_seconds: Optional[float] - strict_duplicated: int +class SummaryResult(TypedDict, total=False): + # Required keys + total: Required[int] + successes: Required[List[ScenarioResult]] + failures: Required[List[ScenarioResult]] + mean_seconds: Required[Optional[float]] + max_seconds: Required[Optional[float]] + min_seconds: Required[Optional[float]] + median_seconds: Required[Optional[float]] + p95_seconds: Required[Optional[float]] + + # Extension keys + warnings_breakdown: NotRequired[Dict[str, int]] + seeds: NotRequired[Dict[str, Any]] + metadata: NotRequired[Dict[str, Any]] + interrupted: NotRequired[bool] + + +_WARN_HEADER_RE = re.compile(r"^\s*(?:[A-Za-z]+Warning|WARNING)\b:?", re.IGNORECASE) + + +def _is_warning_header(line: str) -> bool: + l = line.strip() + if not l: + return False + if "warnings.warn" in l.lower(): + return False + return bool(_WARN_HEADER_RE.search(l)) def build_arg_matrix( @@ -100,19 +126,39 @@ def build_arg_matrix( ) full: List[ConfigTuple] = list(product_iter) + full = [c for c in full if not (c[0] == "canonical" and (c[4] == 1 or c[5] == 1))] if shuffle_seed is not None: rnd = random.Random(shuffle_seed) rnd.shuffle(full) if max_scenarios >= len(full): return full step = len(full) / max_scenarios - idx_pos = 0.0 + idx_pos = step / 2.0 # Centered sampling selected: List[ConfigTuple] = [] + selected_indices: set[int] = set() for _ in range(max_scenarios): - idx = int(idx_pos) - if idx >= len(full): + idx = int(round(idx_pos)) + if idx < 0: + idx = 0 + elif idx >= len(full): idx = len(full) - 1 + if idx in selected_indices: + left = idx - 1 + right = idx + 1 + while True: + if left >= 0 and left not in selected_indices: + idx = left + break + if right < len(full) and right not in selected_indices: + idx = right + break + left -= 1 + right += 1 + if left < 0 and right >= len(full): + # All indices taken; fallback to current idx + break selected.append(full[idx]) + selected_indices.add(idx) idx_pos += step return selected @@ -121,13 +167,17 @@ def run_scenario( script: Path, out_dir: Path, idx: int, - total: int, - base_samples: int, + num_samples: int, conf: ConfigTuple, strict: bool, bootstrap_resamples: int, timeout: int, skip_feature_analysis: bool = False, + skip_partial_dependence: bool = False, + unrealized_pnl: bool = False, + full_logs: bool = False, + params: Optional[List[str]] = None, + tail_chars: int = 5000, ) -> ScenarioResult: ( exit_potential_mode, @@ -143,7 +193,7 @@ def run_scenario( sys.executable, str(script), "--num_samples", - str(base_samples), + str(num_samples), "--out_dir", str(scenario_dir), "--exit_potential_mode", @@ -165,13 +215,17 @@ def run_scenario( cmd += ["--bootstrap_resamples", str(bootstrap_resamples)] if skip_feature_analysis: cmd.append("--skip_feature_analysis") + if skip_partial_dependence: + cmd.append("--skip_partial_dependence") + if unrealized_pnl: + cmd.append("--unrealized_pnl") if strict: cmd.append("--strict_diagnostics") + if params: + cmd += ["--params"] + list(params) start = time.perf_counter() try: - proc = subprocess.run( - cmd, capture_output=True, text=True, check=False, timeout=timeout - ) + proc = subprocess.run(cmd, capture_output=True, text=True, check=False, timeout=timeout) except subprocess.TimeoutExpired: return { "config": conf, @@ -184,37 +238,61 @@ def run_scenario( } status = "ok" if proc.returncode == 0 else f"error({proc.returncode})" end = time.perf_counter() - combined = (proc.stdout + "\n" + proc.stderr).lower() - warn_count = combined.count("warning") + if proc.returncode != 0: + cmd_str = " ".join(cmd) + stderr_head_lines = proc.stderr.splitlines()[:3] + stderr_head = "\n".join(stderr_head_lines) + print(f"[error details] command: {cmd_str}") + if stderr_head: + print(f"[error details] stderr head:\n{stderr_head}") + else: + print("[error details] stderr is empty.") + combined = proc.stdout.splitlines() + proc.stderr.splitlines() + warnings = sum(1 for line in combined if _is_warning_header(line)) + if full_logs: + stdout_out = proc.stdout + stderr_out = proc.stderr + else: + if tail_chars == 0: + stdout_out = "" + stderr_out = "" + else: + stdout_out = proc.stdout[-tail_chars:] + stderr_out = proc.stderr[-tail_chars:] return { "config": conf, "status": status, - "stdout": proc.stdout[-5000:], - "stderr": proc.stderr[-5000:], + "stdout": stdout_out, + "stderr": stderr_out, "strict": strict, "seconds": round(end - start, 4), - "warnings": warn_count, + "warnings": warnings, } def main(): parser = argparse.ArgumentParser() parser.add_argument( - "--samples", + "--num_samples", type=int, default=40, - help="num synthetic samples per scenario (minimum 4 for feature analysis)", + help="Number of synthetic samples per scenario (minimum 4 for feature analysis)", ) parser.add_argument( "--skip_feature_analysis", action="store_true", - help="Skip feature importance and model-based analysis for all scenarios.", + help="Forward --skip_feature_analysis to child process to skip feature importance and model-based analysis for all scenarios.", + ) + parser.add_argument( + "--skip_partial_dependence", + action="store_true", + help="Forward --skip_partial_dependence to child process to skip partial dependence computation.", ) parser.add_argument( "--out_dir", type=str, - default="sample_run_output_smoke", - help="output parent directory", + default="sample_run_output", + help="Output parent directory", ) parser.add_argument( "--shuffle_seed", @@ -247,94 +325,145 @@ def main(): help="Timeout (seconds) per child process (default: 600)", ) parser.add_argument( - "--store_full_logs", + "--full_logs", action="store_true", help="If set, store full stdout/stderr (may be large) instead of tail truncation.", ) + parser.add_argument( + "--unrealized_pnl", + action="store_true", + help="Forward --unrealized_pnl to child process to exercise hold Φ(s) path.", + ) + parser.add_argument( + "--params", + nargs="*", + default=[], + metavar="KEY=VALUE", + help="Forward parameter overrides to child process via --params, e.g. action_masking=0", + ) + parser.add_argument( + "--tail_chars", + type=int, + default=5000, + help="Characters to keep from stdout/stderr tail when not storing full logs.", + ) args = parser.parse_args() # Basic validation if args.max_scenarios <= 0: parser.error("--max_scenarios must be > 0") - if args.samples < 4 and not args.skip_feature_analysis: - parser.error("--samples must be >= 4 unless --skip_feature_analysis is set") + if args.num_samples < 4 and not args.skip_feature_analysis: + parser.error("--num_samples must be >= 4 unless --skip_feature_analysis is set") if args.strict_sample < 0: parser.error("--strict_sample must be >= 0") if args.bootstrap_resamples <= 0: parser.error("--bootstrap_resamples must be > 0") + if args.tail_chars < 0: + parser.error("--tail_chars must be >= 0") + if args.per_scenario_timeout <= 0: + parser.error("--per_scenario_timeout must be > 0") script = Path(__file__).parent / "reward_space_analysis.py" out_dir = Path(args.out_dir) out_dir.mkdir(parents=True, exist_ok=True) - scenarios = build_arg_matrix( - max_scenarios=args.max_scenarios, shuffle_seed=args.shuffle_seed - ) - - # Prepare list of (conf, strict_flag) + scenarios = build_arg_matrix(max_scenarios=args.max_scenarios, shuffle_seed=args.shuffle_seed) + + # Validate --params basic KEY=VALUE format + valid_params: List[str] = [] + invalid_params: List[str] = [] + for p in args.params: + if "=" in p: + valid_params.append(p) + else: + invalid_params.append(p) + if invalid_params: + msg = f"Warning: ignoring malformed --params entries: {invalid_params}" + print(msg, file=sys.stderr) + print(f"{msg}") + args.params = valid_params + + # Prepare list of (conf, strict) scenario_pairs: List[Tuple[ConfigTuple, bool]] = [(c, False) for c in scenarios] - strict_n = max(0, min(args.strict_sample, len(scenarios))) - for c in scenarios[:strict_n]: + indices = {conf: idx for idx, conf in enumerate(scenarios, start=1)} + n_duplicated = max(0, min(args.strict_sample, len(scenarios))) + if n_duplicated > 0: + print(f"Duplicating first {n_duplicated} scenarios with --strict_diagnostics") + for c in scenarios[:n_duplicated]: scenario_pairs.append((c, True)) results: List[ScenarioResult] = [] total = len(scenario_pairs) interrupted = False try: - for i, (conf, strict_flag) in enumerate(scenario_pairs, start=1): - # Ensure child process sees the chosen bootstrap resamples via direct CLI args only + for i, (conf, strict) in enumerate(scenario_pairs, start=1): res = run_scenario( - script, - out_dir, - i, - total, - args.samples, - conf, - strict=strict_flag, + script=script, + out_dir=out_dir, + idx=i, + num_samples=args.num_samples, + conf=conf, + strict=strict, bootstrap_resamples=args.bootstrap_resamples, timeout=args.per_scenario_timeout, skip_feature_analysis=args.skip_feature_analysis, + skip_partial_dependence=args.skip_partial_dependence, + unrealized_pnl=args.unrealized_pnl, + full_logs=args.full_logs, + params=args.params, + tail_chars=args.tail_chars, ) results.append(res) status = res["status"] - tag = "[strict]" if strict_flag else "" + strict_str = f"[strict duplicate_of={indices.get(conf, '?')}]" if strict else "" secs = res.get("seconds") secs_str = f" {secs:.2f}s" if secs is not None else "" - print(f"[{i}/{total}] {conf} {tag} -> {status}{secs_str}") + print(f"[{i}/{total}] {conf} {strict_str} -> {status}{secs_str}") except KeyboardInterrupt: interrupted = True print("\nKeyboardInterrupt received: writing partial summary...") - ok = sum(1 for r in results if r["status"] == "ok") + successes = [r for r in results if r["status"] == "ok"] failures = [r for r in results if r["status"] != "ok"] - total_warnings = sum(r["warnings"] for r in results) durations: List[float] = [ float(r["seconds"]) for r in results if isinstance(r["seconds"], float) ] + if durations: + _sorted = sorted(durations) + median_seconds = statistics.median(_sorted) + n = len(_sorted) + if n == 1: + p95_seconds = _sorted[0] + else: + pos = 0.95 * (n - 1) + i0 = int(math.floor(pos)) + i1 = int(math.ceil(pos)) + if i0 == i1: + p95_seconds = _sorted[i0] + else: + w = pos - i0 + p95_seconds = _sorted[i0] + (_sorted[i1] - _sorted[i0]) * w + else: + median_seconds = None + p95_seconds = None summary: SummaryResult = { "total": len(results), - "ok": ok, + "successes": successes, "failures": failures, - "warnings_total": total_warnings, - "mean_seconds": round(sum(durations) / len(durations), 4) - if durations - else None, + "mean_seconds": round(sum(durations) / len(durations), 4) if durations else None, "max_seconds": max(durations) if durations else None, "min_seconds": min(durations) if durations else None, - "strict_duplicated": strict_n, + "median_seconds": median_seconds, + "p95_seconds": p95_seconds, } - # Build warning breakdown (simple line fingerprinting) - warning_counts: Dict[str, int] = {} + # Build warnings breakdown + warnings_breakdown: Dict[str, int] = {} for r in results: text = (r["stderr"] + "\n" + r["stdout"]).splitlines() for line in text: - if "warning" in line.lower(): - # Fingerprint: trim + collapse whitespace + limit length + if _is_warning_header(line): fp = " ".join(line.strip().split())[:160] - warning_counts[fp] = warning_counts.get(fp, 0) + 1 - - # Scenario export (list of configs only, excluding strict flag duplication detail) - scenario_list = [list(c) for c, _ in scenario_pairs] + warnings_breakdown[fp] = warnings_breakdown.get(fp, 0) + 1 # Collect environment + reproducibility metadata def _git_hash() -> Optional[str]: @@ -352,40 +481,38 @@ def main(): return None return None - summary_extra: Dict[str, Any] = { - "warnings_breakdown": warning_counts, - "scenarios": scenario_list, - "seeds": { - "shuffle_seed": args.shuffle_seed, - "strict_sample": args.strict_sample, - "max_scenarios": args.max_scenarios, - "bootstrap_resamples": args.bootstrap_resamples, - }, - "metadata": { - "timestamp_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), - "python_version": sys.version.split()[0], - "platform": platform.platform(), - "git_commit": _git_hash(), - "schema_version": 1, - "per_scenario_timeout": args.per_scenario_timeout, - }, - } - serializable: Dict[str, Any] + summary.update( + { + "warnings_breakdown": warnings_breakdown, + "seeds": { + "shuffle_seed": args.shuffle_seed, + "strict_sample": args.strict_sample, + "max_scenarios": args.max_scenarios, + "bootstrap_resamples": args.bootstrap_resamples, + }, + "metadata": { + "timestamp_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "python_version": sys.version.split()[0], + "platform": platform.platform(), + "git_commit": _git_hash(), + "schema_version": 2, + "per_scenario_timeout": args.per_scenario_timeout, + }, + } + ) if interrupted: - serializable = {**summary, **summary_extra, "interrupted": True} - else: - serializable = {**summary, **summary_extra} + summary["interrupted"] = True # Atomic write to avoid corrupt partial files tmp_fd, tmp_path = tempfile.mkstemp(prefix="_tmp_summary_", dir=str(out_dir)) try: with os.fdopen(tmp_fd, "w", encoding="utf-8") as fh: - json.dump(serializable, fh, indent=2) + json.dump(summary, fh, indent=2) os.replace(tmp_path, out_dir / SUMMARY_FILENAME) except Exception: # Best effort fallback try: Path(out_dir / SUMMARY_FILENAME).write_text( - json.dumps(serializable, indent=2), encoding="utf-8" + json.dumps(summary, indent=2), encoding="utf-8" ) finally: if os.path.exists(tmp_path): @@ -394,7 +521,8 @@ def main(): except OSError: pass else: - if os.path.exists(tmp_path): # Should have been moved; defensive cleanup + # Defensive cleanup: remove temp file if atomic replace did not clean up + if os.path.exists(tmp_path): try: os.remove(tmp_path) except OSError: diff --git a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py index a7f82d5..ad5ce3b 100644 --- a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py @@ -20,7 +20,7 @@ import tempfile import unittest import warnings from pathlib import Path -from typing import Iterable, Optional, Sequence, Union +from typing import Any, Dict, Iterable, Optional, Sequence, Union import numpy as np import pandas as pd @@ -107,17 +107,13 @@ class RewardSpaceTestBase(unittest.TestCase): PBRS_SWEEP_ITER = 120 # Generic numeric tolerances (distinct from PBRS structural constants) - EPS_BASE = ( - 1e-12 # Base epsilon for strict identity & numeric guards (single source) - ) + EPS_BASE = 1e-12 # Base epsilon for strict identity & numeric guards (single source) TOL_NUMERIC_GUARD = EPS_BASE # Division-by-zero guards / min denominators (alias) TOL_IDENTITY_STRICT = EPS_BASE # Strict component identity (alias of EPS_BASE) TOL_IDENTITY_RELAXED = 1e-9 # Looser identity when cumulative fp drift acceptable TOL_GENERIC_EQ = 1e-6 # Generic numeric equality TOL_NEGLIGIBLE = 1e-8 # Negligible statistical or shaping effects - MIN_EXIT_POWER_TAU = ( - 1e-6 # Lower bound for exit_power_tau parameter (validation semantics) - ) + MIN_EXIT_POWER_TAU = 1e-6 # Lower bound for exit_power_tau parameter (validation semantics) # Distribution shape invariance (skewness / excess kurtosis) tolerance under scaling TOL_DISTRIB_SHAPE = 5e-2 # Theoretical upper bound for Jensen-Shannon distance: sqrt(log 2) @@ -133,7 +129,6 @@ class RewardSpaceTestBase(unittest.TestCase): pnl: float = 0.0, trade_duration: int = 0, idle_duration: int = 0, - max_trade_duration: int = 100, max_unrealized_profit: float = 0.0, min_unrealized_profit: float = 0.0, position: Positions = Positions.Neutral, @@ -144,16 +139,15 @@ class RewardSpaceTestBase(unittest.TestCase): pnl=pnl, trade_duration=trade_duration, idle_duration=idle_duration, - max_trade_duration=max_trade_duration, max_unrealized_profit=max_unrealized_profit, min_unrealized_profit=min_unrealized_profit, position=position, action=action, ) - def base_params(self, **overrides) -> dict: + def base_params(self, **overrides) -> Dict[str, Any]: """Return fresh copy of default reward params with overrides.""" - params = DEFAULT_MODEL_REWARD_PARAMETERS.copy() + params: Dict[str, Any] = DEFAULT_MODEL_REWARD_PARAMETERS.copy() params.update(overrides) return params @@ -161,8 +155,8 @@ class RewardSpaceTestBase(unittest.TestCase): self, params: dict, *, - iterations: int | None = None, - terminal_prob: float | None = None, + iterations: Optional[int] = None, + terminal_prob: Optional[float] = None, seed: int = 123, ) -> tuple[list[float], list[float]]: """Run a lightweight canonical invariance sweep. @@ -178,22 +172,23 @@ class RewardSpaceTestBase(unittest.TestCase): current_pnl = 0.0 current_dur = 0.0 for _ in range(iters): - is_terminal = rng.uniform() < term_p - next_pnl = 0.0 if is_terminal else float(rng.normal(0, 0.2)) + is_exit = rng.uniform() < term_p + next_pnl = 0.0 if is_exit else float(rng.normal(0, 0.2)) inc = rng.uniform(0, 0.12) - next_dur = 0.0 if is_terminal else float(min(1.0, current_dur + inc)) + next_dur = 0.0 if is_exit else float(min(1.0, current_dur + inc)) _tot, shap_val, next_pot = apply_potential_shaping( base_reward=0.0, current_pnl=current_pnl, current_duration_ratio=current_dur, next_pnl=next_pnl, next_duration_ratio=next_dur, - is_terminal=is_terminal, + is_exit=is_exit, + is_entry=False, # In _canonical_sweep, we do not simulate entries last_potential=last_potential, params=params, ) shaping_vals.append(shap_val) - if is_terminal: + if is_exit: terminal_next.append(next_pot) last_potential = 0.0 current_pnl = 0.0 @@ -208,13 +203,13 @@ class RewardSpaceTestBase(unittest.TestCase): self, *, n: int, - reward_total_mean: float = 0.0, - reward_total_std: float = 1.0, + reward_mean: float = 0.0, + reward_std: float = 1.0, pnl_mean: float = 0.01, - pnl_std: float | None = None, + pnl_std: Optional[float] = None, trade_duration_dist: str = "uniform", idle_pattern: str = "mixed", - seed: int | None = None, + seed: Optional[int] = None, ) -> pd.DataFrame: """Generate a synthetic statistical DataFrame. @@ -222,8 +217,8 @@ class RewardSpaceTestBase(unittest.TestCase): ---------- n : int Row count. - reward_total_mean, reward_total_std : float - Normal parameters for reward_total. + reward_mean, reward_std : float + Normal parameters for reward. pnl_mean : float Mean PnL. pnl_std : float | None @@ -237,13 +232,13 @@ class RewardSpaceTestBase(unittest.TestCase): Returns ------- - pd.DataFrame with columns: reward_total, reward_idle, reward_hold, reward_exit, + pd.DataFrame with columns: reward, reward_idle, reward_hold, reward_exit, pnl, trade_duration, idle_duration, position. Guarantees: no NaN; reward_idle==0 where idle_duration==0. """ if seed is not None: self.seed_all(seed) pnl_std_eff = self.TEST_PNL_STD if pnl_std is None else pnl_std - reward_total = np.random.normal(reward_total_mean, reward_total_std, n) + reward = np.random.normal(reward_mean, reward_std, n) pnl = np.random.normal(pnl_mean, pnl_std_eff, n) if trade_duration_dist == "exponential": trade_duration = np.random.exponential(20, n) @@ -264,7 +259,7 @@ class RewardSpaceTestBase(unittest.TestCase): position = np.random.choice([0.0, 0.5, 1.0], n) return pd.DataFrame( { - "reward_total": reward_total, + "reward": reward, "reward_idle": reward_idle, "reward_hold": reward_hold, "reward_exit": reward_exit, @@ -319,7 +314,7 @@ class RewardSpaceTestBase(unittest.TestCase): a: Union[float, int], b: Union[float, int], places: int, - msg: str | None = None, + msg: Optional[str] = None, ) -> None: """Bridge for legacy places-based approximate equality. @@ -379,9 +374,7 @@ class RewardSpaceTestBase(unittest.TestCase): data = list(seq) if len(data) < 2: return - if (non_increasing and non_decreasing) or ( - not non_increasing and not non_decreasing - ): + if (non_increasing and non_decreasing) or (not non_increasing and not non_decreasing): self.fail("Specify exactly one monotonic direction") for a, b in zip(data, data[1:]): if non_increasing: @@ -449,25 +442,23 @@ class RewardSpaceTestBase(unittest.TestCase): np.random.seed(seed) random.seed(seed) - # Shared helper data generators (moved here for subclass availability) + # Shared helper data generators available to subclasses. def _const_df(self, n: int = 64) -> pd.DataFrame: return pd.DataFrame( { - "reward_total": np.ones(n) * 0.5, + "reward": np.ones(n) * 0.5, "pnl": np.zeros(n), "trade_duration": np.ones(n) * 10, "idle_duration": np.ones(n) * 3, } ) - def _shift_scale_df( - self, n: int = 256, shift: float = 0.0, scale: float = 1.0 - ) -> pd.DataFrame: + def _shift_scale_df(self, n: int = 256, shift: float = 0.0, scale: float = 1.0) -> pd.DataFrame: rng = np.random.default_rng(123) base = rng.normal(0, 1, n) return pd.DataFrame( { - "reward_total": shift + scale * base, + "reward": shift + scale * base, "pnl": shift + scale * base * 0.2, "trade_duration": rng.exponential(20, n), "idle_duration": rng.exponential(10, n), @@ -491,9 +482,7 @@ class TestIntegration(RewardSpaceTestBase): str(self.output_path), ] - result = subprocess.run( - cmd, capture_output=True, text=True, cwd=Path(__file__).parent - ) + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path(__file__).parent) # Exit 0 self.assertEqual(result.returncode, 0, f"CLI failed: {result.stderr}") @@ -540,12 +529,8 @@ class TestIntegration(RewardSpaceTestBase): ] # Execute both runs - result1 = subprocess.run( - cmd1, capture_output=True, text=True, cwd=Path(__file__).parent - ) - result2 = subprocess.run( - cmd2, capture_output=True, text=True, cwd=Path(__file__).parent - ) + result1 = subprocess.run(cmd1, capture_output=True, text=True, cwd=Path(__file__).parent) + result2 = subprocess.run(cmd2, capture_output=True, text=True, cwd=Path(__file__).parent) self.assertEqual(result1.returncode, 0) self.assertEqual(result2.returncode, 0) @@ -602,7 +587,7 @@ class TestStatistics(RewardSpaceTestBase): "idle_duration": idle_duration, "reward_idle": reward_idle, "position": np.random.choice([0.0, 0.5, 1.0], n), - "reward_total": np.random.normal(0, 1, n), + "reward": np.random.normal(0, 1, n), "pnl": np.random.normal(0, self.TEST_PNL_STD, n), "trade_duration": np.random.exponential(20, n), } @@ -614,7 +599,7 @@ class TestStatistics(RewardSpaceTestBase): df2 = self._make_idle_variance_df(100) # Shift second dataset - df2["reward_total"] += 0.1 + df2["reward"] += 0.1 metrics = compute_distribution_shift_metrics(df1, df2) @@ -708,14 +693,10 @@ class TestStatistics(RewardSpaceTestBase): diagnostics = distribution_diagnostics(df) # Expect keys - expected_prefixes = ["reward_total_", "pnl_"] + expected_prefixes = ["reward_", "pnl_"] for prefix in expected_prefixes: - matching_keys = [ - key for key in diagnostics.keys() if key.startswith(prefix) - ] - self.assertGreater( - len(matching_keys), 0, f"Should have diagnostics for {prefix}" - ) + matching_keys = [key for key in diagnostics.keys() if key.startswith(prefix)] + self.assertGreater(len(matching_keys), 0, f"Should have diagnostics for {prefix}") # Basic moments expected_suffixes = ["mean", "std", "skewness", "kurtosis"] @@ -731,17 +712,6 @@ class TestStatistics(RewardSpaceTestBase): results = statistical_hypothesis_tests(base) self.assertIsInstance(results, dict) - def test_stats_constant_distribution_bootstrap_and_diagnostics(self): - """Bootstrap on constant columns (degenerate).""" - df = self._const_df(80) - res = bootstrap_confidence_intervals( - df, ["reward_total", "pnl"], n_bootstrap=200, confidence_level=0.95 - ) - for k, (mean, lo, hi) in res.items(): # tuple: mean, low, high - self.assertAlmostEqualFloat(mean, lo, tolerance=2e-9) - self.assertAlmostEqualFloat(mean, hi, tolerance=2e-9) - self.assertLessEqual(hi - lo, 2e-9) - def test_stats_js_distance_symmetry_violin(self): """JS distance symmetry d(P,Q)==d(Q,P).""" df1 = self._shift_scale_df(300, shift=0.0) @@ -751,9 +721,7 @@ class TestStatistics(RewardSpaceTestBase): if js_key is None: self.skipTest("JS distance key not present in metrics output") metrics_swapped = compute_distribution_shift_metrics(df2, df1) - js_key_swapped = next( - (k for k in metrics_swapped if k.endswith("pnl_js_distance")), None - ) + js_key_swapped = next((k for k in metrics_swapped if k.endswith("pnl_js_distance")), None) self.assertIsNotNone(js_key_swapped) self.assertAlmostEqualFloat( metrics[js_key], @@ -762,59 +730,12 @@ class TestStatistics(RewardSpaceTestBase): rtol=self.TOL_RELATIVE, ) - def test_stats_js_distance_symmetry_helper(self): - """JS distance properties: symmetry d(P,Q)=d(Q,P) and upper bound sqrt(log 2).""" - rng = np.random.default_rng(777) - p_raw = rng.uniform(0.0, 1.0, size=400) - q_raw = rng.uniform(0.0, 1.0, size=400) - p = p_raw / p_raw.sum() - q = q_raw / q_raw.sum() - - def _kl(a: np.ndarray, b: np.ndarray) -> float: - mask = (a > 0) & (b > 0) - return float(np.sum(a[mask] * np.log(a[mask] / b[mask]))) - - def js_distance(a: np.ndarray, b: np.ndarray) -> float: - m = 0.5 * (a + b) - js_div = 0.5 * _kl(a, m) + 0.5 * _kl(b, m) - return math.sqrt(max(js_div, 0.0)) - - # Symmetry - self.assertSymmetric( - js_distance, p, q, atol=self.TOL_IDENTITY_STRICT, rtol=self.TOL_RELATIVE - ) - # Upper bound - self.assertLessEqual( - js_distance(p, q), self.JS_DISTANCE_UPPER_BOUND + self.TOL_IDENTITY_STRICT - ) - - def test_stats_bootstrap_shrinkage_with_sample_size(self): - """Bootstrap CI shrinks ~1/sqrt(n).""" - small = self._shift_scale_df(80) - large = self._shift_scale_df(800) - res_small = bootstrap_confidence_intervals( - small, ["reward_total"], n_bootstrap=400 - ) - res_large = bootstrap_confidence_intervals( - large, ["reward_total"], n_bootstrap=400 - ) - (_, lo_s, hi_s) = list(res_small.values())[0] - (_, lo_l, hi_l) = list(res_large.values())[0] - hw_small = (hi_s - lo_s) / 2.0 - hw_large = (hi_l - lo_l) / 2.0 - self.assertFinite(hw_small, name="hw_small") - self.assertFinite(hw_large, name="hw_large") - self.assertLess(hw_large, hw_small * 0.55) - def test_stats_variance_vs_duration_spearman_sign(self): """trade_duration up => pnl variance up (rank corr >0).""" rng = np.random.default_rng(99) n = 250 trade_duration = np.linspace(1, 300, n) pnl = rng.normal(0, 1 + trade_duration / 400.0, n) - df = pd.DataFrame( - {"trade_duration": trade_duration, "pnl": pnl, "reward_total": pnl} - ) ranks_dur = pd.Series(trade_duration).rank().to_numpy() ranks_var = pd.Series(np.abs(pnl)).rank().to_numpy() rho = np.corrcoef(ranks_dur, ranks_var)[0, 1] @@ -842,9 +763,9 @@ class TestStatistics(RewardSpaceTestBase): df_a = self._shift_scale_df(120) df_b = self._shift_scale_df(180, shift=0.2) m_concat = pd.concat([df_a["pnl"], df_b["pnl"]]).mean() - m_weighted = ( - df_a["pnl"].mean() * len(df_a) + df_b["pnl"].mean() * len(df_b) - ) / (len(df_a) + len(df_b)) + m_weighted = (df_a["pnl"].mean() * len(df_a) + df_b["pnl"].mean() * len(df_b)) / ( + len(df_a) + len(df_b) + ) self.assertAlmostEqualFloat( m_concat, m_weighted, @@ -852,15 +773,6 @@ class TestStatistics(RewardSpaceTestBase): rtol=self.TOL_RELATIVE, ) - def test_stats_ks_statistic_bounds(self): - """KS in [0,1].""" - df1 = self._shift_scale_df(150) - df2 = self._shift_scale_df(150, shift=0.4) - metrics = compute_distribution_shift_metrics(df1, df2) - for k, v in metrics.items(): - if k.endswith("_ks_statistic"): - self.assertWithin(v, 0.0, 1.0, name=k) - def test_stats_bh_correction_null_false_positive_rate(self): """Null: low BH discovery rate.""" rng = np.random.default_rng(1234) @@ -868,7 +780,7 @@ class TestStatistics(RewardSpaceTestBase): df = pd.DataFrame( { "pnl": rng.normal(0, 1, n), - "reward_total": rng.normal(0, 1, n), + "reward": rng.normal(0, 1, n), "idle_duration": rng.exponential(5, n), } ) @@ -885,9 +797,7 @@ class TestStatistics(RewardSpaceTestBase): flags.append(bool(v["significant"])) if flags: rate = sum(flags) / len(flags) - self.assertLess( - rate, 0.15, f"BH null FP rate too high under null: {rate:.3f}" - ) + self.assertLess(rate, 0.15, f"BH null FP rate too high under null: {rate:.3f}") def test_stats_half_life_monotonic_series(self): """Smoothed exponential decay monotonic.""" @@ -899,23 +809,9 @@ class TestStatistics(RewardSpaceTestBase): y_smooth = np.convolve(y_noisy, np.ones(window) / window, mode="valid") self.assertMonotonic(y_smooth, non_increasing=True, tolerance=1e-5) - def test_stats_bootstrap_confidence_intervals_basic(self): - """Bootstrap CI calculation (basic).""" - test_data = self.make_stats_df(n=100, seed=self.SEED) - results = bootstrap_confidence_intervals( - test_data, - ["reward_total", "pnl"], - n_bootstrap=100, - ) - for metric, (mean, ci_low, ci_high) in results.items(): - self.assertFinite(mean, name=f"mean[{metric}]") - self.assertFinite(ci_low, name=f"ci_low[{metric}]") - self.assertFinite(ci_high, name=f"ci_high[{metric}]") - self.assertLess(ci_low, ci_high) - def test_stats_hypothesis_seed_reproducibility(self): """Seed reproducibility for statistical_hypothesis_tests + bootstrap.""" - df = self.make_stats_df(n=300, seed=123, idle_pattern="mixed") + df = self.make_stats_df(n=300, seed=self.SEED, idle_pattern="mixed") r1 = statistical_hypothesis_tests(df, seed=777) r2 = statistical_hypothesis_tests(df, seed=777) self.assertEqual(set(r1.keys()), set(r2.keys())) @@ -931,10 +827,22 @@ class TestStatistics(RewardSpaceTestBase): continue self.assertEqual(v1, v2, f"Mismatch for {k}:{field}") # Bootstrap reproducibility - metrics = ["reward_total", "pnl"] + metrics = ["reward", "pnl"] ci_a = bootstrap_confidence_intervals(df, metrics, n_bootstrap=150, seed=2024) ci_b = bootstrap_confidence_intervals(df, metrics, n_bootstrap=150, seed=2024) - self.assertEqual(ci_a, ci_b) + # Compare floats with strict identity + scale-aware relative tolerance to avoid flaky exact equality + for metric in metrics: + m_a, lo_a, hi_a = ci_a[metric] + m_b, lo_b, hi_b = ci_b[metric] + self.assertAlmostEqualFloat( + m_a, m_b, tolerance=self.TOL_IDENTITY_STRICT, rtol=self.TOL_RELATIVE + ) + self.assertAlmostEqualFloat( + lo_a, lo_b, tolerance=self.TOL_IDENTITY_STRICT, rtol=self.TOL_RELATIVE + ) + self.assertAlmostEqualFloat( + hi_a, hi_b, tolerance=self.TOL_IDENTITY_STRICT, rtol=self.TOL_RELATIVE + ) def test_stats_distribution_metrics_mathematical_bounds(self): """Mathematical bounds and validity of distribution shift metrics.""" @@ -974,10 +882,9 @@ class TestStatistics(RewardSpaceTestBase): def test_stats_heteroscedasticity_pnl_validation(self): """PnL variance increases with trade duration (heteroscedasticity).""" df = simulate_samples( + params=self.base_params(max_trade_duration_candles=100), num_samples=1000, seed=123, - params=self.DEFAULT_PARAMS, - max_trade_duration=100, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -1004,15 +911,11 @@ class TestStatistics(RewardSpaceTestBase): """All statistical functions respect bounds.""" df = self.make_stats_df(n=300, seed=self.SEED, idle_pattern="all_nonzero") diagnostics = distribution_diagnostics(df) - for col in ["reward_total", "pnl", "trade_duration", "idle_duration"]: + for col in ["reward", "pnl", "trade_duration", "idle_duration"]: if f"{col}_skewness" in diagnostics: - self.assertFinite( - diagnostics[f"{col}_skewness"], name=f"skewness[{col}]" - ) + self.assertFinite(diagnostics[f"{col}_skewness"], name=f"skewness[{col}]") if f"{col}_kurtosis" in diagnostics: - self.assertFinite( - diagnostics[f"{col}_kurtosis"], name=f"kurtosis[{col}]" - ) + self.assertFinite(diagnostics[f"{col}_kurtosis"], name=f"kurtosis[{col}]") if f"{col}_shapiro_pval" in diagnostics: self.assertPValue( diagnostics[f"{col}_shapiro_pval"], @@ -1021,9 +924,7 @@ class TestStatistics(RewardSpaceTestBase): hypothesis_results = statistical_hypothesis_tests(df, seed=self.SEED) for test_name, result in hypothesis_results.items(): if "p_value" in result: - self.assertPValue( - result["p_value"], msg=f"p-value bounds for {test_name}" - ) + self.assertPValue(result["p_value"], msg=f"p-value bounds for {test_name}") if "effect_size_epsilon_sq" in result: eps2 = result["effect_size_epsilon_sq"] self.assertFinite(eps2, name=f"epsilon_sq[{test_name}]") @@ -1040,10 +941,9 @@ class TestStatistics(RewardSpaceTestBase): def test_stats_benjamini_hochberg_adjustment(self): """BH adjustment adds p_value_adj & significant_adj with valid bounds.""" df = simulate_samples( + params=self.base_params(max_trade_duration_candles=100), num_samples=600, seed=123, - params=self.DEFAULT_PARAMS, - max_trade_duration=100, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -1052,9 +952,7 @@ class TestStatistics(RewardSpaceTestBase): pnl_base_std=self.TEST_PNL_STD, pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE, ) - results_adj = statistical_hypothesis_tests( - df, adjust_method="benjamini_hochberg", seed=777 - ) + results_adj = statistical_hypothesis_tests(df, adjust_method="benjamini_hochberg", seed=777) self.assertGreater(len(results_adj), 0) for name, res in results_adj.items(): self.assertIn("p_value", res) @@ -1093,7 +991,6 @@ class TestRewardComponents(RewardSpaceTestBase): pnl=0.02 if expected_type == "exit_component" else 0.0, trade_duration=50 if position != Positions.Neutral else 0, idle_duration=10 if position == Positions.Neutral else 0, - max_trade_duration=100, max_unrealized_profit=0.03, min_unrealized_profit=-0.01, position=position, @@ -1118,7 +1015,6 @@ class TestRewardComponents(RewardSpaceTestBase): ctx = self.make_ctx( pnl=0.0, trade_duration=1, - max_trade_duration=100, max_unrealized_profit=0.0, min_unrealized_profit=-0.02, position=Positions.Long, @@ -1138,9 +1034,6 @@ class TestRewardComponents(RewardSpaceTestBase): pnl=0.0, trade_duration=0, idle_duration=40, - max_trade_duration=128, - max_unrealized_profit=0.0, - min_unrealized_profit=0.0, position=Positions.Neutral, action=Actions.Neutral, ) @@ -1218,9 +1111,6 @@ class TestRewardComponents(RewardSpaceTestBase): pnl=0.0, trade_duration=0, idle_duration=30, - max_trade_duration=100, - max_unrealized_profit=0.0, - min_unrealized_profit=0.0, position=Positions.Neutral, action=Actions.Neutral, ) @@ -1234,11 +1124,11 @@ class TestRewardComponents(RewardSpaceTestBase): action_masking=True, ) self.assertEqual( - br.idle_penalty, 0.0, "Idle penalty should be zero when profit_target=0" - ) - self.assertEqual( - br.total, 0.0, "Total reward should be zero in this configuration" + br.idle_penalty, + 0.0, + "Idle penalty should be zero when profit_target=0", ) + self.assertEqual(br.total, 0.0, "Total reward should be zero in this configuration") def test_win_reward_factor_saturation(self): """Saturation test: pnl amplification factor should monotonically approach (1 + win_reward_factor).""" @@ -1265,7 +1155,6 @@ class TestRewardComponents(RewardSpaceTestBase): pnl=pnl, trade_duration=0, # duration_ratio=0 -> attenuation = 1 idle_duration=0, - max_trade_duration=100, max_unrealized_profit=pnl, # neutral wrt efficiency (disabled anyway) min_unrealized_profit=0.0, position=Positions.Long, @@ -1333,7 +1222,6 @@ class TestRewardComponents(RewardSpaceTestBase): pnl=0.025, trade_duration=40, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.03, min_unrealized_profit=0.0, position=Positions.Long, @@ -1344,7 +1232,6 @@ class TestRewardComponents(RewardSpaceTestBase): pnl=-self.TEST_PNL_STD, trade_duration=60, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.01, min_unrealized_profit=-0.04, position=Positions.Long, @@ -1355,7 +1242,6 @@ class TestRewardComponents(RewardSpaceTestBase): pnl=0.0, trade_duration=0, idle_duration=35, - max_trade_duration=120, max_unrealized_profit=0.0, min_unrealized_profit=0.0, position=Positions.Neutral, @@ -1366,7 +1252,6 @@ class TestRewardComponents(RewardSpaceTestBase): pnl=0.0, trade_duration=80, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.04, min_unrealized_profit=-0.01, position=Positions.Long, @@ -1402,7 +1287,7 @@ class TestRewardComponents(RewardSpaceTestBase): + br.idle_penalty + br.hold_penalty + br.invalid_penalty - + br.shaping_reward + + br.reward_shaping + br.entry_additive + br.exit_additive ) @@ -1458,7 +1343,6 @@ class TestRewardComponents(RewardSpaceTestBase): pnl=pnl, trade_duration=55, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=pnl if pnl > 0 else 0.01, min_unrealized_profit=pnl if pnl < 0 else -0.01, position=Positions.Long, @@ -1468,7 +1352,6 @@ class TestRewardComponents(RewardSpaceTestBase): pnl=pnl, trade_duration=55, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=pnl if pnl > 0 else 0.01, min_unrealized_profit=pnl if pnl < 0 else -0.01, position=Positions.Short, @@ -1522,10 +1405,9 @@ class TestAPIAndHelpers(RewardSpaceTestBase): def test_api_simulation_and_reward_smoke(self): df = simulate_samples( + params=self.base_params(max_trade_duration_candles=40), num_samples=20, seed=7, - params=self.DEFAULT_PARAMS, - max_trade_duration=40, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -1542,7 +1424,6 @@ class TestAPIAndHelpers(RewardSpaceTestBase): pnl=float(row["pnl"]), trade_duration=int(row["trade_duration"]), idle_duration=int(row["idle_duration"]), - max_trade_duration=40, max_unrealized_profit=float(row["pnl"]) + 0.01, min_unrealized_profit=float(row["pnl"]) - 0.01, position=Positions.Long, @@ -1562,10 +1443,9 @@ class TestAPIAndHelpers(RewardSpaceTestBase): def test_simulate_samples_trading_modes_spot_vs_margin(self): """simulate_samples coverage: spot should forbid shorts, margin should allow them.""" df_spot = simulate_samples( + params=self.base_params(max_trade_duration_candles=100), num_samples=80, seed=self.SEED, - params=self.DEFAULT_PARAMS, - max_trade_duration=100, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -1574,17 +1454,16 @@ class TestAPIAndHelpers(RewardSpaceTestBase): pnl_base_std=self.TEST_PNL_STD, pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE, ) - short_positions_spot = ( - df_spot["position"] == float(Positions.Short.value) - ).sum() + short_positions_spot = (df_spot["position"] == float(Positions.Short.value)).sum() self.assertEqual( - short_positions_spot, 0, "Spot mode must not contain short positions" + short_positions_spot, + 0, + "Spot mode must not contain short positions", ) df_margin = simulate_samples( + params=self.base_params(max_trade_duration_candles=100), num_samples=80, seed=self.SEED, - params=self.DEFAULT_PARAMS, - max_trade_duration=100, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -1599,7 +1478,7 @@ class TestAPIAndHelpers(RewardSpaceTestBase): "idle_duration", "position", "action", - "reward_total", + "reward", "reward_invalid", "reward_idle", "reward_hold", @@ -1613,10 +1492,9 @@ class TestAPIAndHelpers(RewardSpaceTestBase): """Test _to_bool with various inputs.""" # Test via simulate_samples which uses action_masking parameter df1 = simulate_samples( + params=self.base_params(action_masking="true", max_trade_duration_candles=50), num_samples=10, seed=self.SEED, - params={"action_masking": "true"}, - max_trade_duration=50, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -1628,10 +1506,9 @@ class TestAPIAndHelpers(RewardSpaceTestBase): self.assertIsInstance(df1, pd.DataFrame) df2 = simulate_samples( + params=self.base_params(action_masking="false", max_trade_duration_candles=50), num_samples=10, seed=self.SEED, - params={"action_masking": "false"}, - max_trade_duration=50, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -1646,10 +1523,9 @@ class TestAPIAndHelpers(RewardSpaceTestBase): """Test _is_short_allowed via different trading modes.""" # Test futures mode (shorts allowed) df_futures = simulate_samples( + params=self.base_params(max_trade_duration_candles=50), num_samples=100, seed=self.SEED, - params=self.DEFAULT_PARAMS, - max_trade_duration=50, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -1661,9 +1537,7 @@ class TestAPIAndHelpers(RewardSpaceTestBase): # Should have some short positions short_positions = (df_futures["position"] == float(Positions.Short.value)).sum() - self.assertGreater( - short_positions, 0, "Futures mode should allow short positions" - ) + self.assertGreater(short_positions, 0, "Futures mode should allow short positions") def test_get_float_param(self): """Test float parameter extraction.""" @@ -1672,10 +1546,8 @@ class TestAPIAndHelpers(RewardSpaceTestBase): self.assertEqual(_get_float_param(params, "test_int", 0.0), 2.0) # Non parseable string -> NaN fallback in tolerant parser val_str = _get_float_param(params, "test_str", 0.0) - if isinstance(val_str, float) and math.isnan(val_str): - pass - else: - self.fail("Expected NaN for non-numeric string in _get_float_param") + self.assertTrue(isinstance(val_str, float)) + self.assertTrue(math.isnan(val_str)) self.assertEqual(_get_float_param(params, "missing", 3.14), 3.14) def test_get_str_param(self): @@ -1715,7 +1587,7 @@ class TestAPIAndHelpers(RewardSpaceTestBase): self.assertEqual(_get_int_param({"k": 9.99}, "k", 0), 9) self.assertEqual(_get_int_param({"k": -3.7}, "k", 0), -3) # Non-finite floats fallback - self.assertEqual(_get_int_param({"k": float("nan")}, "k", 4), 4) + self.assertEqual(_get_int_param({"k": np.nan}, "k", 4), 4) self.assertEqual(_get_int_param({"k": float("inf")}, "k", 4), 4) # String numerics (int, float, exponent) self.assertEqual(_get_int_param({"k": "42"}, "k", 0), 42) @@ -1754,10 +1626,9 @@ class TestAPIAndHelpers(RewardSpaceTestBase): # Create comprehensive test data test_data = simulate_samples( + params=self.base_params(max_trade_duration_candles=100), num_samples=200, seed=self.SEED, - params=self.DEFAULT_PARAMS, - max_trade_duration=100, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -1773,7 +1644,6 @@ class TestAPIAndHelpers(RewardSpaceTestBase): write_complete_statistical_analysis( test_data, output_path, - max_trade_duration=100, profit_target=self.TEST_PROFIT_TARGET, seed=self.SEED, real_df=None, @@ -1782,14 +1652,13 @@ class TestAPIAndHelpers(RewardSpaceTestBase): # Check that main report is created main_report = output_path / "statistical_analysis.md" self.assertTrue( - main_report.exists(), "Main statistical analysis should be created" + main_report.exists(), + "Main statistical analysis should be created", ) # Check for other expected files feature_file = output_path / "feature_importance.csv" - self.assertTrue( - feature_file.exists(), "Feature importance should be created" - ) + self.assertTrue(feature_file.exists(), "Feature importance should be created") class TestPrivateFunctions(RewardSpaceTestBase): @@ -1798,11 +1667,10 @@ class TestPrivateFunctions(RewardSpaceTestBase): def test_idle_penalty_via_rewards(self): """Test idle penalty calculation via reward calculation.""" # Create context that will trigger idle penalty - context = RewardContext( + context = self.make_ctx( pnl=0.0, trade_duration=0, idle_duration=20, - max_trade_duration=100, max_unrealized_profit=0.0, min_unrealized_profit=0.0, position=Positions.Neutral, @@ -1824,7 +1692,7 @@ class TestPrivateFunctions(RewardSpaceTestBase): self.assertAlmostEqualFloat( breakdown.total, breakdown.idle_penalty - + breakdown.shaping_reward + + breakdown.reward_shaping + breakdown.entry_additive + breakdown.exit_additive, tolerance=self.TOL_IDENTITY_RELAXED, @@ -1834,11 +1702,10 @@ class TestPrivateFunctions(RewardSpaceTestBase): def test_hold_penalty_via_rewards(self): """Test hold penalty calculation via reward calculation.""" # Create context that will trigger hold penalty - context = RewardContext( + context = self.make_ctx( pnl=0.01, trade_duration=150, idle_duration=0, # Long duration - max_trade_duration=100, max_unrealized_profit=0.02, min_unrealized_profit=0.0, position=Positions.Long, @@ -1859,7 +1726,7 @@ class TestPrivateFunctions(RewardSpaceTestBase): self.assertAlmostEqualFloat( breakdown.total, breakdown.hold_penalty - + breakdown.shaping_reward + + breakdown.reward_shaping + breakdown.entry_additive + breakdown.exit_additive, tolerance=self.TOL_IDENTITY_RELAXED, @@ -1870,18 +1737,22 @@ class TestPrivateFunctions(RewardSpaceTestBase): """Test exit reward calculation with various scenarios.""" scenarios = [ (Positions.Long, Actions.Long_exit, 0.05, "Profitable long exit"), - (Positions.Short, Actions.Short_exit, -0.03, "Profitable short exit"), + ( + Positions.Short, + Actions.Short_exit, + -0.03, + "Profitable short exit", + ), (Positions.Long, Actions.Long_exit, -0.02, "Losing long exit"), (Positions.Short, Actions.Short_exit, 0.02, "Losing short exit"), ] for position, action, pnl, description in scenarios: with self.subTest(description=description): - context = RewardContext( + context = self.make_ctx( pnl=pnl, trade_duration=50, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=max(pnl + 0.01, 0.01), min_unrealized_profit=min(pnl - 0.01, -0.01), position=position, @@ -1911,11 +1782,10 @@ class TestPrivateFunctions(RewardSpaceTestBase): def test_invalid_action_handling(self): """Test invalid action penalty.""" # Try to exit long when in short position (invalid) - context = RewardContext( + context = self.make_ctx( pnl=0.02, trade_duration=50, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.03, min_unrealized_profit=0.01, position=Positions.Short, @@ -1933,12 +1803,14 @@ class TestPrivateFunctions(RewardSpaceTestBase): ) self.assertLess( - breakdown.invalid_penalty, 0, "Invalid action should have negative penalty" + breakdown.invalid_penalty, + 0, + "Invalid action should have negative penalty", ) self.assertAlmostEqualFloat( breakdown.total, breakdown.invalid_penalty - + breakdown.shaping_reward + + breakdown.reward_shaping + breakdown.entry_additive + breakdown.exit_additive, tolerance=self.TOL_IDENTITY_RELAXED, @@ -1960,11 +1832,10 @@ class TestPrivateFunctions(RewardSpaceTestBase): for trade_duration, description in test_cases: with self.subTest(duration=trade_duration, desc=description): - context = RewardContext( + context = self.make_ctx( pnl=0.0, # Neutral PnL to isolate hold penalty trade_duration=trade_duration, idle_duration=0, - max_trade_duration=max_duration, max_unrealized_profit=0.0, min_unrealized_profit=0.0, position=Positions.Long, @@ -2008,7 +1879,7 @@ class TestPrivateFunctions(RewardSpaceTestBase): self.assertAlmostEqualFloat( breakdown.total, breakdown.hold_penalty - + breakdown.shaping_reward + + breakdown.reward_shaping + breakdown.entry_additive + breakdown.exit_additive, tolerance=self.TOL_IDENTITY_RELAXED, @@ -2017,16 +1888,15 @@ class TestPrivateFunctions(RewardSpaceTestBase): def test_hold_penalty_progressive_scaling(self): """Test that hold penalty scales progressively after max_duration.""" - max_duration = 100 + params = self.base_params(max_trade_duration_candles=100) durations = [150, 200, 300] # All > max_duration penalties: list[float] = [] for duration in durations: - context = RewardContext( + context = self.make_ctx( pnl=0.0, trade_duration=duration, idle_duration=0, - max_trade_duration=max_duration, max_unrealized_profit=0.0, min_unrealized_profit=0.0, position=Positions.Long, @@ -2035,7 +1905,7 @@ class TestPrivateFunctions(RewardSpaceTestBase): breakdown = calculate_reward( context, - self.DEFAULT_PARAMS, + params, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -2050,7 +1920,7 @@ class TestPrivateFunctions(RewardSpaceTestBase): self.assertLessEqual( penalties[i], penalties[i - 1], - f"Penalty should increase with duration: {penalties[i]} > {penalties[i-1]}", + f"Penalty should increase with duration: {penalties[i]} > {penalties[i - 1]}", ) def test_new_invariant_and_warn_parameters(self): @@ -2062,11 +1932,10 @@ class TestPrivateFunctions(RewardSpaceTestBase): self.assertIn("check_invariants", params) self.assertIn("exit_factor_threshold", params) - context = RewardContext( + context = self.make_ctx( pnl=0.05, trade_duration=300, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.06, min_unrealized_profit=0.0, position=Positions.Long, @@ -2088,15 +1957,14 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): """Robustness & boundary assertions: invariants, attenuation maths, parameter edges, scaling, warnings.""" def test_decomposition_integrity(self): - """reward_total must equal the single active core component under mutually exclusive scenarios (idle/hold/exit/invalid).""" + """reward must equal the single active core component under mutually exclusive scenarios (idle/hold/exit/invalid).""" scenarios = [ # Idle penalty only dict( - ctx=RewardContext( + ctx=self.make_ctx( pnl=0.0, trade_duration=0, idle_duration=25, - max_trade_duration=100, max_unrealized_profit=0.0, min_unrealized_profit=0.0, position=Positions.Neutral, @@ -2106,11 +1974,10 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): ), # Hold penalty only dict( - ctx=RewardContext( + ctx=self.make_ctx( pnl=0.0, trade_duration=150, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.0, min_unrealized_profit=0.0, position=Positions.Long, @@ -2124,7 +1991,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): pnl=self.TEST_PROFIT_TARGET, trade_duration=60, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.05, min_unrealized_profit=0.01, position=Positions.Long, @@ -2134,11 +2000,10 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): ), # Invalid action only dict( - ctx=RewardContext( + ctx=self.make_ctx( pnl=0.01, trade_duration=10, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.02, min_unrealized_profit=0.0, position=Positions.Short, @@ -2152,7 +2017,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): active_label: str = sc["active"] # type: ignore[index] with self.subTest(active=active_label): # Build parameters disabling shaping and additives to enforce strict decomposition - params_local = self.base_params( + params = self.base_params( entry_additive_enabled=False, exit_additive_enabled=False, hold_potential_enabled=False, @@ -2161,7 +2026,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): ) br = calculate_reward( ctx_obj, - params_local, + params, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -2191,7 +2056,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): ) # Shaping and additives explicitly disabled self.assertAlmostEqualFloat( - br.shaping_reward, 0.0, tolerance=self.TOL_IDENTITY_RELAXED + br.reward_shaping, 0.0, tolerance=self.TOL_IDENTITY_RELAXED ) self.assertAlmostEqualFloat( br.entry_additive, 0.0, tolerance=self.TOL_IDENTITY_RELAXED @@ -2203,10 +2068,9 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): def test_pnl_invariant_exit_only(self): """Invariant: only exit actions have non-zero PnL (robustness category).""" df = simulate_samples( + params=self.base_params(max_trade_duration_candles=50), num_samples=200, seed=self.SEED, - params=self.DEFAULT_PARAMS, - max_trade_duration=50, base_factor=self.TEST_BASE_FACTOR, profit_target=self.TEST_PROFIT_TARGET, risk_reward_ratio=self.TEST_RR, @@ -2224,13 +2088,13 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): places=10, msg="PnL invariant violation: total PnL != sum of exit PnL", ) - non_zero_pnl_actions = set(df[df["pnl"] != 0]["action"].unique()) + non_zero_pnl_actions = set(df[df["pnl"].abs() > self.EPS_BASE]["action"].unique()) expected_exit_actions = {2.0, 4.0} self.assertTrue( non_zero_pnl_actions.issubset(expected_exit_actions), f"Non-exit actions have PnL: {non_zero_pnl_actions - expected_exit_actions}", ) - invalid_combinations = df[(df["pnl"] == 0) & (df["reward_exit"] != 0)] + invalid_combinations = df[(df["pnl"].abs() <= self.EPS_BASE) & (df["reward_exit"] != 0)] self.assertEqual(len(invalid_combinations), 0) def test_exit_factor_mathematical_formulas(self): @@ -2239,7 +2103,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): pnl=0.05, trade_duration=50, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.05, min_unrealized_profit=0.01, position=Positions.Long, @@ -2271,8 +2134,31 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): short_allowed=True, action_masking=True, ) - expected_half_life_factor = 2 ** (-duration_ratio / 0.5) - self.assertPlacesEqual(expected_half_life_factor, 0.5, places=6) + # Validate half-life attenuation factor against expected closed-form: 2 ** (-dr / half_life) + pnl_factor_hl = _get_pnl_factor( + params, + context, + self.TEST_PROFIT_TARGET, + self.TEST_RR, + ) + observed_exit_factor = _get_exit_factor( + self.TEST_BASE_FACTOR, + context.pnl, + pnl_factor_hl, + duration_ratio, + params, + ) + # Isolate attenuation factor + observed_half_life_factor = observed_exit_factor / ( + self.TEST_BASE_FACTOR * max(pnl_factor_hl, self.EPS_BASE) + ) + expected_half_life_factor = 2 ** (-duration_ratio / params["exit_half_life"]) + self.assertAlmostEqualFloat( + observed_half_life_factor, + expected_half_life_factor, + tolerance=self.TOL_IDENTITY_RELAXED, + msg="Half-life attenuation mismatch: observed vs expected", + ) params["exit_attenuation_mode"] = "linear" params["exit_linear_slope"] = 1.0 reward_linear = calculate_reward( @@ -2295,7 +2181,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): def test_idle_penalty_fallback_and_proportionality(self): """Idle penalty fallback denominator & proportional scaling (robustness).""" - params = self.base_params(max_idle_duration_candles=None) + params = self.base_params(max_idle_duration_candles=None, max_trade_duration_candles=100) base_factor = 90.0 profit_target = self.TEST_PROFIT_TARGET risk_reward_ratio = 1.0 @@ -2303,7 +2189,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): pnl=0.0, trade_duration=0, idle_duration=20, - max_trade_duration=100, position=Positions.Neutral, action=Actions.Neutral, ) @@ -2328,12 +2213,10 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): ) self.assertLess(br_a.idle_penalty, 0.0) self.assertLess(br_b.idle_penalty, 0.0) - ratio = ( - br_b.idle_penalty / br_a.idle_penalty if br_a.idle_penalty != 0 else None - ) + ratio = br_b.idle_penalty / br_a.idle_penalty if br_a.idle_penalty != 0 else None self.assertIsNotNone(ratio) self.assertAlmostEqualFloat(abs(ratio), 2.0, tolerance=0.2) - ctx_mid = dataclasses.replace(ctx_a, idle_duration=120, max_trade_duration=100) + ctx_mid = dataclasses.replace(ctx_a, idle_duration=120) br_mid = calculate_reward( ctx_mid, params, @@ -2361,7 +2244,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): pnl=0.08, trade_duration=10, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.09, min_unrealized_profit=0.0, position=Positions.Long, @@ -2406,16 +2288,20 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): ) def test_negative_slope_sanitization(self): - """Negative exit_linear_slope is sanitized to 1.0; resulting exit factors must match slope=1.0 within tolerance.""" + """Negative exit_linear_slope is sanitized to 0.0; resulting exit factors must match slope=0.0 within tolerance.""" base_factor = 100.0 pnl = 0.03 pnl_factor = 1.0 duration_ratios = [0.0, 0.2, 0.5, 1.0, 1.5] params_bad = self.base_params( - exit_attenuation_mode="linear", exit_linear_slope=-5.0, exit_plateau=False + exit_attenuation_mode="linear", + exit_linear_slope=-5.0, + exit_plateau=False, ) params_ref = self.base_params( - exit_attenuation_mode="linear", exit_linear_slope=1.0, exit_plateau=False + exit_attenuation_mode="linear", + exit_linear_slope=0.0, + exit_plateau=False, ) for dr in duration_ratios: f_bad = _get_exit_factor(base_factor, pnl, pnl_factor, dr, params_bad) @@ -2438,10 +2324,12 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): 0.5, 0.25, 1.0, - ] # include boundary 1.0 => alpha=0 per formula? actually -> -log(1)/log2 = 0 + ] for tau in taus: params = self.base_params( - exit_attenuation_mode="power", exit_power_tau=tau, exit_plateau=False + exit_attenuation_mode="power", + exit_power_tau=tau, + exit_plateau=False, ) f0 = _get_exit_factor(base_factor, pnl, pnl_factor, 0.0, params) f1 = _get_exit_factor(base_factor, pnl, pnl_factor, duration_ratio, params) @@ -2451,7 +2339,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): else: alpha = 1.0 expected_ratio = 1.0 / (1.0 + duration_ratio) ** alpha - observed_ratio = f1 / f0 if f0 != 0 else float("nan") + observed_ratio = f1 / f0 if f0 != 0 else np.nan self.assertFinite(observed_ratio, name="observed_ratio") self.assertLess( abs(observed_ratio - expected_ratio), @@ -2469,7 +2357,6 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): pnl=0.05, trade_duration=50, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.06, min_unrealized_profit=0.02, position=Positions.Long, @@ -2491,11 +2378,10 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): for mode in modes: with self.subTest(mode=mode): test_params = self.base_params(exit_attenuation_mode=mode) - ctx = RewardContext( + ctx = self.make_ctx( pnl=0.02, trade_duration=50, idle_duration=0, - max_trade_duration=100, max_unrealized_profit=0.03, min_unrealized_profit=0.01, position=Positions.Long, @@ -2551,17 +2437,12 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): params = self.base_params(exit_attenuation_mode="sqrt") ratios = np.linspace(0, 2, 15) - values = [ - _get_exit_factor(base_factor, pnl, pnl_factor, r, params) - for r in ratios - ] + values = [_get_exit_factor(base_factor, pnl, pnl_factor, r, params) for r in ratios] # Plateau+linear: ignore initial flat region when checking monotonic decrease if mode == "plateau_linear": grace = float(params["exit_plateau_grace"]) # type: ignore[index] filtered = [ - (r, v) - for r, v in zip(ratios, values) - if r >= grace - self.TOL_IDENTITY_RELAXED + (r, v) for r, v in zip(ratios, values) if r >= grace - self.TOL_IDENTITY_RELAXED ] values_to_check = [v for _, v in filtered] else: @@ -2580,9 +2461,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): pnl = 0.02 pnl_factor = 1.0 # Tau near 1 (minimal attenuation) vs tau near 0 (strong attenuation) - params_hi = self.base_params( - exit_attenuation_mode="power", exit_power_tau=0.999999 - ) + params_hi = self.base_params(exit_attenuation_mode="power", exit_power_tau=0.999999) params_lo = self.base_params( exit_attenuation_mode="power", exit_power_tau=self.MIN_EXIT_POWER_TAU, @@ -2648,9 +2527,7 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): pnl = 0.04 pnl_factor = 1.2 ratios = [0.3, 0.6, 1.0, 1.4] - values = [ - _get_exit_factor(base_factor, pnl, pnl_factor, r, params) for r in ratios - ] + values = [_get_exit_factor(base_factor, pnl, pnl_factor, r, params) for r in ratios] # All factors should be (approximately) identical after grace (no attenuation) first = values[0] for v in values[1:]: @@ -2678,16 +2555,14 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): pnl_factor = 1.1 # Ratios straddling 1.0 but below grace=1.5 plus one beyond grace ratios = [0.8, 1.0, 1.2, 1.4, 1.6] - vals = [ - _get_exit_factor(base_factor, pnl, pnl_factor, r, params) for r in ratios - ] + vals = [_get_exit_factor(base_factor, pnl, pnl_factor, r, params) for r in ratios] # All ratios <=1.5 should yield identical factor ref = vals[0] for i, r in enumerate(ratios[:-1]): # exclude last (1.6) self.assertAlmostEqualFloat( vals[i], ref, - self.TOL_IDENTITY_RELAXED, + tolerance=self.TOL_IDENTITY_RELAXED, msg=f"Unexpected attenuation before grace end at ratio {r}", ) # Last ratio (1.6) should be attenuated (strictly less than ref) @@ -2718,13 +2593,9 @@ class TestRewardRobustnessAndBoundaries(RewardSpaceTestBase): } ) - left = _get_exit_factor( - base_factor, pnl, pnl_factor, grace - eps, params - ) + left = _get_exit_factor(base_factor, pnl, pnl_factor, grace - eps, params) boundary = _get_exit_factor(base_factor, pnl, pnl_factor, grace, params) - right = _get_exit_factor( - base_factor, pnl, pnl_factor, grace + eps, params - ) + right = _get_exit_factor(base_factor, pnl, pnl_factor, grace + eps, params) self.assertAlmostEqualFloat( left, @@ -2801,7 +2672,7 @@ class TestLoadRealEpisodes(RewardSpaceTestBase): "idle_duration": [5], "position": [1.0], "action": [2.0], - "reward_total": [1.0], + "reward": [1.0], } ) p = Path(self.temp_dir) / "top.pkl" @@ -2823,7 +2694,7 @@ class TestLoadRealEpisodes(RewardSpaceTestBase): "idle_duration": 0, "position": 1.0, "action": 2.0, - "reward_total": 2.0, + "reward": 2.0, } ], } @@ -2833,7 +2704,6 @@ class TestLoadRealEpisodes(RewardSpaceTestBase): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") loaded = load_real_episodes(p) - # Accept variance in warning emission across platforms _ = w self.assertEqual(len(loaded), 1) @@ -2861,8 +2731,8 @@ class TestLoadRealEpisodes(RewardSpaceTestBase): self.write_pickle(trans, p) loaded = load_real_episodes(p, enforce_columns=False) - self.assertIn("reward_total", loaded.columns) - self.assertTrue(loaded["reward_total"].isna().all()) + self.assertIn("reward", loaded.columns) + self.assertTrue(loaded["reward"].isna().all()) def test_casting_numeric_strings(self): trans = [ @@ -2872,7 +2742,7 @@ class TestLoadRealEpisodes(RewardSpaceTestBase): "idle_duration": "0", "position": "1.0", "action": "2.0", - "reward_total": "3.0", + "reward": "3.0", } ] p = Path(self.temp_dir) / "strs.pkl" @@ -2892,7 +2762,7 @@ class TestLoadRealEpisodes(RewardSpaceTestBase): "idle_duration": [5, 0, 8], "position": [1.0, 0.0, 1.0], "action": [2.0, 0.0, 2.0], - "reward_total": [10.5, -5.2, 15.8], + "reward": [10.5, -5.2, 15.8], } ) p = Path(self.temp_dir) / "test_episodes.pkl" @@ -2923,21 +2793,20 @@ class TestPBRS(RewardSpaceTestBase): current_pnl = 0.02 current_dur = 0.5 prev_potential = _compute_hold_potential(current_pnl, current_dur, params) - _total_reward, shaping_reward, next_potential = apply_potential_shaping( + _total_reward, reward_shaping, next_potential = apply_potential_shaping( base_reward=0.0, current_pnl=current_pnl, current_duration_ratio=current_dur, - next_pnl=0.02, - next_duration_ratio=0.6, - is_terminal=True, - last_potential=prev_potential, + next_pnl=0.0, + next_duration_ratio=0.0, + is_exit=True, + is_entry=False, + last_potential=0.789, # arbitrary, should be ignored for Φ' params=params, ) + self.assertAlmostEqualFloat(next_potential, 0.0, tolerance=self.TOL_IDENTITY_RELAXED) self.assertAlmostEqualFloat( - next_potential, 0.0, tolerance=self.TOL_IDENTITY_RELAXED - ) - self.assertAlmostEqualFloat( - shaping_reward, -prev_potential, tolerance=self.TOL_IDENTITY_RELAXED + reward_shaping, -prev_potential, tolerance=self.TOL_IDENTITY_RELAXED ) def test_pbrs_spike_cancel_invariance(self): @@ -2960,23 +2829,24 @@ class TestPBRS(RewardSpaceTestBase): "potential_gamma", DEFAULT_MODEL_REWARD_PARAMETERS.get("potential_gamma", 0.95), ) - expected_next = ( + expected_next_potential = ( prev_potential / gamma if gamma not in (0.0, None) else prev_potential ) - _total_reward, shaping_reward, next_potential = apply_potential_shaping( + _total_reward, reward_shaping, next_potential = apply_potential_shaping( base_reward=0.0, current_pnl=current_pnl, current_duration_ratio=current_dur, - next_pnl=0.016, - next_duration_ratio=0.45, - is_terminal=True, + next_pnl=0.0, + next_duration_ratio=0.0, + is_exit=True, + is_entry=False, last_potential=prev_potential, params=params, ) self.assertAlmostEqualFloat( - next_potential, expected_next, tolerance=self.TOL_IDENTITY_RELAXED + next_potential, expected_next_potential, tolerance=self.TOL_IDENTITY_RELAXED ) - self.assertNearZero(shaping_reward, atol=self.TOL_IDENTITY_RELAXED) + self.assertNearZero(reward_shaping, atol=self.TOL_IDENTITY_RELAXED) def test_tanh_transform(self): """tanh transform: tanh(x) in (-1, 1).""" @@ -3087,7 +2957,8 @@ class TestPBRS(RewardSpaceTestBase): current_duration_ratio=current_duration_ratio, next_pnl=next_pnl, next_duration_ratio=next_duration_ratio, - is_terminal=True, + is_exit=True, + is_entry=False, last_potential=0.789, # arbitrary, should be ignored for Φ' params=params, ) @@ -3110,9 +2981,7 @@ class TestPBRS(RewardSpaceTestBase): {"hold_potential_enabled": True, "hold_potential_scale": 1.0}, ) # shaping should equal -current_potential within tolerance - self.assertAlmostEqual( - shaping, -current_potential, delta=self.TOL_IDENTITY_RELAXED - ) + self.assertAlmostEqual(shaping, -current_potential, delta=self.TOL_IDENTITY_RELAXED) # Since additives are disabled, total ≈ base_reward + shaping (residual ~0) residual = total - base_reward - shaping self.assertAlmostEqual(residual, 0.0, delta=self.TOL_IDENTITY_RELAXED) @@ -3129,14 +2998,14 @@ class TestPBRS(RewardSpaceTestBase): # Structural sweep (ensures terminal Φ'==0 and shaping bounded) terminal_next_potentials, shaping_values = self._canonical_sweep(params) - # Premier appel (terminal pour forcer chemin exit) pour activer le flag _t1, _s1, _n1 = apply_potential_shaping( base_reward=0.0, current_pnl=0.05, current_duration_ratio=0.3, next_pnl=0.0, next_duration_ratio=0.0, - is_terminal=True, + is_exit=True, + is_entry=False, last_potential=0.4, params=params, ) @@ -3144,9 +3013,7 @@ class TestPBRS(RewardSpaceTestBase): self.assertFalse(params["entry_additive_enabled"]) self.assertFalse(params["exit_additive_enabled"]) if terminal_next_potentials: - self.assertTrue( - all(abs(p) < self.PBRS_TERMINAL_TOL for p in terminal_next_potentials) - ) + self.assertTrue(all(abs(p) < self.PBRS_TERMINAL_TOL for p in terminal_next_potentials)) max_abs = max(abs(v) for v in shaping_values) if shaping_values else 0.0 self.assertLessEqual(max_abs, self.PBRS_MAX_ABS_SHAPING) @@ -3161,7 +3028,8 @@ class TestPBRS(RewardSpaceTestBase): current_duration_ratio=0.1, next_pnl=0.0, next_duration_ratio=0.0, - is_terminal=True, + is_exit=True, + is_entry=False, last_potential=0.1, params=params, ) @@ -3185,7 +3053,7 @@ class TestPBRS(RewardSpaceTestBase): current_duration_ratio=0.0, next_pnl=0.0, next_duration_ratio=0.0, - is_terminal=True, + is_exit=True, last_potential=last_potential, params=params, ) @@ -3196,18 +3064,17 @@ class TestPBRS(RewardSpaceTestBase): gamma = float(gamma_raw) # type: ignore[assignment] except Exception: gamma = 0.95 - self.assertLessEqual( - abs(shaping - gamma * last_potential), self.TOL_GENERIC_EQ - ) + self.assertLessEqual( + abs(shaping - gamma * last_potential), + self.TOL_GENERIC_EQ, + ) self.assertPlacesEqual(total, shaping, places=12) def test_potential_gamma_nan_fallback(self): """potential_gamma=NaN should fall back to default value (indirect comparison).""" base_params_dict = self.base_params() default_gamma = base_params_dict.get("potential_gamma", 0.95) - params_nan = self.base_params( - potential_gamma=float("nan"), hold_potential_enabled=True - ) + params_nan = self.base_params(potential_gamma=np.nan, hold_potential_enabled=True) # Non-terminal transition so Φ(s') is computed and depends on gamma res_nan = apply_potential_shaping( base_reward=0.1, @@ -3215,20 +3082,18 @@ class TestPBRS(RewardSpaceTestBase): current_duration_ratio=0.2, next_pnl=0.035, next_duration_ratio=0.25, - is_terminal=False, + is_exit=False, last_potential=0.0, params=params_nan, ) - params_ref = self.base_params( - potential_gamma=default_gamma, hold_potential_enabled=True - ) + params_ref = self.base_params(potential_gamma=default_gamma, hold_potential_enabled=True) res_ref = apply_potential_shaping( base_reward=0.1, current_pnl=0.03, current_duration_ratio=0.2, next_pnl=0.035, next_duration_ratio=0.25, - is_terminal=False, + is_exit=False, last_potential=0.0, params=params_ref, ) @@ -3252,15 +3117,15 @@ class TestPBRS(RewardSpaceTestBase): except Exception as e: # pragma: no cover self.fail(f"validate_reward_parameters raised unexpectedly: {e}") # validate_reward_parameters may return (params, diagnostics) or just params - if ( - isinstance(validated, tuple) - and len(validated) >= 1 - and isinstance(validated[0], dict) - ): + if isinstance(validated, tuple) and len(validated) >= 1 and isinstance(validated[0], dict): validated_params = validated[0] else: validated_params = validated # type: ignore[assignment] - for k in ("potential_gamma", "hold_potential_enabled", "exit_potential_mode"): + for k in ( + "potential_gamma", + "hold_potential_enabled", + "exit_potential_mode", + ): self.assertIn(k, validated_params, f"Missing key '{k}' in validated params") # Introduce invalid values @@ -3305,9 +3170,7 @@ class TestPBRS(RewardSpaceTestBase): msg="Canonical delta mismatch", ) # Spike cancel mode - params_spike = self.base_params( - exit_potential_mode="spike_cancel", **base_common - ) + params_spike = self.base_params(exit_potential_mode="spike_cancel", **base_common) next_phi_spike = _compute_exit_potential(prev_phi, params_spike) shaping_spike = gamma * next_phi_spike - prev_phi self.assertNearZero( @@ -3330,9 +3193,7 @@ class TestPBRS(RewardSpaceTestBase): with self.subTest(transform=name): vals = [apply_transform(name, x) for x in xs] # Strict bounds (-1,1) (sigmoid & tanh asymptotic) - self.assertTrue( - all(-1.0 < v < 1.0 for v in vals), f"{name} out of bounds" - ) + self.assertTrue(all(-1.0 < v < 1.0 for v in vals), f"{name} out of bounds") # Non-decreasing monotonicity for a, b in zip(vals, vals[1:]): self.assertLessEqual( @@ -3385,17 +3246,17 @@ class TestPBRS(RewardSpaceTestBase): class TestReportFormatting(RewardSpaceTestBase): - """Tests for report formatting elements not previously covered.""" + """Tests for report formatting elements not covered elsewhere.""" def test_abs_shaping_line_present_and_constant(self): """Abs Σ Shaping Reward line present, formatted, uses constant not literal.""" - # Minimal synthetic construction to exercise invariance formatting logic. - self.assertPlacesEqual(PBRS_INVARIANCE_TOL, self.TOL_GENERIC_EQ, places=12) - # Use small synthetic DataFrame with zero shaping sum (pandas imported globally) df = pd.DataFrame( { - "reward_shaping": [self.TOL_IDENTITY_STRICT, -self.TOL_IDENTITY_STRICT], + "reward_shaping": [ + self.TOL_IDENTITY_STRICT, + -self.TOL_IDENTITY_STRICT, + ], "reward_entry_additive": [0.0, 0.0], "reward_exit_additive": [0.0, 0.0], } @@ -3409,14 +3270,15 @@ class TestReportFormatting(RewardSpaceTestBase): content = "\n".join(lines) # Validate formatting pattern using regex m = re.search( - r"\| Abs Σ Shaping Reward \| ([0-9]+\.[0-9]{6}e[+-][0-9]{2}) \|", content + r"\| Abs Σ Shaping Reward \| ([0-9]+\.[0-9]{6}e[+-][0-9]{2}) \|", + content, ) self.assertIsNotNone(m, "Abs Σ Shaping Reward line missing or misformatted") # Ensure scientific notation magnitude consistent with small number val = float(m.group(1)) if m else None # type: ignore[arg-type] if val is not None: self.assertLess(val, self.TOL_NEGLIGIBLE + self.TOL_IDENTITY_STRICT) - # Ensure no stray hard-coded tolerance string inside content + # Ensure no hard-coded tolerance string inside content self.assertNotIn( str(self.TOL_GENERIC_EQ), content, @@ -3425,7 +3287,6 @@ class TestReportFormatting(RewardSpaceTestBase): def test_pbrs_non_canonical_report_generation(self): """Generate synthetic invariance section with non-zero shaping to assert Non-canonical classification.""" - import re # local lightweight df = pd.DataFrame( { @@ -3445,12 +3306,8 @@ class TestReportFormatting(RewardSpaceTestBase): section.append(f"| Note | Total shaping = {total_shaping:.6f} (non-zero) |\n") section.append(f"| Σ Shaping Reward | {total_shaping:.6f} |\n") section.append(f"| Abs Σ Shaping Reward | {abs(total_shaping):.6e} |\n") - section.append( - f"| Σ Entry Additive | {df['reward_entry_additive'].sum():.6f} |\n" - ) - section.append( - f"| Σ Exit Additive | {df['reward_exit_additive'].sum():.6f} |\n" - ) + section.append(f"| Σ Entry Additive | {df['reward_entry_additive'].sum():.6f} |\n") + section.append(f"| Σ Exit Additive | {df['reward_exit_additive'].sum():.6f} |\n") content = "".join(section) self.assertIn("❌ Non-canonical", content) self.assertRegex(content, r"Σ Shaping Reward \| 0\.008000 \|") @@ -3486,19 +3343,16 @@ class TestReportFormatting(RewardSpaceTestBase): "current_duration_ratio": 0.2, "next_pnl": 0.012, "next_duration_ratio": 0.25, - "is_terminal": False, + "is_entry": True, + "is_exit": False, } _t0, s0, _n0 = apply_potential_shaping(last_potential=0.0, params=base, **ctx) - t1, s1, _n1 = apply_potential_shaping( - last_potential=0.0, params=with_add, **ctx - ) + t1, s1, _n1 = apply_potential_shaping(last_potential=0.0, params=with_add, **ctx) self.assertFinite(t1) self.assertFinite(s1) # Additives should not alter invariance: shaping difference small self.assertLess(abs(s1 - s0), 0.2) - self.assertGreater( - t1 - _t0, 0.0, "Total reward should increase with additives present" - ) + self.assertGreater(t1 - _t0, 0.0, "Total reward should increase with additives present") def test_report_cumulative_invariance_aggregation(self): """Canonical telescoping term: small per-step mean drift, bounded increments.""" @@ -3519,18 +3373,18 @@ class TestReportFormatting(RewardSpaceTestBase): max_abs_step = 0.0 steps = 0 for _ in range(500): - is_terminal = rng.uniform() < 0.1 + is_exit = rng.uniform() < 0.1 current_pnl = float(rng.normal(0, 0.05)) current_dur = float(rng.uniform(0, 1)) - next_pnl = 0.0 if is_terminal else float(rng.normal(0, 0.05)) - next_dur = 0.0 if is_terminal else float(rng.uniform(0, 1)) + next_pnl = 0.0 if is_exit else float(rng.normal(0, 0.05)) + next_dur = 0.0 if is_exit else float(rng.uniform(0, 1)) _tot, _shap, next_potential = apply_potential_shaping( base_reward=0.0, current_pnl=current_pnl, current_duration_ratio=current_dur, next_pnl=next_pnl, next_duration_ratio=next_dur, - is_terminal=is_terminal, + is_exit=is_exit, last_potential=last_potential, params=params, ) @@ -3540,7 +3394,7 @@ class TestReportFormatting(RewardSpaceTestBase): if abs(inc) > max_abs_step: max_abs_step = abs(inc) steps += 1 - if is_terminal: + if is_exit: # Reset potential at terminal per canonical semantics last_potential = 0.0 else: @@ -3570,21 +3424,21 @@ class TestReportFormatting(RewardSpaceTestBase): last_potential = 0.0 shaping_sum = 0.0 for _ in range(160): - is_terminal = rng.uniform() < 0.15 - next_pnl = 0.0 if is_terminal else float(rng.normal(0, 0.07)) - next_dur = 0.0 if is_terminal else float(rng.uniform(0, 1)) + is_exit = rng.uniform() < 0.15 + next_pnl = 0.0 if is_exit else float(rng.normal(0, 0.07)) + next_dur = 0.0 if is_exit else float(rng.uniform(0, 1)) _tot, shap, next_pot = apply_potential_shaping( base_reward=0.0, current_pnl=float(rng.normal(0, 0.07)), current_duration_ratio=float(rng.uniform(0, 1)), next_pnl=next_pnl, next_duration_ratio=next_dur, - is_terminal=is_terminal, + is_exit=is_exit, last_potential=last_potential, params=params, ) shaping_sum += shap - last_potential = 0.0 if is_terminal else next_pot + last_potential = 0.0 if is_exit else next_pot self.assertGreater( abs(shaping_sum), PBRS_INVARIANCE_TOL * 50, @@ -3607,7 +3461,7 @@ class TestReportFormatting(RewardSpaceTestBase): current_duration_ratio=0.3, next_pnl=0.025, next_duration_ratio=0.35, - is_terminal=False, + is_exit=False, last_potential=0.0, params=params, ) @@ -3623,7 +3477,7 @@ class TestBootstrapStatistics(RewardSpaceTestBase): """Degenerate columns produce (mean≈lo≈hi) zero-width intervals.""" df = self._const_df(80) res = bootstrap_confidence_intervals( - df, ["reward_total", "pnl"], n_bootstrap=200, confidence_level=0.95 + df, ["reward", "pnl"], n_bootstrap=200, confidence_level=0.95 ) for k, (mean, lo, hi) in res.items(): self.assertAlmostEqualFloat(mean, lo, tolerance=2e-9) @@ -3634,12 +3488,8 @@ class TestBootstrapStatistics(RewardSpaceTestBase): """Half-width decreases with larger sample (~1/sqrt(n) heuristic).""" small = self._shift_scale_df(80) large = self._shift_scale_df(800) - res_small = bootstrap_confidence_intervals( - small, ["reward_total"], n_bootstrap=400 - ) - res_large = bootstrap_confidence_intervals( - large, ["reward_total"], n_bootstrap=400 - ) + res_small = bootstrap_confidence_intervals(small, ["reward"], n_bootstrap=400) + res_large = bootstrap_confidence_intervals(large, ["reward"], n_bootstrap=400) (_, lo_s, hi_s) = list(res_small.values())[0] (_, lo_l, hi_l) = list(res_large.values())[0] hw_small = (hi_s - lo_s) / 2.0 @@ -3653,7 +3503,7 @@ class TestBootstrapStatistics(RewardSpaceTestBase): test_data = self.make_stats_df(n=100, seed=self.SEED) results = bootstrap_confidence_intervals( test_data, - ["reward_total", "pnl"], + ["reward", "pnl"], n_bootstrap=100, ) for metric, (mean, ci_low, ci_high) in results.items(): @@ -3662,6 +3512,223 @@ class TestBootstrapStatistics(RewardSpaceTestBase): self.assertFinite(ci_high, name=f"ci_high[{metric}]") self.assertLess(ci_low, ci_high) + def test_canonical_invariance_flag_and_sum(self): + """Canonical mode + no additives -> pbrs_invariant True and Σ shaping ≈ 0.""" + params = self.base_params( + exit_potential_mode="canonical", + entry_additive_enabled=False, + exit_additive_enabled=False, + hold_potential_enabled=True, + ) + df = simulate_samples( + params={**params, "max_trade_duration_candles": 100}, + num_samples=400, + seed=self.SEED, + base_factor=self.TEST_BASE_FACTOR, + profit_target=self.TEST_PROFIT_TARGET, + risk_reward_ratio=self.TEST_RR, + max_duration_ratio=2.0, + trading_mode="margin", + pnl_base_std=self.TEST_PNL_STD, + pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE, + ) + # pbrs_invariant must be True for all samples + unique_flags = set(df["pbrs_invariant"].unique().tolist()) + self.assertEqual(unique_flags, {True}, f"Unexpected invariant flags: {unique_flags}") + # Σ shaping ≈ 0 within PBRS_INVARIANCE_TOL + total_shaping = float(df["reward_shaping"].sum()) + self.assertLess( + abs(total_shaping), + PBRS_INVARIANCE_TOL, + f"Canonical invariance violated: Σ shaping = {total_shaping}", + ) + + def test_non_canonical_flag_false_and_sum_nonzero(self): + """Non-canonical exit potential (progressive_release) -> pbrs_invariant False and Σ shaping != 0.""" + params = self.base_params( + exit_potential_mode="progressive_release", + exit_potential_decay=0.25, + entry_additive_enabled=False, + exit_additive_enabled=False, + hold_potential_enabled=True, + ) + df = simulate_samples( + params={**params, "max_trade_duration_candles": 100}, + num_samples=400, + seed=self.SEED, + base_factor=self.TEST_BASE_FACTOR, + profit_target=self.TEST_PROFIT_TARGET, + risk_reward_ratio=self.TEST_RR, + max_duration_ratio=2.0, + trading_mode="margin", + pnl_base_std=self.TEST_PNL_STD, + pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE, + ) + unique_flags = set(df["pbrs_invariant"].unique().tolist()) + self.assertEqual(unique_flags, {False}, f"Unexpected invariant flags: {unique_flags}") + total_shaping = float(df["reward_shaping"].sum()) + self.assertGreater( + abs(total_shaping), + PBRS_INVARIANCE_TOL * 10, + f"Expected non-zero Σ shaping in non-canonical mode (got {total_shaping})", + ) + + +class TestCsvAndSimulationOptions(RewardSpaceTestBase): + """CLI-level tests: CSV encoding and simulate_unrealized_pnl option effects.""" + + def test_action_column_integer_in_csv(self): + """Ensure 'action' column in reward_samples.csv is encoded as integers.""" + out_dir = self.output_path / "csv_int_check" + cmd = [ + sys.executable, + "reward_space_analysis.py", + "--num_samples", + "200", + "--seed", + str(self.SEED), + "--out_dir", + str(out_dir), + ] + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path(__file__).parent) + self.assertEqual(result.returncode, 0, f"CLI failed: {result.stderr}") + csv_path = out_dir / "reward_samples.csv" + self.assertTrue(csv_path.exists(), "Missing reward_samples.csv") + df = pd.read_csv(csv_path) + self.assertIn("action", df.columns) + # All values must be integral and in the expected enum set {0,1,2,3,4} + values = df["action"].tolist() + self.assertTrue( + all(float(v).is_integer() for v in values), + "Non-integer values detected in 'action' column", + ) + allowed = {0, 1, 2, 3, 4} + self.assertTrue(set(int(v) for v in values).issubset(allowed)) + + def test_unrealized_pnl_affects_hold_potential(self): + """--unrealized_pnl should alter hold next_potential distribution vs default.""" + out_default = self.output_path / "sim_default" + out_sim = self.output_path / "sim_unrealized" + base_args = [ + "--num_samples", + "800", + "--seed", + str(self.SEED), + "--out_dir", + ] + # Default run + cmd_default = [ + sys.executable, + "reward_space_analysis.py", + *base_args, + str(out_default), + ] + res_def = subprocess.run( + cmd_default, + capture_output=True, + text=True, + cwd=Path(__file__).parent, + ) + self.assertEqual(res_def.returncode, 0, f"CLI default run failed: {res_def.stderr}") + # Run with --unrealized_pnl + cmd_sim = [ + sys.executable, + "reward_space_analysis.py", + *base_args, + str(out_sim), + "--unrealized_pnl", + ] + res_sim = subprocess.run(cmd_sim, capture_output=True, text=True, cwd=Path(__file__).parent) + self.assertEqual(res_sim.returncode, 0, f"CLI simulated run failed: {res_sim.stderr}") + + # Load CSVs + df_def = pd.read_csv(out_default / "reward_samples.csv") + df_sim = pd.read_csv(out_sim / "reward_samples.csv") + # Hold actions: position in {Long (1.0), Short (0.0)} and action == 0 (Neutral) + mask_hold_def = (df_def["action"] == 0) & (df_def["position"].isin([0.0, 1.0])) + mask_hold_sim = (df_sim["action"] == 0) & (df_sim["position"].isin([0.0, 1.0])) + # Sanity: ensure we have holds in both runs + self.assertGreater(int(mask_hold_def.sum()), 0, "No hold samples in default run") + self.assertGreater(int(mask_hold_sim.sum()), 0, "No hold samples in simulate run") + # Compare mean next_potential on holds: simulated should differ from default + mean_next_def = float(df_def.loc[mask_hold_def, "next_potential"].mean()) + mean_next_sim = float(df_sim.loc[mask_hold_sim, "next_potential"].mean()) + self.assertFinite(mean_next_def, name="mean_next_def") + self.assertFinite(mean_next_sim, name="mean_next_sim") + self.assertGreater( + abs(mean_next_sim - mean_next_def), + self.TOL_GENERIC_EQ, + f"No detectable effect of --unrealized_pnl on Φ(s): def={mean_next_def:.6f}, sim={mean_next_sim:.6f}", + ) + + +class TestParamsPropagation(RewardSpaceTestBase): + """Integration tests to validate max_trade_duration_candles propagation via CLI params and dynamic flag.""" + + def test_max_trade_duration_candles_propagation_params(self): + """--params max_trade_duration_candles=X propagates to manifest and simulation params.""" + out_dir = self.output_path / "mtd_params" + cmd = [ + sys.executable, + "reward_space_analysis.py", + "--num_samples", + "120", + "--seed", + str(self.SEED), + "--out_dir", + str(out_dir), + "--params", + "max_trade_duration_candles=96", + ] + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path(__file__).parent) + self.assertEqual(result.returncode, 0, f"CLI failed: {result.stderr}") + + manifest_path = out_dir / "manifest.json" + self.assertTrue(manifest_path.exists(), "Missing manifest.json") + with open(manifest_path, "r") as f: + manifest = json.load(f) + + # Basic structure checks + self.assertIn("reward_params", manifest) + self.assertIn("simulation_params", manifest) + + # Reward params should include the tunable (float or int acceptable -> coerce) + rp = manifest["reward_params"] + self.assertIn("max_trade_duration_candles", rp) + self.assertEqual(int(rp["max_trade_duration_candles"]), 96) + + def test_max_trade_duration_candles_propagation_flag(self): + """Dynamic flag --max_trade_duration_candles X propagates identically.""" + out_dir = self.output_path / "mtd_flag" + cmd = [ + sys.executable, + "reward_space_analysis.py", + "--num_samples", + "120", + "--seed", + str(self.SEED), + "--out_dir", + str(out_dir), + "--max_trade_duration_candles", + "64", + ] + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path(__file__).parent) + self.assertEqual(result.returncode, 0, f"CLI failed: {result.stderr}") + + manifest_path = out_dir / "manifest.json" + self.assertTrue(manifest_path.exists(), "Missing manifest.json") + with open(manifest_path, "r") as f: + manifest = json.load(f) + + # Basic structure checks + self.assertIn("reward_params", manifest) + self.assertIn("simulation_params", manifest) + + # Reward params should include the tunable (float or int acceptable -> coerce) + rp = manifest["reward_params"] + self.assertIn("max_trade_duration_candles", rp) + self.assertEqual(int(rp["max_trade_duration_candles"]), 64) + if __name__ == "__main__": # Configure test discovery and execution diff --git a/ReforceXY/user_data/config-template.json b/ReforceXY/user_data/config-template.json index 575819c..6cb3c53 100644 --- a/ReforceXY/user_data/config-template.json +++ b/ReforceXY/user_data/config-template.json @@ -160,13 +160,13 @@ "model_reward_parameters": { "rr": 2, "profit_aim": 0.025, - "win_reward_factor": 2 + "win_reward_factor": 2, + "max_trade_duration_candles": 96 // Maximum trade duration in candles }, "train_cycles": 25, "add_state_info": true, "cpu_count": 4, "max_training_drawdown_pct": 0.02, - "max_trade_duration_candles": 96, // Maximum trade duration in candles "n_envs": 8, // Number of DummyVecEnv or SubProcVecEnv training environments "multiprocessing": true, // Use SubprocVecEnv if n_envs>1 (otherwise DummyVecEnv) "frame_stacking": 2, // Number of VecFrameStack stacks (set > 1 to use) diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index 6c862d2..9fa91b7 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -77,7 +77,6 @@ class ReforceXY(BaseReinforcementLearningModel): ... "rl_config": { ... - "max_trade_duration_candles": 96, // Maximum trade duration in candles "n_envs": 1, // Number of DummyVecEnv or SubProcVecEnv training environments "n_eval_envs": 1, // Number of DummyVecEnv or SubProcVecEnv evaluation environments "multiprocessing": false, // Use SubprocVecEnv if n_envs>1 (otherwise DummyVecEnv) @@ -1348,9 +1347,7 @@ class MyRLEnv(Base5ActionRLEnv): super().__init__(*args, **kwargs) self._set_observation_space() self.action_masking: bool = self.rl_config.get("action_masking", False) - self.max_trade_duration_candles: int = self.rl_config.get( - "max_trade_duration_candles", 128 - ) + # === INTERNAL STATE === self._last_closed_position: Optional[Positions] = None self._last_closed_trade_tick: int = 0 @@ -1358,9 +1355,32 @@ class MyRLEnv(Base5ActionRLEnv): self._min_unrealized_profit: float = np.inf self._last_potential: float = 0.0 # === PBRS INSTRUMENTATION === - self._total_shaping_reward: float = 0.0 - self._last_shaping_reward: float = 0.0 + self._last_prev_potential: float = 0.0 + self._last_next_potential: float = 0.0 + self._last_reward_shaping: float = 0.0 + self._total_reward_shaping: float = 0.0 + self._last_invalid_penalty: float = 0.0 + self._last_idle_penalty: float = 0.0 + self._last_hold_penalty: float = 0.0 + self._last_exit_reward: float = 0.0 + self._last_entry_additive: float = 0.0 + self._total_entry_additive: float = 0.0 + self._last_exit_additive: float = 0.0 + self._total_exit_additive: float = 0.0 model_reward_parameters = self.rl_config.get("model_reward_parameters", {}) + self.max_trade_duration_candles: int = int( + model_reward_parameters.get( + "max_trade_duration_candles", + 128, + ) + ) + self.max_idle_duration_candles: int = int( + model_reward_parameters.get( + "max_idle_duration_candles", + ReforceXY.DEFAULT_IDLE_DURATION_MULTIPLIER + * self.max_trade_duration_candles, + ) + ) # === PBRS COMMON PARAMETERS === potential_gamma = model_reward_parameters.get("potential_gamma") if potential_gamma is None: @@ -1455,7 +1475,7 @@ class MyRLEnv(Base5ActionRLEnv): if self._exit_potential_mode == "canonical": if self._entry_additive_enabled or self._exit_additive_enabled: logger.info( - "Canonical mode: additive rewards disabled with Φ(terminal)=0. PBRS invariance is preserved. " + "PBRS canonical mode: additive rewards disabled with Φ(terminal)=0. PBRS invariance is preserved. " "To use additive rewards, set exit_potential_mode='non_canonical'." ) self._entry_additive_enabled = False @@ -1463,15 +1483,24 @@ class MyRLEnv(Base5ActionRLEnv): elif self._exit_potential_mode == "non_canonical": if self._entry_additive_enabled or self._exit_additive_enabled: logger.info( - "Non-canonical mode: additive rewards enabled with Φ(terminal)=0. PBRS invariance is intentionally broken." + "PBRS non-canonical mode: additive rewards enabled with Φ(terminal)=0. PBRS invariance is intentionally broken." ) if MyRLEnv.is_unsupported_pbrs_config( self._hold_potential_enabled, getattr(self, "add_state_info", False) ): logger.warning( - "PBRS: hold_potential_enabled=True & add_state_info=False is unsupported. PBRS invariance is not guaranteed" + "PBRS: hold_potential_enabled=True and add_state_info=False is unsupported. Automatically enabling add_state_info=True." + ) + self.add_state_info = True + + # === PNL TARGET VALIDATION === + pnl_target = self.profit_aim * self.rr + if MyRLEnv._is_invalid_pnl_target(pnl_target): + raise ValueError( + f"Invalid pnl_target={pnl_target:.12g} computed from profit_aim={self.profit_aim:.12g} and rr={self.rr:.12g}" ) + self._pnl_target = pnl_target def _get_next_position(self, action: int) -> Positions: if action == Actions.Long_enter.value and self._position == Positions.Neutral: @@ -1501,7 +1530,7 @@ class MyRLEnv(Base5ActionRLEnv): Positions.Long, Positions.Short, ): - return next_position, 0, 0.0 + return next_position, 0, pnl # Exit if ( self._position in (Positions.Long, Positions.Short) @@ -1517,9 +1546,14 @@ class MyRLEnv(Base5ActionRLEnv): # Neutral self-loop return next_position, 0, 0.0 - def _is_invalid_pnl_target(self, pnl_target: float) -> bool: - """Check if pnl_target is invalid (negative or close to zero).""" - return pnl_target < 0.0 or np.isclose(pnl_target, 0.0) + @staticmethod + def _is_invalid_pnl_target(pnl_target: float) -> bool: + """Return True when pnl_target is non-finite, <= 0, or effectively zero within tolerance.""" + return ( + (not np.isfinite(pnl_target)) + or (pnl_target <= 0.0) + or np.isclose(pnl_target, 0.0) + ) def _compute_pnl_duration_signal( self, @@ -1574,8 +1608,6 @@ class MyRLEnv(Base5ActionRLEnv): return 0.0 if require_position and position not in (Positions.Long, Positions.Short): return 0.0 - if self._is_invalid_pnl_target(pnl_target): - return 0.0 duration_ratio = 0.0 if duration_ratio < 0.0 else duration_ratio if duration_ratio > 1.0: @@ -1709,7 +1741,7 @@ class MyRLEnv(Base5ActionRLEnv): if name == "clip": return max(-1.0, min(1.0, x)) - logger.info("Unknown potential transform '%s'; falling back to tanh", name) + logger.warning("Unknown potential transform '%s'; falling back to tanh", name) return math.tanh(x) def _compute_exit_potential(self, prev_potential: float, gamma: float) -> float: @@ -1723,7 +1755,7 @@ class MyRLEnv(Base5ActionRLEnv): if mode == "progressive_release": decay = self._exit_potential_decay if not np.isfinite(decay) or decay < 0.0: - decay = 0.5 + decay = 0.0 if decay > 1.0: decay = 1.0 next_potential = prev_potential * (1.0 - decay) @@ -1763,7 +1795,7 @@ class MyRLEnv(Base5ActionRLEnv): def is_unsupported_pbrs_config( hold_potential_enabled: bool, add_state_info: bool ) -> bool: - """Return True if PBRS potential relies on hidden (non-observed) state. + """Return True if PBRS potential relies on hidden state. Case: hold_potential enabled while auxiliary state info (pnl, trade_duration) is excluded from the observation space (add_state_info=False). In that situation, Φ(s) uses hidden @@ -1929,11 +1961,13 @@ class MyRLEnv(Base5ActionRLEnv): potential = self._compute_hold_potential( next_position, next_duration_ratio, next_pnl, pnl_target ) - shaping_reward = gamma * potential - prev_potential + reward_shaping = gamma * potential - prev_potential self._last_potential = potential else: - shaping_reward = 0.0 + reward_shaping = 0.0 self._last_potential = 0.0 + self._last_exit_additive = 0.0 + self._last_entry_additive = 0.0 entry_additive = 0.0 if self._entry_additive_enabled and not self.is_pbrs_invariant_mode(): entry_additive = self._compute_entry_additive( @@ -1941,48 +1975,62 @@ class MyRLEnv(Base5ActionRLEnv): pnl_target=pnl_target, duration_ratio=next_duration_ratio, ) - self._last_shaping_reward = float(shaping_reward) - self._total_shaping_reward += float(shaping_reward) - return base_reward + shaping_reward + entry_additive + self._last_entry_additive = float(entry_additive) + self._total_entry_additive += float(entry_additive) + self._last_reward_shaping = float(reward_shaping) + self._total_reward_shaping += float(reward_shaping) + self._last_prev_potential = float(prev_potential) + self._last_next_potential = float(self._last_potential) + return base_reward + reward_shaping + entry_additive elif is_hold: if self._hold_potential_enabled: potential = self._compute_hold_potential( next_position, next_duration_ratio, next_pnl, pnl_target ) - shaping_reward = gamma * potential - prev_potential + reward_shaping = gamma * potential - prev_potential self._last_potential = potential else: - shaping_reward = 0.0 + reward_shaping = 0.0 self._last_potential = 0.0 - self._last_shaping_reward = float(shaping_reward) - self._total_shaping_reward += float(shaping_reward) - return base_reward + shaping_reward + self._last_entry_additive = 0.0 + self._last_exit_additive = 0.0 + self._last_reward_shaping = float(reward_shaping) + self._total_reward_shaping += float(reward_shaping) + self._last_prev_potential = float(prev_potential) + self._last_next_potential = float(self._last_potential) + return base_reward + reward_shaping elif is_exit: if ( self._exit_potential_mode == "canonical" or self._exit_potential_mode == "non_canonical" ): next_potential = 0.0 - exit_shaping_reward = -prev_potential + exit_reward_shaping = -prev_potential else: next_potential = self._compute_exit_potential(prev_potential, gamma) - exit_shaping_reward = gamma * next_potential - prev_potential - + exit_reward_shaping = gamma * next_potential - prev_potential + self._last_entry_additive = 0.0 + self._last_exit_additive = 0.0 exit_additive = 0.0 if self._exit_additive_enabled and not self.is_pbrs_invariant_mode(): duration_ratio = trade_duration / max(max_trade_duration, 1) exit_additive = self._compute_exit_additive( pnl, pnl_target, duration_ratio ) - + self._last_exit_additive = float(exit_additive) + self._total_exit_additive += float(exit_additive) self._last_potential = next_potential - self._last_shaping_reward = float(exit_shaping_reward) - self._total_shaping_reward += float(exit_shaping_reward) - return base_reward + exit_shaping_reward + exit_additive + self._last_reward_shaping = float(exit_reward_shaping) + self._total_reward_shaping += float(exit_reward_shaping) + self._last_prev_potential = float(prev_potential) + self._last_next_potential = float(self._last_potential) + return base_reward + exit_reward_shaping + exit_additive else: # Neutral self-loop - self._last_potential = 0.0 - self._last_shaping_reward = 0.0 + self._last_prev_potential = float(prev_potential) + self._last_next_potential = float(self._last_potential) + self._last_entry_additive = 0.0 + self._last_exit_additive = 0.0 return base_reward def _set_observation_space(self) -> None: @@ -2030,8 +2078,18 @@ class MyRLEnv(Base5ActionRLEnv): self._max_unrealized_profit = -np.inf self._min_unrealized_profit = np.inf self._last_potential = 0.0 - self._total_shaping_reward = 0.0 - self._last_shaping_reward = 0.0 + self._last_prev_potential = 0.0 + self._last_next_potential = 0.0 + self._last_reward_shaping = 0.0 + self._total_reward_shaping = 0.0 + self._last_entry_additive = 0.0 + self._total_entry_additive = 0.0 + self._last_exit_additive = 0.0 + self._total_exit_additive = 0.0 + self._last_invalid_penalty = 0.0 + self._last_idle_penalty = 0.0 + self._last_hold_penalty = 0.0 + self._last_exit_reward = 0.0 return observation, history def _get_exit_factor( @@ -2059,10 +2117,10 @@ class MyRLEnv(Base5ActionRLEnv): model_reward_parameters.get("exit_plateau_grace", 1.0) ) if exit_plateau_grace < 0.0: - exit_plateau_grace = 1.0 + exit_plateau_grace = 0.0 exit_linear_slope = float(model_reward_parameters.get("exit_linear_slope", 1.0)) if exit_linear_slope < 0.0: - exit_linear_slope = 1.0 + exit_linear_slope = 0.0 def _legacy(f: float, dr: float, p: Mapping) -> float: return f * (1.5 if dr <= 1.0 else 0.5) @@ -2091,7 +2149,7 @@ class MyRLEnv(Base5ActionRLEnv): def _half_life(f: float, dr: float, p: Mapping) -> float: hl = float(p.get("exit_half_life", 0.5)) if hl <= 0.0: - hl = 0.5 + hl = 0.0 return f * math.pow(2.0, -dr / hl) strategies: Dict[str, Callable[[float, float, Mapping], float]] = { @@ -2129,7 +2187,7 @@ class MyRLEnv(Base5ActionRLEnv): ) factor = _linear(factor, effective_dr, model_reward_parameters) - factor *= self._get_pnl_factor(pnl, self.profit_aim * self.rr) + factor *= self._get_pnl_factor(pnl, self._pnl_target) check_invariants = model_reward_parameters.get("check_invariants", True) check_invariants = ( @@ -2161,7 +2219,7 @@ class MyRLEnv(Base5ActionRLEnv): return factor def _get_pnl_factor(self, pnl: float, pnl_target: float) -> float: - if not np.isfinite(pnl) or not np.isfinite(pnl_target): + if not np.isfinite(pnl): return 0.0 model_reward_parameters = self.rl_config.get("model_reward_parameters", {}) @@ -2233,17 +2291,22 @@ class MyRLEnv(Base5ActionRLEnv): model_reward_parameters = self.rl_config.get("model_reward_parameters", {}) base_reward: Optional[float] = None + self._last_invalid_penalty = 0.0 + self._last_idle_penalty = 0.0 + self._last_hold_penalty = 0.0 + self._last_exit_reward = 0.0 + # 1. Invalid action if not self.action_masking and not self._is_valid(action): self.tensorboard_log("invalid", category="actions") base_reward = float(model_reward_parameters.get("invalid_action", -2.0)) + self._last_invalid_penalty = float(base_reward) - max_trade_duration = max(self.max_trade_duration_candles, 1) + max_trade_duration = max(1, self.max_trade_duration_candles) trade_duration = self.get_trade_duration() duration_ratio = trade_duration / max_trade_duration base_factor = float(model_reward_parameters.get("base_factor", 100.0)) - pnl_target = self.profit_aim * self.rr - idle_factor = base_factor * pnl_target / 4.0 + idle_factor = base_factor * self._pnl_target / 4.0 hold_factor = idle_factor # 2. Idle penalty @@ -2252,12 +2315,7 @@ class MyRLEnv(Base5ActionRLEnv): and action == Actions.Neutral.value and self._position == Positions.Neutral ): - max_idle_duration = int( - model_reward_parameters.get( - "max_idle_duration_candles", - ReforceXY.DEFAULT_IDLE_DURATION_MULTIPLIER * max_trade_duration, - ) - ) + max_idle_duration = max(1, self.max_idle_duration_candles) idle_penalty_scale = float( model_reward_parameters.get("idle_penalty_scale", 0.5) ) @@ -2271,6 +2329,7 @@ class MyRLEnv(Base5ActionRLEnv): * idle_penalty_scale * idle_duration_ratio**idle_penalty_power ) + self._last_idle_penalty = float(base_reward) # 3. Hold overtime penalty if ( @@ -2292,6 +2351,7 @@ class MyRLEnv(Base5ActionRLEnv): * hold_penalty_scale * (duration_ratio - 1.0) ** hold_penalty_power ) + self._last_hold_penalty = float(base_reward) # 4. Exit rewards pnl = self.get_unrealized_profit() @@ -2301,12 +2361,14 @@ class MyRLEnv(Base5ActionRLEnv): and self._position == Positions.Long ): base_reward = pnl * self._get_exit_factor(base_factor, pnl, duration_ratio) + self._last_exit_reward = float(base_reward) if ( base_reward is None and action == Actions.Short_exit.value and self._position == Positions.Short ): base_reward = pnl * self._get_exit_factor(base_factor, pnl, duration_ratio) + self._last_exit_reward = float(base_reward) # 5. Default if base_reward is None: @@ -2319,7 +2381,7 @@ class MyRLEnv(Base5ActionRLEnv): trade_duration=trade_duration, max_trade_duration=max_trade_duration, pnl=pnl, - pnl_target=pnl_target, + pnl_target=self._pnl_target, ) def _get_observation(self) -> NDArray[np.float32]: @@ -2411,7 +2473,6 @@ class MyRLEnv(Base5ActionRLEnv): self._update_portfolio_log_returns() reward = self.calculate_reward(action) self.total_reward += reward - self.tensorboard_log(Actions._member_names_[action], category="actions") trade_type = self.execute_trade(action) if trade_type is not None: self.append_trade_history(trade_type, self.current_price(), pre_pnl) @@ -2420,6 +2481,9 @@ class MyRLEnv(Base5ActionRLEnv): self._update_max_unrealized_profit(pnl) self._update_min_unrealized_profit(pnl) delta_pnl = pnl - pre_pnl + max_idle_duration = max(1, self.max_idle_duration_candles) + idle_duration = self.get_idle_duration() + trade_duration = self.get_trade_duration() info = { "tick": self._current_tick, "position": float(self._position.value), @@ -2432,14 +2496,25 @@ class MyRLEnv(Base5ActionRLEnv): "most_recent_return": round(self.get_most_recent_return(), 5), "most_recent_profit": round(self.get_most_recent_profit(), 5), "total_profit": round(self._total_profit, 5), - "potential": round(self._last_potential, 5), - "shaping_reward": round(self._last_shaping_reward, 5), - "total_shaping_reward": round(self._total_shaping_reward, 5), + "prev_potential": round(self._last_prev_potential, 5), + "next_potential": round(self._last_next_potential, 5), + "reward_entry_additive": round(self._last_entry_additive, 5), + "reward_exit_additive": round(self._last_exit_additive, 5), + "reward_shaping": round(self._last_reward_shaping, 5), + "total_reward_shaping": round(self._total_reward_shaping, 5), + "reward_invalid": round(self._last_invalid_penalty, 5), + "reward_idle": round(self._last_idle_penalty, 5), + "reward_hold": round(self._last_hold_penalty, 5), + "reward_exit": round(self._last_exit_reward, 5), "reward": round(reward, 5), "total_reward": round(self.total_reward, 5), "pbrs_invariant": self.is_pbrs_invariant_mode(), - "idle_duration": self.get_idle_duration(), - "trade_duration": self.get_trade_duration(), + "idle_duration": idle_duration, + "idle_ratio": (idle_duration / max_idle_duration), + "trade_duration": trade_duration, + "duration_ratio": ( + trade_duration / max(1, self.max_trade_duration_candles) + ), "trade_count": int(len(self.trade_history) // 2), } self._update_history(info) @@ -2447,6 +2522,14 @@ class MyRLEnv(Base5ActionRLEnv): if terminated: # Enforce Φ(terminal)=0 for PBRS invariance (Wiewiora et al. 2003) self._last_potential = 0.0 + eps = 1e-6 + if self.is_pbrs_invariant_mode() and abs(self._total_reward_shaping) > eps: + logger.warning( + "PBRS mode %s invariance deviation: |sum Δ|=%.6f > eps=%.6f", + self._exit_potential_mode, + self._total_reward_shaping, + eps, + ) return ( self._get_observation(), reward, diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py index f618c7b..c906355 100644 --- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py +++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py @@ -74,8 +74,8 @@ class QuickAdapterRegressorV3(BaseRegressionModel): max(int(self.max_system_threads / 4), 1), ), "storage": "file", - "continuous": True, - "warm_start": True, + "continuous": False, + "warm_start": False, "n_startup_trials": 15, "n_trials": 50, "timeout": 7200, @@ -960,14 +960,14 @@ class QuickAdapterRegressorV3(BaseRegressionModel): # "hamming", # "jaccard", "jensenshannon", - # "kulczynski1", # deprecated since version 1.15.0 + # "kulczynski1", # Deprecated in SciPy ≥ 1.15.0; do not use. "mahalanobis", # "matching", "minkowski", # "rogerstanimoto", # "russellrao", "seuclidean", - # "sokalmichener", # deprecated since version 1.15.0 + # "sokalmichener", # Deprecated in SciPy ≥ 1.15.0; do not use. # "sokalsneath", "sqeuclidean", # "yule",