From c2f1bbf44a86004c8cc2c66b478cfbe4b4889d2c Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 15 Oct 2025 01:35:57 +0200 Subject: [PATCH] fix(reforcexy): add non-canonical exit to use PBRS additives MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/reward_space_analysis/README.md | 35 ++- .../reward_space_analysis.py | 233 +++++++++++------- ReforceXY/reward_space_analysis/test_cli.py | 19 +- .../test_reward_space_analysis.py | 140 ++++++++++- ReforceXY/user_data/freqaimodels/ReforceXY.py | 55 +++-- 5 files changed, 365 insertions(+), 117 deletions(-) diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md index 35b5f1c..a498e02 100644 --- a/ReforceXY/reward_space_analysis/README.md +++ b/ReforceXY/reward_space_analysis/README.md @@ -318,7 +318,7 @@ _PBRS (Potential-Based Reward Shaping) configuration:_ - `potential_gamma` (default: 0.95) - Discount factor γ for PBRS potential term (0 ≤ γ ≤ 1) - `potential_softsign_sharpness` (default: 1.0) - Sharpness parameter for softsign_sharp transform (smaller = sharper) -- `exit_potential_mode` (default: canonical) - Exit potential mode: 'canonical' (Φ=0), 'progressive_release', 'spike_cancel', 'retain_previous' +- `exit_potential_mode` (default: canonical) - Exit potential mode: 'canonical' (Φ=0, preserves invariance, disables additives), 'non-canonical' (Φ=0, allows additives, breaks invariance), 'progressive_release', 'spike_cancel', 'retain_previous' - `exit_potential_decay` (default: 0.5) - Decay factor for progressive_release exit mode (0 ≤ decay ≤ 1) - `hold_potential_enabled` (default: true) - Enable PBRS hold potential function Φ(s) - `hold_potential_scale` (default: 1.0) - Scale factor for hold potential function @@ -342,10 +342,10 @@ _PBRS (Potential-Based Reward Shaping) configuration:_ |-----------|---------|-------|-----------------|----------| | `tanh` | tanh(x) | (-1, 1) | Smooth sigmoid, symmetric around 0 | Balanced PnL/duration transforms (default) | | `softsign` | x / (1 + \|x\|) | (-1, 1) | Smoother than tanh, linear near 0 | Less aggressive saturation | -| `softsign_sharp` | x / (sharpness + \|x\|) | (-1, 1) | Tunable sharpness via `potential_softsign_sharpness` | Custom saturation control | +| `softsign_sharp` | (sharpness * x) / (1 + \|sharpness * x\|) | (-1, 1) | Tunable sharpness via `potential_softsign_sharpness` | Custom saturation control | | `arctan` | (2/π) × arctan(x) | (-1, 1) | Slower saturation than tanh | Wide dynamic range | | `logistic` | 2 / (1 + e^(-x)) - 1 | (-1, 1) | Equivalent to tanh(x/2), gentler curve | Mild non-linearity | -| `asinh_norm` | asinh(x) / asinh(10) | (-1, 1) | Normalized asinh, handles large values | Extreme outlier robustness | +| `asinh_norm` | x / √(1 + x²) | (-1, 1) | Normalized asinh-like transform | Extreme outlier robustness | | `clip` | clip(x, -1, 1) | [-1, 1] | Hard clipping at ±1 | Preserve linearity within bounds | _Invariant / safety controls:_ @@ -393,6 +393,27 @@ Use strict mode in CI or research contexts requiring hard guarantees; keep defau - When set, skips computation and export of partial dependence CSV files, reducing runtime (often 30–60% faster for large sample sizes) at the cost of losing marginal response curve inspection. - Feature importance (RandomForest Gini importance + permutation importance) is still computed. +**`--skip-feature-analysis`** (flag, default: disabled) + +- Skips the entire model-based feature analysis block: no RandomForest training, no permutation importance, no feature_importance.csv, no partial_dependence_*.csv (regardless of `--skip_partial_dependence`). +- Automatically suppresses any partial dependence computation even if `--skip_partial_dependence` is not provided (hard superset). +- Useful for ultra-fast smoke / CI runs or very low sample exploratory checks (e.g. `--num_samples < 4`) where the model would not be statistically meaningful. + +Hierarchy / precedence of skip flags: + +| Scenario | `--skip-feature-analysis` | `--skip_partial_dependence` | Feature Importance | Partial Dependence | Report Section 4 | +|----------|---------------------------|-----------------------------|--------------------|-------------------|------------------| +| Default (no flags) | ✗ | ✗ | Yes | Yes | Full (R², top features, exported data) | +| PD only skipped | ✗ | ✓ | Yes | No | Full (PD line shows skipped note) | +| Feature analysis skipped | ✓ | ✗ | No | No | Marked “(skipped)” with reason(s) | +| Both flags | ✓ | ✓ | No | No | Marked “(skipped)” + note PD redundant | + +Additional notes: + +- If `--num_samples < 4`, feature analysis is automatically skipped (insufficient rows to perform train/test split) and the summary marks the section as skipped with reason. +- Providing `--skip_partial_dependence` together with `--skip-feature-analysis` is harmless; the report clarifies redundancy. +- Skipping feature analysis reduces runtime and memory footprint significantly for large `--num_samples` (avoid building a 400-tree forest + permutation loops). + ### Reproducibility Model | Component | Controlled By | Notes | @@ -546,12 +567,18 @@ python reward_space_analysis.py \ --params hold_penalty_scale=0.5 \ --output aggressive_hold -# Test PBRS configurations +# Canonical PBRS (strict invariance, additives disabled) python reward_space_analysis.py \ --num_samples 25000 \ --params hold_potential_enabled=true entry_additive_enabled=true exit_additive_enabled=false exit_potential_mode=canonical \ --output pbrs_canonical +# Non-canonical PBRS (allows additives with Φ(terminal)=0, breaks invariance) +python reward_space_analysis.py \ + --num_samples 25000 \ + --params hold_potential_enabled=true entry_additive_enabled=true exit_additive_enabled=true exit_potential_mode=non-canonical \ + --output pbrs_non_canonical + python reward_space_analysis.py \ --num_samples 25000 \ --params hold_potential_transform_pnl=softsign_sharp potential_softsign_sharpness=0.5 \ diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index a5af370..9e5c252 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -176,6 +176,7 @@ ALLOWED_TRANSFORMS = { } ALLOWED_EXIT_POTENTIAL_MODES = { "canonical", + "non-canonical", "progressive_release", "spike_cancel", "retain_previous", @@ -213,7 +214,7 @@ DEFAULT_MODEL_REWARD_PARAMETERS: RewardParams = { # Discount factor γ for potential term (0 ≤ γ ≤ 1) "potential_gamma": POTENTIAL_GAMMA_DEFAULT, "potential_softsign_sharpness": 1.0, - # Exit potential modes: canonical | progressive_release | spike_cancel | retain_previous + # Exit potential modes: canonical | non-canonical | progressive_release | spike_cancel | retain_previous "exit_potential_mode": "canonical", "exit_potential_decay": 0.5, # Hold potential (PBRS function Φ) @@ -259,7 +260,7 @@ DEFAULT_MODEL_REWARD_PARAMETERS_HELP: Dict[str, str] = { # PBRS parameters "potential_gamma": "Discount factor γ for PBRS potential-based reward shaping (0 ≤ γ ≤ 1).", "potential_softsign_sharpness": "Sharpness parameter for softsign_sharp transform (smaller = sharper).", - "exit_potential_mode": "Exit potential mode: 'canonical' (Φ=0), 'progressive_release', 'spike_cancel', 'retain_previous'.", + "exit_potential_mode": "Exit potential mode: 'canonical' (Φ=0 & additives disabled), 'non-canonical' (Φ=0 & additives allowed), 'progressive_release', 'spike_cancel', 'retain_previous'.", "exit_potential_decay": "Decay factor for progressive_release exit mode (0 ≤ decay ≤ 1).", "hold_potential_enabled": "Enable PBRS hold potential function Φ(s).", "hold_potential_scale": "Scale factor for hold potential function.", @@ -2388,6 +2389,11 @@ def build_argument_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="Synthetic stress-test of the ReforceXY reward shaping logic." ) + parser.add_argument( + "--skip-feature-analysis", + action="store_true", + help="Skip feature importance and model-based analysis for all scenarios.", + ) parser.add_argument( "--num_samples", type=int, @@ -2529,6 +2535,7 @@ def write_complete_statistical_analysis( strict_diagnostics: bool = False, bootstrap_resamples: int = 10000, skip_partial_dependence: bool = False, + skip_feature_analysis: bool = False, ) -> None: """Generate a single comprehensive statistical analysis report with enhanced tests.""" output_dir.mkdir(parents=True, exist_ok=True) @@ -2579,21 +2586,25 @@ def write_complete_statistical_analysis( df, profit_target, max_trade_duration ) - # Model analysis - importance_df, analysis_stats, partial_deps, _model = _perform_feature_analysis( - df, seed, skip_partial_dependence=skip_partial_dependence - ) - - # Save feature importance CSV - importance_df.to_csv(output_dir / "feature_importance.csv", index=False) - - # Save partial dependence CSVs - if not skip_partial_dependence: - for feature, pd_df in partial_deps.items(): - pd_df.to_csv( - output_dir / f"partial_dependence_{feature}.csv", - index=False, - ) + # Model analysis: skip if requested or not enough samples + importance_df = None + analysis_stats = None + partial_deps = {} + if skip_feature_analysis or len(df) < 4: + print("Skipping feature analysis: flag set or insufficient samples (<4).") + else: + importance_df, analysis_stats, partial_deps, _model = _perform_feature_analysis( + df, seed, skip_partial_dependence=skip_partial_dependence + ) + # Save feature importance CSV + importance_df.to_csv(output_dir / "feature_importance.csv", index=False) + # Save partial dependence CSVs + if not skip_partial_dependence: + for feature, pd_df in partial_deps.items(): + pd_df.to_csv( + output_dir / f"partial_dependence_{feature}.csv", + index=False, + ) # Enhanced statistics test_seed = ( @@ -2859,24 +2870,56 @@ def write_complete_statistical_analysis( pbrs_stats_df.index.name = "component" f.write(_df_to_md(pbrs_stats_df, index_name="component", ndigits=6)) - # PBRS invariance check (canonical mode) + # PBRS invariance check total_shaping = df["reward_shaping"].sum() entry_add_total = df.get("reward_entry_additive", pd.Series([0])).sum() exit_add_total = df.get("reward_exit_additive", pd.Series([0])).sum() + + # Get configuration for proper invariance assessment + reward_params = ( + df.attrs.get("reward_params", {}) if hasattr(df, "attrs") else {} + ) + exit_potential_mode = reward_params.get("exit_potential_mode", "canonical") + entry_additive_enabled = reward_params.get("entry_additive_enabled", False) + exit_additive_enabled = reward_params.get("exit_additive_enabled", False) + + # True invariance requires canonical mode AND no additives + is_theoretically_invariant = exit_potential_mode == "canonical" and not ( + entry_additive_enabled or exit_additive_enabled + ) + shaping_near_zero = abs(total_shaping) < PBRS_INVARIANCE_TOL + # Prepare invariance summary markdown block - if abs(total_shaping) < PBRS_INVARIANCE_TOL: - invariance_status = "✅ Canonical" - invariance_note = "Total shaping ≈ 0 (canonical invariance)" + if is_theoretically_invariant: + if shaping_near_zero: + invariance_status = "✅ Canonical" + invariance_note = "Theoretical invariance preserved (canonical mode, no additives, Σ≈0)" + else: + invariance_status = "⚠️ Canonical (with warning)" + invariance_note = f"Canonical mode but unexpected shaping sum = {total_shaping:.6f}" else: invariance_status = "❌ Non-canonical" - invariance_note = f"Total shaping = {total_shaping:.6f} (non-zero)" + reasons = [] + if exit_potential_mode != "canonical": + reasons.append(f"exit_potential_mode='{exit_potential_mode}'") + if entry_additive_enabled or exit_additive_enabled: + additive_types = [] + if entry_additive_enabled: + additive_types.append("entry") + if exit_additive_enabled: + additive_types.append("exit") + reasons.append(f"additives={additive_types}") + invariance_note = f"Modified for flexibility: {', '.join(reasons)}" # Summarize PBRS invariance f.write("**PBRS Invariance Summary:**\n\n") f.write("| Field | Value |\n") f.write("|-------|-------|\n") - f.write(f"| Invariance | {invariance_status} |\n") - f.write(f"| Note | {invariance_note} |\n") + f.write(f"| Invariance Status | {invariance_status} |\n") + f.write(f"| Analysis Note | {invariance_note} |\n") + f.write(f"| Exit Potential Mode | {exit_potential_mode} |\n") + f.write(f"| Entry Additive Enabled | {entry_additive_enabled} |\n") + f.write(f"| Exit Additive Enabled | {exit_additive_enabled} |\n") f.write(f"| Σ Shaping Reward | {total_shaping:.6f} |\n") f.write(f"| Abs Σ Shaping Reward | {abs(total_shaping):.6e} |\n") f.write(f"| Σ Entry Additive | {entry_add_total:.6f} |\n") @@ -2888,31 +2931,44 @@ def write_complete_statistical_analysis( # Section 4: Feature Importance Analysis f.write("---\n\n") f.write("## 4. Feature Importance\n\n") - f.write( - "Machine learning analysis to identify which features most influence total reward.\n\n" - ) - f.write("**Model:** Random Forest Regressor (400 trees) \n") - f.write(f"**R² Score:** {analysis_stats['r2_score']:.4f}\n\n") - - f.write("### 4.1 Top 10 Features by Importance\n\n") - top_imp = importance_df.head(10).copy().reset_index(drop=True) - # Render as markdown without index column - header = "| feature | importance_mean | importance_std |\n" - sep = "|---------|------------------|----------------|\n" - rows = [] - for _, r in top_imp.iterrows(): - rows.append( - f"| {r['feature']} | {_fmt_val(r['importance_mean'], 6)} | {_fmt_val(r['importance_std'], 6)} |" - ) - f.write(header + sep + "\n".join(rows) + "\n\n") - f.write("**Exported Data:**\n") - f.write("- Full feature importance: `feature_importance.csv`\n") - if not skip_partial_dependence: - f.write("- Partial dependence plots: `partial_dependence_*.csv`\n\n") + if skip_feature_analysis or len(df) < 4: + reason = [] + if skip_feature_analysis: + reason.append("flag --skip-feature-analysis set") + if len(df) < 4: + reason.append("insufficient samples <4") + reason_str = "; ".join(reason) if reason else "skipped" + f.write(f"_Skipped ({reason_str})._\n\n") + if skip_partial_dependence: + f.write( + "_Note: --skip_partial_dependence is redundant when feature analysis is skipped._\n\n" + ) else: f.write( - "- Partial dependence plots: (skipped via --skip_partial_dependence)\n\n" + "Machine learning analysis to identify which features most influence total reward.\n\n" ) + f.write("**Model:** Random Forest Regressor (400 trees) \n") + f.write(f"**R² Score:** {analysis_stats['r2_score']:.4f}\n\n") + + f.write("### 4.1 Top 10 Features by Importance\n\n") + top_imp = importance_df.head(10).copy().reset_index(drop=True) + # Render as markdown without index column + header = "| feature | importance_mean | importance_std |\n" + sep = "|---------|------------------|----------------|\n" + rows = [] + for _, r in top_imp.iterrows(): + rows.append( + f"| {r['feature']} | {_fmt_val(r['importance_mean'], 6)} | {_fmt_val(r['importance_std'], 6)} |" + ) + f.write(header + sep + "\n".join(rows) + "\n\n") + f.write("**Exported Data:**\n") + f.write("- Full feature importance: `feature_importance.csv`\n") + if not skip_partial_dependence: + f.write("- Partial dependence plots: `partial_dependence_*.csv`\n\n") + else: + f.write( + "- Partial dependence plots: (skipped via --skip_partial_dependence)\n\n" + ) # Section 5: Statistical Validation if hypothesis_tests: @@ -3074,9 +3130,14 @@ def write_complete_statistical_analysis( f.write( "3. **Component Analysis** - Relationships between rewards and conditions (including PBRS)\n" ) - f.write( - "4. **Feature Importance** - Machine learning analysis of key drivers\n" - ) + if skip_feature_analysis or len(df) < 4: + f.write( + "4. **Feature Importance** - (skipped) Machine learning analysis of key drivers\n" + ) + else: + f.write( + "4. **Feature Importance** - Machine learning analysis of key drivers\n" + ) f.write( "5. **Statistical Validation** - Hypothesis tests and confidence intervals\n" ) @@ -3101,10 +3162,13 @@ def write_complete_statistical_analysis( f.write("\n") f.write("**Generated Files:**\n") f.write("- `reward_samples.csv` - Raw synthetic samples\n") - f.write("- `feature_importance.csv` - Complete feature importance rankings\n") - f.write( - "- `partial_dependence_*.csv` - Partial dependence data for visualization\n" - ) + if not skip_feature_analysis and len(df) >= 4: + f.write( + "- `feature_importance.csv` - Complete feature importance rankings\n" + ) + f.write( + "- `partial_dependence_*.csv` - Partial dependence data for visualization\n" + ) def main() -> None: @@ -3230,6 +3294,7 @@ def main() -> None: strict_diagnostics=bool(getattr(args, "strict_diagnostics", False)), bootstrap_resamples=getattr(args, "bootstrap_resamples", 10000), skip_partial_dependence=bool(getattr(args, "skip_partial_dependence", False)), + skip_feature_analysis=bool(getattr(args, "skip_feature_analysis", False)), ) print( f"Complete statistical analysis saved to: {args.output / 'statistical_analysis.md'}" @@ -3282,34 +3347,31 @@ def main() -> None: # === PBRS TRANSFORM FUNCTIONS === -def _apply_transform_tanh(value: float, scale: float = 1.0) -> float: - """tanh(scale*value) ∈ (-1,1).""" - return float(np.tanh(scale * value)) +def _apply_transform_tanh(value: float) -> float: + """tanh(value) ∈ (-1,1).""" + return float(np.tanh(value)) -def _apply_transform_softsign(value: float, scale: float = 1.0) -> float: - """softsign: x/(1+|x|) with x=scale*value.""" - x = scale * value +def _apply_transform_softsign(value: float) -> float: + """softsign: value/(1+|value|).""" + x = value return float(x / (1.0 + abs(x))) -def _apply_transform_softsign_sharp( - value: float, scale: float = 1.0, sharpness: float = 1.0 -) -> float: - """softsign_sharp: x/(sharpness+|x|) with x=scale*value (smaller sharpness = steeper).""" - x = scale * value - return float(x / (sharpness + abs(x))) +def _apply_transform_softsign_sharp(value: float, sharpness: float = 1.0) -> float: + """softsign_sharp: (sharpness*value)/(1+|sharpness*value|) - multiplicative sharpness.""" + xs = sharpness * value + return float(xs / (1.0 + abs(xs))) -def _apply_transform_arctan(value: float, scale: float = 1.0) -> float: - """arctan normalized: (2/pi)*atan(scale*value) ∈ (-1,1).""" - x = scale * value - return float((2.0 / math.pi) * math.atan(x)) +def _apply_transform_arctan(value: float) -> float: + """arctan normalized: (2/pi)*atan(value) ∈ (-1,1).""" + return float((2.0 / math.pi) * math.atan(value)) -def _apply_transform_logistic(value: float, scale: float = 1.0) -> float: - """Overflow‑safe logistic transform mapped to (-1,1): 2σ(kx)−1 where k=scale.""" - x = scale * value +def _apply_transform_logistic(value: float) -> float: + """Overflow‑safe logistic transform mapped to (-1,1): 2σ(x)−1.""" + x = value try: if x >= 0: z = math.exp(-x) # z in (0,1] @@ -3321,15 +3383,14 @@ def _apply_transform_logistic(value: float, scale: float = 1.0) -> float: return 1.0 if x > 0 else -1.0 -def _apply_transform_asinh_norm(value: float, scale: float = 1.0) -> float: - """Normalized asinh: x / sqrt(1 + x²) producing range (-1,1).""" - scaled = scale * value - return float(scaled / math.hypot(1.0, scaled)) +def _apply_transform_asinh_norm(value: float) -> float: + """Normalized asinh: value / sqrt(1 + value²) producing range (-1,1).""" + return float(value / math.hypot(1.0, value)) -def _apply_transform_clip(value: float, scale: float = 1.0) -> float: - """clip(scale*value) to [-1,1].""" - return float(np.clip(scale * value, -1.0, 1.0)) +def _apply_transform_clip(value: float) -> float: + """clip(value) to [-1,1].""" + return float(np.clip(value, -1.0, 1.0)) def apply_transform(transform_name: str, value: float, **kwargs: Any) -> float: @@ -3350,8 +3411,7 @@ def apply_transform(transform_name: str, value: float, **kwargs: Any) -> float: RewardDiagnosticsWarning, stacklevel=2, ) - return _apply_transform_tanh(value, **kwargs) - + return _apply_transform_tanh(value) return transforms[transform_name](value, **kwargs) @@ -3465,7 +3525,8 @@ def _compute_exit_potential(last_potential: float, params: RewardParams) -> floa """Compute next potential Φ(s') for closing/exit transitions. Semantics: - - canonical: Φ' = 0.0 + - canonical: Φ' = 0.0 (preserves invariance, disables additives) + - non-canonical: Φ' = 0.0 (allows additives, breaks invariance) - progressive_release: Φ' = Φ * (1 - decay) with decay clamped to [0,1] - spike_cancel: Φ' = Φ / γ (neutralizes shaping spike ≈ 0 net effect) if γ>0 else Φ - retain_previous: Φ' = Φ @@ -3474,7 +3535,7 @@ def _compute_exit_potential(last_potential: float, params: RewardParams) -> floa coerced to 0.0. """ mode = _get_str_param(params, "exit_potential_mode", "canonical") - if mode == "canonical": + if mode == "canonical" or mode == "non-canonical": return _fail_safely("canonical_exit_potential") if mode == "progressive_release": @@ -3503,10 +3564,10 @@ def _compute_exit_potential(last_potential: float, params: RewardParams) -> floa next_potential = last_potential * (1.0 - decay) elif mode == "spike_cancel": gamma = _get_potential_gamma(params) - if gamma > 0.0 and np.isfinite(gamma): - next_potential = last_potential / gamma - else: + if gamma <= 0.0 or not np.isfinite(gamma): next_potential = last_potential + else: + next_potential = last_potential / gamma elif mode == "retain_previous": next_potential = last_potential else: diff --git a/ReforceXY/reward_space_analysis/test_cli.py b/ReforceXY/reward_space_analysis/test_cli.py index 71e2f6d..68bb555 100644 --- a/ReforceXY/reward_space_analysis/test_cli.py +++ b/ReforceXY/reward_space_analysis/test_cli.py @@ -79,6 +79,7 @@ def build_arg_matrix( ) -> List[ConfigTuple]: exit_potential_modes = [ "canonical", + "non-canonical", "progressive_release", "retain_previous", "spike_cancel", @@ -126,6 +127,7 @@ def run_scenario( strict: bool, bootstrap_resamples: int, timeout: int, + skip_feature_analysis: bool = False, ) -> ScenarioResult: ( exit_potential_mode, @@ -161,6 +163,8 @@ def run_scenario( ] # Forward bootstrap resamples explicitly cmd += ["--bootstrap_resamples", str(bootstrap_resamples)] + if skip_feature_analysis: + cmd.append("--skip-feature-analysis") if strict: cmd.append("--strict_diagnostics") start = time.perf_counter() @@ -196,7 +200,15 @@ def run_scenario( def main(): parser = argparse.ArgumentParser() parser.add_argument( - "--samples", type=int, default=40, help="num synthetic samples per scenario" + "--samples", + type=int, + default=40, + help="num synthetic samples per scenario (minimum 4 for feature analysis)", + ) + parser.add_argument( + "--skip-feature-analysis", + action="store_true", + help="Skip feature importance and model-based analysis for all scenarios.", ) parser.add_argument( "--out-dir", @@ -244,8 +256,8 @@ def main(): # Basic validation if args.max_scenarios <= 0: parser.error("--max-scenarios must be > 0") - if args.samples <= 0: - parser.error("--samples must be > 0") + if args.samples < 4 and not args.skip_feature_analysis: + parser.error("--samples must be >= 4 unless --skip-feature-analysis is set") if args.strict_sample < 0: parser.error("--strict-sample must be >= 0") if args.bootstrap_resamples <= 0: @@ -281,6 +293,7 @@ def main(): strict=strict_flag, bootstrap_resamples=args.bootstrap_resamples, timeout=args.per_scenario_timeout, + skip_feature_analysis=args.skip_feature_analysis, ) results.append(res) status = res["status"] diff --git a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py index 0f8c6ae..068375d 100644 --- a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py @@ -3080,7 +3080,7 @@ class TestPBRSIntegration(RewardSpaceTestBase): """Tests for PBRS (Potential-Based Reward Shaping) integration.""" def test_tanh_transform(self): - """tanh transform: bounded in [-1,1], symmetric.""" + """tanh transform: bounded in (-1,1), symmetric.""" self.assertAlmostEqualFloat(apply_transform("tanh", 0.0), 0.0) self.assertAlmostEqualFloat(apply_transform("tanh", 1.0), math.tanh(1.0)) self.assertAlmostEqualFloat(apply_transform("tanh", -1.0), math.tanh(-1.0)) @@ -3095,9 +3095,46 @@ class TestPBRSIntegration(RewardSpaceTestBase): self.assertTrue(abs(apply_transform("softsign", 100.0)) < 1.0) self.assertTrue(abs(apply_transform("softsign", -100.0)) < 1.0) + def test_softsign_sharp_transform(self): + """softsign_sharp transform: (s*x)/(1+|s*x|) in (-1,1) with sharpness s.""" + # Baseline: s=1 should match softsign + self.assertAlmostEqualFloat( + apply_transform("softsign_sharp", 0.0, sharpness=1.0), 0.0 + ) + self.assertAlmostEqualFloat( + apply_transform("softsign_sharp", 1.0, sharpness=1.0), + apply_transform("softsign", 1.0), + ) + # Higher sharpness => faster saturation + v_low = apply_transform("softsign_sharp", 0.5, sharpness=1.0) + v_high = apply_transform("softsign_sharp", 0.5, sharpness=4.0) + self.assertTrue(abs(v_high) > abs(v_low)) + # Boundedness stress + self.assertTrue( + abs(apply_transform("softsign_sharp", 100.0, sharpness=10.0)) < 1.0 + ) + self.assertTrue( + abs(apply_transform("softsign_sharp", -100.0, sharpness=10.0)) < 1.0 + ) + + def test_asinh_norm_transform(self): + """asinh_norm transform: x/sqrt(1+x^2) in (-1,1).""" + self.assertAlmostEqualFloat(apply_transform("asinh_norm", 0.0), 0.0) + # Symmetry + self.assertAlmostEqualFloat( + apply_transform("asinh_norm", 1.2345), + -apply_transform("asinh_norm", -1.2345), + tolerance=1e-12, + ) + # Monotonicity (sampled) + vals = [apply_transform("asinh_norm", x) for x in [-5.0, -1.0, 0.0, 1.0, 5.0]] + self.assertTrue(all(vals[i] < vals[i + 1] for i in range(len(vals) - 1))) + # Bounded + self.assertTrue(abs(apply_transform("asinh_norm", 1e6)) < 1.0) + self.assertTrue(abs(apply_transform("asinh_norm", -1e6)) < 1.0) + def test_arctan_transform(self): - """arctan transform: normalized (2/pi)atan(x) bounded [-1,1].""" - # Environment uses normalized arctan: (2/pi)*atan(x) + """arctan transform: normalized (2/pi)atan(x) bounded (-1,1).""" self.assertAlmostEqualFloat(apply_transform("arctan", 0.0), 0.0) self.assertAlmostEqualFloat( apply_transform("arctan", 1.0), @@ -3116,6 +3153,38 @@ class TestPBRSIntegration(RewardSpaceTestBase): self.assertTrue(-1 < apply_transform("logistic", 10.0) < 1) self.assertTrue(-1 < apply_transform("logistic", -10.0) < 1) + def test_logistic_equivalence_tanh_half(self): + """logistic(x) must equal tanh(x/2) within tight tolerance across representative domain. + + Uses identity: 2/(1+e^{-x}) - 1 = tanh(x/2). + """ + samples = [ + 0.0, + 1e-6, + -1e-6, + 0.5, + -0.5, + 1.0, + -1.0, + 2.5, + -2.5, + 5.0, + -5.0, + 10.0, + -10.0, + ] + for x in samples: + with self.subTest(x=x): + v_log = apply_transform("logistic", x) + v_tanh = math.tanh(x / 2.0) + tol = 1e-12 if abs(x) <= 5 else 1e-10 + self.assertAlmostEqualFloat( + v_log, + v_tanh, + tolerance=tol, + msg=f"Mismatch logistic vs tanh(x/2) at x={x}: {v_log} vs {v_tanh}", + ) + def test_clip_transform(self): """clip transform: clamp to [-1,1].""" self.assertAlmostEqualFloat(apply_transform("clip", 0.0), 0.0) @@ -3357,6 +3426,71 @@ class TestPBRSIntegration(RewardSpaceTestBase): self.assertLessEqual(abs(shaping), 1.0) self.assertTrue(np.isfinite(next_potential)) + def test_pbrs_non_canonical_runtime_behavior(self): + """Non-canonical mode: Φ'=0 at terminal but additives remain enabled (should not be auto-disabled). + + We construct a simple scenario: + - enable hold potential (so Φ(s) != 0) + - set exit_potential_mode = 'non-canonical' + - enable both entry & exit additives with small scales so they contribute deterministically + - take a terminal transition + + Expectations: + - canonical auto-disabling does NOT occur (additives stay True) + - next_potential returned by _compute_exit_potential is exactly 0.0 (Φ'=0) + - shaping_reward = γ * 0 - Φ(s) = -Φ(s) + - total_reward = base + shaping + entry_add + exit_add (all finite) + - invariance would be broken (but we just assert mechanism, not report here) + """ + params = DEFAULT_MODEL_REWARD_PARAMETERS.copy() + params.update( + { + "hold_potential_enabled": True, + "hold_potential_scale": 1.0, + "exit_potential_mode": "non-canonical", + "entry_additive_enabled": True, + "exit_additive_enabled": True, + # deterministic small values + "entry_additive_scale": 0.5, + "exit_additive_scale": 0.5, + "entry_additive_gain": 1.0, + "exit_additive_gain": 1.0, + } + ) + base_reward = 0.123 + current_pnl = 0.2 + current_duration_ratio = 0.4 + # terminal next state values (ignored for potential since Φ'=0 in non-canonical exit path) + next_pnl = 0.0 + next_duration_ratio = 0.0 + total, shaping, next_potential = apply_potential_shaping( + base_reward=base_reward, + current_pnl=current_pnl, + current_duration_ratio=current_duration_ratio, + next_pnl=next_pnl, + next_duration_ratio=next_duration_ratio, + is_terminal=True, + last_potential=0.789, # arbitrary, should be ignored for Φ' + params=params, + ) + # Additives should not have been disabled + self.assertTrue(params["entry_additive_enabled"]) + self.assertTrue(params["exit_additive_enabled"]) + # Next potential is forced to 0 + self.assertAlmostEqual(next_potential, 0.0, places=12) + # Compute current potential independently to assert shaping = -Φ(s) + current_potential = _compute_hold_potential( + current_pnl, + current_duration_ratio, + {"hold_potential_enabled": True, "hold_potential_scale": 1.0}, + ) + # shaping should equal -current_potential within tolerance + self.assertAlmostEqual(shaping, -current_potential, delta=1e-9) + # Total reward includes additives: ensure total - base - shaping differs from 0 (i.e., additives present) + residual = total - base_reward - shaping + self.assertNotAlmostEqual(residual, 0.0, delta=1e-12) + self.assertTrue(np.isfinite(total)) + class TestReportFormatting(RewardSpaceTestBase): """Tests for report formatting elements not previously covered.""" diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py index bdf005f..3512947 100644 --- a/ReforceXY/user_data/freqaimodels/ReforceXY.py +++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py @@ -1372,7 +1372,8 @@ class MyRLEnv(Base5ActionRLEnv): ) # === EXIT POTENTIAL MODE === # exit_potential_mode options: - # 'canonical' -> Φ(s')=0 (baseline PBRS, preserves invariance) + # 'canonical' -> Φ(s')=0 (preserves invariance, disables additives) + # 'non-canonical' -> Φ(s')=0 (allows additives, breaks invariance) # 'progressive_release' -> Φ(s')=Φ(s)*(1-decay_factor) # 'spike_cancel' -> Φ(s')=Φ(s)/γ (Δ ≈ 0, cancels shaping) # 'retain_previous' -> Φ(s')=Φ(s) @@ -1381,6 +1382,7 @@ class MyRLEnv(Base5ActionRLEnv): ) _allowed_exit_modes = { "canonical", + "non-canonical", "progressive_release", "spike_cancel", "retain_previous", @@ -1445,16 +1447,17 @@ class MyRLEnv(Base5ActionRLEnv): # === PBRS INVARIANCE CHECKS === if self._exit_potential_mode == "canonical": if self._entry_additive_enabled or self._exit_additive_enabled: - if self._entry_additive_enabled: - logger.info( - "Disabling entry additive to preserve PBRS invariance (canonical mode)." - ) - if self._exit_additive_enabled: - logger.info( - "Disabling exit additive to preserve PBRS invariance (canonical mode)." - ) + logger.info( + "Canonical mode: additive rewards disabled with Φ(terminal)=0. PBRS invariance is preserved. " + "To use additive rewards, set exit_potential_mode='non-canonical'." + ) self._entry_additive_enabled = False self._exit_additive_enabled = False + elif self._exit_potential_mode == "non-canonical": + if self._entry_additive_enabled or self._exit_additive_enabled: + logger.info( + "Non-canonical mode: additive rewards enabled with Φ(terminal)=0. PBRS invariance is intentionally broken." + ) if MyRLEnv.is_unsupported_pbrs_config( self._hold_potential_enabled, getattr(self, "add_state_info", False) @@ -1688,12 +1691,15 @@ class MyRLEnv(Base5ActionRLEnv): return (2.0 / math.pi) * math.atan(x) if name == "logistic": - if x >= 0: - z = math.exp(-x) # z in (0,1] - return (1.0 - z) / (1.0 + z) - else: - z = math.exp(x) # z in (0,1] - return (z - 1.0) / (z + 1.0) + try: + if x >= 0: + z = math.exp(-x) # z in (0,1] + return (1.0 - z) / (1.0 + z) + else: + z = math.exp(x) # z in (0,1] + return (z - 1.0) / (z + 1.0) + except OverflowError: + return 1.0 if x > 0 else -1.0 if name == "asinh_norm": return x / math.hypot(1.0, x) @@ -1710,7 +1716,7 @@ class MyRLEnv(Base5ActionRLEnv): See ``_apply_potential_shaping`` for complete PBRS documentation. """ mode = self._exit_potential_mode - if mode == "canonical": + if mode == "canonical" or mode == "non-canonical": return 0.0 if mode == "progressive_release": decay = self._exit_potential_decay @@ -1821,7 +1827,7 @@ class MyRLEnv(Base5ActionRLEnv): **Bounded Transform Functions** (range [-1,1]): - tanh: smooth saturation, tanh(x) - softsign: x/(1+|x|), gentler than tanh - - softsign_sharp: softsign(sharpness*x), tunable steepness + - softsign_sharp: (sharpness*x)/(1+|sharpness*x|), custom saturation control - arctan: (2/π)*arctan(x), linear near origin - logistic: 2σ(x)-1 where σ(x)=1/(1+e^(-x)), numerically stable implementation - asinh_norm: x/√(1+x²), normalized asinh-like @@ -1944,9 +1950,13 @@ class MyRLEnv(Base5ActionRLEnv): else: shaping_reward = 0.0 self._last_potential = 0.0 - entry_additive = self._compute_entry_additive( - pnl=next_pnl, pnl_target=pnl_target, duration_ratio=next_duration_ratio - ) + entry_additive = 0.0 + if self._entry_additive_enabled and not self.is_pbrs_invariant_mode(): + entry_additive = self._compute_entry_additive( + pnl=next_pnl, + pnl_target=pnl_target, + duration_ratio=next_duration_ratio, + ) self._last_shaping_reward = float(shaping_reward) self._total_shaping_reward += float(shaping_reward) return base_reward + shaping_reward + entry_additive @@ -1964,7 +1974,10 @@ class MyRLEnv(Base5ActionRLEnv): self._total_shaping_reward += float(shaping_reward) return base_reward + shaping_reward elif is_exit: - if self._exit_potential_mode == "canonical": + if ( + self._exit_potential_mode == "canonical" + or self._exit_potential_mode == "non-canonical" + ): next_potential = 0.0 exit_shaping_reward = -prev_potential else: -- 2.43.0