From 4941849a106902bde67c0bcbdb75084bf4bf3ae1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Wed, 15 Oct 2025 22:50:14 +0200 Subject: [PATCH] refactor(reforcexy): align CLI namespace MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme Benoit --- ReforceXY/reward_space_analysis/README.md | 46 +++++++++---------- .../reward_space_analysis.py | 24 +++++----- ReforceXY/reward_space_analysis/test_cli.py | 32 ++++++------- .../test_reward_space_analysis.py | 17 +++++-- 4 files changed, 63 insertions(+), 56 deletions(-) diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md index 631796a..0caa2f2 100644 --- a/ReforceXY/reward_space_analysis/README.md +++ b/ReforceXY/reward_space_analysis/README.md @@ -68,7 +68,7 @@ pip install pandas numpy scipy scikit-learn pytest Run: ```shell -python reward_space_analysis.py --num_samples 20000 --output out +python reward_space_analysis.py --num_samples 20000 --out_dir out ``` ## Common Use Cases @@ -76,7 +76,7 @@ python reward_space_analysis.py --num_samples 20000 --output out ### 1. Validate Reward Logic ```shell -python reward_space_analysis.py --num_samples 20000 --output reward_space_outputs +python reward_space_analysis.py --num_samples 20000 --out_dir reward_space_outputs ``` See `statistical_analysis.md` (1–3): positive exit averages (long & short), negative invalid penalties, monotonic idle reduction, zero invariance failures. @@ -88,18 +88,18 @@ See `statistical_analysis.md` (1–3): positive exit averages (long & short), ne python reward_space_analysis.py \ --num_samples 30000 \ --params win_reward_factor=2.0 \ - --output conservative_rewards + --out_dir conservative_rewards python reward_space_analysis.py \ --num_samples 30000 \ --params win_reward_factor=4.0 \ - --output aggressive_rewards + --out_dir aggressive_rewards # Test PBRS potential shaping python reward_space_analysis.py \ --num_samples 30000 \ --params hold_potential_enabled=true potential_gamma=0.9 exit_potential_mode=progressive_release \ - --output pbrs_analysis + --out_dir pbrs_analysis ``` Compare reward distribution & component share deltas across runs. @@ -110,7 +110,7 @@ Compare reward distribution & component share deltas across runs. # Generate detailed analysis python reward_space_analysis.py \ --num_samples 50000 \ - --output debug_analysis + --out_dir debug_analysis ``` Focus: feature importance, shaping activation, invariance drift, extremes. @@ -123,7 +123,7 @@ Focus: feature importance, shaping activation, invariance drift, extremes. python reward_space_analysis.py \ --num_samples 100000 \ --real_episodes path/to/episode_rewards.pkl \ - --output real_vs_synthetic + --out_dir real_vs_synthetic ``` --- @@ -168,7 +168,7 @@ Controls synthetic PnL variance (heteroscedastic; grows with duration): ### Output & Overrides -**`--output`** (path, default: reward_space_outputs) – Output directory (auto-created). +**`--out_dir`** (path, default: reward_space_outputs) – Output directory (auto-created). **`--params`** (k=v ...) – Override reward params. Example: `--params win_reward_factor=3.0 idle_penalty_scale=2.0`. @@ -237,11 +237,11 @@ Invariant toggle: disable only for performance experiments (diagnostics become a **`--skip_partial_dependence`**: skip PD curves (faster). -**`--skip_feature-analysis`**: skip model, importance, PD. +**`--skip_feature_analysis`**: skip model, importance, PD. Hierarchy / precedence of skip flags: -| Scenario | `--skip_feature-analysis` | `--skip_partial_dependence` | Feature Importance | Partial Dependence | Report Section 4 | +| Scenario | `--skip_feature_analysis` | `--skip_partial_dependence` | Feature Importance | Partial Dependence | Report Section 4 | |----------|---------------------------|-----------------------------|--------------------|-------------------|------------------| | Default (no flags) | ✗ | ✗ | Yes | Yes | Full (R², top features, exported data) | | PD only skipped | ✗ | ✓ | Yes | No | Full (PD line shows skipped note) | @@ -262,8 +262,8 @@ Auto-skip if `num_samples < 4`. Patterns: ```shell # Same synthetic data, two different statistical re-analysis runs -python reward_space_analysis.py --num_samples 50000 --seed 123 --stats_seed 9001 --output run_stats1 -python reward_space_analysis.py --num_samples 50000 --seed 123 --stats_seed 9002 --output run_stats2 +python reward_space_analysis.py --num_samples 50000 --seed 123 --stats_seed 9001 --out_dir run_stats1 +python reward_space_analysis.py --num_samples 50000 --seed 123 --stats_seed 9002 --out_dir run_stats2 # Fully reproducible end-to-end (all aspects deterministic) python reward_space_analysis.py --num_samples 50000 --seed 777 @@ -294,25 +294,25 @@ python reward_space_analysis.py \ --num_samples 50000 \ --profit_target 0.05 \ --trading_mode futures \ - --output custom_analysis + --out_dir custom_analysis # Parameter sensitivity testing python reward_space_analysis.py \ --num_samples 30000 \ --params win_reward_factor=3.0 idle_penalty_scale=1.5 \ - --output sensitivity_test + --out_dir sensitivity_test # PBRS potential shaping analysis python reward_space_analysis.py \ --num_samples 40000 \ --params hold_potential_enabled=true exit_potential_mode=spike_cancel potential_gamma=0.95 \ - --output pbrs_test + --out_dir pbrs_test # Real vs synthetic comparison python reward_space_analysis.py \ --num_samples 100000 \ --real_episodes path/to/episode_rewards.pkl \ - --output validation + --out_dir validation ``` --- @@ -374,30 +374,30 @@ Test reward parameter configurations: python reward_space_analysis.py \ --num_samples 25000 \ --params exit_attenuation_mode=power exit_power_tau=0.5 efficiency_weight=0.8 \ - --output custom_test + --out_dir custom_test # Test aggressive hold penalties python reward_space_analysis.py \ --num_samples 25000 \ --params hold_penalty_scale=0.5 \ - --output aggressive_hold + --out_dir aggressive_hold # Canonical PBRS (strict invariance, additives disabled) python reward_space_analysis.py \ --num_samples 25000 \ --params hold_potential_enabled=true entry_additive_enabled=true exit_additive_enabled=false exit_potential_mode=canonical \ - --output pbrs_canonical + --out_dir pbrs_canonical # Non-canonical PBRS (allows additives with Φ(terminal)=0, breaks invariance) python reward_space_analysis.py \ --num_samples 25000 \ --params hold_potential_enabled=true entry_additive_enabled=true exit_additive_enabled=true exit_potential_mode=non-canonical \ - --output pbrs_non_canonical + --out_dir pbrs_non_canonical python reward_space_analysis.py \ --num_samples 25000 \ --params hold_potential_transform_pnl=sigmoid hold_potential_gain=2.0 \ - --output pbrs_sigmoid_transforms + --out_dir pbrs_sigmoid_transforms ``` ### Real Data Comparison @@ -408,7 +408,7 @@ Compare with real trading episodes: python reward_space_analysis.py \ --num_samples 100000 \ --real_episodes path/to/episode_rewards.pkl \ - --output real_vs_synthetic + --out_dir real_vs_synthetic ``` Shift metrics: lower is better (except p-value: higher ⇒ cannot reject equality). @@ -421,7 +421,7 @@ for factor in 1.5 2.0 2.5 3.0; do python reward_space_analysis.py \ --num_samples 20000 \ --params win_reward_factor=$factor \ - --output analysis_factor_$factor + --out_dir analysis_factor_$factor done ``` diff --git a/ReforceXY/reward_space_analysis/reward_space_analysis.py b/ReforceXY/reward_space_analysis/reward_space_analysis.py index 42b344b..b51cd50 100644 --- a/ReforceXY/reward_space_analysis/reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/reward_space_analysis.py @@ -2539,7 +2539,7 @@ def build_argument_parser() -> argparse.ArgumentParser: description="Synthetic stress-test of the ReforceXY reward shaping logic." ) parser.add_argument( - "--skip_feature-analysis", + "--skip_feature_analysis", action="store_true", help="Skip feature importance and model-based analysis for all scenarios.", ) @@ -2623,7 +2623,7 @@ def build_argument_parser() -> argparse.ArgumentParser: help="Enable action masking simulation (default: true).", ) parser.add_argument( - "--output", + "--out_dir", type=Path, default=Path("reward_space_outputs"), help="Output directory for artifacts (default: reward_space_outputs).", @@ -3093,7 +3093,7 @@ def write_complete_statistical_analysis( if skip_feature_analysis or len(df) < 4: reason = [] if skip_feature_analysis: - reason.append("flag --skip_feature-analysis set") + reason.append("flag --skip_feature_analysis set") if len(df) < 4: reason.append("insufficient samples <4") reason_str = "; ".join(reason) if reason else "skipped" @@ -3425,8 +3425,8 @@ def main() -> None: # Attach resolved reward parameters for inline overrides rendering in report df.attrs["reward_params"] = dict(params) - args.output.mkdir(parents=True, exist_ok=True) - csv_path = args.output / "reward_samples.csv" + args.out_dir.mkdir(parents=True, exist_ok=True) + csv_path = args.out_dir / "reward_samples.csv" df.to_csv(csv_path, index=False) sample_output_message = f"Samples saved to {csv_path}" @@ -3441,26 +3441,26 @@ def main() -> None: write_complete_statistical_analysis( df, - args.output, + args.out_dir, max_trade_duration=args.max_trade_duration, profit_target=float(profit_target * risk_reward_ratio), seed=args.seed, real_df=real_df, adjust_method=args.pvalue_adjust, - stats_seed=args.stats_seed - if getattr(args, "stats_seed", None) is not None - else None, + stats_seed=( + args.stats_seed if getattr(args, "stats_seed", None) is not None else None + ), strict_diagnostics=bool(getattr(args, "strict_diagnostics", False)), bootstrap_resamples=getattr(args, "bootstrap_resamples", 10000), skip_partial_dependence=bool(getattr(args, "skip_partial_dependence", False)), skip_feature_analysis=bool(getattr(args, "skip_feature_analysis", False)), ) print( - f"Complete statistical analysis saved to: {args.output / 'statistical_analysis.md'}" + f"Complete statistical analysis saved to: {args.out_dir / 'statistical_analysis.md'}" ) # Generate manifest summarizing key metrics try: - manifest_path = args.output / "manifest.json" + manifest_path = args.out_dir / "manifest.json" resolved_reward_params = dict(params) # already validated/normalized upstream manifest = { "generated_at": pd.Timestamp.now().isoformat(), @@ -3500,7 +3500,7 @@ def main() -> None: print(f"Generated {len(df):,} synthetic samples.") print(sample_output_message) - print(f"Artifacts saved to: {args.output.resolve()}") + print(f"Artifacts saved to: {args.out_dir.resolve()}") if __name__ == "__main__": diff --git a/ReforceXY/reward_space_analysis/test_cli.py b/ReforceXY/reward_space_analysis/test_cli.py index d1947c9..da0c37d 100644 --- a/ReforceXY/reward_space_analysis/test_cli.py +++ b/ReforceXY/reward_space_analysis/test_cli.py @@ -18,8 +18,8 @@ Key features Usage ----- -python test_cli.py --samples 50 --out-dir ../sample_run_output_smoke \ - --shuffle-seed 123 --strict-sample 3 --bootstrap-resamples 200 +python test_cli.py --samples 50 --out_dir ../sample_run_output_smoke \ + --shuffle_seed 123 --strict_sample 3 --bootstrap_resamples 200 JSON Summary fields ------------------- @@ -144,7 +144,7 @@ def run_scenario( str(script), "--num_samples", str(base_samples), - "--output", + "--out_dir", str(scenario_dir), "--exit_potential_mode", exit_potential_mode, @@ -164,7 +164,7 @@ def run_scenario( # Forward bootstrap resamples explicitly cmd += ["--bootstrap_resamples", str(bootstrap_resamples)] if skip_feature_analysis: - cmd.append("--skip_feature-analysis") + cmd.append("--skip_feature_analysis") if strict: cmd.append("--strict_diagnostics") start = time.perf_counter() @@ -206,48 +206,48 @@ def main(): help="num synthetic samples per scenario (minimum 4 for feature analysis)", ) parser.add_argument( - "--skip_feature-analysis", + "--skip_feature_analysis", action="store_true", help="Skip feature importance and model-based analysis for all scenarios.", ) parser.add_argument( - "--out-dir", + "--out_dir", type=str, default="sample_run_output_smoke", help="output parent directory", ) parser.add_argument( - "--shuffle-seed", + "--shuffle_seed", type=int, default=None, help="If set, shuffle full scenario space before sampling a diverse subset", ) parser.add_argument( - "--strict-sample", + "--strict_sample", type=int, default=0, help="Duplicate the first N scenarios executed again with --strict_diagnostics", ) parser.add_argument( - "--max-scenarios", + "--max_scenarios", type=int, default=40, help="Maximum number of (non-strict) scenarios before strict duplication", ) parser.add_argument( - "--bootstrap-resamples", + "--bootstrap_resamples", type=int, default=120, help="Number of bootstrap resamples to pass to child processes (speed/perf tradeoff)", ) parser.add_argument( - "--per-scenario-timeout", + "--per_scenario_timeout", type=int, default=600, help="Timeout (seconds) per child process (default: 600)", ) parser.add_argument( - "--store-full-logs", + "--store_full_logs", action="store_true", help="If set, store full stdout/stderr (may be large) instead of tail truncation.", ) @@ -255,13 +255,13 @@ def main(): # Basic validation if args.max_scenarios <= 0: - parser.error("--max-scenarios must be > 0") + parser.error("--max_scenarios must be > 0") if args.samples < 4 and not args.skip_feature_analysis: - parser.error("--samples must be >= 4 unless --skip_feature-analysis is set") + parser.error("--samples must be >= 4 unless --skip_feature_analysis is set") if args.strict_sample < 0: - parser.error("--strict-sample must be >= 0") + parser.error("--strict_sample must be >= 0") if args.bootstrap_resamples <= 0: - parser.error("--bootstrap-resamples must be > 0") + parser.error("--bootstrap_resamples must be > 0") script = Path(__file__).parent / "reward_space_analysis.py" out_dir = Path(args.out_dir) diff --git a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py index b967e28..4c52640 100644 --- a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py +++ b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py @@ -461,7 +461,7 @@ class TestIntegration(RewardSpaceTestBase): str(self.TEST_SAMPLES), "--seed", str(self.SEED), - "--output", + "--out_dir", str(self.output_path), ] @@ -497,7 +497,7 @@ class TestIntegration(RewardSpaceTestBase): str(self.TEST_SAMPLES), "--seed", str(self.SEED), - "--output", + "--out_dir", str(self.output_path / "run1"), ] @@ -509,7 +509,7 @@ class TestIntegration(RewardSpaceTestBase): str(self.TEST_SAMPLES), "--seed", str(self.SEED), - "--output", + "--out_dir", str(self.output_path / "run2"), ] @@ -1688,9 +1688,16 @@ class TestAPIAndHelpers(RewardSpaceTestBase): self.assertIsNotNone(parser) # Test parsing with minimal arguments - args = parser.parse_args(["--num_samples", "100", "--output", "test_output"]) + args = parser.parse_args( + [ + "--num_samples", + "100", + "--out_dir", + "test_output", + ] + ) self.assertEqual(args.num_samples, 100) - self.assertEqual(str(args.output), "test_output") + self.assertEqual(str(args.out_dir), "test_output") def test_complete_statistical_analysis_writer(self): """Test write_complete_statistical_analysis function.""" -- 2.43.0