]> Piment Noir Git Repositories - freqai-strategies.git/commitdiff
refactor(reforcexy): refine reward CLI API
authorJérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 7 Oct 2025 22:09:17 +0000 (00:09 +0200)
committerJérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 7 Oct 2025 22:09:17 +0000 (00:09 +0200)
Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
ReforceXY/reward_space_analysis/README.md
ReforceXY/reward_space_analysis/reward_space_analysis.py
ReforceXY/reward_space_analysis/test_reward_space_analysis.py

index 5eaec89b3ce5479745769cc6226b5c5e5e71754f..7822f53efcdf2be22ba5b42fddd15cee1468282e 100644 (file)
@@ -86,12 +86,11 @@ source .venv/bin/activate
 pip install pandas numpy scipy scikit-learn
 ```
 
-Whenever you need to run analyses or tests, activate the environment first:
+Whenever you need to run analyses, activate the environment first and execute:
 
 ```shell
 source .venv/bin/activate
 python reward_space_analysis.py --num_samples 20000 --output reward_space_outputs
-python test_reward_space_analysis.py
 ```
 
 > Deactivate the environment with `deactivate` when you're done.
@@ -204,14 +203,14 @@ None - all parameters have sensible defaults.
 **`--profit_target`** (float, default: 0.03)
 
 - Target profit threshold as decimal (e.g., 0.03 = 3%)
-- Used for efficiency calculations and holding penalties
+- Used for exit reward
 
 **`--risk_reward_ratio`** (float, default: 1.0)
 
 - Risk/reward ratio multiplier
 - Affects profit target adjustment in reward calculations
 
-**`--holding_max_ratio`** (float, default: 2.5)
+**`--max_duration_ratio`** (float, default: 2.5)
 
 - Multiple of max_trade_duration used for sampling trade/idle durations
 - Higher = more variety in duration scenarios
@@ -289,7 +288,7 @@ effective_r = r - grace    if exit_plateau and r >  grace
 effective_r = r            if not exit_plateau
 ```
 
-| Mode | Multiplier (applied to base_factor * pnl * pnl_factor * efficiency) | Monotonic ↓ | Notes |
+| Mode | Multiplier (applied to base_factor * pnl * pnl_factor * efficiency_factor) | Monotonic ↓ | Notes |
 |------|---------------------------------------------------------------------|-------------|-------|
 | legacy | step: ×1.5 if r* ≤ 1 else ×0.5 | No | Historical discontinuity retained (not smoothed) |
 | sqrt | 1 / sqrt(1 + r*) | Yes | Sub-linear decay |
@@ -299,11 +298,6 @@ effective_r = r            if not exit_plateau
 
 Where r* = `effective_r` above.
 
-Notes:
-- Plateau guarantees continuity at the boundary r = grace for all monotonic kernels; only `legacy` may jump.
-- A single implementation in code (`_get_exit_factor`) mirrors this table; this README is the canonical human-readable mapping.
-- Continuity tests assert small‑epsilon bounded attenuation onset (excluding `legacy`).
-
 _Efficiency configuration:_
 
 - `efficiency_weight` (default: 1.0) - Weight for efficiency factor in exit reward
@@ -515,10 +509,13 @@ done
 ### Run Tests
 
 ```shell
-python test_reward_space_analysis.py
+# activate the venv first
+source .venv/bin/activate
+pip install pytest packaging
+pytest -q
 ```
 
-The suite currently contains 59 tests (current state; this number evolves as new invariants and attenuation modes are added). Always run the full suite after modifying reward logic or attenuation parameters.
+Always run the full suite after modifying reward logic or attenuation parameters.
 
 ### Test Categories
 
index eae0fd96a4dc5aadc84f7c83f2fbbfd095c1a258..865c9fac3c97738f3d4c57622d2ac9f3942ee0a3 100644 (file)
@@ -698,7 +698,7 @@ def simulate_samples(
     base_factor: float,
     profit_target: float,
     risk_reward_ratio: float,
-    holding_max_ratio: float,
+    max_duration_ratio: float,
     trading_mode: str,
     pnl_base_std: float,
     pnl_duration_vol_scale: float,
@@ -737,9 +737,25 @@ def simulate_samples(
 
         if position == Positions.Neutral:
             trade_duration = 0
-            idle_duration = int(rng.uniform(0, max_trade_duration * holding_max_ratio))
+            max_idle_duration_candles = params.get("max_idle_duration_candles")
+            try:
+                if max_idle_duration_candles is not None:
+                    max_idle_duration_candles = int(max_idle_duration_candles)
+                else:
+                    max_idle_duration_candles = int(
+                        max_trade_duration * max_duration_ratio
+                    )
+            except (TypeError, ValueError):
+                max_idle_duration_candles = int(max_trade_duration * max_duration_ratio)
+
+            if max_idle_duration_candles <= 0:
+                max_idle_duration_candles = int(max_trade_duration * max_duration_ratio)
+
+            idle_duration = int(rng.uniform(0, max_idle_duration_candles))
         else:
-            trade_duration = int(rng.uniform(1, max_trade_duration * holding_max_ratio))
+            trade_duration = int(
+                rng.uniform(1, max_trade_duration * max_duration_ratio)
+            )
             trade_duration = max(1, trade_duration)
             idle_duration = 0
 
@@ -1980,7 +1996,7 @@ def build_argument_parser() -> argparse.ArgumentParser:
         help="Risk reward ratio multiplier (default: 1.0).",
     )
     parser.add_argument(
-        "--holding_max_ratio",
+        "--max_duration_ratio",
         type=float,
         default=2.5,
         help="Multiple of max duration used when sampling trade/idle durations.",
@@ -2536,7 +2552,7 @@ def main() -> None:
         base_factor=base_factor,
         profit_target=profit_target,
         risk_reward_ratio=risk_reward_ratio,
-        holding_max_ratio=args.holding_max_ratio,
+        max_duration_ratio=args.max_duration_ratio,
         trading_mode=args.trading_mode,
         pnl_base_std=args.pnl_base_std,
         pnl_duration_vol_scale=args.pnl_duration_vol_scale,
@@ -2549,7 +2565,7 @@ def main() -> None:
         "base_factor": base_factor,
         "profit_target": profit_target,
         "risk_reward_ratio": risk_reward_ratio,
-        "holding_max_ratio": args.holding_max_ratio,
+        "max_duration_ratio": args.max_duration_ratio,
         "trading_mode": args.trading_mode,
         "action_masking": params.get("action_masking", True),
         "pnl_base_std": args.pnl_base_std,
index 460f4ed5fdd2e6f3c1cd421516bab38a6bdb542b..2bd0918e887fefa7c48c702074cedecdb11925e8 100644 (file)
@@ -1252,7 +1252,7 @@ class TestStatisticalValidation(RewardSpaceTestBase):
             base_factor=TEST_BASE_FACTOR,
             profit_target=TEST_PROFIT_TARGET,
             risk_reward_ratio=TEST_RR,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="margin",
             pnl_base_std=TEST_PNL_STD,
             pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
@@ -1369,7 +1369,7 @@ class TestStatisticalValidation(RewardSpaceTestBase):
             base_factor=TEST_BASE_FACTOR,
             profit_target=TEST_PROFIT_TARGET,
             risk_reward_ratio=1.0,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="margin",
             pnl_base_std=TEST_PNL_STD,
             pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
@@ -1602,7 +1602,7 @@ class TestStatisticalValidation(RewardSpaceTestBase):
             base_factor=TEST_BASE_FACTOR,
             profit_target=TEST_PROFIT_TARGET,
             risk_reward_ratio=1.0,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="margin",
             pnl_base_std=TEST_PNL_STD,
             pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
@@ -1663,7 +1663,7 @@ class TestStatisticalValidation(RewardSpaceTestBase):
             base_factor=TEST_BASE_FACTOR,
             profit_target=TEST_PROFIT_TARGET,
             risk_reward_ratio=TEST_RR,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="spot",
             pnl_base_std=0.02,
             pnl_duration_vol_scale=0.5,
@@ -1684,7 +1684,7 @@ class TestStatisticalValidation(RewardSpaceTestBase):
             base_factor=100.0,
             profit_target=0.03,
             risk_reward_ratio=1.0,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="margin",
             pnl_base_std=0.02,
             pnl_duration_vol_scale=0.5,
@@ -1867,7 +1867,7 @@ class TestHelperFunctions(RewardSpaceTestBase):
             base_factor=100.0,
             profit_target=0.03,
             risk_reward_ratio=1.0,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="spot",
             pnl_base_std=0.02,
             pnl_duration_vol_scale=0.5,
@@ -1882,7 +1882,7 @@ class TestHelperFunctions(RewardSpaceTestBase):
             base_factor=100.0,
             profit_target=0.03,
             risk_reward_ratio=1.0,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="spot",
             pnl_base_std=0.02,
             pnl_duration_vol_scale=0.5,
@@ -1900,7 +1900,7 @@ class TestHelperFunctions(RewardSpaceTestBase):
             base_factor=100.0,
             profit_target=0.03,
             risk_reward_ratio=1.0,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="futures",
             pnl_base_std=0.02,
             pnl_duration_vol_scale=0.5,
@@ -1925,7 +1925,7 @@ class TestHelperFunctions(RewardSpaceTestBase):
             base_factor=100.0,
             profit_target=0.03,
             risk_reward_ratio=1.0,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="spot",
             pnl_base_std=0.02,
             pnl_duration_vol_scale=0.5,
@@ -1959,7 +1959,7 @@ class TestHelperFunctions(RewardSpaceTestBase):
             base_factor=100.0,
             profit_target=0.03,
             risk_reward_ratio=1.0,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="spot",
             pnl_base_std=0.02,
             pnl_duration_vol_scale=0.5,
@@ -2084,7 +2084,7 @@ class TestHelperFunctions(RewardSpaceTestBase):
             base_factor=100.0,
             profit_target=0.03,
             risk_reward_ratio=1.0,
-            holding_max_ratio=2.0,
+            max_duration_ratio=2.0,
             trading_mode="margin",
             pnl_base_std=0.02,
             pnl_duration_vol_scale=0.5,