pip install pandas numpy scipy scikit-learn
```
-Whenever you need to run analyses or tests, activate the environment first:
+Whenever you need to run analyses, activate the environment first and execute:
```shell
source .venv/bin/activate
python reward_space_analysis.py --num_samples 20000 --output reward_space_outputs
-python test_reward_space_analysis.py
```
> Deactivate the environment with `deactivate` when you're done.
**`--profit_target`** (float, default: 0.03)
- Target profit threshold as decimal (e.g., 0.03 = 3%)
-- Used for efficiency calculations and holding penalties
+- Used for exit reward
**`--risk_reward_ratio`** (float, default: 1.0)
- Risk/reward ratio multiplier
- Affects profit target adjustment in reward calculations
-**`--holding_max_ratio`** (float, default: 2.5)
+**`--max_duration_ratio`** (float, default: 2.5)
- Multiple of max_trade_duration used for sampling trade/idle durations
- Higher = more variety in duration scenarios
effective_r = r if not exit_plateau
```
-| Mode | Multiplier (applied to base_factor * pnl * pnl_factor * efficiency) | Monotonic ↓ | Notes |
+| Mode | Multiplier (applied to base_factor * pnl * pnl_factor * efficiency_factor) | Monotonic ↓ | Notes |
|------|---------------------------------------------------------------------|-------------|-------|
| legacy | step: ×1.5 if r* ≤ 1 else ×0.5 | No | Historical discontinuity retained (not smoothed) |
| sqrt | 1 / sqrt(1 + r*) | Yes | Sub-linear decay |
Where r* = `effective_r` above.
-Notes:
-- Plateau guarantees continuity at the boundary r = grace for all monotonic kernels; only `legacy` may jump.
-- A single implementation in code (`_get_exit_factor`) mirrors this table; this README is the canonical human-readable mapping.
-- Continuity tests assert small‑epsilon bounded attenuation onset (excluding `legacy`).
-
_Efficiency configuration:_
- `efficiency_weight` (default: 1.0) - Weight for efficiency factor in exit reward
### Run Tests
```shell
-python test_reward_space_analysis.py
+# activate the venv first
+source .venv/bin/activate
+pip install pytest packaging
+pytest -q
```
-The suite currently contains 59 tests (current state; this number evolves as new invariants and attenuation modes are added). Always run the full suite after modifying reward logic or attenuation parameters.
+Always run the full suite after modifying reward logic or attenuation parameters.
### Test Categories
base_factor: float,
profit_target: float,
risk_reward_ratio: float,
- holding_max_ratio: float,
+ max_duration_ratio: float,
trading_mode: str,
pnl_base_std: float,
pnl_duration_vol_scale: float,
if position == Positions.Neutral:
trade_duration = 0
- idle_duration = int(rng.uniform(0, max_trade_duration * holding_max_ratio))
+ max_idle_duration_candles = params.get("max_idle_duration_candles")
+ try:
+ if max_idle_duration_candles is not None:
+ max_idle_duration_candles = int(max_idle_duration_candles)
+ else:
+ max_idle_duration_candles = int(
+ max_trade_duration * max_duration_ratio
+ )
+ except (TypeError, ValueError):
+ max_idle_duration_candles = int(max_trade_duration * max_duration_ratio)
+
+ if max_idle_duration_candles <= 0:
+ max_idle_duration_candles = int(max_trade_duration * max_duration_ratio)
+
+ idle_duration = int(rng.uniform(0, max_idle_duration_candles))
else:
- trade_duration = int(rng.uniform(1, max_trade_duration * holding_max_ratio))
+ trade_duration = int(
+ rng.uniform(1, max_trade_duration * max_duration_ratio)
+ )
trade_duration = max(1, trade_duration)
idle_duration = 0
help="Risk reward ratio multiplier (default: 1.0).",
)
parser.add_argument(
- "--holding_max_ratio",
+ "--max_duration_ratio",
type=float,
default=2.5,
help="Multiple of max duration used when sampling trade/idle durations.",
base_factor=base_factor,
profit_target=profit_target,
risk_reward_ratio=risk_reward_ratio,
- holding_max_ratio=args.holding_max_ratio,
+ max_duration_ratio=args.max_duration_ratio,
trading_mode=args.trading_mode,
pnl_base_std=args.pnl_base_std,
pnl_duration_vol_scale=args.pnl_duration_vol_scale,
"base_factor": base_factor,
"profit_target": profit_target,
"risk_reward_ratio": risk_reward_ratio,
- "holding_max_ratio": args.holding_max_ratio,
+ "max_duration_ratio": args.max_duration_ratio,
"trading_mode": args.trading_mode,
"action_masking": params.get("action_masking", True),
"pnl_base_std": args.pnl_base_std,
base_factor=TEST_BASE_FACTOR,
profit_target=TEST_PROFIT_TARGET,
risk_reward_ratio=TEST_RR,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="margin",
pnl_base_std=TEST_PNL_STD,
pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
base_factor=TEST_BASE_FACTOR,
profit_target=TEST_PROFIT_TARGET,
risk_reward_ratio=1.0,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="margin",
pnl_base_std=TEST_PNL_STD,
pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
base_factor=TEST_BASE_FACTOR,
profit_target=TEST_PROFIT_TARGET,
risk_reward_ratio=1.0,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="margin",
pnl_base_std=TEST_PNL_STD,
pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
base_factor=TEST_BASE_FACTOR,
profit_target=TEST_PROFIT_TARGET,
risk_reward_ratio=TEST_RR,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="spot",
pnl_base_std=0.02,
pnl_duration_vol_scale=0.5,
base_factor=100.0,
profit_target=0.03,
risk_reward_ratio=1.0,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="margin",
pnl_base_std=0.02,
pnl_duration_vol_scale=0.5,
base_factor=100.0,
profit_target=0.03,
risk_reward_ratio=1.0,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="spot",
pnl_base_std=0.02,
pnl_duration_vol_scale=0.5,
base_factor=100.0,
profit_target=0.03,
risk_reward_ratio=1.0,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="spot",
pnl_base_std=0.02,
pnl_duration_vol_scale=0.5,
base_factor=100.0,
profit_target=0.03,
risk_reward_ratio=1.0,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="futures",
pnl_base_std=0.02,
pnl_duration_vol_scale=0.5,
base_factor=100.0,
profit_target=0.03,
risk_reward_ratio=1.0,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="spot",
pnl_base_std=0.02,
pnl_duration_vol_scale=0.5,
base_factor=100.0,
profit_target=0.03,
risk_reward_ratio=1.0,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="spot",
pnl_base_std=0.02,
pnl_duration_vol_scale=0.5,
base_factor=100.0,
profit_target=0.03,
risk_reward_ratio=1.0,
- holding_max_ratio=2.0,
+ max_duration_ratio=2.0,
trading_mode="margin",
pnl_base_std=0.02,
pnl_duration_vol_scale=0.5,