print(f"Import error: {e}")
sys.exit(1)
-# Canonical test constants
-TEST_BASE_FACTOR: float = 100.0
-TEST_PROFIT_TARGET: float = 0.03
-TEST_RR: float = 1.0
-TEST_RR_HIGH: float = 2.0
-TEST_PNL_STD: float = 0.02
-TEST_PNL_DUR_VOL_SCALE: float = 0.5
-
class RewardSpaceTestBase(unittest.TestCase):
"""Base class with common test utilities."""
cls.SEED = 42
cls.DEFAULT_PARAMS = DEFAULT_MODEL_REWARD_PARAMETERS.copy()
cls.TEST_SAMPLES = 50 # Small for speed
+ cls.TEST_BASE_FACTOR = 100.0
+ cls.TEST_PROFIT_TARGET = 0.03
+ cls.TEST_RR = 1.0
+ cls.TEST_RR_HIGH = 2.0
+ cls.TEST_PNL_STD = 0.02
+ cls.TEST_PNL_DUR_VOL_SCALE = 0.5
def setUp(self):
"""Set up test fixtures with reproducible random seed."""
"reward_idle": reward_idle,
"position": np.random.choice([0.0, 0.5, 1.0], n),
"reward_total": np.random.normal(0, 1, n),
- "pnl": np.random.normal(0, TEST_PNL_STD, n),
+ "pnl": np.random.normal(0, self.TEST_PNL_STD, n),
"trade_duration": np.random.exponential(20, n),
}
)
def test_basic_reward_calculation(self):
"""Test basic reward calculation consistency."""
context = RewardContext(
- pnl=TEST_PROFIT_TARGET,
+ pnl=self.TEST_PROFIT_TARGET,
trade_duration=10,
idle_duration=0,
max_trade_duration=100,
breakdown = calculate_reward(
context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
+ base_factor=self.TEST_BASE_FACTOR,
profit_target=0.06, # Scenario-specific larger target kept explicit
- risk_reward_ratio=TEST_RR_HIGH,
+ risk_reward_ratio=self.TEST_RR_HIGH,
short_allowed=True,
action_masking=True,
)
tp_breakdown = calculate_reward(
tp_context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
+ base_factor=self.TEST_BASE_FACTOR,
profit_target=profit_target,
- risk_reward_ratio=TEST_RR_HIGH,
+ risk_reward_ratio=self.TEST_RR_HIGH,
short_allowed=True,
action_masking=True,
)
sl_breakdown = calculate_reward(
sl_context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
+ base_factor=self.TEST_BASE_FACTOR,
profit_target=profit_target,
- risk_reward_ratio=TEST_RR_HIGH,
+ risk_reward_ratio=self.TEST_RR_HIGH,
short_allowed=True,
action_masking=True,
)
to_breakdown = calculate_reward(
to_context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
+ base_factor=self.TEST_BASE_FACTOR,
profit_target=profit_target,
- risk_reward_ratio=TEST_RR_HIGH,
+ risk_reward_ratio=self.TEST_RR_HIGH,
short_allowed=True,
action_masking=True,
)
params_small["max_idle_duration_candles"] = 50
params_large["max_idle_duration_candles"] = 200
- base_factor = TEST_BASE_FACTOR
+ base_factor = self.TEST_BASE_FACTOR
idle_duration = 40 # below large threshold, near small threshold
context = RewardContext(
pnl=0.0,
context,
params_small,
base_factor,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
breakdown_large = calculate_reward(
context,
params_large,
- base_factor=TEST_BASE_FACTOR,
+ base_factor=self.TEST_BASE_FACTOR,
profit_target=0.06,
- risk_reward_ratio=TEST_RR,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
params = self.DEFAULT_PARAMS.copy()
params["max_idle_duration_candles"] = 0 # force fallback
base_factor = 90.0
- profit_target = TEST_PROFIT_TARGET
+ profit_target = self.TEST_PROFIT_TARGET
risk_reward_ratio = 1.0
# Two contexts with different idle durations
baseline = calculate_reward(
context,
params,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR_HIGH,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR_HIGH,
short_allowed=True,
action_masking=True,
)
# Amplified: choose a much larger base_factor (ensure > threshold relative scale)
amplified_base_factor = max(
- TEST_BASE_FACTOR * 50, threshold * TEST_RR_HIGH / max(context.pnl, 1e-9)
+ self.TEST_BASE_FACTOR * 50,
+ threshold * self.TEST_RR_HIGH / max(context.pnl, 1e-9),
)
amplified = calculate_reward(
context,
params,
base_factor=amplified_base_factor,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR_HIGH,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR_HIGH,
short_allowed=True,
action_masking=True,
)
def test_negative_slope_sanitization(self):
"""Negative slopes for linear must be sanitized to positive default (1.0)."""
- base_factor = TEST_BASE_FACTOR
+ base_factor = self.TEST_BASE_FACTOR
pnl = 0.04
pnl_factor = 1.0
duration_ratio_linear = 1.2 # any positive ratio
br = calculate_reward(
context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
+ base_factor=self.TEST_BASE_FACTOR,
profit_target=0.0, # critical case
- risk_reward_ratio=TEST_RR,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
tau = 0.5
r = 1.2
alpha = -math.log(tau) / math.log(2.0)
- base_factor = TEST_BASE_FACTOR
- pnl = TEST_PROFIT_TARGET
+ base_factor = self.TEST_BASE_FACTOR
+ pnl = self.TEST_PROFIT_TARGET
pnl_factor = 1.0 # isolate attenuation
params = self.DEFAULT_PARAMS.copy()
params.update(
"""Saturation test: pnl amplification factor should monotonically approach (1 + win_reward_factor)."""
win_reward_factor = 3.0 # asymptote = 4.0
beta = 0.5
- profit_target = TEST_PROFIT_TARGET
+ profit_target = self.TEST_PROFIT_TARGET
params = self.DEFAULT_PARAMS.copy()
params.update(
{
params.pop("base_factor", None)
# pnl values: slightly above target, 2x, 5x, 10x target
- pnl_values = [profit_target * m for m in (1.05, TEST_RR_HIGH, 5.0, 10.0)]
+ pnl_values = [profit_target * m for m in (1.05, self.TEST_RR_HIGH, 5.0, 10.0)]
ratios_observed: list[float] = []
for pnl in pnl_values:
params.pop("base_factor", None)
base_factor = 80.0
k = 7.5
- profit_target = TEST_PROFIT_TARGET
+ profit_target = self.TEST_PROFIT_TARGET
rr = 1.5
contexts: list[RewardContext] = [
),
# Losing exit
RewardContext(
- pnl=-TEST_PNL_STD,
+ pnl=-self.TEST_PNL_STD,
trade_duration=60,
idle_duration=0,
max_trade_duration=100,
params.pop("base_factor", None)
base_factor = 120.0
profit_target = 0.04
- rr = TEST_RR_HIGH
+ rr = self.TEST_RR_HIGH
pnls = [0.018, -0.022]
for pnl in pnls:
ctx_long = RewardContext(
test_data = pd.DataFrame(
{
"reward_total": np.random.normal(0, 1, 100),
- "pnl": np.random.normal(0.01, TEST_PNL_STD, 100),
+ "pnl": np.random.normal(0.01, self.TEST_PNL_STD, 100),
}
)
~idle_mask, np.random.normal(-0.5, 0.2, 300), 0.0
),
"reward_exit": np.random.normal(0.8, 0.6, 300),
- "pnl": np.random.normal(0.01, TEST_PNL_STD, 300),
+ "pnl": np.random.normal(0.01, self.TEST_PNL_STD, 300),
"trade_duration": np.random.uniform(5, 150, 300),
"idle_duration": idle_duration,
"position": np.random.choice([0.0, 0.5, 1.0], 300),
seed=42,
params=self.DEFAULT_PARAMS,
max_trade_duration=50,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
max_duration_ratio=2.0,
trading_mode="margin",
- pnl_base_std=TEST_PNL_STD,
- pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
# Critical invariant: Total PnL must equal sum of exit PnL
np.random.seed(42)
df1 = pd.DataFrame(
{
- "pnl": np.random.normal(0, TEST_PNL_STD, 500),
+ "pnl": np.random.normal(0, self.TEST_PNL_STD, 500),
"trade_duration": np.random.exponential(30, 500),
"idle_duration": np.random.gamma(2, 5, 500),
}
seed=123,
params=self.DEFAULT_PARAMS,
max_trade_duration=100,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
max_duration_ratio=2.0,
trading_mode="margin",
- pnl_base_std=TEST_PNL_STD,
- pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
# Filter to exit actions only (where PnL is meaningful)
reward_power = calculate_reward(
context,
params,
- TEST_BASE_FACTOR,
- TEST_PROFIT_TARGET,
- TEST_RR,
+ self.TEST_BASE_FACTOR,
+ self.TEST_PROFIT_TARGET,
+ self.TEST_RR,
short_allowed=True,
action_masking=True,
)
reward_half_life = calculate_reward(
context,
params,
- TEST_BASE_FACTOR,
- TEST_PROFIT_TARGET,
- TEST_RR,
+ self.TEST_BASE_FACTOR,
+ self.TEST_PROFIT_TARGET,
+ self.TEST_RR,
short_allowed=True,
action_masking=True,
)
reward_linear = calculate_reward(
context,
params,
- TEST_BASE_FACTOR,
- TEST_PROFIT_TARGET,
- TEST_RR,
+ self.TEST_BASE_FACTOR,
+ self.TEST_PROFIT_TARGET,
+ self.TEST_RR,
short_allowed=True,
action_masking=True,
)
seed=123,
params=self.DEFAULT_PARAMS,
max_trade_duration=100,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
max_duration_ratio=2.0,
trading_mode="margin",
- pnl_base_std=TEST_PNL_STD,
- pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
results_adj = statistical_hypothesis_tests(
seed=42,
params=self.DEFAULT_PARAMS,
max_trade_duration=100,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
max_duration_ratio=2.0,
trading_mode="spot",
- pnl_base_std=TEST_PNL_STD,
- pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
# Should not have any short positions
seed=42,
params=self.DEFAULT_PARAMS,
max_trade_duration=100,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
max_duration_ratio=2.0,
trading_mode="margin",
- pnl_base_std=TEST_PNL_STD,
- pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
# Should have required columns
breakdown = calculate_reward(
context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
risk_reward_ratio=1.0,
short_allowed=True,
action_masking=True,
context,
extreme_params,
base_factor=10000.0,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
breakdown = calculate_reward(
context,
test_params,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
seed=42,
params={"action_masking": "true"},
max_trade_duration=50,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
max_duration_ratio=2.0,
trading_mode="spot",
- pnl_base_std=TEST_PNL_STD,
- pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
self.assertIsInstance(df1, pd.DataFrame)
seed=42,
params={"action_masking": "false"},
max_trade_duration=50,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
max_duration_ratio=2.0,
trading_mode="spot",
- pnl_base_std=TEST_PNL_STD,
- pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
self.assertIsInstance(df2, pd.DataFrame)
seed=42,
params=self.DEFAULT_PARAMS,
max_trade_duration=50,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
max_duration_ratio=2.0,
trading_mode="futures",
- pnl_base_std=TEST_PNL_STD,
- pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
# Should have some short positions
seed=42,
params=self.DEFAULT_PARAMS,
max_trade_duration=100,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
max_duration_ratio=2.0,
trading_mode="margin",
- pnl_base_std=TEST_PNL_STD,
- pnl_duration_vol_scale=TEST_PNL_DUR_VOL_SCALE,
+ pnl_base_std=self.TEST_PNL_STD,
+ pnl_duration_vol_scale=self.TEST_PNL_DUR_VOL_SCALE,
)
with tempfile.TemporaryDirectory() as tmp_dir:
test_data,
output_path,
max_trade_duration=100,
- profit_target=TEST_PROFIT_TARGET,
+ profit_target=self.TEST_PROFIT_TARGET,
seed=42,
real_df=None,
)
breakdown = calculate_reward(
context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
risk_reward_ratio=1.0,
short_allowed=True,
action_masking=True,
breakdown = calculate_reward(
context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
breakdown = calculate_reward(
context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
risk_reward_ratio=1.0,
short_allowed=True,
action_masking=True,
breakdown = calculate_reward(
context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
risk_reward_ratio=1.0,
short_allowed=True,
action_masking=False, # Disable masking to test invalid penalty
breakdown = calculate_reward(
context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
risk_reward_ratio=1.0,
short_allowed=True,
action_masking=True,
breakdown = calculate_reward(
context,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
context,
params,
base_factor=1e7, # exaggerated factor
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
),
# Exit reward only (positive pnl)
dict(
- ctx=self._mk_context(pnl=TEST_PROFIT_TARGET, trade_duration=60),
+ ctx=self._mk_context(pnl=self.TEST_PROFIT_TARGET, trade_duration=60),
active="exit_component",
),
# Invalid action only
br = calculate_reward(
ctx_obj,
self.DEFAULT_PARAMS,
- base_factor=TEST_BASE_FACTOR,
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR,
+ base_factor=self.TEST_BASE_FACTOR,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=(active_label != "invalid_penalty"),
)
"""
modes = ["sqrt", "linear", "power", "half_life", "plateau_linear"]
- base_factor = TEST_BASE_FACTOR
+ base_factor = self.TEST_BASE_FACTOR
pnl = 0.05
pnl_factor = 1.0
for mode in modes:
"exit_linear_slope": 0.0,
}
)
- base_factor = TEST_BASE_FACTOR
+ base_factor = self.TEST_BASE_FACTOR
pnl = 0.04
pnl_factor = 1.2
ratios = [0.3, 0.6, 1.0, 1.4]
}
)
base_factor = 80.0
- pnl = TEST_PROFIT_TARGET
+ pnl = self.TEST_PROFIT_TARGET
pnl_factor = 1.1
# Ratios straddling 1.0 but below grace=1.5 plus one beyond grace
ratios = [0.8, 1.0, 1.2, 1.4, 1.6]
params = self.DEFAULT_PARAMS.copy()
params["exit_attenuation_mode"] = "legacy"
params["exit_plateau"] = False
- base_factor = TEST_BASE_FACTOR
+ base_factor = self.TEST_BASE_FACTOR
pnl = 0.02
pnl_factor = 1.0
# ratio below 1 vs above 1
params = self.DEFAULT_PARAMS.copy()
# Try multiple modes / extreme params
modes = ["linear", "power", "half_life", "sqrt", "legacy", "linear_plateau"]
- base_factor = TEST_BASE_FACTOR
+ base_factor = self.TEST_BASE_FACTOR
pnl = 0.05
pnl_factor = 2.0 # amplified
for mode in modes:
params["idle_penalty_power"] = 2.0
params["max_idle_duration_candles"] = 100
base_factor = 90.0
- profit_target = TEST_PROFIT_TARGET
+ profit_target = self.TEST_PROFIT_TARGET
# Idle penalties for durations 20 vs 40 (quadratic → (40/100)^2 / (20/100)^2 = (0.4^2)/(0.2^2)=4)
ctx_a = RewardContext(
pnl=0.0,
params,
base_factor=base_factor,
profit_target=profit_target,
- risk_reward_ratio=TEST_RR,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
params,
base_factor=base_factor,
profit_target=profit_target,
- risk_reward_ratio=TEST_RR,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
force_action=None,
)
ctx_h2 = dataclasses.replace(ctx_h1, trade_duration=140)
+ # Compute baseline and comparison holding penalties
br_h1 = calculate_reward(
ctx_h1,
params,
base_factor=base_factor,
profit_target=profit_target,
- risk_reward_ratio=TEST_RR,
+ risk_reward_ratio=self.TEST_RR,
short_allowed=True,
action_masking=True,
)
context,
params,
base_factor=5000.0, # large enough to exceed threshold
- profit_target=TEST_PROFIT_TARGET,
- risk_reward_ratio=TEST_RR_HIGH,
+ profit_target=self.TEST_PROFIT_TARGET,
+ risk_reward_ratio=self.TEST_RR_HIGH,
short_allowed=True,
action_masking=True,
)
modes = ["sqrt", "linear", "power", "half_life"]
grace = 0.8
eps = 1e-4
- base_factor = TEST_BASE_FACTOR
+ base_factor = self.TEST_BASE_FACTOR
pnl = 0.01
pnl_factor = 1.0
tau = 0.5 # for power