_Idle penalty configuration:_
-- `idle_penalty_scale` (default: 0.75) - Scale of idle penalty
-- `idle_penalty_power` (default: 1.0) - Power applied to idle penalty scaling
+- `idle_penalty_scale` (default: 0.5) - Scale of idle penalty
+- `idle_penalty_power` (default: 1.025) - Power applied to idle penalty scaling
_Holding penalty configuration:_
-- `holding_penalty_scale` (default: 0.5) - Scale of holding penalty
-- `holding_penalty_power` (default: 1.0) - Power applied to holding penalty scaling
+- `holding_penalty_scale` (default: 0.25) - Scale of holding penalty
+- `holding_penalty_power` (default: 1.025) - Power applied to holding penalty scaling
_Exit attenuation configuration:_
"invalid_action": -2.0,
"base_factor": 100.0,
# Idle penalty (env defaults)
- "idle_penalty_power": 1.0,
- "idle_penalty_scale": 0.75,
+ "idle_penalty_scale": 0.5,
+ "idle_penalty_power": 1.025,
# Fallback semantics: if <=0 or unset → 2 * max_trade_duration_candles (grace window before full idle penalty)
"max_idle_duration_candles": 0,
# Holding keys (env defaults)
- "holding_penalty_scale": 0.5,
- "holding_penalty_power": 1.0,
+ "holding_penalty_scale": 0.25,
+ "holding_penalty_power": 1.025,
# Exit attenuation configuration (env default)
"exit_attenuation_mode": "linear",
"exit_plateau": True,
context: RewardContext, idle_factor: float, params: Dict[str, float | str]
) -> float:
"""Mirror the environment's idle penalty behaviour."""
- idle_penalty_scale = _get_param_float(params, "idle_penalty_scale", 0.75)
- idle_penalty_power = _get_param_float(params, "idle_penalty_power", 1.0)
+ idle_penalty_scale = _get_param_float(
+ params,
+ "idle_penalty_scale",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("idle_penalty_scale", 0.5),
+ )
+ idle_penalty_power = _get_param_float(
+ params,
+ "idle_penalty_power",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("idle_penalty_power", 1.025),
+ )
max_trade_duration_candles = params.get("max_trade_duration_candles")
try:
if max_trade_duration_candles is not None:
context: RewardContext, holding_factor: float, params: Dict[str, float | str]
) -> float:
"""Mirror the environment's holding penalty behaviour."""
- holding_penalty_scale = _get_param_float(params, "holding_penalty_scale", 0.5)
- holding_penalty_power = _get_param_float(params, "holding_penalty_power", 1.0)
+ holding_penalty_scale = _get_param_float(
+ params,
+ "holding_penalty_scale",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("holding_penalty_scale", 0.25),
+ )
+ holding_penalty_power = _get_param_float(
+ params,
+ "holding_penalty_power",
+ DEFAULT_MODEL_REWARD_PARAMETERS.get("holding_penalty_power", 1.025),
+ )
duration_ratio = _compute_duration_ratio(
context.trade_duration, context.max_trade_duration
)
action_masking=True,
)
self.assertLess(br_mid.idle_penalty, 0.0)
- idle_penalty_scale = float(params.get("idle_penalty_scale", 0.75))
- idle_penalty_power = float(params.get("idle_penalty_power", 1.0))
+ idle_penalty_scale = float(params.get("idle_penalty_scale", 0.5))
+ idle_penalty_power = float(params.get("idle_penalty_power", 1.025))
# Internal factor may come from params (overrides provided base_factor argument)
factor_used = float(params.get("base_factor", base_factor))
idle_factor = factor_used * (profit_target * risk_reward_ratio) / 3.0
if max_idle_duration <= 0:
max_idle_duration = 2 * max_trade_duration
idle_penalty_scale = float(
- model_reward_parameters.get("idle_penalty_scale", 0.75)
+ model_reward_parameters.get("idle_penalty_scale", 0.5)
)
idle_penalty_power = float(
- model_reward_parameters.get("idle_penalty_power", 1.0)
+ model_reward_parameters.get("idle_penalty_power", 1.025)
)
idle_duration = self.get_idle_duration()
idle_duration_ratio = idle_duration / max(1, max_idle_duration)
and action == Actions.Neutral.value
):
holding_penalty_scale = float(
- model_reward_parameters.get("holding_penalty_scale", 0.5)
+ model_reward_parameters.get("holding_penalty_scale", 0.25)
)
holding_penalty_power = float(
- model_reward_parameters.get("holding_penalty_power", 1.0)
+ model_reward_parameters.get("holding_penalty_power", 1.025)
)
if duration_ratio < 1.0: