function will set them to proper values and warn them
"""
if not isinstance(self.n_envs, int) or self.n_envs < 1:
- logger.warning("Config: invalid n_envs=%s, forcing n_envs=1", self.n_envs)
+ logger.warning("Config: n_envs=%s invalid, set to 1", self.n_envs)
self.n_envs = 1
if not isinstance(self.n_eval_envs, int) or self.n_eval_envs < 1:
logger.warning(
- "Config: invalid n_eval_envs=%s, forcing n_eval_envs=1",
+ "Config: n_eval_envs=%s invalid, set to 1",
self.n_eval_envs,
)
self.n_eval_envs = 1
if self.multiprocessing and self.n_envs <= 1:
logger.warning(
- "Config: multiprocessing requires n_envs>1, deactivating multiprocessing",
+ "Config: multiprocessing requires n_envs>1, set to False",
)
self.multiprocessing = False
if self.eval_multiprocessing and self.n_eval_envs <= 1:
logger.warning(
- "Config: eval_multiprocessing requires n_eval_envs>1, deactivating eval_multiprocessing",
+ "Config: eval_multiprocessing requires n_eval_envs>1, set to False",
)
self.eval_multiprocessing = False
if self.multiprocessing and self.plot_new_best:
logger.warning(
- "Config: plot_new_best incompatible with multiprocessing, deactivating plot_new_best",
+ "Config: plot_new_best incompatible with multiprocessing, set to False",
)
self.plot_new_best = False
if not isinstance(self.frame_stacking, int) or self.frame_stacking < 0:
logger.warning(
- "Config: invalid frame_stacking=%s, forcing frame_stacking=0",
+ "Config: frame_stacking=%s invalid, set to 0",
self.frame_stacking,
)
self.frame_stacking = 0
if self.frame_stacking == 1:
logger.warning(
- "Config: frame_stacking=1 is equivalent to no stacking, forcing frame_stacking=0",
+ "Config: frame_stacking=1 equivalent to no stacking, set to 0",
)
self.frame_stacking = 0
if not isinstance(self.n_eval_steps, int) or self.n_eval_steps <= 0:
logger.warning(
- "Config: invalid n_eval_steps=%s, forcing n_eval_steps=10_000",
+ "Config: n_eval_steps=%s invalid, set to 10000",
self.n_eval_steps,
)
self.n_eval_steps = 10_000
if not isinstance(self.n_eval_episodes, int) or self.n_eval_episodes <= 0:
logger.warning(
- "Config: invalid n_eval_episodes=%s, forcing n_eval_episodes=5",
+ "Config: n_eval_episodes=%s invalid, set to 5",
self.n_eval_episodes,
)
self.n_eval_episodes = 5
or self.optuna_purge_period < 0
):
logger.warning(
- "Config: invalid purge_period=%s, forcing purge_period=0",
+ "Config: purge_period=%s invalid, set to 0",
self.optuna_purge_period,
)
self.optuna_purge_period = 0
and self.optuna_purge_period > 0
):
logger.warning(
- "Config: purge_period has no effect when continuous=True, forcing purge_period=0",
+ "Config: purge_period has no effect when continuous=True, set to 0",
)
self.optuna_purge_period = 0
add_state_info = self.rl_config.get("add_state_info", False)
tensorboard_throttle = self.rl_config.get("tensorboard_throttle", 1)
if not isinstance(tensorboard_throttle, int) or tensorboard_throttle < 1:
logger.warning(
- "Config: invalid tensorboard_throttle=%s, forcing tensorboard_throttle=1",
+ "Config: tensorboard_throttle=%s invalid, set to 1",
tensorboard_throttle,
)
self.rl_config["tensorboard_throttle"] = 1
if self.continual_learning and bool(self.frame_stacking):
logger.warning(
- "Config: continual_learning incompatible with frame_stacking, deactivating continual_learning",
+ "Config: continual_learning incompatible with frame_stacking, set to False",
)
self.continual_learning = False
if gamma is not None:
model_reward_parameters["potential_gamma"] = gamma
else:
- logger.warning(
- "PBRS: %s no valid discount gamma resolved for environment", pair
- )
+ logger.warning("PBRS: no valid discount gamma resolved for environment")
return env_info
cast(ScheduleTypeKnown, ReforceXY._SCHEDULE_TYPES[0]), lr
)
logger.info(
- "Training: learning rate linear schedule enabled, initial=%s", lr
+ "Training: learning rate linear schedule enabled, initial=%.6f", lr
)
# "PPO"
cast(ScheduleTypeKnown, ReforceXY._SCHEDULE_TYPES[0]), cr
)
logger.info(
- "Training: clip range linear schedule enabled, initial=%s", cr
+ "Training: clip range linear schedule enabled, initial=%.2f", cr
)
# "DQN"
)
else:
logger.warning(
- "Config: invalid net_arch=%s, using default", net_arch
+ "Config: net_arch=%r invalid, set to %r",
+ net_arch,
+ {"pi": default_net_arch, "vf": default_net_arch},
)
model_params["policy_kwargs"]["net_arch"] = {
"pi": default_net_arch,
model_params["policy_kwargs"]["net_arch"] = {"pi": pi, "vf": vf}
else:
logger.warning(
- "Config: unexpected net_arch type=%s, using default",
- type(net_arch),
+ "Config: net_arch type=%s unexpected, set to %r",
+ type(net_arch).__name__,
+ {"pi": default_net_arch, "vf": default_net_arch},
)
model_params["policy_kwargs"]["net_arch"] = {
"pi": default_net_arch,
)
else:
logger.warning(
- "Config: invalid net_arch=%s, using default", net_arch
+ "Config: net_arch=%r invalid, set to %r",
+ net_arch,
+ default_net_arch,
)
model_params["policy_kwargs"]["net_arch"] = default_net_arch
elif isinstance(net_arch, list):
model_params["policy_kwargs"]["net_arch"] = net_arch
else:
logger.warning(
- "Config: unexpected net_arch type=%s, using default",
- type(net_arch),
+ "Config: net_arch type=%s unexpected, set to %r",
+ type(net_arch).__name__,
+ default_net_arch,
)
model_params["policy_kwargs"]["net_arch"] = default_net_arch
eval_days = steps_to_days(eval_timesteps, self.config.get("timeframe"))
total_days = steps_to_days(total_timesteps, self.config.get("timeframe"))
- logger.info("Model: %s", self.model_type)
+ logger.info("Model: type=%s", self.model_type)
logger.info(
"Training: %s steps (%s days), %s cycles, %s env(s) -> total %s steps (%s days)",
train_timesteps,
best_params = self.optimize(dk, total_timesteps)
if best_params is None:
logger.error(
- "Hyperopt: optimization failed, using default model params"
+ "Hyperopt %s: optimization failed, using default model params",
+ dk.pair,
)
best_params = self.get_model_params()
model_params = best_params
def _optuna_retrain_counters_path(self) -> Path:
return Path(self.full_path / "optuna-retrain-counters.json")
- def _load_optuna_retrain_counters(self) -> Dict[str, int]:
+ def _load_optuna_retrain_counters(self, pair: str) -> Dict[str, int]:
counters_path = self._optuna_retrain_counters_path()
if not counters_path.is_file():
return {}
return result
except Exception as e:
logger.warning(
- "Hyperopt: failed to load retrain counters from %s: %r",
+ "Hyperopt %s: failed to load retrain counters from %s: %r",
+ pair,
counters_path,
e,
)
return {}
- def _save_optuna_retrain_counters(self, counters: Dict[str, int]) -> None:
+ def _save_optuna_retrain_counters(
+ self, counters: Dict[str, int], pair: str
+ ) -> None:
counters_path = self._optuna_retrain_counters_path()
try:
with counters_path.open("w", encoding="utf-8") as write_file:
json.dump(counters, write_file, indent=4, sort_keys=True)
except Exception as e:
logger.warning(
- "Hyperopt: failed to save retrain counters to %s: %r", counters_path, e
+ "Hyperopt %s: failed to save retrain counters to %s: %r",
+ pair,
+ counters_path,
+ e,
)
def _increment_optuna_retrain_counter(self, pair: str) -> int:
- pair = ReforceXY._sanitize_pair(pair)
- counters = self._load_optuna_retrain_counters()
- pair_count = int(counters.get(pair, 0)) + 1
- counters[pair] = pair_count
- self._save_optuna_retrain_counters(counters)
+ sanitized_pair = ReforceXY._sanitize_pair(pair)
+ counters = self._load_optuna_retrain_counters(pair)
+ pair_count = int(counters.get(sanitized_pair, 0)) + 1
+ counters[sanitized_pair] = pair_count
+ self._save_optuna_retrain_counters(counters, pair)
return pair_count
def create_storage(self, pair: str) -> BaseStorage:
)
else:
raise ValueError(
- f"Hyperopt: unsupported storage backend '{storage_backend}'. "
+ f"Hyperopt {pair}: unsupported storage backend '{storage_backend}'. "
f"Expected one of: {list(ReforceXY._STORAGE_BACKENDS)}"
)
return storage
# "auto"
if sampler == ReforceXY._SAMPLER_TYPES[1]:
logger.info(
- "Hyperopt: using AutoSampler (seed=%s)",
+ "Hyperopt: using AutoSampler (seed=%d)",
seed,
)
return optunahub.load_module("samplers/auto_sampler").AutoSampler(seed=seed)
# "tpe"
elif sampler == ReforceXY._SAMPLER_TYPES[0]:
logger.info(
- "Hyperopt: using TPESampler (n_startup_trials=%s, multivariate=True, group=True, seed=%s)",
+ "Hyperopt: using TPESampler (n_startup_trials=%d, multivariate=True, group=True, seed=%d)",
self.optuna_n_startup_trials,
seed,
)
min_resource: int, max_resource: int, reduction_factor: int
) -> BasePruner:
logger.info(
- "Hyperopt: using HyperbandPruner (min_resource=%s, max_resource=%s, reduction_factor=%s)",
+ "Hyperopt: using HyperbandPruner (min_resource=%d, max_resource=%d, reduction_factor=%d)",
min_resource,
max_resource,
reduction_factor,
)
if study_has_best_trial:
logger.info(
- "Hyperopt %s: best trial #%s with score %s",
+ "Hyperopt %s: best trial #%d with score %s",
study_name,
study.best_trial.number,
study.best_trial.value,
return sample_params_dqn(trial)
else:
raise NotImplementedError(
- f"Hyperopt: model type '{self.model_type}' not supported"
+ f"Hyperopt {trial.study.study_name}: model type '{self.model_type}' not supported"
)
def objective(
params = deepmerge(self.get_model_params(), params)
params["seed"] = params.get("seed", 42) + trial.number
logger.info(
- "Hyperopt %s: trial #%s params %s", study_name, trial.number, params
+ "Hyperopt %s: trial #%d params %s", study_name, trial.number, params
)
# "PPO"
model.learn(total_timesteps=total_timesteps, callback=callbacks)
except AssertionError:
logger.warning(
- "Hyperopt %s: trial #%s encountered NaN (AssertionError)",
+ "Hyperopt %s: trial #%d encountered NaN (AssertionError)",
study_name,
trial.number,
)
except ValueError as e:
if any(x in str(e).lower() for x in ("nan", "inf")):
logger.warning(
- "Hyperopt %s: trial #%s encountered NaN/Inf (ValueError): %r",
+ "Hyperopt %s: trial #%d encountered NaN/Inf (ValueError): %r",
study_name,
trial.number,
e,
raise
except FloatingPointError as e:
logger.warning(
- "Hyperopt %s: trial #%s encountered NaN/Inf (FloatingPointError): %r",
+ "Hyperopt %s: trial #%d encountered NaN/Inf (FloatingPointError): %r",
study_name,
trial.number,
e,
except RuntimeError as e:
if any(x in str(e).lower() for x in ("nan", "inf")):
logger.warning(
- "Hyperopt %s: trial #%s encountered NaN/Inf (RuntimeError): %r",
+ "Hyperopt %s: trial #%d encountered NaN/Inf (RuntimeError): %r",
study_name,
trial.number,
e,
)
if self._exit_potential_mode not in set(ReforceXY._EXIT_POTENTIAL_MODES):
logger.warning(
- "PBRS: unknown exit_potential_mode %r; defaulting to %r. Valid modes: %s",
+ "PBRS: exit_potential_mode=%r invalid, set to %r. Valid: %s",
self._exit_potential_mode,
ReforceXY._EXIT_POTENTIAL_MODES[0],
", ".join(ReforceXY._EXIT_POTENTIAL_MODES),
if self._exit_potential_mode == ReforceXY._EXIT_POTENTIAL_MODES[0]:
if self._entry_additive_enabled or self._exit_additive_enabled:
logger.info(
- "PBRS: canonical mode, additive rewards disabled with Φ(terminal)=0. "
- "Invariance preserved. To use additive rewards, set exit_potential_mode=%s",
+ "PBRS: canonical mode, additive disabled (use exit_potential_mode=%s to enable)",
ReforceXY._EXIT_POTENTIAL_MODES[1],
)
self._entry_additive_enabled = False
# "non_canonical"
elif self._exit_potential_mode == ReforceXY._EXIT_POTENTIAL_MODES[1]:
if self._entry_additive_enabled or self._exit_additive_enabled:
- logger.info(
- "PBRS: non-canonical mode, additive rewards enabled with Φ(terminal)=0. "
- "Invariance intentionally broken."
- )
+ logger.info("PBRS: non-canonical mode, additive enabled")
if MyRLEnv.is_unsupported_pbrs_config(
self._hold_potential_enabled, getattr(self, "add_state_info", False)
):
logger.warning(
- "PBRS: hold_potential_enabled=True and add_state_info=False is unsupported. Automatically enabling add_state_info=True."
+ "PBRS: hold_potential_enabled=True requires add_state_info=True, enabling"
)
self.add_state_info = True
self._set_observation_space()
return min(max(-1.0, x), 1.0)
logger.warning(
- "PBRS: unknown potential transform '%s'; falling back to tanh. Valid transforms: %s",
+ "PBRS: potential_transform=%r invalid, set to 'tanh'. Valid: %s",
name,
", ".join(ReforceXY._TRANSFORM_FUNCTIONS),
)
)
)
if exit_plateau_grace < 0.0:
- logger.warning("PBRS: exit_plateau_grace < 0; falling back to 0.0")
+ logger.warning(
+ "PBRS: exit_plateau_grace=%.2f invalid, set to 0.0", exit_plateau_grace
+ )
exit_plateau_grace = 0.0
def _legacy(dr: float, p: Mapping[str, Any]) -> float:
p.get("exit_linear_slope", ReforceXY.DEFAULT_EXIT_LINEAR_SLOPE)
)
if slope < 0.0:
- logger.warning("PBRS: exit_linear_slope < 0; falling back to 1.0")
+ logger.warning(
+ "PBRS: exit_linear_slope=%.2f invalid, set to 1.0", slope
+ )
slope = 1.0
return 1.0 / (1.0 + slope * dr)
strategy_fn = strategies.get(exit_attenuation_mode, None)
if strategy_fn is None:
logger.warning(
- "PBRS: unknown exit_attenuation_mode '%s'; defaulting to %s. Valid modes: %s",
+ "PBRS: exit_attenuation_mode=%r invalid, set to %r. Valid: %s",
exit_attenuation_mode,
ReforceXY._EXIT_ATTENUATION_MODES[2], # "linear"
", ".join(ReforceXY._EXIT_ATTENUATION_MODES),
)
except Exception as e:
logger.warning(
- "PBRS: exit_attenuation_mode '%s' failed (%r); fallback to %s (effective_dr=%.5f)",
+ "PBRS: exit_attenuation_mode=%r failed (%r), set to %r (effective_dr=%.5f)",
exit_attenuation_mode,
e,
ReforceXY._EXIT_ATTENUATION_MODES[2], # "linear"
if check_invariants:
if not np.isfinite(exit_factor):
logger.debug(
- "PBRS: _get_exit_factor produced non-finite factor; resetting to 0.0"
+ "PBRS: exit_factor=%.5f non-finite, set to 0.0", exit_factor
)
return 0.0
if efficiency_coefficient < 0.0:
logger.debug(
- "PBRS: _compute_efficiency_coefficient produced negative coefficient %.5f",
- efficiency_coefficient,
+ "PBRS: efficiency_coefficient=%.5f negative", efficiency_coefficient
)
if exit_factor < 0.0 and pnl >= 0.0:
logger.debug(
- "PBRS: _get_exit_factor produced negative factor with positive pnl (exit_factor=%.5f, pnl=%.5f); clamping to 0.0",
+ "PBRS: exit_factor=%.5f negative with pnl=%.5f positive, clamped to 0.0",
exit_factor,
pnl,
)
)
if exit_factor_threshold > 0 and abs(exit_factor) > exit_factor_threshold:
logger.warning(
- "PBRS: _get_exit_factor |exit_factor|=%.5f exceeds threshold %.5f",
- exit_factor,
+ "PBRS: |exit_factor|=%.5f exceeds exit_factor_threshold=%.5f",
+ abs(exit_factor),
exit_factor_threshold,
)
)
except Exception as e:
logger.error(
- "Tensorboard: logger.record failed at best/train_env%s: %r",
+ "Tensorboard: logger.record failed at best/train_env%d: %r",
i,
e,
exc_info=True,
last_mean_reward = float(getattr(self, "last_mean_reward", np.nan))
except Exception as e:
logger.warning(
- "Hyperopt %s: trial #%s invalid last_mean_reward (eval_idx=%s, timesteps=%s): %r",
+ "Hyperopt %s: trial #%d invalid last_mean_reward (eval_idx=%s, timesteps=%s): %r",
self.trial.study.study_name,
self.trial.number,
self.eval_idx,
if not np.isfinite(last_mean_reward):
logger.warning(
- "Hyperopt %s: trial #%s non-finite last_mean_reward (eval_idx=%s, timesteps=%s)",
+ "Hyperopt %s: trial #%d non-finite last_mean_reward (eval_idx=%s, timesteps=%s)",
self.trial.study.study_name,
self.trial.number,
self.eval_idx,
self.trial.report(last_mean_reward, self.num_timesteps)
except Exception as e:
logger.warning(
- "Hyperopt %s: trial #%s trial.report failed (eval_idx=%s, timesteps=%s): %r",
+ "Hyperopt %s: trial #%d trial.report failed (eval_idx=%s, timesteps=%s): %r",
self.trial.study.study_name,
self.trial.number,
self.eval_idx,
best_mean_reward = float(getattr(self, "best_mean_reward", np.nan))
except Exception as e:
logger.warning(
- "Hyperopt %s: trial #%s invalid best_mean_reward (eval_idx=%s, timesteps=%s): %r",
+ "Hyperopt %s: trial #%d invalid best_mean_reward (eval_idx=%s, timesteps=%s): %r",
self.trial.study.study_name,
self.trial.number,
self.eval_idx,
)
else:
logger.warning(
- "Hyperopt %s: trial #%s non-finite best_mean_reward (eval_idx=%s, timesteps=%s)",
+ "Hyperopt %s: trial #%d non-finite best_mean_reward (eval_idx=%s, timesteps=%s)",
self.trial.study.study_name,
self.trial.number,
self.eval_idx,
)
except Exception as e:
logger.error(
- "Hyperopt %s: trial #%s logger.record failed (eval_idx=%s, timesteps=%s): %r",
+ "Hyperopt %s: trial #%d logger.record failed (eval_idx=%s, timesteps=%s): %r",
self.trial.study.study_name,
self.trial.number,
self.eval_idx,
try:
if self.trial.should_prune():
logger.info(
- "Hyperopt %s: trial #%s pruned (eval_idx=%s, timesteps=%s, score=%.5f)",
+ "Hyperopt %s: trial #%d pruned (eval_idx=%s, timesteps=%s, score=%.5f)",
self.trial.study.study_name,
self.trial.number,
self.eval_idx,
return False
except Exception as e:
logger.warning(
- "Hyperopt %s: trial #%s should_prune failed (eval_idx=%s, timesteps=%s): %r",
+ "Hyperopt %s: trial #%d should_prune failed (eval_idx=%s, timesteps=%s): %r",
self.trial.study.study_name,
self.trial.number,
self.eval_idx,
lr = optuna_params.get("learning_rate")
if lr is None:
- raise ValueError(f"Optuna: missing 'learning_rate' in params for {model_type}")
+ raise ValueError(
+ f"Hyperopt: missing 'learning_rate' in params for {model_type}"
+ )
lr = get_schedule(
optuna_params.get("lr_schedule", ReforceXY._SCHEDULE_TYPES[1]), float(lr)
) # default: "constant"
for param in required_ppo_params:
if optuna_params.get(param) is None:
raise ValueError(
- f"Optuna: missing '{param}' in params for {model_type}"
+ f"Hyperopt: missing '{param}' in params for {model_type}"
)
cr = optuna_params.get("clip_range")
cr = get_schedule(
for param in required_dqn_params:
if optuna_params.get(param) is None:
raise ValueError(
- f"Optuna: missing '{param}' in params for {model_type}"
+ f"Hyperopt: missing '{param}' in params for {model_type}"
)
train_freq = optuna_params.get("train_freq")
subsample_steps = optuna_params.get("subsample_steps")
): # "QRDQN"
policy_kwargs["n_quantiles"] = int(optuna_params["n_quantiles"])
else:
- raise ValueError(f"Optuna: model type '{model_type}' not supported")
+ raise ValueError(f"Hyperopt: model type '{model_type}' not supported")
if optuna_params.get("net_arch"):
net_arch_value = str(optuna_params["net_arch"])