"QRDQN",
)
_SCHEDULE_TYPES: Final[tuple[ScheduleType, ...]] = ("linear", "constant", "unknown")
+ _SCHEDULE_TYPES_KNOWN: Final[tuple[ScheduleTypeKnown, ...]] = ("linear", "constant")
_EXIT_POTENTIAL_MODES: Final[tuple[ExitPotentialMode, ...]] = (
"canonical",
"non_canonical",
"leaky_relu",
)
_OPTIMIZER_CLASSES: Final[tuple[OptimizerClass, ...]] = ("adam", "adamw", "rmsprop")
+ _OPTIMIZER_CLASSES_OPTUNA: Final[tuple[OptimizerClass, ...]] = ("adamw", "rmsprop")
_NET_ARCH_SIZES: Final[tuple[NetArchSize, ...]] = (
"small",
"medium",
"max_grad_norm": trial.suggest_float("max_grad_norm", 0.3, 1.0, step=0.05),
"vf_coef": trial.suggest_float("vf_coef", 0.0, 1.0, step=0.05),
"lr_schedule": trial.suggest_categorical(
- "lr_schedule", list(ReforceXY._SCHEDULE_TYPES)
+ "lr_schedule", list(ReforceXY._SCHEDULE_TYPES_KNOWN)
),
"cr_schedule": trial.suggest_categorical(
- "cr_schedule", list(ReforceXY._SCHEDULE_TYPES)
+ "cr_schedule", list(ReforceXY._SCHEDULE_TYPES_KNOWN)
),
"target_kl": trial.suggest_categorical(
"target_kl", [None, 0.01, 0.015, 0.02, 0.03, 0.04]
"activation_fn", list(ReforceXY._ACTIVATION_FUNCTIONS)
),
"optimizer_class": trial.suggest_categorical(
- "optimizer_class",
- [ReforceXY._OPTIMIZER_CLASSES[1], ReforceXY._OPTIMIZER_CLASSES[2]],
+ "optimizer_class", list(ReforceXY._OPTIMIZER_CLASSES_OPTUNA)
),
}
),
"learning_rate": trial.suggest_float("learning_rate", 1e-5, 3e-3, log=True),
"lr_schedule": trial.suggest_categorical(
- "lr_schedule", list(ReforceXY._SCHEDULE_TYPES[:2])
- ), # ["linear", "constant"]
+ "lr_schedule", list(ReforceXY._SCHEDULE_TYPES_KNOWN)
+ ),
"buffer_size": trial.suggest_categorical(
"buffer_size", [int(1e4), int(5e4), int(1e5), int(2e5)]
),
"activation_fn", list(ReforceXY._ACTIVATION_FUNCTIONS)
),
"optimizer_class": trial.suggest_categorical(
- "optimizer_class",
- [ReforceXY._OPTIMIZER_CLASSES[1], ReforceXY._OPTIMIZER_CLASSES[2]],
+ "optimizer_class", list(ReforceXY._OPTIMIZER_CLASSES_OPTUNA)
),
}
from enum import IntEnum
from functools import lru_cache
from logging import Logger
-from typing import Any, Callable, Literal, Optional, TypeVar, Union
+from typing import Any, Callable, Final, Literal, Optional, TypeVar, Union
import numpy as np
import optuna
WeightStrategy = Literal["none", "pivot_threshold"]
-WEIGHT_STRATEGIES: tuple[WeightStrategy, ...] = ("none", "pivot_threshold")
+WEIGHT_STRATEGIES: Final[tuple[WeightStrategy, ...]] = ("none", "pivot_threshold")
NormalizationType = Literal["minmax", "l1", "none"]
-NORMALIZATION_TYPES: tuple[NormalizationType, ...] = ("minmax", "l1", "none")
+NORMALIZATION_TYPES: Final[tuple[NormalizationType, ...]] = ("minmax", "l1", "none")
SmoothingKernel = Literal["gaussian", "kaiser", "triang"]
SmoothingMethod = Union[SmoothingKernel, Literal["smm", "sma"]]
-SMOOTHING_METHODS: tuple[SmoothingMethod, ...] = (
+SMOOTHING_METHODS: Final[tuple[SmoothingMethod, ...]] = (
"gaussian",
"kaiser",
"triang",
)
-DEFAULTS_EXTREMA_SMOOTHING: dict[str, Any] = {
+DEFAULTS_EXTREMA_SMOOTHING: Final[dict[str, Any]] = {
"method": SMOOTHING_METHODS[0], # "gaussian"
"window": 5,
"beta": 8.0,
}
-DEFAULTS_EXTREMA_WEIGHTING: dict[str, Any] = {
+DEFAULTS_EXTREMA_WEIGHTING: Final[dict[str, Any]] = {
"normalization": NORMALIZATION_TYPES[0], # "minmax"
"gamma": 1.0,
"strategy": WEIGHT_STRATEGIES[0], # "none"