refactor(reforcexy): PBRS requires per pair HPO

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Tue, 14 Oct 2025 20:47:34 +0000 (22:47 +0200)

committer Jérôme Benoit <jerome.benoit@piment-noir.org>

Tue, 14 Oct 2025 20:47:34 +0000 (22:47 +0200)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 14 Oct 2025 20:47:34 +0000 (22:47 +0200)
committer Jérôme Benoit <jerome.benoit@piment-noir.org>
Tue, 14 Oct 2025 20:47:34 +0000 (22:47 +0200)
diff --git a/ReforceXY/reward_space_analysis/README.md b/ReforceXY/reward_space_analysis/README.md

index 506a105ec47c3b6999b5e99daaa0c746eeb806f8..35b5f1cb255fee1cffea8de2fd02ccee81ae36d2 100644 (file)
--- a/ReforceXY/reward_space_analysis/README.md
+++ b/ReforceXY/reward_space_analysis/README.md
@@ -284,7 +284,7 @@ _Exit attenuation configuration:_
  - `exit_half_life` (default: 0.5) - Half-life parameter for the half_life kernel.
  - `exit_factor_threshold` (default: 10000.0) - Warning-only soft threshold (emits RuntimeWarning; no capping).
  
-Attenuation kernels:
+**Attenuation kernels**:
  
  Let r be the raw duration ratio and grace = `exit_plateau_grace`.
  
@@ -336,6 +336,18 @@ _PBRS (Potential-Based Reward Shaping) configuration:_
  - `exit_additive_transform_pnl` (default: tanh) - Transform function for PnL in exit additive
  - `exit_additive_transform_duration` (default: tanh) - Transform function for duration ratio in exit additive
  
+**Transform functions**:
+
+| Transform | Formula | Range | Characteristics | Use Case |
+|-----------|---------|-------|-----------------|----------|
+| `tanh` | tanh(x) | (-1, 1) | Smooth sigmoid, symmetric around 0 | Balanced PnL/duration transforms (default) |
+| `softsign` | x / (1 + \|x\|) | (-1, 1) | Smoother than tanh, linear near 0 | Less aggressive saturation |
+| `softsign_sharp` | x / (sharpness + \|x\|) | (-1, 1) | Tunable sharpness via `potential_softsign_sharpness` | Custom saturation control |
+| `arctan` | (2/π) × arctan(x) | (-1, 1) | Slower saturation than tanh | Wide dynamic range |
+| `logistic` | 2 / (1 + e^(-x)) - 1 | (-1, 1) | Equivalent to tanh(x/2), gentler curve | Mild non-linearity |
+| `asinh_norm` | asinh(x) / asinh(10) | (-1, 1) | Normalized asinh, handles large values | Extreme outlier robustness |
+| `clip` | clip(x, -1, 1) | [-1, 1] | Hard clipping at ±1 | Preserve linearity within bounds |
+
  _Invariant / safety controls:_
  
  - `check_invariants` (default: true) - Enable/disable runtime invariant & safety validations (simulation invariants, mathematical bounds, distribution checks). Set to `false` only for performance experiments; not recommended for production validation.
diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py

index 685fea3abbb58ce80678059986dd78b08b1fce23..bdf005f752136dced106e6a078cd7fbe58f83b32 100644 (file)
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -96,11 +96,10 @@ class ReforceXY(BaseReinforcementLearningModel):
                  "plot_window": 2000,                // Environment history window used for tensorboard rollout plot
              },
              "rl_config_optuna": {
-                "enabled": false,                   // Enable optuna hyperopt
-                "per_pair": false,                  // Enable per pair hyperopt
-                "n_trials": 100,
-                "n_startup_trials": 15,
-                "timeout_hours": 0,
+                "enabled": false,                   // Enable hyperopt
+                "n_trials": 100,                    // Number of trials
+                "n_startup_trials": 15,             // Number of initial random trials for TPESampler
+                "timeout_hours": 0,                 // Maximum time in hours for hyperopt (0 = no timeout)
                  "continuous": false,                // If true, perform continuous optimization
                  "warm_start": false,                // If true, enqueue previous best params if exists
                  "seed": 42,                         // RNG seed
@@ -872,12 +871,12 @@ class ReforceXY(BaseReinforcementLearningModel):
          except Exception:
              pass
  
-    def get_storage(self, pair: Optional[str] = None) -> BaseStorage:
+    def get_storage(self, pair: str) -> BaseStorage:
          """
          Get the storage for Optuna
          """
          storage_dir = self.full_path
-        storage_filename = f"optuna-{pair.split('/')[0]}" if pair else "optuna"
+        storage_filename = f"optuna-{pair.split('/')[0]}"
          storage_backend = self.rl_config_optuna.get("storage", "sqlite")
          if storage_backend == "sqlite":
              storage = RDBStorage(
@@ -912,16 +911,8 @@ class ReforceXY(BaseReinforcementLearningModel):
          Runs hyperparameter optimization using Optuna and returns the best hyperparameters found merged with the user defined parameters
          """
          identifier = self.freqai_info.get("identifier", "no_id_provided")
-        study_name = (
-            f"{identifier}-{dk.pair}"
-            if self.rl_config_optuna.get("per_pair", False)
-            else identifier
-        )
-        storage = (
-            self.get_storage(dk.pair)
-            if self.rl_config_optuna.get("per_pair", False)
-            else self.get_storage()
-        )
+        study_name = f"{identifier}-{dk.pair}"
+        storage = self.get_storage(dk.pair)
          continuous = self.rl_config_optuna.get("continuous", False)
          if continuous:
              ReforceXY.study_delete(study_name, storage)
@@ -952,9 +943,7 @@ class ReforceXY(BaseReinforcementLearningModel):
              load_if_exists=not continuous,
          )
          if self.rl_config_optuna.get("warm_start", False):
-            best_trial_params = self.load_best_trial_params(
-                dk.pair if self.rl_config_optuna.get("per_pair", False) else None
-            )
+            best_trial_params = self.load_best_trial_params(dk.pair)
              if best_trial_params:
                  study.enqueue_trial(best_trial_params)
          hyperopt_failed = False
@@ -991,9 +980,7 @@ class ReforceXY(BaseReinforcementLearningModel):
              hyperopt_failed = True
  
          if hyperopt_failed:
-            best_trial_params = self.load_best_trial_params(
-                dk.pair if self.rl_config_optuna.get("per_pair", False) else None
-            )
+            best_trial_params = self.load_best_trial_params(dk.pair)
              if best_trial_params is None:
                  logger.error(
                      f"Hyperopt {study_name} failed ({time_spent:.2f} secs): no previously saved best trial params found"
@@ -1016,10 +1003,7 @@ class ReforceXY(BaseReinforcementLearningModel):
          logger.info("Best trial params: %s", best_trial_params)
          logger.info("-------------------------------------------------------")
  
-        self.save_best_trial_params(
-            best_trial_params,
-            dk.pair if self.rl_config_optuna.get("per_pair", False) else None,
-        )
+        self.save_best_trial_params(best_trial_params, dk.pair)
  
          return deepmerge(
              self.get_model_params(),
@@ -1027,25 +1011,18 @@ class ReforceXY(BaseReinforcementLearningModel):
          )
  
      def save_best_trial_params(
-        self, best_trial_params: Dict[str, Any], pair: Optional[str] = None
+        self, best_trial_params: Dict[str, Any], pair: str
      ) -> None:
          """
          Save the best trial hyperparameters found during hyperparameter optimization
          """
-        best_trial_params_filename = (
-            f"hyperopt-best-params-{pair.split('/')[0]}"
-            if pair
-            else "hyperopt-best-params"
-        )
+        best_trial_params_filename = f"hyperopt-best-params-{pair.split('/')[0]}"
          best_trial_params_path = Path(
              self.full_path / f"{best_trial_params_filename}.json"
          )
-        log_msg: str = (
-            f"{pair}: saving best params to {best_trial_params_path} JSON file"
-            if pair
-            else f"Saving best params to {best_trial_params_path} JSON file"
+        logger.info(
+            "%s: saving best params to %s JSON file", pair, best_trial_params_path
          )
-        logger.info(log_msg)
          try:
              with best_trial_params_path.open("w", encoding="utf-8") as write_file:
                  json.dump(best_trial_params, write_file, indent=4)
@@ -1056,27 +1033,20 @@ class ReforceXY(BaseReinforcementLearningModel):
              )
              raise
  
-    def load_best_trial_params(
-        self, pair: Optional[str] = None
-    ) -> Optional[Dict[str, Any]]:
+    def load_best_trial_params(self, pair: str) -> Optional[Dict[str, Any]]:
          """
          Load the best trial hyperparameters found and saved during hyperparameter optimization
          """
-        best_trial_params_filename = (
-            f"hyperopt-best-params-{pair.split('/')[0]}"
-            if pair
-            else "hyperopt-best-params"
-        )
+        best_trial_params_filename = f"hyperopt-best-params-{pair.split('/')[0]}"
          best_trial_params_path = Path(
              self.full_path / f"{best_trial_params_filename}.json"
          )
-        log_msg: str = (
-            f"{pair}: loading best params from {best_trial_params_path} JSON file"
-            if pair
-            else f"Loading best params from {best_trial_params_path} JSON file"
-        )
          if best_trial_params_path.is_file():
-            logger.info(log_msg)
+            logger.info(
+                "%s: loading best params from %s JSON file",
+                pair,
+                best_trial_params_path,
+            )
              with best_trial_params_path.open("r", encoding="utf-8") as read_file:
                  best_trial_params = json.load(read_file)
              return best_trial_params
@@ -1115,9 +1085,7 @@ class ReforceXY(BaseReinforcementLearningModel):
          gamma: Optional[float] = None
          best_trial_params: Optional[Dict[str, Any]] = None
          if self.hyperopt:
-            best_trial_params = self.load_best_trial_params(
-                dk.pair if self.rl_config_optuna.get("per_pair", False) else None
-            )
+            best_trial_params = self.load_best_trial_params(dk.pair)
          if model_params and isinstance(model_params.get("gamma"), (int, float)):
              gamma = model_params.get("gamma")
          elif best_trial_params:
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Tue, 14 Oct 2025 20:47:34 +0000 (22:47 +0200)
committer	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Tue, 14 Oct 2025 20:47:34 +0000 (22:47 +0200)
ReforceXY/reward_space_analysis/README.md		patch \| blob \| blame \| history
ReforceXY/user_data/freqaimodels/ReforceXY.py		patch \| blob \| blame \| history