From ea33c959292be8827b13355f2c087662f9e3dbc1 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Wed, 17 Sep 2025 15:38:15 +0200
Subject: [PATCH] fix: assorted bug fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 ReforceXY/user_data/freqaimodels/ReforceXY.py | 214 ++++++++++++++++--
 .../freqaimodels/QuickAdapterRegressorV3.py   |   9 +-
 .../user_data/strategies/QuickAdapterV3.py    |   4 +-
 3 files changed, 201 insertions(+), 26 deletions(-)

diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py
index 1d2d4d7..7b8790e 100644
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -180,7 +180,9 @@ class ReforceXY(BaseReinforcementLearningModel):
         """
         Set training and evaluation environments
         """
-        self.close_envs()
+        if self.train_env is not None or self.eval_env is not None:
+            logger.info("Closing environments")
+            self.close_envs()
 
         train_df = data_dictionary.get("train_features")
         test_df = data_dictionary.get("test_features")
@@ -836,11 +838,20 @@ class ReforceXY(BaseReinforcementLearningModel):
             logger.warning("Optuna encountered NaN (AssertionError)")
             nan_encountered = True
         except ValueError as e:
-            if "NaN" in str(e):
+            if "nan" in str(e).lower():
                 logger.warning("Optuna encountered NaN (ValueError)")
                 nan_encountered = True
             else:
                 raise
+        except FloatingPointError as e:
+            logger.warning("Optuna encountered NaN/Inf (FloatingPointError): %s", e)
+            nan_encountered = True
+        except RuntimeError as e:
+            if "nan" in str(e).lower() or "inf" in str(e).lower():
+                logger.warning("Optuna encountered NaN/Inf (RuntimeError): %s", e)
+                nan_encountered = True
+            else:
+                raise
         finally:
             if self.progressbar_callback:
                 self.progressbar_callback.on_training_end()
@@ -1173,6 +1184,8 @@ class ReforceXY(BaseReinforcementLearningModel):
             """
             self._current_tick += 1
             self._update_unrealized_total_profit()
+            pnl = self.get_unrealized_profit()
+            self._update_portfolio_log_returns()
             self._force_action = self._get_force_action()
             reward = self.calculate_reward(action)
             self.total_reward += reward
@@ -1186,13 +1199,13 @@ class ReforceXY(BaseReinforcementLearningModel):
                 "force_action": (
                     self._force_action.name if self._force_action else None
                 ),
-                "pnl": self.get_unrealized_profit(),
+                "pnl": round(pnl, 5),
                 "reward": round(reward, 5),
                 "total_reward": round(self.total_reward, 5),
                 "total_profit": round(self._total_profit, 5),
                 "idle_duration": self.get_idle_duration(),
                 "trade_duration": self.get_trade_duration(),
-                "trade_count": len(self.trade_history),
+                "trade_count": int(len(self.trade_history) // 2),
             }
             self._update_history(info)
             return (
@@ -1313,7 +1326,7 @@ class ReforceXY(BaseReinforcementLearningModel):
                 return np.log(previous_price) - np.log(current_price)
             return 0.0
 
-        def update_portfolio_log_returns(self):
+        def _update_portfolio_log_returns(self):
             self.portfolio_log_returns[self._current_tick] = (
                 self.get_most_recent_return()
             )
@@ -1454,7 +1467,7 @@ class ReforceXY(BaseReinforcementLearningModel):
             fig.suptitle(
                 f"Total Reward: {self.total_reward:.2f} ~ "
                 + f"Total Profit: {self._total_profit:.2f} ~ "
-                + f"Trades: {len(self.trade_history)}"
+                + f"Trades: {int(len(self.trade_history) // 2)}",
             )
             fig.tight_layout()
             return fig
@@ -1549,7 +1562,23 @@ class InfoMetricsCallback(TensorboardCallback):
 
         if isinstance(infos_list, list) and infos_list:
             numeric_acc: Dict[str, list[float]] = defaultdict(list)
-            non_numeric_acc: Dict[str, set[Any]] = defaultdict(set)
+            non_numeric_counts: Dict[str, Dict[Any, int]] = defaultdict(
+                lambda: defaultdict(int)
+            )
+            filtered_values: int = 0
+
+            def _is_numeric_non_bool(x: Any) -> bool:
+                return isinstance(
+                    x, (int, float, np.integer, np.floating)
+                ) and not isinstance(x, bool)
+
+            def _is_finite_number(x: Any) -> bool:
+                if not _is_numeric_non_bool(x):
+                    return False
+                try:
+                    return np.isfinite(float(x))
+                except Exception:
+                    return False
 
             for info_dict in infos_list:
                 if not isinstance(info_dict, dict):
@@ -1557,16 +1586,18 @@ class InfoMetricsCallback(TensorboardCallback):
                 for k, v in info_dict.items():
                     if k in {"episode", "terminal_observation", "TimeLimit.truncated"}:
                         continue
-                    if isinstance(v, (int, float)) and not isinstance(v, bool):
+                    if _is_finite_number(v):
                         numeric_acc[k].append(float(v))
+                    elif _is_numeric_non_bool(v):
+                        filtered_values += 1
                     else:
-                        non_numeric_acc[k].add(v)
+                        non_numeric_counts[k][v] += 1
 
             for k, values in numeric_acc.items():
                 if not values:
                     continue
-                values_mean = sum(values) / len(values)
-                aggregated_info[k] = values_mean
+                mean = sum(values) / len(values)
+                aggregated_info[k] = mean
                 if len(values) > 1:
                     try:
                         aggregated_info[f"{k}_std"] = stdev(values)
@@ -1576,30 +1607,167 @@ class InfoMetricsCallback(TensorboardCallback):
             for key in ("reward", "pnl"):
                 values = numeric_acc.get(key)
                 if values:
-                    aggregated_info[f"{key}_min"] = float(min(values))
-                    aggregated_info[f"{key}_max"] = float(max(values))
+                    try:
+                        aggregated_info[f"{key}_min"] = float(min(values))
+                        aggregated_info[f"{key}_max"] = float(max(values))
+                        percentiles = np.percentile(values, [25, 50, 75, 90])
+                        aggregated_info[f"{key}_p25"] = float(percentiles[0])
+                        aggregated_info[f"{key}_p50"] = float(percentiles[1])
+                        aggregated_info[f"{key}_p75"] = float(percentiles[2])
+                        aggregated_info[f"{key}_p90"] = float(percentiles[3])
+                        med = float(percentiles[1])
+                        mad = float(np.median(np.abs(np.array(values) - med)))
+                        aggregated_info[f"{key}_mad"] = mad
+                    except Exception:
+                        pass
 
-            for k, values in non_numeric_acc.items():
-                aggregated_info[k] = next(iter(values)) if len(values) == 1 else "mixed"
+            for k, counts in non_numeric_counts.items():
+                if not counts:
+                    continue
+                if len(counts) == 1:
+                    try:
+                        aggregated_info[f"{k}_mode"] = next(iter(counts.keys()))
+                    except Exception:
+                        pass
+                else:
+                    aggregated_info[f"{k}_mode"] = "mixed"
+
+            try:
+                self.logger.record("info/n_envs", int(len(infos_list)))
+            except Exception:
+                pass
+
+            if filtered_values > 0:
+                try:
+                    self.logger.record("info/filtered_values", int(filtered_values))
+                except Exception:
+                    try:
+                        self.logger.record(
+                            "info/filtered_values",
+                            int(filtered_values),
+                            exclude=("tensorboard",),
+                        )
+                    except Exception:
+                        pass
 
         if self.training_env is None:
             return True
 
         try:
             tensorboard_metrics_list = self.training_env.get_attr("tensorboard_metrics")
-            tensorboard_metrics = (
-                tensorboard_metrics_list[0] if tensorboard_metrics_list else {}
-            )
         except Exception:
-            tensorboard_metrics = {}
+            tensorboard_metrics_list = []
+
+        aggregated_tensorboard_metrics: Dict[str, Dict[str, Any]] = defaultdict(dict)
+        aggregate_tensorboard_counts: Dict[str, Dict[str, int]] = defaultdict(dict)
+        for env_metrics in tensorboard_metrics_list or []:
+            if not isinstance(env_metrics, dict):
+                continue
+            for category, metrics in env_metrics.items():
+                if not isinstance(metrics, dict):
+                    continue
+                cat_dict = aggregated_tensorboard_metrics.setdefault(category, {})
+                cnt_dict = aggregate_tensorboard_counts.setdefault(category, {})
+                for metric, value in metrics.items():
+                    if _is_finite_number(value):
+                        v = float(value)
+                        try:
+                            base = float(cat_dict.get(metric, 0.0))
+                        except Exception:
+                            base = 0.0
+                        cat_dict[metric] = base + v
+                        cnt_dict[metric] = cnt_dict.get(metric, 0) + 1
+                    else:
+                        if (
+                            aggregate_tensorboard_counts.get(category, {}).get(
+                                metric, 0
+                            )
+                            == 0
+                        ):
+                            cat_dict[metric] = value
 
         for metric, value in aggregated_info.items():
-            self.logger.record(f"info/{metric}", value)
+            try:
+                self.logger.record(f"info/{metric}", value)
+            except Exception:
+                try:
+                    self.logger.record(
+                        f"info/{metric}", value, exclude=("tensorboard",)
+                    )
+                except Exception:
+                    pass
 
-        for category, metrics in tensorboard_metrics.items():
+        if isinstance(infos_list, list) and infos_list:
+            cat_keys = ("force_action", "action", "position")
+            cat_counts: Dict[str, Dict[Any, int]] = {
+                k: defaultdict(int) for k in cat_keys
+            }
+            cat_totals: Dict[str, int] = {k: 0 for k in cat_keys}
+            for info_dict in infos_list:
+                if not isinstance(info_dict, dict):
+                    continue
+                for k in cat_keys:
+                    if k in info_dict:
+                        v = info_dict.get(k)
+                        cat_counts[k][v] += 1
+                        cat_totals[k] += 1
+
+            for k, counts in cat_counts.items():
+                cat_total = max(1, int(cat_totals.get(k, 0)))
+                for name, cnt in counts.items():
+                    try:
+                        self.logger.record(f"info/{k}/{name}_count", int(cnt))
+                        self.logger.record(
+                            f"info/{k}/{name}_ratio", float(cnt) / float(cat_total)
+                        )
+                    except Exception:
+                        try:
+                            self.logger.record(
+                                f"info/{k}/{name}_count",
+                                int(cnt),
+                                exclude=("tensorboard",),
+                            )
+                            self.logger.record(
+                                f"info/{k}/{name}_ratio",
+                                float(cnt) / float(cat_total),
+                                exclude=("tensorboard",),
+                            )
+                        except Exception:
+                            pass
+
+        for category, metrics in aggregated_tensorboard_metrics.items():
             if isinstance(metrics, dict):
                 for metric, value in metrics.items():
-                    self.logger.record(f"{category}/{metric}", value)
+                    try:
+                        self.logger.record(f"{category}/{metric}", value)
+                    except Exception:
+                        try:
+                            self.logger.record(
+                                f"{category}/{metric}", value, exclude=("tensorboard",)
+                            )
+                        except Exception:
+                            pass
+                    try:
+                        count = aggregate_tensorboard_counts.get(category, {}).get(
+                            metric
+                        )
+                        if isinstance(value, (int, float)) and count and count > 0:
+                            mean = float(value) / float(count)
+                            self.logger.record(f"{category}/{metric}_mean", mean)
+                    except Exception:
+                        try:
+                            count = aggregate_tensorboard_counts.get(category, {}).get(
+                                metric
+                            )
+                            if isinstance(value, (int, float)) and count and count > 0:
+                                mean = float(value) / float(count)
+                                self.logger.record(
+                                    f"{category}/{metric}_mean",
+                                    mean,
+                                    exclude=("tensorboard",),
+                                )
+                        except Exception:
+                            pass
         return True
 
 
@@ -1613,7 +1781,7 @@ class RolloutPlotCallback(BaseCallback):
         for i, fig in enumerate(figures):
             figure = Figure(fig, close=True)
             self.logger.record(
-                f"best/train_env_{i}", figure, exclude=("stdout", "log", "json", "csv")
+                f"best/train_env{i}", figure, exclude=("stdout", "log", "json", "csv")
             )
         return True
 
diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
index 7cf1e0f..6878654 100644
--- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
+++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py
@@ -1130,7 +1130,14 @@ class QuickAdapterRegressorV3(BaseRegressionModel):
             }
             metric_log_msg = ""
         else:
-            best_trial = self.get_multi_objective_study_best_trial("label", study)
+            try:
+                best_trial = self.get_multi_objective_study_best_trial("label", study)
+            except Exception as e:
+                logger.error(
+                    f"Optuna {pair} {namespace} {objective_type} objective hyperopt failed ({time_spent:.2f} secs): {repr(e)}",
+                    exc_info=True,
+                )
+                best_trial = None
             if not best_trial:
                 logger.error(
                     f"Optuna {pair} {namespace} {objective_type} objective hyperopt failed ({time_spent:.2f} secs): no study best trial found"
diff --git a/quickadapter/user_data/strategies/QuickAdapterV3.py b/quickadapter/user_data/strategies/QuickAdapterV3.py
index 39542b9..87b4a91 100644
--- a/quickadapter/user_data/strategies/QuickAdapterV3.py
+++ b/quickadapter/user_data/strategies/QuickAdapterV3.py
@@ -1117,8 +1117,8 @@ class QuickAdapterV3(IStrategy):
         side: str,
         order: Literal["entry", "exit"],
         rate: float,
-        min_natr_ratio_percent: float = 0.009,
-        max_natr_ratio_percent: float = 0.09,
+        min_natr_ratio_percent: float = 0.0085,
+        max_natr_ratio_percent: float = 0.085,
         lookback_period: int = 1,
         decay_ratio: float = 0.5,
     ) -> bool:
-- 
2.43.0