From: Jérôme Benoit <jerome.benoit@piment-noir.org>
Date: Mon, 15 Sep 2025 00:09:01 +0000 (+0200)
Subject: fix(reforcexy): proper frame stacking implementation
X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=6c0c5e5e29d4b2219b48ced7ba54cc13f78f38d2;p=freqai-strategies.git

fix(reforcexy): proper frame stacking implementation

Signed-off-by: Jérôme Benoit <jerome.benoit@piment-noir.org>
---

diff --git a/ReforceXY/user_data/freqaimodels/ReforceXY.py b/ReforceXY/user_data/freqaimodels/ReforceXY.py
index 5bb3080..a6deefc 100644
--- a/ReforceXY/user_data/freqaimodels/ReforceXY.py
+++ b/ReforceXY/user_data/freqaimodels/ReforceXY.py
@@ -514,15 +514,22 @@ class ReforceXY(BaseReinforcementLearningModel):
 
             return True
 
-        def _action_masks(position: float):
+        def _action_masks(position: float) -> list[bool]:
             return [_is_valid(action.value, position) for action in Actions]
 
-        def _predict(window):
+        def _predict(window) -> int:
             observation: DataFrame = dataframe.iloc[window.index]
             action_masks_param: Dict[str, Any] = {}
 
-            if self.live and self.rl_config.get("add_state_info", False):
-                position, pnl, trade_duration = self.get_state_info(dk.pair)
+            if self.rl_config.get("add_state_info", False):
+                if self.live:
+                    position, pnl, trade_duration = self.get_state_info(dk.pair)
+                else:
+                    # TODO: state info handling for backtests
+                    position = Positions.Neutral
+                    pnl = 0.0
+                    trade_duration = 0
+
                 # STATE_INFO
                 observation["pnl"] = pnl
                 observation["position"] = position
@@ -531,23 +538,33 @@ class ReforceXY(BaseReinforcementLearningModel):
                 if self.is_maskable:
                     action_masks_param = {"action_masks": _action_masks(position)}
 
-            observation = observation.to_numpy(dtype=np.float32)
-
-            if self.frame_stacking:
-                # FIXME: proper observation stacking needs more work
-                observations = np.repeat(
-                    observation, axis=1, repeats=self.frame_stacking
-                )
+            np_observation = observation.to_numpy(dtype=np.float32)
+
+            frame_stacking = self.frame_stacking
+            if frame_stacking and frame_stacking > 1:
+                if not hasattr(_predict, "_frame_buffer"):
+                    _predict._frame_buffer = []
+                fb: list[np.ndarray] = getattr(_predict, "_frame_buffer")
+                fb.append(np_observation)
+                if len(fb) > frame_stacking:
+                    del fb[0 : len(fb) - frame_stacking]
+                if len(fb) < frame_stacking:
+                    pad_needed = frame_stacking - len(fb)
+                    fb_padded = [fb[0]] * pad_needed + fb
+                else:
+                    fb_padded = fb
+                stacked_observations = np.concatenate(fb_padded, axis=0)
+                observations = stacked_observations.flatten()
             else:
-                observations = observation
+                observations = np_observation.flatten()
 
             action, _ = model.predict(
                 observations, deterministic=True, **action_masks_param
             )
-            return action
+            return int(action)
 
-        output = DataFrame(np.zeros(len(dataframe)), columns=dk.label_list)
-        output = output.rolling(window=self.CONV_WIDTH).apply(_predict)
+        actions = dataframe.iloc[:, 0].rolling(window=self.CONV_WIDTH).apply(_predict)
+        output = DataFrame({label: actions for label in dk.label_list})
         return output
 
     def get_storage(self, pair: Optional[str] = None) -> BaseStorage: