From a951ede0fc1f2d4020ee1393074c2e80ff379610 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@piment-noir.org>
Date: Tue, 7 Oct 2025 13:20:25 +0200
Subject: [PATCH] docs(README.md): add structure and quick start
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: JÃ©rÃ´me Benoit <jerome.benoit@piment-noir.org>
---
 README.md                                     | 82 ++++++++++++++++++-
 .../test_reward_space_analysis.py             | 16 ++--
 2 files changed, 91 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index c7134db..af07e45 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,83 @@
 # FreqAI strategies
 
-Do not expect any support of any kind on Internet. Nevertheless, PRs implementing documentation, bug fixes, cleanups or sensible features will be discussed and might get merged. 
+## QuickAdapter
+
+### Quick start
+
+Change the timezone according to your location in [`docker-compose.yml`](./quickadapter/docker-compose.yml).
+
+From the repository root, configure, build and start the QuickAdapter container:
+
+```shell
+cd quickadapter
+cp user_data/config-template.json user_data/config.json
+```
+
+Adapt the configuration to your needs: edit `user_data/config.json` to set your exchange API keys and tune the `freqai` section.
+
+Then build and start the container:
+
+```shell
+docker compose up -d --build
+```
+
+## ReforceXY
+
+### Quick start
+
+Change the timezone according to your location in [`docker-compose.yml`](./ReforceXY/docker-compose.yml).
+
+From the repository root, configure, build and start the ReforceXY container:
+
+```shell
+cd ReforceXY
+cp user_data/config-template.json user_data/config.json
+```
+
+Adapt the configuration to your needs: edit `user_data/config.json` to set your exchange API keys and tune the `freqai` section.
+
+Then build and start the container:
+
+```shell
+docker compose up -d --build
+```
+
+[Reward Space Analysis](./ReforceXY/reward_space_analysis/README.md)
+
+## Common workflows
+
+List running compose services and the containers they created:
+
+```shell
+docker compose ps
+```
+
+Enter a running service:
+
+```shell
+# use the compose service name (e.g. "freqtrade")
+docker compose exec freqtrade /bin/sh
+```
+
+View logs:
+
+```shell
+# service logs (compose maps service -> container(s))
+docker compose logs -f freqtrade
+
+# or follow a specific container's logs
+docker logs -f freqtrade-quickadapter
+```
+
+Stop and remove the compose stack:
+
+```shell
+docker compose down
+```
+
+---
+
+## Note
+
+> Do not expect any support of any kind on the Internet. Nevertheless, PRs implementing documentation, bug fixes, cleanups or sensible features will be discussed and might get merged.
+
diff --git a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
index b4ac70b..460f4ed 100644
--- a/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
+++ b/ReforceXY/reward_space_analysis/test_reward_space_analysis.py
@@ -271,8 +271,8 @@ class TestStatisticalCoherence(RewardSpaceTestBase):
             idle_data = df[df["idle_duration"] > 0]
             if len(idle_data) > 10:
                 # Simple correlation check: idle duration should correlate negatively with idle reward
-                idle_dur = idle_data["idle_duration"].values
-                idle_rew = idle_data["reward_idle"].values
+                idle_dur = idle_data["idle_duration"].to_numpy()
+                idle_rew = idle_data["reward_idle"].to_numpy()
 
                 # Basic validation that data makes sense
                 self.assertTrue(
@@ -874,7 +874,7 @@ class TestRewardAlignment(RewardSpaceTestBase):
 
         # pnl values: slightly above target, 2x, 5x, 10x target
         pnl_values = [profit_target * m for m in (1.05, TEST_RR_HIGH, 5.0, 10.0)]
-        ratios_observed = []
+        ratios_observed: list[float] = []
 
         for pnl in pnl_values:
             context = RewardContext(
@@ -899,7 +899,7 @@ class TestRewardAlignment(RewardSpaceTestBase):
             )
             # br.exit_component = pnl * (base_factor * pnl_factor) => with base_factor=1, attenuation=1 => ratio = exit_component / pnl = pnl_factor
             ratio = br.exit_component / pnl if pnl != 0 else 0.0
-            ratios_observed.append(ratio)
+            ratios_observed.append(float(ratio))
 
         # Monotonic non-decreasing (allow tiny float noise)
         for a, b in zip(ratios_observed, ratios_observed[1:]):
@@ -910,6 +910,8 @@ class TestRewardAlignment(RewardSpaceTestBase):
         asymptote = 1.0 + win_reward_factor
         final_ratio = ratios_observed[-1]
         # Expect to be very close to asymptote (tanh(0.5*(10-1)) â 0.9997)
+        if not math.isfinite(final_ratio):
+            self.fail(f"Final ratio is not finite: {final_ratio}")
         self.assertLess(
             abs(final_ratio - asymptote),
             1e-3,
@@ -917,13 +919,15 @@ class TestRewardAlignment(RewardSpaceTestBase):
         )
 
         # Analytical expected ratios for comparison (not strict assertions except final)
-        expected_ratios = []
+        expected_ratios: list[float] = []
         for pnl in pnl_values:
             pnl_ratio = pnl / profit_target
             expected = 1.0 + win_reward_factor * math.tanh(beta * (pnl_ratio - 1.0))
             expected_ratios.append(expected)
         # Compare each observed to expected within loose tolerance (model parity)
         for obs, exp in zip(ratios_observed, expected_ratios):
+            if not (math.isfinite(obs) and math.isfinite(exp)):
+                self.fail(f"Non-finite observed/expected ratio: obs={obs}, exp={exp}")
             self.assertLess(
                 abs(obs - exp),
                 5e-6,
@@ -2327,7 +2331,7 @@ class TestPrivateFunctions(RewardSpaceTestBase):
         """Test that holding penalty scales progressively after max_duration."""
         max_duration = 100
         durations = [150, 200, 300]  # All > max_duration
-        penalties = []
+        penalties: list[float] = []
 
         for duration in durations:
             context = RewardContext(
-- 
2.43.0