From 1503c17b6788a70854562baaa24b32ad32ca80a6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Mon, 22 Jun 2026 14:35:27 +0200 Subject: [PATCH] fix(quickadapter): catch UnicodeDecodeError in journal tail probe MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Codex inline review (P2) on PR #102 flagged that `_optuna_journal_has_corrupt_tail` catches only `json.JSONDecodeError` from `json.loads(last_line)`, but `json.loads` raises `UnicodeDecodeError` (subclass of `ValueError`, NOT of `JSONDecodeError`) when the trailing record contains invalid UTF-8 bytes — a common crash pattern when `fsync` is interrupted mid-multibyte. The exception escapes the helper, propagates out of `optuna_create_storage` (the helper runs BEFORE the recoverable try/except), reaches `optuna_create_study`'s broad outer handler, and reproduces the silent-HPO-skip symptom under a different corruption class. Empirically reproduced: >>> json.loads(b'\\xc3\\x28') UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 0: invalid continuation byte Broaden the except clause from `json.JSONDecodeError` to `ValueError` — the common parent of both `JSONDecodeError` and `UnicodeDecodeError` — so the helper treats any `json.loads`-unparseable trailing record as corruption and routes it through the same quarantine path. Other `ValueError` subclasses are not plausibly raised by `json.loads` on bytes input. Reproducer at `/tmp/quickadapter-tests/test_optuna_journal_quarantine.py` extended from 17 to 19 scenarios (Class C4 detection + end-to-end quarantine on invalid UTF-8). All pass. --- .../user_data/freqaimodels/QuickAdapterRegressorV3.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py index 6a0311e..399120f 100644 --- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py +++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py @@ -4274,8 +4274,10 @@ class QuickAdapterRegressorV3(BaseRegressionModel): Bounded tail probe (last ``_OPTUNA_JOURNAL_TAIL_PROBE_BYTES``). Return True iff the file is non-empty AND its trailing record is (a) missing the newline, (b) empty (bare ``\\n``), or - (c) malformed JSON. Fail-open when the probe window cuts a - single line larger than the window; defer to the + (c) ``json.loads``-unparseable (malformed JSON, ``ValueError``, + or invalid UTF-8, ``UnicodeDecodeError`` — both are + ``ValueError`` subclasses). Fail-open when the probe window + cuts a single line larger than the window; defer to the post-construction handler. """ if not journal_path.exists(): @@ -4307,7 +4309,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): return True try: json.loads(last_line) - except json.JSONDecodeError: + except ValueError: return True return False -- 2.53.0