feat(sandcastle): make strategy dispatch registry-driven (#1862)

author Jérôme Benoit <jerome.benoit@piment-noir.org>

Sun, 17 May 2026 22:45:10 +0000 (00:45 +0200)

committer GitHub <noreply@github.com>

Sun, 17 May 2026 22:45:10 +0000 (00:45 +0200)
author Jérôme Benoit <jerome.benoit@piment-noir.org>
Sun, 17 May 2026 22:45:10 +0000 (00:45 +0200)
committer GitHub <noreply@github.com>
Sun, 17 May 2026 22:45:10 +0000 (00:45 +0200)
diff --git a/.github/workflows/sandcastle.yml b/.github/workflows/sandcastle.yml

index 2d173f66b8a33609dfff37065ce4ce4cbdaef772..1c6d7c20b2f0769e777d493b9b90963155ce2f4c 100644 (file)
--- a/.github/workflows/sandcastle.yml
+++ b/.github/workflows/sandcastle.yml
@@ -1,3 +1,13 @@
+# Cutover note (one-time, can be removed after the first post-rename run):
+# The previous setup used label `sandcastle` and branch prefix `agent/issue`.
+# Before the first run on the new naming, the operator MUST:
+#   1. Rename the GitHub label:
+#      gh label edit sandcastle --name sandcastle-implement
+#   2. Close or merge any open PR whose head branch starts with `agent/issue-`,
+#      since the new pattern `agent/<key>-<n>-` does not match legacy branches
+#      and they are no longer recognised as covering an issue (which would
+#      otherwise produce duplicate PRs on the next nightly).
+
  name: Sandcastle
  
  on:
diff --git a/.sandcastle/constants.ts b/.sandcastle/constants.ts

index 5212910babff69dbd7c4d5cc3b7502faf7104b8e..bfc3334e642126f2445d8f0f45d5e42aad4d4853 100644 (file)
--- a/.sandcastle/constants.ts
+++ b/.sandcastle/constants.ts
@@ -35,8 +35,6 @@ export const MAX_PARALLEL = 5
  
  export const GIT_BASE_BRANCH = 'main'
  
-export const GIT_BRANCH_PREFIX = 'agent/issue'
-
  export const GIT_PUSH_TIMEOUT_MS = 60_000
  
  export const GIT_TIMEOUT_MS = 30_000
@@ -97,12 +95,12 @@ function resolvePnpmStorePath (): string | undefined {
  
  // ── GitHub ───────────────────────────────────────────────────────────────────
  
-export const GITHUB_ISSUE_LABEL = 'sandcastle'
-
  export const GITHUB_MAX_ISSUES_FETCH = 50
  
  export const GITHUB_MAX_PRS_FETCH = 200
  
+export const MAX_SLUG_CHARS = 40
+
  export const MAX_TITLE_CHARS = 200
  
  // ── Validation ───────────────────────────────────────────────────────────────
diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts

index bfed3f7ba4b00060f746a662e79e7be0e2ecd6b5..8bad05bb607310fed8aa25cbe7ba5822a1846830 100644 (file)
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -10,19 +10,16 @@ import {
    AGENT_TASK_TIMEOUT_MS,
    DOCKER_IMAGE,
    DOCKER_MOUNTS,
-  GIT_BRANCH_PREFIX,
-  GITHUB_ISSUE_LABEL,
    MAX_PARALLEL,
    SANDBOX_BUILD_HOOKS,
  } from './constants.js'
  import { runRefinementLoop } from './refinement-loop.js'
-import { implementStrategy } from './strategies/implement/strategy.js'
+import { STRATEGY_BY_KEY, STRATEGY_REGISTRY } from './strategies/index.js'
  import { GithubIssueSource } from './task-source.js'
  
  const source = new GithubIssueSource({
-  branchPrefix: GIT_BRANCH_PREFIX,
    dockerImage: DOCKER_IMAGE,
-  label: GITHUB_ISSUE_LABEL,
+  strategies: STRATEGY_REGISTRY,
  })
  
  let tasks: TaskSpec[]
@@ -42,6 +39,12 @@ if (tasks.length === 0) {
    const settled = await Promise.allSettled(
      tasks.map(spec =>
        pool.run(async () => {
+        const entry = STRATEGY_BY_KEY.get(spec.strategyKey)
+        if (!entry) {
+          throw new Error(
+            `Task #${spec.id}: unknown strategy '${spec.strategyKey}' (not in registry).`
+          )
+        }
          const ac = new AbortController()
          const timer = setTimeout(() => {
            ac.abort(new Error(`Task #${spec.id} timed out after ${String(AGENT_TASK_TIMEOUT_MS)}ms`))
@@ -55,7 +58,7 @@ if (tasks.length === 0) {
              sandbox: docker({ imageName: DOCKER_IMAGE, mounts: [...DOCKER_MOUNTS] }),
            })
  
-          const loopResult = await runRefinementLoop(spec, sandbox, implementStrategy, {
+          const loopResult = await runRefinementLoop(spec, sandbox, entry.strategy, {
              iterationBudget: AGENT_ITERATION_BUDGET,
              maxRounds: AGENT_MAX_CRITIC_ROUNDS,
              postLoopValidationRetry: true,
@@ -64,8 +67,8 @@ if (tasks.length === 0) {
  
            let workSuccess = false
            if (loopResult.totalCommits > 0) {
-            const finalizeResult = await implementStrategy.finalize(spec, loopResult, sandbox)
-            workSuccess = implementStrategy.isWorkComplete(finalizeResult)
+            const finalizeResult = await entry.strategy.finalize(spec, loopResult, sandbox)
+            workSuccess = entry.strategy.isWorkComplete(finalizeResult)
            }
  
            return { spec, success: workSuccess }
diff --git a/.sandcastle/plan-prompt.md b/.sandcastle/plan-prompt.md

index 547951ded0d4775e8ccf62b9cff96c2b23f611b7..9325cfd0f427eb76c9490675af9f114087c76cd3 100644 (file)
--- a/.sandcastle/plan-prompt.md
+++ b/.sandcastle/plan-prompt.md
@@ -1,59 +1,40 @@
-# Plan Agent
+# Plan
  
-Read open GitHub issues and produce a parallelizable execution plan with implementation context.
+Read open GitHub issues and produce a parallelizable execution plan.
  
-## Context
+## Inputs
  
-This is a Node.js TypeScript monorepo (pnpm workspace) simulating OCPP charging stations.
-Structure: root simulator (`src/`), `ui/common`, `ui/cli`, `ui/web`, `tests/ocpp-server` (Python).
-Test runner: Node.js native (`node:test`). Build tool: esbuild. Linter: ESLint (neostandard).
-Read `AGENTS.md` and `.serena/memories/project_overview`.
-
-## Open Issues
+- `ISSUES_JSON` — JSON array of open issues, each entry `{ number, title, body, labels }`.
  
  {{ISSUES_JSON}}
  
-## Steps
-
-1. Analyze the issues above. For each, determine:
-   - Can it be implemented independently (no blocking dependency on another open issue)?
-   - Is the scope clear enough to implement without further clarification?
+## Task
  
-2. Select all issues that are independent and actionable.
+1. Read `AGENTS.md` and `.serena/memories/project_overview` for repository context.
+2. For each issue, decide whether it is independently actionable: scope is clear and there is no blocking dependency on another open issue.
+3. Drop issues labelled `wontfix`, `duplicate`, or `question`, or blocked by another open issue. If every issue is blocked, keep only the highest-priority candidate.
+4. For each kept issue, derive:
+   - `id`: the issue number as a string.
+   - `slug`: a short kebab-case summary of the change matching `^[a-z0-9]+(?:-[a-z0-9]+)*$` (≤ 40 chars).
+   - `title`: the issue title.
+   - `issueType`: `bug-fix`, `feature`, or `refactor`.
+   - `confidence`: `high` (clear scope), `medium` (some ambiguity), or `low` (unclear scope).
+   - `rootCauseHypothesis`: a specific hypothesis (modules, patterns, behaviours) for the implementer to validate — not a restatement of the title.
+   - `acceptanceCriteria`: 2–4 conditions verifiable by static inspection of the diff (code structure, logic, algorithms — not runtime behaviour).
+5. Do not implement anything; output only the plan.
  
-3. For each selected issue:
-   - Assign a branch name: `{{BRANCH_PREFIX}}-<number>-<slug>` where slug is a short kebab-case summary (e.g., `{{BRANCH_PREFIX}}-42-fix-streaming-id`).
-   - Classify the issue type: `bug-fix`, `feature`, or `refactor`.
-   - Assess your confidence: `high` (clear scope, obvious approach), `medium` (some ambiguity), or `low` (unclear scope, multiple valid approaches).
-   - Formulate a root cause hypothesis: what is broken or missing, and why. This is a hypothesis for the implementer to validate — not a directive.
-   - Define 2-4 acceptance criteria: concrete, verifiable conditions that must be true when the implementation is complete. Focus on code structure, algorithmic and logic, not runtime behavior.
+## Output
  
-4. Output the plan in this exact format:
+```text
+<plan>{"issues":[{"id":"<number>","slug":"<kebab-slug>","title":"<title>","issueType":"bug-fix|feature|refactor","confidence":"high|medium|low","rootCauseHypothesis":"...","acceptanceCriteria":["..."]}]}</plan>
+```
  
-   ```text
-   <plan>{"issues":[{"id":"<number>","title":"<title>","branch":"{{BRANCH_PREFIX}}-<number>-<slug>","issueType":"bug-fix|feature|refactor","confidence":"high|medium|low","rootCauseHypothesis":"...","acceptanceCriteria":["..."]}]}</plan>
-   ```
+When no issue is actionable: `<plan>{"issues":[]}</plan>`.
  
  ## Rules
  
-- Exclude issues labeled `wontfix`, `duplicate`, or `question`.
-- Exclude issues that depend on another open issue (mention "blocked by #N" or similar).
-- Prefer issues where scope fits a single-file change over cross-cutting refactors.
-- If every issue is blocked, include the single highest-priority candidate (fewest/weakest dependencies).
-- If no actionable issues exist, output:
-
-  ```text
-  <plan>{ "issues": [] }</plan>
-  ```
+- Prefer single-file scope over cross-cutting refactors.
  
-- Do not implement anything. Only produce the plan.
-- Acceptance criteria must be verifiable by static code inspection of the diff.
-- Root cause hypothesis should be specific (mention modules, patterns, or behaviors) — not a restatement of the issue title.
+## Done
  
-## Completion
-
-After outputting the plan, output:
-
-```text
  <promise>COMPLETE</promise>
-```
diff --git a/.sandcastle/strategies/implement/actor-prompt.md b/.sandcastle/strategies/implement/actor-prompt.md

index d0c1edfc4b1c7542b27aa4596901496590c8a6ff..903e0a74e67f3db9676913af8c80965dfe4cb9bc 100644 (file)
--- a/.sandcastle/strategies/implement/actor-prompt.md
+++ b/.sandcastle/strategies/implement/actor-prompt.md
@@ -1,79 +1,52 @@
-# Actor Agent: Implementer
+# Actor
  
-Implement issue **#{{TASK_ID}}** ("{{ISSUE_TITLE}}") on branch `{{BRANCH}}`.
+Implement issue **#{{ISSUE_NUMBER}}** ("{{ISSUE_TITLE}}") on branch `{{BRANCH}}`, or address review findings if present.
  
-## Issue Details
+## Inputs
+
+- `ISSUE_NUMBER` — GitHub issue number.
+- `ISSUE_TITLE` — issue title.
+- `BRANCH` — working branch, already checked out.
+- `ISSUE_BODY` — sanitised issue body.
+- `PLAN_CONTEXT` — optional planner analysis (hypothesis, acceptance criteria); empty when absent.
+- `FINDINGS` — optional JSON array of critic findings from the previous round; empty on the first round.
  
  {{ISSUE_BODY}}
  
  {{PLAN_CONTEXT}}
  
-## Review Findings
-
  {{FINDINGS}}
  
-## Exploration
-
-Explore the repo to understand the architecture before coding. Pay attention to:
-
-- Files related to the issue
-- Test files touching relevant modules
-- Existing patterns in similar code
-
-Read `AGENTS.md`, `CONTRIBUTING.md`, `.serena/memories/code_style_conventions` and `.serena/memories/task_completion_checklist`.
+## Task
  
-## Implementation
-
-1. If review findings are provided above, cross-validate each one against the code. Fix findings you agree with. Ignore findings that are incorrect or not applicable.
-
-2. If no findings are provided, implement the issue from scratch following existing patterns:
-   - Strict TypeScript, no `any`/`@ts-ignore`
-   - Use existing error classes (BaseError, OCPPError)
-   - Follow existing naming conventions (camelCase functions, PascalCase classes/types)
-   - Tests use Node.js native test runner (`node:test` + `node:assert`)
-
-3. Before every commit, run the quality gates for the affected sub-project(s):
+1. Read `AGENTS.md`, `CONTRIBUTING.md`, `.serena/memories/code_style_conventions`, and `.serena/memories/task_completion_checklist`. Explore files surrounding the issue and similar patterns in the repo.
+2. If `FINDINGS` is non-empty, cross-validate each finding against the code; fix the ones you agree with, ignore the rest.
+3. Otherwise implement the issue end-to-end, including matching tests using `node:test` + `node:assert`.
+4. Before every commit, run the full quality-gate chain in every affected sub-project. Root-level chain:
  
     ```bash
     pnpm format && pnpm typecheck && pnpm lint && pnpm build && pnpm test
     ```
  
-   If changes affect `ui/web`:
+   For changes inside `ui/web` or `ui/cli`, run the same chain in that directory (substitute `pnpm test:coverage` for `pnpm test` in `ui/web`).
  
-   ```bash
-   cd ui/web && pnpm format && pnpm typecheck && pnpm lint && pnpm build && pnpm test:coverage
-   ```
-
-   If changes affect `ui/cli`:
+5. Commit one logical change at a time using Conventional Commits (`fix:`, `feat:`, `refactor:`, `chore:`).
+6. Push the branch:
  
     ```bash
-   cd ui/cli && pnpm format && pnpm typecheck && pnpm lint && pnpm build && pnpm test
+   git push -u origin {{BRANCH}}
     ```
  
-4. Commit with conventional commits:
-   - `fix: <description>` — bug fix
-   - `feat: <description>` — new feature
-   - `refactor: <description>` — restructuring
-   - `chore: <description>` — tooling/config
-
-5. Push the branch:
+## Output
  
-   ```bash
-   git push -u origin {{BRANCH}}
-   ```
+Commits on `{{BRANCH}}` pushed to `origin`. No structured stdout payload.
  
  ## Rules
  
-- One logical change per commit.
-- Tests must pass before pushing. Zero type errors, zero test failures.
-- Do not modify unrelated files.
-- Do not bump version numbers.
-- Push BEFORE signaling completion.
-
-## Completion
+- Strict TypeScript: no `any`, no `@ts-ignore`, no non-null `!`; use the existing typed errors (`BaseError`, `OCPPError`).
+- Do not modify unrelated files; do not bump version numbers.
+- Push before signaling completion; HEAD must have zero type errors and zero test failures.
  
-When validation passes and the branch is pushed, output:
+## Done
  
-```text
  <promise>COMPLETE</promise>
-```
diff --git a/.sandcastle/strategies/implement/critic-prompt.md b/.sandcastle/strategies/implement/critic-prompt.md

index d7caf81d3d3f1a36d62a60e6704f869eb5ad4be9..c337318ac79a7e183027cc96b099d8fb15e0ad4b 100644 (file)
--- a/.sandcastle/strategies/implement/critic-prompt.md
+++ b/.sandcastle/strategies/implement/critic-prompt.md
@@ -1,70 +1,50 @@
-# Critic Agent: Reviewer
+# Critic
  
-Analyze the implementation on branch `{{BRANCH}}` and produce structured findings.
+Review the diff on `{{BRANCH}}` against `{{BASE_BRANCH}}` and emit structured findings.
  
-## Task
-
-Run `git diff {{BASE_BRANCH}}...{{BRANCH}}` to see all changes. Examine the diff carefully. For each issue found, produce a structured finding.
+## Inputs
  
-Read `AGENTS.md`, `CONTRIBUTING.md` and `.serena/memories/code_style_conventions`.
-
-## Acceptance Criteria
+- `BRANCH` — branch under review.
+- `BASE_BRANCH` — branch to diff against.
+- `NONCE` — unique tag id used to delimit the findings payload.
+- `ACCEPTANCE_CRITERIA` — numbered acceptance criteria from the planner; empty when absent.
  
  {{ACCEPTANCE_CRITERIA}}
  
-If acceptance criteria are listed above, assess from the diff whether the implementation satisfies each one. Report a HIGH finding for any unmet criterion. Only judge observable outcomes, not implementation approach. If no criteria are listed, skip this section.
+## Task
  
-## Output Format
+1. Read `AGENTS.md`, `CONTRIBUTING.md`, and `.serena/memories/code_style_conventions`.
+2. Run `git diff {{BASE_BRANCH}}...{{BRANCH}}` and inspect every changed line.
+3. For each acceptance criterion (if any), decide from the diff whether it is satisfied; report a `HIGH` finding for any unmet criterion, judged on observable diff content, not implementation approach.
+4. Surface other defects in the changed code: logic errors, missing edge cases, security issues, type-safety violations, test gaps.
  
-Output your findings as JSON wrapped in nonce-tagged delimiters. Use EXACTLY this tag format:
+## Output
  
  ```text
-<findings-{{NONCE}}>[...]</findings-{{NONCE}}>
+<findings-{{NONCE}}>[
+  {
+    "file": "path/to/file.ts",
+    "line": 42,
+    "title": "short description of the issue",
+    "severity": "CRITICAL|HIGH|MEDIUM|LOW",
+    "category": "security|logic|performance|architecture|style",
+    "confidence": "HIGH|MEDIUM|LOW",
+    "description": "why this is a problem",
+    "suggestion": "how to fix it"
+  }
+]</findings-{{NONCE}}>
  ```
  
-Each finding must have this structure:
-
-```json
-{
-  "file": "path/to/file.ts",
-  "line": 42,
-  "title": "short description of the issue",
-  "severity": "CRITICAL|HIGH|MEDIUM|LOW",
-  "category": "security|logic|performance|architecture|style",
-  "confidence": "HIGH|MEDIUM|LOW",
-  "description": "detailed explanation of why this is a problem",
-  "suggestion": "how to fix it"
-}
-```
-
-If no issues are found, output:
-
-```text
-<findings-{{NONCE}}>[]</findings-{{NONCE}}>
-```
+When nothing is wrong: `<findings-{{NONCE}}>[]</findings-{{NONCE}}>`.
  
  ## Rules
  
-- Report ≤5 findings. HIGH and CRITICAL only. Omit LOW/MEDIUM unless zero higher-severity issues exist.
-- If >5 HIGH/CRITICAL issues exist, report the top 5 and add a summary note in the last finding's description.
-- Do NOT modify any files. Do NOT commit. Do NOT push.
-- Only report issues in the CHANGED code (not pre-existing issues).
-- Use HIGH confidence only when you've verified the issue by reading the relevant code.
-- Use MEDIUM confidence for pattern-based detection.
-- Use LOW confidence for style preferences or uncertain issues.
-- Focus on: logic errors, missing edge cases, security issues, type safety violations, test gaps.
-- Do NOT report formatting issues (prettier handles those).
-
-## Known Design Decisions (do not flag)
-
-- Mid-loop validation convergence bypasses critic (ARCS pattern — deterministic tests > subjective review).
-- Agent runs use `idleTimeoutSeconds` and `completionSignal` for cooperative cancellation.
-- Content-addressed dedup hash includes line number (collision reduction tradeoff, bounded by hard cap).
+- Report at most 5 findings, `HIGH` or `CRITICAL` only; include `LOW`/`MEDIUM` only when no higher-severity issue exists. If more than 5 `HIGH`/`CRITICAL` issues exist, report the top 5 and add a summary line in the last finding's `description`.
+- Confidence: `HIGH` after reading the relevant code; `MEDIUM` for pattern-based detection; `LOW` for style preference or uncertainty.
+- Only flag changed code; ignore pre-existing issues. Do not flag formatting (Prettier handles it).
+- Do not modify, commit, or push files.
+- Do not flag the following intentional design decisions: mid-loop validation convergence bypassing the critic (ARCS pattern); cooperative cancellation via `idleTimeoutSeconds` + `completionSignal`; line-number-aware dedup hash.
  
-## Completion
+## Done
  
-After outputting the findings, output:
-
-```text
  <promise>COMPLETE</promise>
-```
diff --git a/.sandcastle/strategies/implement/strategy.ts b/.sandcastle/strategies/implement/strategy.ts

index 99b68054439c3987ec5c55bfd527d348e28a2f88..f1370b2313226a0169d7789e09c4c48bc71ef07c 100644 (file)
--- a/.sandcastle/strategies/implement/strategy.ts
+++ b/.sandcastle/strategies/implement/strategy.ts
@@ -21,8 +21,7 @@ function buildPlanContext (spec: TaskSpec): string {
        `Acceptance criteria:\n${spec.acceptanceCriteria.map((c, i) => `${String(i + 1)}. ${c}`).join('\n')}`
      )
    }
-  if (parts.length === 0) return ''
-  return `## Planner Analysis\n\n${parts.join('\n\n')}`
+  return parts.join('\n\n')
  }
  
  export const implementStrategy: FinalizationConfig & LoopStrategy = {
@@ -32,9 +31,9 @@ export const implementStrategy: FinalizationConfig & LoopStrategy = {
      BRANCH: spec.branch,
      FINDINGS: findings.length > 0 ? JSON.stringify(findings, null, 2) : '',
      ISSUE_BODY: spec.body,
+    ISSUE_NUMBER: spec.id,
      ISSUE_TITLE: spec.title,
      PLAN_CONTEXT: buildPlanContext(spec),
-    TASK_ID: spec.id,
    }),
  
    buildCriticArgs: (spec, baseBranch) => ({
diff --git a/.sandcastle/strategies/index.ts b/.sandcastle/strategies/index.ts

new file mode 100644 (file)

index 0000000..1067f3a
--- /dev/null
+++ b/.sandcastle/strategies/index.ts
@@ -0,0 +1,115 @@
+import type { FinalizationConfig, LoopStrategy } from '../types.js'
+
+import { implementStrategy } from './implement/strategy.js'
+
+/**
+ * A registered strategy: the canonical declaration that maps a key to its
+ * actor/critic loop and finalization configuration. Labels and branch prefixes
+ * are derived from the key to keep a single source of truth.
+ */
+export interface StrategyEntry {
+  /**
+   * Additional XML-like tag names (besides `key`) that this strategy uses
+   * inside prompts. They are stripped from issue text to harden against
+   * prompt injection. The strategy `key` is always added implicitly.
+   */
+  readonly controlTags?: readonly string[]
+  /** Strategy key (kebab-case). Used in TaskSpec.strategyKey and to derive label/branchPrefix. */
+  readonly key: string
+  /** The actor/critic loop and finalization configuration. */
+  readonly strategy: FinalizationConfig & LoopStrategy
+}
+
+/**
+ * Canonical registry of strategies. Order matters: when an issue carries
+ * several `sandcastle-*` labels, the first matching entry wins.
+ *
+ * Adding a new strategy is one line + one `strategies/<key>/` sub-directory.
+ */
+export const STRATEGY_REGISTRY: readonly StrategyEntry[] = [
+  { controlTags: ['review'], key: 'implement', strategy: implementStrategy },
+] as const
+
+/** Indexed view: strategy key → entry. Throws on duplicate keys at load time. */
+export const STRATEGY_BY_KEY: ReadonlyMap<string, StrategyEntry> = indexByKey(STRATEGY_REGISTRY)
+
+/**
+ * Derives the git branch prefix used for tasks of a strategy.
+ * @param key - Strategy key.
+ * @returns Branch prefix of the form `agent/<key>`.
+ */
+export function branchPrefixOf (key: string): string {
+  return `agent/${key}`
+}
+
+/**
+ * Derives the GitHub issue label that triggers a strategy.
+ * @param key - Strategy key.
+ * @returns Label of the form `sandcastle-<key>`.
+ */
+export function labelOf (key: string): string {
+  return `sandcastle-${key}`
+}
+
+/**
+ * Strict kebab-case: lowercase letters/digits, hyphen-separated, must start
+ * with a letter. Constrains `key` because it flows verbatim into the GitHub
+ * label `sandcastle-<key>` and the git branch prefix `agent/<key>`.
+ */
+const STRATEGY_KEY_PATTERN = /^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$/
+
+/**
+ * XML-name-safe subset for `controlTags`: must start with a letter, followed
+ * by letters, digits, `_` or `-`. Looser than {@link STRATEGY_KEY_PATTERN}
+ * to accept agent vocabulary such as `tool_call` while still rejecting
+ * empty strings and angle-bracket characters that would corrupt the
+ * sanitizer regex.
+ */
+const CONTROL_TAG_PATTERN = /^[a-zA-Z][a-zA-Z0-9_-]*$/
+
+/**
+ * Builds the strategy-key index, validating each entry and throwing on
+ * malformed, duplicate, or prefix-overlapping keys so registry mistakes
+ * (typos, wrong casing, empty tag, key colliding with another key's branch
+ * prefix) fail loudly at module load instead of silently producing
+ * undiscoverable labels, invalid git branches, empty regex alternatives in
+ * the prompt sanitizer, or ambiguous open-PR dedup matches.
+ * @param entries - Canonical registry entries.
+ * @returns Map from key to entry.
+ * @throws {Error} when an entry has an invalid key, an invalid controlTag,
+ *   a duplicate key, or a key whose branch prefix overlaps a previously
+ *   registered one.
+ */
+function indexByKey (entries: readonly StrategyEntry[]): ReadonlyMap<string, StrategyEntry> {
+  const map = new Map<string, StrategyEntry>()
+  for (const entry of entries) {
+    if (!STRATEGY_KEY_PATTERN.test(entry.key)) {
+      throw new Error(
+        `Invalid strategy key '${entry.key}' in STRATEGY_REGISTRY: ` +
+          `must match ${STRATEGY_KEY_PATTERN.source} (kebab-case).`
+      )
+    }
+    for (const tag of entry.controlTags ?? []) {
+      if (!CONTROL_TAG_PATTERN.test(tag)) {
+        throw new Error(
+          `Invalid controlTag '${tag}' for strategy '${entry.key}' in STRATEGY_REGISTRY: ` +
+            `must match ${CONTROL_TAG_PATTERN.source}.`
+        )
+      }
+    }
+    if (map.has(entry.key)) {
+      throw new Error(`Duplicate strategy key in STRATEGY_REGISTRY: '${entry.key}'.`)
+    }
+    for (const existing of map.keys()) {
+      if (entry.key.startsWith(`${existing}-`) || existing.startsWith(`${entry.key}-`)) {
+        throw new Error(
+          `Strategy key '${entry.key}' overlaps with '${existing}' in STRATEGY_REGISTRY: ` +
+            `branch '${branchPrefixOf(existing)}-<n>-…' would also match the regex derived ` +
+            `from '${branchPrefixOf(entry.key)}-' (or vice versa), making open-PR dedup ambiguous.`
+        )
+      }
+    }
+    map.set(entry.key, entry)
+  }
+  return map
+}
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts

index 36c6bb1bf3c37950a709db3a1c6992c568f5f091..96ee1774dc057f8c3d2df3a4f1491ded9969aee1 100644 (file)
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -16,9 +16,11 @@ import {
    GIT_TIMEOUT_MS,
    GITHUB_MAX_ISSUES_FETCH,
    GITHUB_MAX_PRS_FETCH,
+  MAX_SLUG_CHARS,
    MAX_TITLE_CHARS,
    SANDBOX_AUTH_HOOKS,
  } from './constants.js'
+import { branchPrefixOf, labelOf, type StrategyEntry } from './strategies/index.js'
  import { agentProvider, execFileAsync, toErrorMessage } from './utils.js'
  
  const RawIssueSchema = z.object({
@@ -34,14 +36,12 @@ const RawIssuesSchema = z.array(RawIssueSchema)
  
  /** Configuration for the GitHub issue task source. */
  export interface GithubIssueSourceConfig {
-  /** Git branch prefix for issue branches. */
-  branchPrefix: string
    /** Docker image name for the sandbox. */
    dockerImage: string
-  /** GitHub issue label to filter by. */
-  label: string
    /** Maximum planner retries. */
    maxRetries?: number
+  /** Strategies to run, in priority order (first matching label wins). */
+  strategies: readonly StrategyEntry[]
  }
  
  /** Interface for task discovery sources. */
@@ -50,47 +50,69 @@ export interface TaskSource {
    discover(): Promise<TaskSpec[]>
  }
  
+/** A sanitized issue resolved to the strategy that will handle it. */
+interface ResolvedIssue {
+  body: string
+  branchPrefix: string
+  labels: string[]
+  number: number
+  strategyKey: string
+  title: string
+}
+
  /**
   * Task source that discovers work from GitHub issues via planner agent.
+ * Each strategy in the registry is associated with the GitHub label
+ * `sandcastle-<key>` and produces tasks on branches `agent/<key>-<n>-<slug>`.
   */
  export class GithubIssueSource implements TaskSource {
-  private readonly branchPattern: RegExp
-  private readonly branchPrefix: string
+  private readonly branchPatterns: readonly RegExp[]
+  private readonly controlTagPattern: RegExp
    private readonly dockerImage: string
-  private readonly escapedPrefix: string
-  private readonly label: string
    private readonly maxRetries: number
+  private readonly strategies: readonly StrategyEntry[]
  
    /**
     * @param config - Configuration for the GitHub issue source.
     */
    constructor (config: GithubIssueSourceConfig) {
-    this.branchPrefix = config.branchPrefix
+    if (config.strategies.length === 0) {
+      throw new Error('GithubIssueSource requires at least one strategy.')
+    }
      this.dockerImage = config.dockerImage
-    this.label = config.label
      this.maxRetries = config.maxRetries ?? 5
+    this.strategies = config.strategies
  
-    this.escapedPrefix = this.branchPrefix.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
-    this.branchPattern = new RegExp(`^${this.escapedPrefix}-\\d+-[\\w-]+$`)
+    this.branchPatterns = this.strategies.map(
+      entry => new RegExp(`^${escapeRegex(branchPrefixOf(entry.key))}-(\\d+)-${SLUG_PATTERN_BODY}$`)
+    )
+    this.controlTagPattern = buildControlTagPattern(this.strategies)
    }
  
    /**
-   * Discovers tasks by fetching GitHub issues, running the planner, and validating the plan.
+   * Discovers tasks by fetching GitHub issues per strategy, running the planner,
+   * and validating the plan.
     * @returns Array of task specifications to implement.
     */
    async discover (): Promise<TaskSpec[]> {
-    const issuesJson = await this.fetchAndSanitizeIssues()
+    const issues = await this.fetchAndSanitizeIssues()
  
-    if (issuesJson.length === 0) {
-      console.log("No issues with label '%s'. Exiting.", this.label)
+    if (issues.length === 0) {
+      console.log(
+        'No issues with labels [%s]. Exiting.',
+        this.strategies.map(s => labelOf(s.key)).join(', ')
+      )
        return []
      }
  
      const coveredIssues = await this.fetchIssuesWithOpenPRs()
-    const actionableIssues = issuesJson.filter(issue => !coveredIssues.has(issue.number))
+    const actionableIssues = issues.filter(issue => !coveredIssues.has(issue.number))
  
      if (actionableIssues.length === 0) {
-      console.log('All sandcastle issues already have open PRs. Exiting.')
+      console.log(
+        'All issues with labels [%s] already have open PRs. Exiting.',
+        this.strategies.map(s => labelOf(s.key)).join(', ')
+      )
        return []
      }
  
@@ -107,8 +129,16 @@ export class GithubIssueSource implements TaskSource {
            maxIterations: 5,
            name: 'Planner',
            promptArgs: {
-            BRANCH_PREFIX: this.branchPrefix,
-            ISSUES_JSON: JSON.stringify(actionableIssues, null, 2),
+            ISSUES_JSON: JSON.stringify(
+              actionableIssues.map(({ body, labels, number, title }) => ({
+                body,
+                labels,
+                number,
+                title,
+              })),
+              null,
+              2
+            ),
            },
            promptFile: './.sandcastle/plan-prompt.md',
            sandbox: docker({ imageName: this.dockerImage, mounts: [...DOCKER_MOUNTS] }),
@@ -140,7 +170,7 @@ export class GithubIssueSource implements TaskSource {
  
        console.log(`Plan: ${String(tasks.length)} issue(s) to work on:`)
        for (const task of tasks) {
-        console.log(`  #${task.id}: ${task.title} → ${task.branch}`)
+        console.log(`  #${task.id} [${task.strategyKey}]: ${task.title} → ${task.branch}`)
        }
  
        return tasks
@@ -149,14 +179,50 @@ export class GithubIssueSource implements TaskSource {
      throw new Error('Planner failed to produce a valid plan after all retries.')
    }
  
-  private async fetchAndSanitizeIssues (): Promise<
-    {
-      body: string
-      labels: string[]
-      number: number
-      title: string
-    }[]
-  > {
+  /**
+   * Fetches issues for each registered strategy in parallel, then deduplicates
+   * by issue number in registry order (first strategy registered wins). Each
+   * issue is annotated with the strategy that will handle it and the
+   * corresponding branch prefix.
+   * @returns Sanitized issues with their resolved strategy.
+   */
+  private async fetchAndSanitizeIssues (): Promise<ResolvedIssue[]> {
+    const fetched = await Promise.all(
+      this.strategies.map(async entry => ({
+        entry,
+        rawIssues: await this.fetchIssuesByLabel(labelOf(entry.key)),
+      }))
+    )
+    const seen = new Map<number, ResolvedIssue>()
+    for (const { entry, rawIssues } of fetched) {
+      for (const issue of rawIssues) {
+        const previous = seen.get(issue.number)
+        if (previous !== undefined) {
+          const winnerLabel = labelOf(previous.strategyKey)
+          const droppedLabel = labelOf(entry.key)
+          console.warn(
+            `Issue #${String(issue.number)} carries multiple strategy labels ` +
+              `('${winnerLabel}' and '${droppedLabel}'); processing as '${winnerLabel}' ` +
+              `(registered first), skipping '${droppedLabel}'. ` +
+              'To remove the unwanted label: ' +
+              `gh issue edit ${String(issue.number)} --remove-label ${droppedLabel}`
+          )
+          continue
+        }
+        seen.set(issue.number, {
+          body: this.sanitizeForPrompt(issue.body),
+          branchPrefix: branchPrefixOf(entry.key),
+          labels: issue.labels.map(l => l.name),
+          number: issue.number,
+          strategyKey: entry.key,
+          title: this.sanitizeForPrompt(issue.title),
+        })
+      }
+    }
+    return [...seen.values()]
+  }
+
+  private async fetchIssuesByLabel (label: string): Promise<z.infer<typeof RawIssuesSchema>> {
      let rawIssuesJson: string
      try {
        const { stdout } = await execFileAsync(
@@ -171,34 +237,26 @@ export class GithubIssueSource implements TaskSource {
            '--limit',
            String(GITHUB_MAX_ISSUES_FETCH),
            '--label',
-          this.label,
+          label,
          ],
          { encoding: 'utf-8', maxBuffer: 8 * 1024 * 1024, timeout: GIT_TIMEOUT_MS }
        )
        rawIssuesJson = stdout
      } catch (err: unknown) {
        throw new Error(
-        `Failed to fetch issues: ${toErrorMessage(err)}. Ensure gh is installed and authenticated.`,
+        `Failed to fetch issues with label '${label}': ${toErrorMessage(err)}. Ensure gh is installed and authenticated.`,
          { cause: err }
        )
      }
  
-    let rawIssues: z.infer<typeof RawIssuesSchema>
      try {
-      rawIssues = RawIssuesSchema.parse(JSON.parse(rawIssuesJson))
+      return RawIssuesSchema.parse(JSON.parse(rawIssuesJson))
      } catch (err: unknown) {
        throw new Error(
-        `Failed to parse issues JSON: ${toErrorMessage(err)}. Unexpected format from gh CLI.`,
+        `Failed to parse issues JSON for label '${label}': ${toErrorMessage(err)}. Unexpected format from gh CLI.`,
          { cause: err }
        )
      }
-
-    return rawIssues.map(issue => ({
-      body: sanitizeForPrompt(issue.body),
-      labels: issue.labels.map(label => label.name),
-      number: issue.number,
-      title: sanitizeForPrompt(issue.title),
-    }))
    }
  
    private async fetchIssuesWithOpenPRs (): Promise<Set<number>> {
@@ -219,11 +277,13 @@ export class GithubIssueSource implements TaskSource {
        )
        const prs = z.array(z.object({ headRefName: z.string() })).parse(JSON.parse(stdout))
        const issueNumbers = new Set<number>()
-      const pattern = new RegExp(`^${this.escapedPrefix}-(\\d+)-`)
        for (const pr of prs) {
-        const match = pattern.exec(pr.headRefName)
-        if (match) {
-          issueNumbers.add(Number(match[1]))
+        for (const pattern of this.branchPatterns) {
+          const match = pattern.exec(pr.headRefName)
+          if (match) {
+            issueNumbers.add(Number(match[1]))
+            break
+          }
          }
        }
        return issueNumbers
@@ -233,10 +293,17 @@ export class GithubIssueSource implements TaskSource {
      }
    }
  
-  private validatePlan (
-    planContent: string,
-    issuesJson: { body: string; labels: string[]; number: number; title: string }[]
-  ): null | TaskSpec[] {
+  /**
+   * Strips agent-control tags from text to reduce prompt-injection risk.
+   * The deny-list is derived once from the registry at construction time.
+   * @param text - Raw text to sanitize.
+   * @returns Text with all control tags removed.
+   */
+  private sanitizeForPrompt (text: string): string {
+    return text.normalize('NFKC').replace(this.controlTagPattern, '')
+  }
+
+  private validatePlan (planContent: string, actionableIssues: ResolvedIssue[]): null | TaskSpec[] {
      try {
        const PlanSchema = z.object({ issues: z.array(z.unknown()) })
        const parseResult = PlanSchema.safeParse(JSON.parse(planContent))
@@ -245,63 +312,103 @@ export class GithubIssueSource implements TaskSource {
          return null
        }
        const parsed = parseResult.data
-      const validated = parsed.issues.filter((entry): entry is Record<string, unknown> => {
-        if (typeof entry !== 'object' || entry === null) return false
-        const item = entry as Record<string, unknown>
-        if (typeof item.id !== 'string' || !/^\d+$/.test(item.id)) return false
-        if (typeof item.branch !== 'string' || !this.branchPattern.test(item.branch)) return false
-        if (typeof item.title !== 'string') return false
-        if (item.title.length > MAX_TITLE_CHARS) return false
-        // eslint-disable-next-line no-control-regex
-        if (/[\x00-\x1f]/.test(item.title)) return false
-        return true
-      })
-
-      const issueMap = new Map(issuesJson.map(issue => [String(issue.number), issue]))
+      const issueMap = new Map(actionableIssues.map(issue => [String(issue.number), issue]))
+
+      const seenIds = new Set<string>()
+      const validated: TaskSpec[] = []
+      for (const entry of parsed.issues) {
+        const spec = this.validatePlanEntry(entry, issueMap)
+        if (spec === null) continue
+        if (seenIds.has(spec.id)) {
+          console.warn(
+            `Planner produced duplicate id '${spec.id}'; keeping first occurrence and dropping the rest.`
+          )
+          continue
+        }
+        seenIds.add(spec.id)
+        validated.push(spec)
+      }
+
+      if (parsed.issues.length > 0 && validated.length === 0) {
+        console.error(
+          `Planner produced ${String(parsed.issues.length)} entries but none passed validation. Retrying.`
+        )
+        return null
+      }
        return validated
-        .map(entry => {
-          const source = issueMap.get(entry.id as string)
-          if (!source) return null
-          const spec: TaskSpec = {
-            body: source.body,
-            branch: entry.branch as string,
-            id: entry.id as string,
-            labels: source.labels,
-            title: entry.title as string,
-          }
-          if (isValidIssueType(entry.issueType)) {
-            spec.issueType = entry.issueType
-          }
-          if (isValidConfidence(entry.confidence)) {
-            spec.confidence = entry.confidence
-          }
-          if (
-            typeof entry.rootCauseHypothesis === 'string' &&
-            entry.rootCauseHypothesis.length > 0
-          ) {
-            spec.rootCauseHypothesis = sanitizeForPrompt(entry.rootCauseHypothesis).slice(0, 500)
-          }
-          if (Array.isArray(entry.acceptanceCriteria)) {
-            const criteria = entry.acceptanceCriteria
-              .filter((c): c is string => typeof c === 'string' && c.length > 0)
-              .map(c => sanitizeForPrompt(c).slice(0, 200))
-            if (criteria.length > 0) {
-              spec.acceptanceCriteria = criteria.slice(0, 5)
-            }
-          }
-          return spec
-        })
-        .filter((entry): entry is NonNullable<typeof entry> => entry !== null)
      } catch (err: unknown) {
        console.error(`Planner produced invalid JSON: ${toErrorMessage(err)}. Retrying.`)
        return null
      }
    }
+
+  private validatePlanEntry (entry: unknown, issueMap: Map<string, ResolvedIssue>): null | TaskSpec {
+    if (typeof entry !== 'object' || entry === null) return null
+    const item = entry as Record<string, unknown>
+    if (typeof item.id !== 'string' || !/^\d+$/.test(item.id)) return null
+    if (typeof item.slug !== 'string') return null
+    if (item.slug.length > MAX_SLUG_CHARS || !SLUG_PATTERN.test(item.slug)) return null
+    if (typeof item.title !== 'string') return null
+    if (item.title.length > MAX_TITLE_CHARS) return null
+    // eslint-disable-next-line no-control-regex -- guard against control characters in titles
+    if (/[\x00-\x1f]/.test(item.title)) return null
+    const sanitizedTitle = this.sanitizeForPrompt(item.title).trim()
+    if (sanitizedTitle.length === 0) return null
+
+    const source = issueMap.get(item.id)
+    if (!source) return null
+
+    const spec: TaskSpec = {
+      body: source.body,
+      branch: `${source.branchPrefix}-${item.id}-${item.slug}`,
+      id: item.id,
+      labels: source.labels,
+      strategyKey: source.strategyKey,
+      title: sanitizedTitle,
+    }
+    if (isValidIssueType(item.issueType)) {
+      spec.issueType = item.issueType
+    }
+    if (isValidConfidence(item.confidence)) {
+      spec.confidence = item.confidence
+    }
+    if (typeof item.rootCauseHypothesis === 'string' && item.rootCauseHypothesis.length > 0) {
+      spec.rootCauseHypothesis = this.sanitizeForPrompt(item.rootCauseHypothesis).slice(0, 500)
+    }
+    if (Array.isArray(item.acceptanceCriteria)) {
+      const criteria = item.acceptanceCriteria
+        .filter((c): c is string => typeof c === 'string' && c.length > 0)
+        .map(c => this.sanitizeForPrompt(c).slice(0, 200))
+      if (criteria.length > 0) {
+        spec.acceptanceCriteria = criteria.slice(0, 5)
+      }
+    }
+    return spec
+  }
  }
  
+/**
+ * Strict kebab-case slug body shared by plan validation and PR-coverage
+ * branch parsing: lowercase letters/digits, hyphen-separated, no leading,
+ * trailing or double hyphen, no underscore. Anchored consumers wrap it in
+ * `^...$`. Mirrors the strategy-key shape so the assembled branch
+ * `<branchPrefix>-<id>-<slug>` is uniformly kebab-cased.
+ */
+const SLUG_PATTERN_BODY = '[a-z0-9]+(?:-[a-z0-9]+)*'
+
+const SLUG_PATTERN = new RegExp(`^${SLUG_PATTERN_BODY}$`)
+
  const VALID_CONFIDENCE = new Set(['high', 'low', 'medium'])
  const VALID_ISSUE_TYPES = new Set(['bug-fix', 'feature', 'refactor'])
  
+/**
+ * @param value - Value to escape for safe interpolation in a regex.
+ * @returns The value with regex metacharacters escaped.
+ */
+function escapeRegex (value: string): string {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
+}
+
  /**
   * @param value - Value to check.
   * @returns Whether value is a valid confidence level.
@@ -319,14 +426,38 @@ function isValidIssueType (value: unknown): value is 'bug-fix' | 'feature' | 're
  }
  
  /**
- * Strips agent-control tags from text to reduce prompt-injection risk.
- * @param text - Raw text to sanitize.
- * @returns Text with plan/findings/promise tags removed.
+ * Universal agent-control tags shared by every strategy: orchestrator-level
+ * vocabulary (planner output, completion signal) and common prompt-injection
+ * vectors. Strategy-specific tags come from `StrategyEntry.key` and
+ * `StrategyEntry.controlTags`.
+ */
+const UNIVERSAL_CONTROL_TAGS: readonly string[] = [
+  'code',
+  'findings',
+  'instructions',
+  'plan',
+  'promise',
+  'system',
+  'tool_call',
+] as const
+
+/**
+ * Builds the regex that strips agent-control tags. The deny-list is the union
+ * of {@link UNIVERSAL_CONTROL_TAGS} and, for every registered strategy, its
+ * `key` plus optional `controlTags`. Adding a strategy automatically extends
+ * the deny-list — no edit to the task source is required.
+ * @param strategies - Registered strategies whose vocabulary participates.
+ * @returns Compiled regex matching opening or closing tags of any control name.
   */
-function sanitizeForPrompt (text: string): string {
-  const normalized = text.normalize('NFKC')
-  return normalized.replace(
-    /<\/?(?:plan|findings|promise|system|code|instructions|implement|review|tool_call)[^>]*>/gi,
-    ''
-  )
+function buildControlTagPattern (strategies: readonly StrategyEntry[]): RegExp {
+  const tags = new Set<string>(UNIVERSAL_CONTROL_TAGS)
+  for (const entry of strategies) {
+    tags.add(entry.key)
+    for (const tag of entry.controlTags ?? []) tags.add(tag)
+  }
+  const alternation = [...tags].map(escapeRegex).join('|')
+  // Lookahead asserts the tag name ends at an XML-tag-name boundary
+  // (whitespace, `/` for self-closing, or `>`), preventing prefix collisions
+  // such as `<plant>` matching alternative `plan`.
+  return new RegExp(`</?(?:${alternation})(?=[\\s/>])[^>]*>`, 'gi')
  }
diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts

index 90e787bd97579a21c7c254be569256b271301504..7ddc6f7720aa1f30aa0e99598ff0833862be994b 100644 (file)
--- a/.sandcastle/types.ts
+++ b/.sandcastle/types.ts
@@ -121,6 +121,8 @@ export interface TaskSpec {
    labels?: string[]
    /** Planner's hypothesis about what is broken/missing — for actor to validate, not follow blindly. */
    rootCauseHypothesis?: string
+  /** Strategy key from the registry that drives the actor/critic loop for this task. */
+  strategyKey: string
    /** Task title. */
    title: string
  }
author	Jérôme Benoit <jerome.benoit@piment-noir.org>
	Sun, 17 May 2026 22:45:10 +0000 (00:45 +0200)
committer	GitHub <noreply@github.com>
	Sun, 17 May 2026 22:45:10 +0000 (00:45 +0200)
.github/workflows/sandcastle.yml		patch \| blob \| blame \| history
.sandcastle/constants.ts		patch \| blob \| blame \| history
.sandcastle/main.ts		patch \| blob \| blame \| history
.sandcastle/plan-prompt.md		patch \| blob \| blame \| history
.sandcastle/strategies/implement/actor-prompt.md		patch \| blob \| blame \| history
.sandcastle/strategies/implement/critic-prompt.md		patch \| blob \| blame \| history
.sandcastle/strategies/implement/strategy.ts		patch \| blob \| blame \| history
.sandcastle/strategies/index.ts	[new file with mode: 0644]	patch \| blob
.sandcastle/task-source.ts		patch \| blob \| blame \| history
.sandcastle/types.ts		patch \| blob \| blame \| history