From 5d430ad4b09351c66730e58283efb0a1e9d7cd83 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Mon, 18 May 2026 00:45:10 +0200 Subject: [PATCH] feat(sandcastle): make strategy dispatch registry-driven (#1862) * feat(sandcastle): make strategy dispatch registry-driven Replace the hardcoded `sandcastle` label / single-strategy implementation with a registry mapping each strategy key to its actor/critic loop and finalization config. Labels (`sandcastle-`) and branch prefixes (`agent/`) are derived from the key, so adding a strategy is one folder + one registry line. - New `.sandcastle/strategies/index.ts` (registry, indexed view with load-time uniqueness assertion, `labelOf`/`branchPrefixOf` helpers). - `TaskSpec.strategyKey` propagates the chosen strategy through the pipeline. - `GithubIssueSource` fetches issues per registered label, deduplicates with a multi-label warning, validates planner output (strategy echo and branch number bound to issue id), and builds its prompt sanitizer once from the registry (universal core + per-strategy `controlTags`). - `GITHUB_ISSUE_LABEL` and `GIT_BRANCH_PREFIX` constants removed; no back-compat shim. Cutover (one-time, also noted in `.github/workflows/sandcastle.yml`): 1. `gh label edit sandcastle --name sandcastle-implement` 2. Close or merge open PRs whose head branches start with `agent/issue-`. * fix(sandcastle): validate strategy key and controlTags at registry load Extend `indexByKey` to enforce the kebab-case contract on `StrategyEntry.key` (documented but unchecked) and an XML-name-safe contract on `controlTags`. Failures throw at module load with a precise message, matching the existing duplicate-key behaviour, so registry mistakes (typos, wrong casing, empty tag) cannot silently produce undiscoverable GitHub labels, invalid git branches or empty regex alternatives in the prompt sanitizer. * fix(sandcastle): tighten control-tag sanitizer against prefix matches The deny-list regex `]*>` matched tag-name prefixes because `[^>]*` swallowed the trailing characters: `` matched alternative `plan`, `` matched `review`, and so on. Insert a zero-width lookahead `(?=[\\s/>])` after the alternation so only XML tag-name terminators (whitespace, `/`, or `>`) are accepted. The deny-list remains entirely registry-driven. * refactor(sandcastle): parallelise issue fetch and clarify multi-label warning - Fan out per-strategy `gh issue list` calls with `Promise.all`. The dedup pass still iterates the resolved array in registry order, so the "first registered wins" semantics is preserved. Failure semantics are unchanged: any rejected fetch propagates as before. - Render labels via `labelOf` (no more hard-coded `sandcastle-*` glob), name both winner and dropped GitHub labels, and append a ready-to-paste `gh issue edit --remove-label` remediation hint. * fix(sandcastle): reject prefix-overlapping strategy keys at registry load Two registered keys related by a kebab-prefix (e.g. 'foo' and 'foo-1') yield overlapping branch-detection regexes: a branch 'agent/foo-1-42-slug' is matched by both '^agent/foo-(\d+)-' (capturing '1' as the issue id) and '^agent/foo-1-(\d+)-' (capturing '42'), and the registry-order tie-break in `fetchIssuesWithOpenPRs` then attributes the open PR to the wrong issue. Extend `indexByKey` with a pairwise prefix-incomparable check that throws at module load with a precise message, in line with the existing `STRATEGY_KEY_PATTERN`, `CONTROL_TAG_PATTERN`, and duplicate-key checks introduced in 89be4bd3. Today's single-entry registry passes unchanged. * refactor(sandcastle): unify prompt taxonomy and decouple planner from orchestrator Adopt the Inputs/Task/Output/Rules/Done section structure across the plan, actor and critic prompts; drop `Agent` from the role headers; deduplicate intra-prompt boilerplate (verbatim instructions, repeated quality-gate blocks, multiply-explained confidence levels). Total prompt size: 217 \u2192 142 lines. Decouple the planner output from registry-known data: the planner now emits a kebab-case `slug` only; the orchestrator builds `branch = \`\${source.branchPrefix}-\${id}-\${slug}\`` and copies `strategyKey` from the registry-resolved source. The validator drops the `strategyKey` echo check and the full-branch regex, validates the slug shape via `SLUG_PATTERN` and a `MAX_SLUG_CHARS` cap. The planner JSON view is also projected to `{ body, labels, number, title }` so `branchPrefix`/`strategyKey` cannot leak back into the prompt. Rename actor variable `TASK_ID` \u2192 `ISSUE_NUMBER` for precision; drop the `## Planner Analysis` section header from `buildPlanContext` so `{{PLAN_CONTEXT}}` interpolates cleanly inside the actor's Inputs section. * fix(sandcastle): de-duplicate multi-line prompt placeholders The sandcastle library's PromptArgumentSubstitution (`@ai-hero/sandcastle/dist/PromptArgumentSubstitution.js`) applies the regex \`/\\{\\{\\s*([A-Za-z_][A-Za-z0-9_]*)\\s*\\}\\}/g\` globally with no Markdown awareness, so a placeholder appearing both inside an `## Inputs` bullet (`\`{{KEY}}\` \u2014 description.`) and on a bare line below was substituted twice. For multi-line values (`ISSUE_BODY`, `PLAN_CONTEXT`, `FINDINGS`, `ISSUES_JSON`, `ACCEPTANCE_CRITERIA`) this corrupted the inline-code span and doubled the prompt token cost on every actor and critic round. Reference variables by bare name (`\`KEY\``) in the `## Inputs` bullets and in narratives that mention multi-line values; keep the bare-line `{{KEY}}` injections downstream as the single substitution sites. Scalar variables (`ISSUE_NUMBER`, `ISSUE_TITLE`, `BRANCH`, `BASE_BRANCH`, `NONCE`) remain interpolated where their single-line value belongs in the rendered prose or commands. * refactor(sandcastle): centralize MAX_SLUG_CHARS in constants module `MAX_SLUG_CHARS` is a tunable peer of `MAX_TITLE_CHARS`, `CONTEXT_HASH_RADIUS`, and `HASH_PREFIX_LENGTH`, all already in `constants.ts`. Move it there to honour the AGENTS.md "single source of truth: canonical defaults" rule. `SLUG_PATTERN` stays local in `task-source.ts`: it is a structural validator regex, not a tunable. * fix(sandcastle): align PR-coverage branch regex with slug grammar The PR-coverage regex used to dedup open PRs in `fetchIssuesWithOpenPRs` omitted the trailing slug, while `validatePlanEntry` enforces a strict kebab-case slug shape via `SLUG_PATTERN`. A stale or malformed branch like `agent/implement-42-` would match the dedup regex and suppress retries forever. Lift the slug body into a shared `SLUG_PATTERN_BODY` constant reused by both `SLUG_PATTERN` (the plan-entry validator) and the per-strategy `branchPatterns` so the two definitions stay in lock-step. * fix(sandcastle): sanitize planner-supplied issue title at trust boundary `validatePlanEntry` validated typeof, length and control characters on `item.title` but stored the raw value verbatim in `TaskSpec`, exposing both the actor prompt (`{{ISSUE_TITLE}}`) and the GitHub PR title (via `finalizer.ts`) to control tags such as `` or `` that a prompt-injected planner could echo from an issue body. Route `item.title` through the same registry-derived `sanitizeForPrompt` already used for `body`, `rootCauseHypothesis` and `acceptanceCriteria`, trim whitespace, and reject the entry if the sanitised title is empty. Length and control-character checks remain on the raw input as a fast fail-fast path before sanitisation. * fix(sandcastle): retry on all-invalid plan and dedupe planner ids `validatePlan` returned `[]` both for legitimate empty plans and for plans where every entry failed validation. The caller treats `[]` identically ("No actionable issues. Exiting."), so a systemic planner mistake silently terminated the nightly run as if there were genuinely no work, wasting the entire retry budget without diagnostic. It also accepted duplicate ids: a planner emitting two entries for the same issue number produced two `TaskSpec`s sharing one branch, so `main.ts` would spawn two concurrent sandboxes and create duplicate PRs. Restructure `validatePlan` to: - Track `seenIds: Set`; on duplicate, warn and drop the second occurrence (first registered wins, mirrors the multi-label dedup pattern in `fetchAndSanitizeIssues`). - Detect `parsed.issues.length > 0 && validated.length === 0`: log an error and return `null` so the existing retry loop at lines 119-177 re-invokes the planner instead of exiting silently. --- .github/workflows/sandcastle.yml | 10 + .sandcastle/constants.ts | 6 +- .sandcastle/main.ts | 19 +- .sandcastle/plan-prompt.md | 67 ++-- .../strategies/implement/actor-prompt.md | 77 ++-- .../strategies/implement/critic-prompt.md | 84 ++--- .sandcastle/strategies/implement/strategy.ts | 5 +- .sandcastle/strategies/index.ts | 115 ++++++ .sandcastle/task-source.ts | 339 ++++++++++++------ .sandcastle/types.ts | 2 + 10 files changed, 458 insertions(+), 266 deletions(-) create mode 100644 .sandcastle/strategies/index.ts diff --git a/.github/workflows/sandcastle.yml b/.github/workflows/sandcastle.yml index 2d173f66..1c6d7c20 100644 --- a/.github/workflows/sandcastle.yml +++ b/.github/workflows/sandcastle.yml @@ -1,3 +1,13 @@ +# Cutover note (one-time, can be removed after the first post-rename run): +# The previous setup used label `sandcastle` and branch prefix `agent/issue`. +# Before the first run on the new naming, the operator MUST: +# 1. Rename the GitHub label: +# gh label edit sandcastle --name sandcastle-implement +# 2. Close or merge any open PR whose head branch starts with `agent/issue-`, +# since the new pattern `agent/--` does not match legacy branches +# and they are no longer recognised as covering an issue (which would +# otherwise produce duplicate PRs on the next nightly). + name: Sandcastle on: diff --git a/.sandcastle/constants.ts b/.sandcastle/constants.ts index 5212910b..bfc3334e 100644 --- a/.sandcastle/constants.ts +++ b/.sandcastle/constants.ts @@ -35,8 +35,6 @@ export const MAX_PARALLEL = 5 export const GIT_BASE_BRANCH = 'main' -export const GIT_BRANCH_PREFIX = 'agent/issue' - export const GIT_PUSH_TIMEOUT_MS = 60_000 export const GIT_TIMEOUT_MS = 30_000 @@ -97,12 +95,12 @@ function resolvePnpmStorePath (): string | undefined { // ── GitHub ─────────────────────────────────────────────────────────────────── -export const GITHUB_ISSUE_LABEL = 'sandcastle' - export const GITHUB_MAX_ISSUES_FETCH = 50 export const GITHUB_MAX_PRS_FETCH = 200 +export const MAX_SLUG_CHARS = 40 + export const MAX_TITLE_CHARS = 200 // ── Validation ─────────────────────────────────────────────────────────────── diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts index bfed3f7b..8bad05bb 100644 --- a/.sandcastle/main.ts +++ b/.sandcastle/main.ts @@ -10,19 +10,16 @@ import { AGENT_TASK_TIMEOUT_MS, DOCKER_IMAGE, DOCKER_MOUNTS, - GIT_BRANCH_PREFIX, - GITHUB_ISSUE_LABEL, MAX_PARALLEL, SANDBOX_BUILD_HOOKS, } from './constants.js' import { runRefinementLoop } from './refinement-loop.js' -import { implementStrategy } from './strategies/implement/strategy.js' +import { STRATEGY_BY_KEY, STRATEGY_REGISTRY } from './strategies/index.js' import { GithubIssueSource } from './task-source.js' const source = new GithubIssueSource({ - branchPrefix: GIT_BRANCH_PREFIX, dockerImage: DOCKER_IMAGE, - label: GITHUB_ISSUE_LABEL, + strategies: STRATEGY_REGISTRY, }) let tasks: TaskSpec[] @@ -42,6 +39,12 @@ if (tasks.length === 0) { const settled = await Promise.allSettled( tasks.map(spec => pool.run(async () => { + const entry = STRATEGY_BY_KEY.get(spec.strategyKey) + if (!entry) { + throw new Error( + `Task #${spec.id}: unknown strategy '${spec.strategyKey}' (not in registry).` + ) + } const ac = new AbortController() const timer = setTimeout(() => { ac.abort(new Error(`Task #${spec.id} timed out after ${String(AGENT_TASK_TIMEOUT_MS)}ms`)) @@ -55,7 +58,7 @@ if (tasks.length === 0) { sandbox: docker({ imageName: DOCKER_IMAGE, mounts: [...DOCKER_MOUNTS] }), }) - const loopResult = await runRefinementLoop(spec, sandbox, implementStrategy, { + const loopResult = await runRefinementLoop(spec, sandbox, entry.strategy, { iterationBudget: AGENT_ITERATION_BUDGET, maxRounds: AGENT_MAX_CRITIC_ROUNDS, postLoopValidationRetry: true, @@ -64,8 +67,8 @@ if (tasks.length === 0) { let workSuccess = false if (loopResult.totalCommits > 0) { - const finalizeResult = await implementStrategy.finalize(spec, loopResult, sandbox) - workSuccess = implementStrategy.isWorkComplete(finalizeResult) + const finalizeResult = await entry.strategy.finalize(spec, loopResult, sandbox) + workSuccess = entry.strategy.isWorkComplete(finalizeResult) } return { spec, success: workSuccess } diff --git a/.sandcastle/plan-prompt.md b/.sandcastle/plan-prompt.md index 547951de..9325cfd0 100644 --- a/.sandcastle/plan-prompt.md +++ b/.sandcastle/plan-prompt.md @@ -1,59 +1,40 @@ -# Plan Agent +# Plan -Read open GitHub issues and produce a parallelizable execution plan with implementation context. +Read open GitHub issues and produce a parallelizable execution plan. -## Context +## Inputs -This is a Node.js TypeScript monorepo (pnpm workspace) simulating OCPP charging stations. -Structure: root simulator (`src/`), `ui/common`, `ui/cli`, `ui/web`, `tests/ocpp-server` (Python). -Test runner: Node.js native (`node:test`). Build tool: esbuild. Linter: ESLint (neostandard). -Read `AGENTS.md` and `.serena/memories/project_overview`. - -## Open Issues +- `ISSUES_JSON` — JSON array of open issues, each entry `{ number, title, body, labels }`. {{ISSUES_JSON}} -## Steps - -1. Analyze the issues above. For each, determine: - - Can it be implemented independently (no blocking dependency on another open issue)? - - Is the scope clear enough to implement without further clarification? +## Task -2. Select all issues that are independent and actionable. +1. Read `AGENTS.md` and `.serena/memories/project_overview` for repository context. +2. For each issue, decide whether it is independently actionable: scope is clear and there is no blocking dependency on another open issue. +3. Drop issues labelled `wontfix`, `duplicate`, or `question`, or blocked by another open issue. If every issue is blocked, keep only the highest-priority candidate. +4. For each kept issue, derive: + - `id`: the issue number as a string. + - `slug`: a short kebab-case summary of the change matching `^[a-z0-9]+(?:-[a-z0-9]+)*$` (≤ 40 chars). + - `title`: the issue title. + - `issueType`: `bug-fix`, `feature`, or `refactor`. + - `confidence`: `high` (clear scope), `medium` (some ambiguity), or `low` (unclear scope). + - `rootCauseHypothesis`: a specific hypothesis (modules, patterns, behaviours) for the implementer to validate — not a restatement of the title. + - `acceptanceCriteria`: 2–4 conditions verifiable by static inspection of the diff (code structure, logic, algorithms — not runtime behaviour). +5. Do not implement anything; output only the plan. -3. For each selected issue: - - Assign a branch name: `{{BRANCH_PREFIX}}--` where slug is a short kebab-case summary (e.g., `{{BRANCH_PREFIX}}-42-fix-streaming-id`). - - Classify the issue type: `bug-fix`, `feature`, or `refactor`. - - Assess your confidence: `high` (clear scope, obvious approach), `medium` (some ambiguity), or `low` (unclear scope, multiple valid approaches). - - Formulate a root cause hypothesis: what is broken or missing, and why. This is a hypothesis for the implementer to validate — not a directive. - - Define 2-4 acceptance criteria: concrete, verifiable conditions that must be true when the implementation is complete. Focus on code structure, algorithmic and logic, not runtime behavior. +## Output -4. Output the plan in this exact format: +```text +{"issues":[{"id":"","slug":"","title":"","issueType":"bug-fix|feature|refactor","confidence":"high|medium|low","rootCauseHypothesis":"...","acceptanceCriteria":["..."]}]}</plan> +``` - ```text - <plan>{"issues":[{"id":"<number>","title":"<title>","branch":"{{BRANCH_PREFIX}}-<number>-<slug>","issueType":"bug-fix|feature|refactor","confidence":"high|medium|low","rootCauseHypothesis":"...","acceptanceCriteria":["..."]}]}</plan> - ``` +When no issue is actionable: `<plan>{"issues":[]}</plan>`. ## Rules -- Exclude issues labeled `wontfix`, `duplicate`, or `question`. -- Exclude issues that depend on another open issue (mention "blocked by #N" or similar). -- Prefer issues where scope fits a single-file change over cross-cutting refactors. -- If every issue is blocked, include the single highest-priority candidate (fewest/weakest dependencies). -- If no actionable issues exist, output: - - ```text - <plan>{ "issues": [] }</plan> - ``` +- Prefer single-file scope over cross-cutting refactors. -- Do not implement anything. Only produce the plan. -- Acceptance criteria must be verifiable by static code inspection of the diff. -- Root cause hypothesis should be specific (mention modules, patterns, or behaviors) — not a restatement of the issue title. +## Done -## Completion - -After outputting the plan, output: - -```text <promise>COMPLETE</promise> -``` diff --git a/.sandcastle/strategies/implement/actor-prompt.md b/.sandcastle/strategies/implement/actor-prompt.md index d0c1edfc..903e0a74 100644 --- a/.sandcastle/strategies/implement/actor-prompt.md +++ b/.sandcastle/strategies/implement/actor-prompt.md @@ -1,79 +1,52 @@ -# Actor Agent: Implementer +# Actor -Implement issue **#{{TASK_ID}}** ("{{ISSUE_TITLE}}") on branch `{{BRANCH}}`. +Implement issue **#{{ISSUE_NUMBER}}** ("{{ISSUE_TITLE}}") on branch `{{BRANCH}}`, or address review findings if present. -## Issue Details +## Inputs + +- `ISSUE_NUMBER` — GitHub issue number. +- `ISSUE_TITLE` — issue title. +- `BRANCH` — working branch, already checked out. +- `ISSUE_BODY` — sanitised issue body. +- `PLAN_CONTEXT` — optional planner analysis (hypothesis, acceptance criteria); empty when absent. +- `FINDINGS` — optional JSON array of critic findings from the previous round; empty on the first round. {{ISSUE_BODY}} {{PLAN_CONTEXT}} -## Review Findings - {{FINDINGS}} -## Exploration - -Explore the repo to understand the architecture before coding. Pay attention to: - -- Files related to the issue -- Test files touching relevant modules -- Existing patterns in similar code - -Read `AGENTS.md`, `CONTRIBUTING.md`, `.serena/memories/code_style_conventions` and `.serena/memories/task_completion_checklist`. +## Task -## Implementation - -1. If review findings are provided above, cross-validate each one against the code. Fix findings you agree with. Ignore findings that are incorrect or not applicable. - -2. If no findings are provided, implement the issue from scratch following existing patterns: - - Strict TypeScript, no `any`/`@ts-ignore` - - Use existing error classes (BaseError, OCPPError) - - Follow existing naming conventions (camelCase functions, PascalCase classes/types) - - Tests use Node.js native test runner (`node:test` + `node:assert`) - -3. Before every commit, run the quality gates for the affected sub-project(s): +1. Read `AGENTS.md`, `CONTRIBUTING.md`, `.serena/memories/code_style_conventions`, and `.serena/memories/task_completion_checklist`. Explore files surrounding the issue and similar patterns in the repo. +2. If `FINDINGS` is non-empty, cross-validate each finding against the code; fix the ones you agree with, ignore the rest. +3. Otherwise implement the issue end-to-end, including matching tests using `node:test` + `node:assert`. +4. Before every commit, run the full quality-gate chain in every affected sub-project. Root-level chain: ```bash pnpm format && pnpm typecheck && pnpm lint && pnpm build && pnpm test ``` - If changes affect `ui/web`: + For changes inside `ui/web` or `ui/cli`, run the same chain in that directory (substitute `pnpm test:coverage` for `pnpm test` in `ui/web`). - ```bash - cd ui/web && pnpm format && pnpm typecheck && pnpm lint && pnpm build && pnpm test:coverage - ``` - - If changes affect `ui/cli`: +5. Commit one logical change at a time using Conventional Commits (`fix:`, `feat:`, `refactor:`, `chore:`). +6. Push the branch: ```bash - cd ui/cli && pnpm format && pnpm typecheck && pnpm lint && pnpm build && pnpm test + git push -u origin {{BRANCH}} ``` -4. Commit with conventional commits: - - `fix: <description>` — bug fix - - `feat: <description>` — new feature - - `refactor: <description>` — restructuring - - `chore: <description>` — tooling/config - -5. Push the branch: +## Output - ```bash - git push -u origin {{BRANCH}} - ``` +Commits on `{{BRANCH}}` pushed to `origin`. No structured stdout payload. ## Rules -- One logical change per commit. -- Tests must pass before pushing. Zero type errors, zero test failures. -- Do not modify unrelated files. -- Do not bump version numbers. -- Push BEFORE signaling completion. - -## Completion +- Strict TypeScript: no `any`, no `@ts-ignore`, no non-null `!`; use the existing typed errors (`BaseError`, `OCPPError`). +- Do not modify unrelated files; do not bump version numbers. +- Push before signaling completion; HEAD must have zero type errors and zero test failures. -When validation passes and the branch is pushed, output: +## Done -```text <promise>COMPLETE</promise> -``` diff --git a/.sandcastle/strategies/implement/critic-prompt.md b/.sandcastle/strategies/implement/critic-prompt.md index d7caf81d..c337318a 100644 --- a/.sandcastle/strategies/implement/critic-prompt.md +++ b/.sandcastle/strategies/implement/critic-prompt.md @@ -1,70 +1,50 @@ -# Critic Agent: Reviewer +# Critic -Analyze the implementation on branch `{{BRANCH}}` and produce structured findings. +Review the diff on `{{BRANCH}}` against `{{BASE_BRANCH}}` and emit structured findings. -## Task - -Run `git diff {{BASE_BRANCH}}...{{BRANCH}}` to see all changes. Examine the diff carefully. For each issue found, produce a structured finding. +## Inputs -Read `AGENTS.md`, `CONTRIBUTING.md` and `.serena/memories/code_style_conventions`. - -## Acceptance Criteria +- `BRANCH` — branch under review. +- `BASE_BRANCH` — branch to diff against. +- `NONCE` — unique tag id used to delimit the findings payload. +- `ACCEPTANCE_CRITERIA` — numbered acceptance criteria from the planner; empty when absent. {{ACCEPTANCE_CRITERIA}} -If acceptance criteria are listed above, assess from the diff whether the implementation satisfies each one. Report a HIGH finding for any unmet criterion. Only judge observable outcomes, not implementation approach. If no criteria are listed, skip this section. +## Task -## Output Format +1. Read `AGENTS.md`, `CONTRIBUTING.md`, and `.serena/memories/code_style_conventions`. +2. Run `git diff {{BASE_BRANCH}}...{{BRANCH}}` and inspect every changed line. +3. For each acceptance criterion (if any), decide from the diff whether it is satisfied; report a `HIGH` finding for any unmet criterion, judged on observable diff content, not implementation approach. +4. Surface other defects in the changed code: logic errors, missing edge cases, security issues, type-safety violations, test gaps. -Output your findings as JSON wrapped in nonce-tagged delimiters. Use EXACTLY this tag format: +## Output ```text -<findings-{{NONCE}}>[...]</findings-{{NONCE}}> +<findings-{{NONCE}}>[ + { + "file": "path/to/file.ts", + "line": 42, + "title": "short description of the issue", + "severity": "CRITICAL|HIGH|MEDIUM|LOW", + "category": "security|logic|performance|architecture|style", + "confidence": "HIGH|MEDIUM|LOW", + "description": "why this is a problem", + "suggestion": "how to fix it" + } +]</findings-{{NONCE}}> ``` -Each finding must have this structure: - -```json -{ - "file": "path/to/file.ts", - "line": 42, - "title": "short description of the issue", - "severity": "CRITICAL|HIGH|MEDIUM|LOW", - "category": "security|logic|performance|architecture|style", - "confidence": "HIGH|MEDIUM|LOW", - "description": "detailed explanation of why this is a problem", - "suggestion": "how to fix it" -} -``` - -If no issues are found, output: - -```text -<findings-{{NONCE}}>[]</findings-{{NONCE}}> -``` +When nothing is wrong: `<findings-{{NONCE}}>[]</findings-{{NONCE}}>`. ## Rules -- Report ≤5 findings. HIGH and CRITICAL only. Omit LOW/MEDIUM unless zero higher-severity issues exist. -- If >5 HIGH/CRITICAL issues exist, report the top 5 and add a summary note in the last finding's description. -- Do NOT modify any files. Do NOT commit. Do NOT push. -- Only report issues in the CHANGED code (not pre-existing issues). -- Use HIGH confidence only when you've verified the issue by reading the relevant code. -- Use MEDIUM confidence for pattern-based detection. -- Use LOW confidence for style preferences or uncertain issues. -- Focus on: logic errors, missing edge cases, security issues, type safety violations, test gaps. -- Do NOT report formatting issues (prettier handles those). - -## Known Design Decisions (do not flag) - -- Mid-loop validation convergence bypasses critic (ARCS pattern — deterministic tests > subjective review). -- Agent runs use `idleTimeoutSeconds` and `completionSignal` for cooperative cancellation. -- Content-addressed dedup hash includes line number (collision reduction tradeoff, bounded by hard cap). +- Report at most 5 findings, `HIGH` or `CRITICAL` only; include `LOW`/`MEDIUM` only when no higher-severity issue exists. If more than 5 `HIGH`/`CRITICAL` issues exist, report the top 5 and add a summary line in the last finding's `description`. +- Confidence: `HIGH` after reading the relevant code; `MEDIUM` for pattern-based detection; `LOW` for style preference or uncertainty. +- Only flag changed code; ignore pre-existing issues. Do not flag formatting (Prettier handles it). +- Do not modify, commit, or push files. +- Do not flag the following intentional design decisions: mid-loop validation convergence bypassing the critic (ARCS pattern); cooperative cancellation via `idleTimeoutSeconds` + `completionSignal`; line-number-aware dedup hash. -## Completion +## Done -After outputting the findings, output: - -```text <promise>COMPLETE</promise> -``` diff --git a/.sandcastle/strategies/implement/strategy.ts b/.sandcastle/strategies/implement/strategy.ts index 99b68054..f1370b23 100644 --- a/.sandcastle/strategies/implement/strategy.ts +++ b/.sandcastle/strategies/implement/strategy.ts @@ -21,8 +21,7 @@ function buildPlanContext (spec: TaskSpec): string { `Acceptance criteria:\n${spec.acceptanceCriteria.map((c, i) => `${String(i + 1)}. ${c}`).join('\n')}` ) } - if (parts.length === 0) return '' - return `## Planner Analysis\n\n${parts.join('\n\n')}` + return parts.join('\n\n') } export const implementStrategy: FinalizationConfig & LoopStrategy = { @@ -32,9 +31,9 @@ export const implementStrategy: FinalizationConfig & LoopStrategy = { BRANCH: spec.branch, FINDINGS: findings.length > 0 ? JSON.stringify(findings, null, 2) : '', ISSUE_BODY: spec.body, + ISSUE_NUMBER: spec.id, ISSUE_TITLE: spec.title, PLAN_CONTEXT: buildPlanContext(spec), - TASK_ID: spec.id, }), buildCriticArgs: (spec, baseBranch) => ({ diff --git a/.sandcastle/strategies/index.ts b/.sandcastle/strategies/index.ts new file mode 100644 index 00000000..1067f3a8 --- /dev/null +++ b/.sandcastle/strategies/index.ts @@ -0,0 +1,115 @@ +import type { FinalizationConfig, LoopStrategy } from '../types.js' + +import { implementStrategy } from './implement/strategy.js' + +/** + * A registered strategy: the canonical declaration that maps a key to its + * actor/critic loop and finalization configuration. Labels and branch prefixes + * are derived from the key to keep a single source of truth. + */ +export interface StrategyEntry { + /** + * Additional XML-like tag names (besides `key`) that this strategy uses + * inside prompts. They are stripped from issue text to harden against + * prompt injection. The strategy `key` is always added implicitly. + */ + readonly controlTags?: readonly string[] + /** Strategy key (kebab-case). Used in TaskSpec.strategyKey and to derive label/branchPrefix. */ + readonly key: string + /** The actor/critic loop and finalization configuration. */ + readonly strategy: FinalizationConfig & LoopStrategy +} + +/** + * Canonical registry of strategies. Order matters: when an issue carries + * several `sandcastle-*` labels, the first matching entry wins. + * + * Adding a new strategy is one line + one `strategies/<key>/` sub-directory. + */ +export const STRATEGY_REGISTRY: readonly StrategyEntry[] = [ + { controlTags: ['review'], key: 'implement', strategy: implementStrategy }, +] as const + +/** Indexed view: strategy key → entry. Throws on duplicate keys at load time. */ +export const STRATEGY_BY_KEY: ReadonlyMap<string, StrategyEntry> = indexByKey(STRATEGY_REGISTRY) + +/** + * Derives the git branch prefix used for tasks of a strategy. + * @param key - Strategy key. + * @returns Branch prefix of the form `agent/<key>`. + */ +export function branchPrefixOf (key: string): string { + return `agent/${key}` +} + +/** + * Derives the GitHub issue label that triggers a strategy. + * @param key - Strategy key. + * @returns Label of the form `sandcastle-<key>`. + */ +export function labelOf (key: string): string { + return `sandcastle-${key}` +} + +/** + * Strict kebab-case: lowercase letters/digits, hyphen-separated, must start + * with a letter. Constrains `key` because it flows verbatim into the GitHub + * label `sandcastle-<key>` and the git branch prefix `agent/<key>`. + */ +const STRATEGY_KEY_PATTERN = /^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$/ + +/** + * XML-name-safe subset for `controlTags`: must start with a letter, followed + * by letters, digits, `_` or `-`. Looser than {@link STRATEGY_KEY_PATTERN} + * to accept agent vocabulary such as `tool_call` while still rejecting + * empty strings and angle-bracket characters that would corrupt the + * sanitizer regex. + */ +const CONTROL_TAG_PATTERN = /^[a-zA-Z][a-zA-Z0-9_-]*$/ + +/** + * Builds the strategy-key index, validating each entry and throwing on + * malformed, duplicate, or prefix-overlapping keys so registry mistakes + * (typos, wrong casing, empty tag, key colliding with another key's branch + * prefix) fail loudly at module load instead of silently producing + * undiscoverable labels, invalid git branches, empty regex alternatives in + * the prompt sanitizer, or ambiguous open-PR dedup matches. + * @param entries - Canonical registry entries. + * @returns Map from key to entry. + * @throws {Error} when an entry has an invalid key, an invalid controlTag, + * a duplicate key, or a key whose branch prefix overlaps a previously + * registered one. + */ +function indexByKey (entries: readonly StrategyEntry[]): ReadonlyMap<string, StrategyEntry> { + const map = new Map<string, StrategyEntry>() + for (const entry of entries) { + if (!STRATEGY_KEY_PATTERN.test(entry.key)) { + throw new Error( + `Invalid strategy key '${entry.key}' in STRATEGY_REGISTRY: ` + + `must match ${STRATEGY_KEY_PATTERN.source} (kebab-case).` + ) + } + for (const tag of entry.controlTags ?? []) { + if (!CONTROL_TAG_PATTERN.test(tag)) { + throw new Error( + `Invalid controlTag '${tag}' for strategy '${entry.key}' in STRATEGY_REGISTRY: ` + + `must match ${CONTROL_TAG_PATTERN.source}.` + ) + } + } + if (map.has(entry.key)) { + throw new Error(`Duplicate strategy key in STRATEGY_REGISTRY: '${entry.key}'.`) + } + for (const existing of map.keys()) { + if (entry.key.startsWith(`${existing}-`) || existing.startsWith(`${entry.key}-`)) { + throw new Error( + `Strategy key '${entry.key}' overlaps with '${existing}' in STRATEGY_REGISTRY: ` + + `branch '${branchPrefixOf(existing)}-<n>-…' would also match the regex derived ` + + `from '${branchPrefixOf(entry.key)}-' (or vice versa), making open-PR dedup ambiguous.` + ) + } + } + map.set(entry.key, entry) + } + return map +} diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts index 36c6bb1b..96ee1774 100644 --- a/.sandcastle/task-source.ts +++ b/.sandcastle/task-source.ts @@ -16,9 +16,11 @@ import { GIT_TIMEOUT_MS, GITHUB_MAX_ISSUES_FETCH, GITHUB_MAX_PRS_FETCH, + MAX_SLUG_CHARS, MAX_TITLE_CHARS, SANDBOX_AUTH_HOOKS, } from './constants.js' +import { branchPrefixOf, labelOf, type StrategyEntry } from './strategies/index.js' import { agentProvider, execFileAsync, toErrorMessage } from './utils.js' const RawIssueSchema = z.object({ @@ -34,14 +36,12 @@ const RawIssuesSchema = z.array(RawIssueSchema) /** Configuration for the GitHub issue task source. */ export interface GithubIssueSourceConfig { - /** Git branch prefix for issue branches. */ - branchPrefix: string /** Docker image name for the sandbox. */ dockerImage: string - /** GitHub issue label to filter by. */ - label: string /** Maximum planner retries. */ maxRetries?: number + /** Strategies to run, in priority order (first matching label wins). */ + strategies: readonly StrategyEntry[] } /** Interface for task discovery sources. */ @@ -50,47 +50,69 @@ export interface TaskSource { discover(): Promise<TaskSpec[]> } +/** A sanitized issue resolved to the strategy that will handle it. */ +interface ResolvedIssue { + body: string + branchPrefix: string + labels: string[] + number: number + strategyKey: string + title: string +} + /** * Task source that discovers work from GitHub issues via planner agent. + * Each strategy in the registry is associated with the GitHub label + * `sandcastle-<key>` and produces tasks on branches `agent/<key>-<n>-<slug>`. */ export class GithubIssueSource implements TaskSource { - private readonly branchPattern: RegExp - private readonly branchPrefix: string + private readonly branchPatterns: readonly RegExp[] + private readonly controlTagPattern: RegExp private readonly dockerImage: string - private readonly escapedPrefix: string - private readonly label: string private readonly maxRetries: number + private readonly strategies: readonly StrategyEntry[] /** * @param config - Configuration for the GitHub issue source. */ constructor (config: GithubIssueSourceConfig) { - this.branchPrefix = config.branchPrefix + if (config.strategies.length === 0) { + throw new Error('GithubIssueSource requires at least one strategy.') + } this.dockerImage = config.dockerImage - this.label = config.label this.maxRetries = config.maxRetries ?? 5 + this.strategies = config.strategies - this.escapedPrefix = this.branchPrefix.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') - this.branchPattern = new RegExp(`^${this.escapedPrefix}-\\d+-[\\w-]+$`) + this.branchPatterns = this.strategies.map( + entry => new RegExp(`^${escapeRegex(branchPrefixOf(entry.key))}-(\\d+)-${SLUG_PATTERN_BODY}$`) + ) + this.controlTagPattern = buildControlTagPattern(this.strategies) } /** - * Discovers tasks by fetching GitHub issues, running the planner, and validating the plan. + * Discovers tasks by fetching GitHub issues per strategy, running the planner, + * and validating the plan. * @returns Array of task specifications to implement. */ async discover (): Promise<TaskSpec[]> { - const issuesJson = await this.fetchAndSanitizeIssues() + const issues = await this.fetchAndSanitizeIssues() - if (issuesJson.length === 0) { - console.log("No issues with label '%s'. Exiting.", this.label) + if (issues.length === 0) { + console.log( + 'No issues with labels [%s]. Exiting.', + this.strategies.map(s => labelOf(s.key)).join(', ') + ) return [] } const coveredIssues = await this.fetchIssuesWithOpenPRs() - const actionableIssues = issuesJson.filter(issue => !coveredIssues.has(issue.number)) + const actionableIssues = issues.filter(issue => !coveredIssues.has(issue.number)) if (actionableIssues.length === 0) { - console.log('All sandcastle issues already have open PRs. Exiting.') + console.log( + 'All issues with labels [%s] already have open PRs. Exiting.', + this.strategies.map(s => labelOf(s.key)).join(', ') + ) return [] } @@ -107,8 +129,16 @@ export class GithubIssueSource implements TaskSource { maxIterations: 5, name: 'Planner', promptArgs: { - BRANCH_PREFIX: this.branchPrefix, - ISSUES_JSON: JSON.stringify(actionableIssues, null, 2), + ISSUES_JSON: JSON.stringify( + actionableIssues.map(({ body, labels, number, title }) => ({ + body, + labels, + number, + title, + })), + null, + 2 + ), }, promptFile: './.sandcastle/plan-prompt.md', sandbox: docker({ imageName: this.dockerImage, mounts: [...DOCKER_MOUNTS] }), @@ -140,7 +170,7 @@ export class GithubIssueSource implements TaskSource { console.log(`Plan: ${String(tasks.length)} issue(s) to work on:`) for (const task of tasks) { - console.log(` #${task.id}: ${task.title} → ${task.branch}`) + console.log(` #${task.id} [${task.strategyKey}]: ${task.title} → ${task.branch}`) } return tasks @@ -149,14 +179,50 @@ export class GithubIssueSource implements TaskSource { throw new Error('Planner failed to produce a valid plan after all retries.') } - private async fetchAndSanitizeIssues (): Promise< - { - body: string - labels: string[] - number: number - title: string - }[] - > { + /** + * Fetches issues for each registered strategy in parallel, then deduplicates + * by issue number in registry order (first strategy registered wins). Each + * issue is annotated with the strategy that will handle it and the + * corresponding branch prefix. + * @returns Sanitized issues with their resolved strategy. + */ + private async fetchAndSanitizeIssues (): Promise<ResolvedIssue[]> { + const fetched = await Promise.all( + this.strategies.map(async entry => ({ + entry, + rawIssues: await this.fetchIssuesByLabel(labelOf(entry.key)), + })) + ) + const seen = new Map<number, ResolvedIssue>() + for (const { entry, rawIssues } of fetched) { + for (const issue of rawIssues) { + const previous = seen.get(issue.number) + if (previous !== undefined) { + const winnerLabel = labelOf(previous.strategyKey) + const droppedLabel = labelOf(entry.key) + console.warn( + `Issue #${String(issue.number)} carries multiple strategy labels ` + + `('${winnerLabel}' and '${droppedLabel}'); processing as '${winnerLabel}' ` + + `(registered first), skipping '${droppedLabel}'. ` + + 'To remove the unwanted label: ' + + `gh issue edit ${String(issue.number)} --remove-label ${droppedLabel}` + ) + continue + } + seen.set(issue.number, { + body: this.sanitizeForPrompt(issue.body), + branchPrefix: branchPrefixOf(entry.key), + labels: issue.labels.map(l => l.name), + number: issue.number, + strategyKey: entry.key, + title: this.sanitizeForPrompt(issue.title), + }) + } + } + return [...seen.values()] + } + + private async fetchIssuesByLabel (label: string): Promise<z.infer<typeof RawIssuesSchema>> { let rawIssuesJson: string try { const { stdout } = await execFileAsync( @@ -171,34 +237,26 @@ export class GithubIssueSource implements TaskSource { '--limit', String(GITHUB_MAX_ISSUES_FETCH), '--label', - this.label, + label, ], { encoding: 'utf-8', maxBuffer: 8 * 1024 * 1024, timeout: GIT_TIMEOUT_MS } ) rawIssuesJson = stdout } catch (err: unknown) { throw new Error( - `Failed to fetch issues: ${toErrorMessage(err)}. Ensure gh is installed and authenticated.`, + `Failed to fetch issues with label '${label}': ${toErrorMessage(err)}. Ensure gh is installed and authenticated.`, { cause: err } ) } - let rawIssues: z.infer<typeof RawIssuesSchema> try { - rawIssues = RawIssuesSchema.parse(JSON.parse(rawIssuesJson)) + return RawIssuesSchema.parse(JSON.parse(rawIssuesJson)) } catch (err: unknown) { throw new Error( - `Failed to parse issues JSON: ${toErrorMessage(err)}. Unexpected format from gh CLI.`, + `Failed to parse issues JSON for label '${label}': ${toErrorMessage(err)}. Unexpected format from gh CLI.`, { cause: err } ) } - - return rawIssues.map(issue => ({ - body: sanitizeForPrompt(issue.body), - labels: issue.labels.map(label => label.name), - number: issue.number, - title: sanitizeForPrompt(issue.title), - })) } private async fetchIssuesWithOpenPRs (): Promise<Set<number>> { @@ -219,11 +277,13 @@ export class GithubIssueSource implements TaskSource { ) const prs = z.array(z.object({ headRefName: z.string() })).parse(JSON.parse(stdout)) const issueNumbers = new Set<number>() - const pattern = new RegExp(`^${this.escapedPrefix}-(\\d+)-`) for (const pr of prs) { - const match = pattern.exec(pr.headRefName) - if (match) { - issueNumbers.add(Number(match[1])) + for (const pattern of this.branchPatterns) { + const match = pattern.exec(pr.headRefName) + if (match) { + issueNumbers.add(Number(match[1])) + break + } } } return issueNumbers @@ -233,10 +293,17 @@ export class GithubIssueSource implements TaskSource { } } - private validatePlan ( - planContent: string, - issuesJson: { body: string; labels: string[]; number: number; title: string }[] - ): null | TaskSpec[] { + /** + * Strips agent-control tags from text to reduce prompt-injection risk. + * The deny-list is derived once from the registry at construction time. + * @param text - Raw text to sanitize. + * @returns Text with all control tags removed. + */ + private sanitizeForPrompt (text: string): string { + return text.normalize('NFKC').replace(this.controlTagPattern, '') + } + + private validatePlan (planContent: string, actionableIssues: ResolvedIssue[]): null | TaskSpec[] { try { const PlanSchema = z.object({ issues: z.array(z.unknown()) }) const parseResult = PlanSchema.safeParse(JSON.parse(planContent)) @@ -245,63 +312,103 @@ export class GithubIssueSource implements TaskSource { return null } const parsed = parseResult.data - const validated = parsed.issues.filter((entry): entry is Record<string, unknown> => { - if (typeof entry !== 'object' || entry === null) return false - const item = entry as Record<string, unknown> - if (typeof item.id !== 'string' || !/^\d+$/.test(item.id)) return false - if (typeof item.branch !== 'string' || !this.branchPattern.test(item.branch)) return false - if (typeof item.title !== 'string') return false - if (item.title.length > MAX_TITLE_CHARS) return false - // eslint-disable-next-line no-control-regex - if (/[\x00-\x1f]/.test(item.title)) return false - return true - }) - - const issueMap = new Map(issuesJson.map(issue => [String(issue.number), issue])) + const issueMap = new Map(actionableIssues.map(issue => [String(issue.number), issue])) + + const seenIds = new Set<string>() + const validated: TaskSpec[] = [] + for (const entry of parsed.issues) { + const spec = this.validatePlanEntry(entry, issueMap) + if (spec === null) continue + if (seenIds.has(spec.id)) { + console.warn( + `Planner produced duplicate id '${spec.id}'; keeping first occurrence and dropping the rest.` + ) + continue + } + seenIds.add(spec.id) + validated.push(spec) + } + + if (parsed.issues.length > 0 && validated.length === 0) { + console.error( + `Planner produced ${String(parsed.issues.length)} entries but none passed validation. Retrying.` + ) + return null + } return validated - .map(entry => { - const source = issueMap.get(entry.id as string) - if (!source) return null - const spec: TaskSpec = { - body: source.body, - branch: entry.branch as string, - id: entry.id as string, - labels: source.labels, - title: entry.title as string, - } - if (isValidIssueType(entry.issueType)) { - spec.issueType = entry.issueType - } - if (isValidConfidence(entry.confidence)) { - spec.confidence = entry.confidence - } - if ( - typeof entry.rootCauseHypothesis === 'string' && - entry.rootCauseHypothesis.length > 0 - ) { - spec.rootCauseHypothesis = sanitizeForPrompt(entry.rootCauseHypothesis).slice(0, 500) - } - if (Array.isArray(entry.acceptanceCriteria)) { - const criteria = entry.acceptanceCriteria - .filter((c): c is string => typeof c === 'string' && c.length > 0) - .map(c => sanitizeForPrompt(c).slice(0, 200)) - if (criteria.length > 0) { - spec.acceptanceCriteria = criteria.slice(0, 5) - } - } - return spec - }) - .filter((entry): entry is NonNullable<typeof entry> => entry !== null) } catch (err: unknown) { console.error(`Planner produced invalid JSON: ${toErrorMessage(err)}. Retrying.`) return null } } + + private validatePlanEntry (entry: unknown, issueMap: Map<string, ResolvedIssue>): null | TaskSpec { + if (typeof entry !== 'object' || entry === null) return null + const item = entry as Record<string, unknown> + if (typeof item.id !== 'string' || !/^\d+$/.test(item.id)) return null + if (typeof item.slug !== 'string') return null + if (item.slug.length > MAX_SLUG_CHARS || !SLUG_PATTERN.test(item.slug)) return null + if (typeof item.title !== 'string') return null + if (item.title.length > MAX_TITLE_CHARS) return null + // eslint-disable-next-line no-control-regex -- guard against control characters in titles + if (/[\x00-\x1f]/.test(item.title)) return null + const sanitizedTitle = this.sanitizeForPrompt(item.title).trim() + if (sanitizedTitle.length === 0) return null + + const source = issueMap.get(item.id) + if (!source) return null + + const spec: TaskSpec = { + body: source.body, + branch: `${source.branchPrefix}-${item.id}-${item.slug}`, + id: item.id, + labels: source.labels, + strategyKey: source.strategyKey, + title: sanitizedTitle, + } + if (isValidIssueType(item.issueType)) { + spec.issueType = item.issueType + } + if (isValidConfidence(item.confidence)) { + spec.confidence = item.confidence + } + if (typeof item.rootCauseHypothesis === 'string' && item.rootCauseHypothesis.length > 0) { + spec.rootCauseHypothesis = this.sanitizeForPrompt(item.rootCauseHypothesis).slice(0, 500) + } + if (Array.isArray(item.acceptanceCriteria)) { + const criteria = item.acceptanceCriteria + .filter((c): c is string => typeof c === 'string' && c.length > 0) + .map(c => this.sanitizeForPrompt(c).slice(0, 200)) + if (criteria.length > 0) { + spec.acceptanceCriteria = criteria.slice(0, 5) + } + } + return spec + } } +/** + * Strict kebab-case slug body shared by plan validation and PR-coverage + * branch parsing: lowercase letters/digits, hyphen-separated, no leading, + * trailing or double hyphen, no underscore. Anchored consumers wrap it in + * `^...$`. Mirrors the strategy-key shape so the assembled branch + * `<branchPrefix>-<id>-<slug>` is uniformly kebab-cased. + */ +const SLUG_PATTERN_BODY = '[a-z0-9]+(?:-[a-z0-9]+)*' + +const SLUG_PATTERN = new RegExp(`^${SLUG_PATTERN_BODY}$`) + const VALID_CONFIDENCE = new Set(['high', 'low', 'medium']) const VALID_ISSUE_TYPES = new Set(['bug-fix', 'feature', 'refactor']) +/** + * @param value - Value to escape for safe interpolation in a regex. + * @returns The value with regex metacharacters escaped. + */ +function escapeRegex (value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') +} + /** * @param value - Value to check. * @returns Whether value is a valid confidence level. @@ -319,14 +426,38 @@ function isValidIssueType (value: unknown): value is 'bug-fix' | 'feature' | 're } /** - * Strips agent-control tags from text to reduce prompt-injection risk. - * @param text - Raw text to sanitize. - * @returns Text with plan/findings/promise tags removed. + * Universal agent-control tags shared by every strategy: orchestrator-level + * vocabulary (planner output, completion signal) and common prompt-injection + * vectors. Strategy-specific tags come from `StrategyEntry.key` and + * `StrategyEntry.controlTags`. + */ +const UNIVERSAL_CONTROL_TAGS: readonly string[] = [ + 'code', + 'findings', + 'instructions', + 'plan', + 'promise', + 'system', + 'tool_call', +] as const + +/** + * Builds the regex that strips agent-control tags. The deny-list is the union + * of {@link UNIVERSAL_CONTROL_TAGS} and, for every registered strategy, its + * `key` plus optional `controlTags`. Adding a strategy automatically extends + * the deny-list — no edit to the task source is required. + * @param strategies - Registered strategies whose vocabulary participates. + * @returns Compiled regex matching opening or closing tags of any control name. */ -function sanitizeForPrompt (text: string): string { - const normalized = text.normalize('NFKC') - return normalized.replace( - /<\/?(?:plan|findings|promise|system|code|instructions|implement|review|tool_call)[^>]*>/gi, - '' - ) +function buildControlTagPattern (strategies: readonly StrategyEntry[]): RegExp { + const tags = new Set<string>(UNIVERSAL_CONTROL_TAGS) + for (const entry of strategies) { + tags.add(entry.key) + for (const tag of entry.controlTags ?? []) tags.add(tag) + } + const alternation = [...tags].map(escapeRegex).join('|') + // Lookahead asserts the tag name ends at an XML-tag-name boundary + // (whitespace, `/` for self-closing, or `>`), preventing prefix collisions + // such as `<plant>` matching alternative `plan`. + return new RegExp(`</?(?:${alternation})(?=[\\s/>])[^>]*>`, 'gi') } diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts index 90e787bd..7ddc6f77 100644 --- a/.sandcastle/types.ts +++ b/.sandcastle/types.ts @@ -121,6 +121,8 @@ export interface TaskSpec { labels?: string[] /** Planner's hypothesis about what is broken/missing — for actor to validate, not follow blindly. */ rootCauseHypothesis?: string + /** Strategy key from the registry that drives the actor/critic loop for this task. */ + strategyKey: string /** Task title. */ title: string } -- 2.43.0