From: Jérôme Benoit Date: Thu, 7 May 2026 23:04:27 +0000 (+0200) Subject: feat(sandcastle): enrich planner with acceptance criteria and root cause hypothesis X-Git-Tag: cli@v4.7.0~5 X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=92bffdfdd8472b5182f4e44c1276849b1bdafa12;p=e-mobility-charging-stations-simulator.git feat(sandcastle): enrich planner with acceptance criteria and root cause hypothesis The planner now produces structured analysis per issue: issueType, confidence, rootCauseHypothesis, and acceptanceCriteria. These flow into the actor prompt (confidence-gated hypothesis + criteria) and the critic prompt (criteria as verification checklist). - Confidence controls plan specificity: high → full context, medium/low → criteria only - All planner-generated fields are sanitized and length-bounded - Critic evaluates observable outcomes, never plan adherence - Backward-compatible: missing fields result in empty template variables --- diff --git a/.sandcastle/plan-prompt.md b/.sandcastle/plan-prompt.md index 2f4470fb..3108d244 100644 --- a/.sandcastle/plan-prompt.md +++ b/.sandcastle/plan-prompt.md @@ -1,6 +1,6 @@ # Plan Agent -Read open GitHub issues and produce a parallelizable execution plan. +Read open GitHub issues and produce a parallelizable execution plan with implementation context. ## Context @@ -21,12 +21,17 @@ Read `AGENTS.md` and `.serena/memories/project_overview`. 2. Select all issues that are independent and actionable. -3. For each selected issue, assign a branch name: `{{BRANCH_PREFIX}}--` where slug is a short kebab-case summary (e.g., `{{BRANCH_PREFIX}}-42-fix-streaming-id`). +3. For each selected issue: + - Assign a branch name: `{{BRANCH_PREFIX}}--` where slug is a short kebab-case summary (e.g., `{{BRANCH_PREFIX}}-42-fix-streaming-id`). + - Classify the issue type: `bug-fix`, `feature`, or `refactor`. + - Assess your confidence: `high` (clear scope, obvious approach), `medium` (some ambiguity), or `low` (unclear scope, multiple valid approaches). + - Formulate a root cause hypothesis: what is broken or missing, and why. This is a hypothesis for the implementer to validate — not a directive. + - Define 2-4 acceptance criteria: concrete, verifiable conditions that must be true when the implementation is complete. Focus on observable behavior, not implementation details. 4. Output the plan in this exact format: ```text - { "issues": [{ "id": "", "title": "", "branch": "{{BRANCH_PREFIX}}-<number>-<slug>" }] }</plan> + <plan>{"issues":[{"id":"<number>","title":"<title>","branch":"{{BRANCH_PREFIX}}-<number>-<slug>","issueType":"bug-fix|feature|refactor","confidence":"high|medium|low","rootCauseHypothesis":"...","acceptanceCriteria":["..."]}]}</plan> ``` ## Rules @@ -42,6 +47,8 @@ Read `AGENTS.md` and `.serena/memories/project_overview`. ``` - Do not implement anything. Only produce the plan. +- Acceptance criteria must be testable by reading code or running tests — no subjective assessments. +- Root cause hypothesis should be specific (mention modules, patterns, or behaviors) — not a restatement of the issue title. ## Completion diff --git a/.sandcastle/strategies/implement/actor-prompt.md b/.sandcastle/strategies/implement/actor-prompt.md index 8ca0f40f..d0c1edfc 100644 --- a/.sandcastle/strategies/implement/actor-prompt.md +++ b/.sandcastle/strategies/implement/actor-prompt.md @@ -6,6 +6,8 @@ Implement issue **#{{TASK_ID}}** ("{{ISSUE_TITLE}}") on branch `{{BRANCH}}`. {{ISSUE_BODY}} +{{PLAN_CONTEXT}} + ## Review Findings {{FINDINGS}} diff --git a/.sandcastle/strategies/implement/critic-prompt.md b/.sandcastle/strategies/implement/critic-prompt.md index f274ea38..2caa8bbc 100644 --- a/.sandcastle/strategies/implement/critic-prompt.md +++ b/.sandcastle/strategies/implement/critic-prompt.md @@ -8,6 +8,12 @@ Run `git diff {{BASE_BRANCH}}...{{BRANCH}}` to see all changes. Examine the diff Read `AGENTS.md`, `CONTRIBUTING.md` and `.serena/memories/code_style_conventions`. +## Acceptance Criteria + +{{ACCEPTANCE_CRITERIA}} + +If acceptance criteria are listed above, verify that the implementation satisfies each one. Report a HIGH finding for any criterion that is not met. Do NOT evaluate whether the actor followed a specific implementation approach — only whether the observable outcome matches the criteria. If no criteria are listed, skip this section. + ## Output Format Output your findings as JSON wrapped in nonce-tagged delimiters. Use EXACTLY this tag format: diff --git a/.sandcastle/strategies/implement/strategy.ts b/.sandcastle/strategies/implement/strategy.ts index 58900902..3d1fbecd 100644 --- a/.sandcastle/strategies/implement/strategy.ts +++ b/.sandcastle/strategies/implement/strategy.ts @@ -1,10 +1,30 @@ -import type { FinalizationConfig, LoopStrategy } from '../../types.js' +import type { FinalizationConfig, LoopStrategy, TaskSpec } from '../../types.js' import { GIT_TIMEOUT_MS } from '../../constants.js' import { attemptRebase, buildPrArgs, pushBranch } from '../../finalizer.js' import { execFileAsync, toErrorMessage } from '../../utils.js' import { runValidation } from '../../validation.js' +/** + * + * @param spec + */ +function buildPlanContext (spec: TaskSpec): string { + const parts: string[] = [] + const includeHypothesis = spec.confidence === 'high' || spec.confidence === undefined + + if (includeHypothesis && spec.rootCauseHypothesis) { + parts.push(`HYPOTHESIS (may be wrong — verify independently): ${spec.rootCauseHypothesis}`) + } + if (spec.acceptanceCriteria && spec.acceptanceCriteria.length > 0) { + parts.push( + `Acceptance criteria:\n${spec.acceptanceCriteria.map((c, i) => `${String(i + 1)}. ${c}`).join('\n')}` + ) + } + if (parts.length === 0) return '' + return `## Planner Analysis\n\n${parts.join('\n\n')}` +} + export const implementStrategy: FinalizationConfig & LoopStrategy = { actorPromptFile: './.sandcastle/strategies/implement/actor-prompt.md', @@ -13,10 +33,13 @@ export const implementStrategy: FinalizationConfig & LoopStrategy = { FINDINGS: findings.length > 0 ? JSON.stringify(findings, null, 2) : '', ISSUE_BODY: spec.body, ISSUE_TITLE: spec.title, + PLAN_CONTEXT: buildPlanContext(spec), TASK_ID: spec.id, }), buildCriticArgs: (spec, baseBranch) => ({ + ACCEPTANCE_CRITERIA: + spec.acceptanceCriteria?.map((c, i) => `${String(i + 1)}. ${c}`).join('\n') ?? '', BASE_BRANCH: baseBranch, BRANCH: spec.branch, }), diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts index c7c9494e..05ea119a 100644 --- a/.sandcastle/task-source.ts +++ b/.sandcastle/task-source.ts @@ -246,30 +246,51 @@ export class GithubIssueSource implements TaskSource { return null } const parsed = parseResult.data - const validated = parsed.issues.filter( - (entry): entry is { branch: string; id: string; title: string } => { - if (typeof entry !== 'object' || entry === null) return false - const item = entry as Record<string, unknown> - if (typeof item.id !== 'string' || !/^\d+$/.test(item.id)) return false - if (typeof item.branch !== 'string' || !this.branchPattern.test(item.branch)) return false - if (typeof item.title !== 'string') return false - if (item.title.length > MAX_TITLE_CHARS) return false - // eslint-disable-next-line no-control-regex - if (/[\x00-\x1f]/.test(item.title)) return false - return true - } - ) + const validated = parsed.issues.filter((entry): entry is Record<string, unknown> => { + if (typeof entry !== 'object' || entry === null) return false + const item = entry as Record<string, unknown> + if (typeof item.id !== 'string' || !/^\d+$/.test(item.id)) return false + if (typeof item.branch !== 'string' || !this.branchPattern.test(item.branch)) return false + if (typeof item.title !== 'string') return false + if (item.title.length > MAX_TITLE_CHARS) return false + // eslint-disable-next-line no-control-regex + if (/[\x00-\x1f]/.test(item.title)) return false + return true + }) const issueMap = new Map(issuesJson.map(issue => [String(issue.number), issue])) return validated .map(entry => { - const source = issueMap.get(entry.id) + const source = issueMap.get(entry.id as string) if (!source) return null - return { - ...entry, + const spec: TaskSpec = { body: source.body, + branch: entry.branch as string, + id: entry.id as string, labels: source.labels, + title: entry.title as string, + } + if (isValidIssueType(entry.issueType)) { + spec.issueType = entry.issueType + } + if (isValidConfidence(entry.confidence)) { + spec.confidence = entry.confidence } + if ( + typeof entry.rootCauseHypothesis === 'string' && + entry.rootCauseHypothesis.length > 0 + ) { + spec.rootCauseHypothesis = sanitizeForPrompt(entry.rootCauseHypothesis).slice(0, 500) + } + if (Array.isArray(entry.acceptanceCriteria)) { + const criteria = entry.acceptanceCriteria + .filter((c): c is string => typeof c === 'string' && c.length > 0) + .map(c => sanitizeForPrompt(c).slice(0, 200)) + if (criteria.length > 0) { + spec.acceptanceCriteria = criteria.slice(0, 5) + } + } + return spec }) .filter((entry): entry is NonNullable<typeof entry> => entry !== null) } catch (err: unknown) { @@ -279,6 +300,25 @@ export class GithubIssueSource implements TaskSource { } } +const VALID_CONFIDENCE = new Set(['high', 'low', 'medium']) +const VALID_ISSUE_TYPES = new Set(['bug-fix', 'feature', 'refactor']) + +/** + * + * @param value + */ +function isValidConfidence (value: unknown): value is 'high' | 'low' | 'medium' { + return typeof value === 'string' && VALID_CONFIDENCE.has(value) +} + +/** + * + * @param value + */ +function isValidIssueType (value: unknown): value is 'bug-fix' | 'feature' | 'refactor' { + return typeof value === 'string' && VALID_ISSUE_TYPES.has(value) +} + /** * Strips agent-control tags from text to reduce prompt-injection risk. * @param text - Raw text to sanitize. diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts index 4de46430..950660f7 100644 --- a/.sandcastle/types.ts +++ b/.sandcastle/types.ts @@ -106,16 +106,24 @@ export type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox /** Specification for a task to be implemented. */ export interface TaskSpec { + /** Verifiable conditions that must hold when implementation is complete. */ + acceptanceCriteria?: string[] /** Sanitized issue body text. */ body: string /** Git branch name for this task. */ branch: string + /** Planner's confidence in its analysis: controls plan specificity injected into actor. */ + confidence?: 'high' | 'low' | 'medium' /** Task identifier (e.g. GitHub issue number as string). */ id: string + /** Classification of the issue. */ + issueType?: 'bug-fix' | 'feature' | 'refactor' /** Label names associated with the task (platform-specific, optional). */ labels?: string[] /** Raw planner agent output that produced this task selection. */ plannerOutput?: string + /** Planner's hypothesis about what is broken/missing — for actor to validate, not follow blindly. */ + rootCauseHypothesis?: string /** Task title. */ title: string }