From: Jérôme Benoit <jerome.benoit@sap.com>
Date: Thu, 7 May 2026 23:04:27 +0000 (+0200)
Subject: feat(sandcastle): enrich planner with acceptance criteria and root cause hypothesis
X-Git-Tag: cli@v4.7.0~5
X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=92bffdfdd8472b5182f4e44c1276849b1bdafa12;p=e-mobility-charging-stations-simulator.git

feat(sandcastle): enrich planner with acceptance criteria and root cause hypothesis

The planner now produces structured analysis per issue: issueType,
confidence, rootCauseHypothesis, and acceptanceCriteria. These flow
into the actor prompt (confidence-gated hypothesis + criteria) and the
critic prompt (criteria as verification checklist).

- Confidence controls plan specificity: high → full context, medium/low → criteria only
- All planner-generated fields are sanitized and length-bounded
- Critic evaluates observable outcomes, never plan adherence
- Backward-compatible: missing fields result in empty template variables
---

diff --git a/.sandcastle/plan-prompt.md b/.sandcastle/plan-prompt.md
index 2f4470fb..3108d244 100644
--- a/.sandcastle/plan-prompt.md
+++ b/.sandcastle/plan-prompt.md
@@ -1,6 +1,6 @@
 # Plan Agent
 
-Read open GitHub issues and produce a parallelizable execution plan.
+Read open GitHub issues and produce a parallelizable execution plan with implementation context.
 
 ## Context
 
@@ -21,12 +21,17 @@ Read `AGENTS.md` and `.serena/memories/project_overview`.
 
 2. Select all issues that are independent and actionable.
 
-3. For each selected issue, assign a branch name: `{{BRANCH_PREFIX}}-<number>-<slug>` where slug is a short kebab-case summary (e.g., `{{BRANCH_PREFIX}}-42-fix-streaming-id`).
+3. For each selected issue:
+   - Assign a branch name: `{{BRANCH_PREFIX}}-<number>-<slug>` where slug is a short kebab-case summary (e.g., `{{BRANCH_PREFIX}}-42-fix-streaming-id`).
+   - Classify the issue type: `bug-fix`, `feature`, or `refactor`.
+   - Assess your confidence: `high` (clear scope, obvious approach), `medium` (some ambiguity), or `low` (unclear scope, multiple valid approaches).
+   - Formulate a root cause hypothesis: what is broken or missing, and why. This is a hypothesis for the implementer to validate â not a directive.
+   - Define 2-4 acceptance criteria: concrete, verifiable conditions that must be true when the implementation is complete. Focus on observable behavior, not implementation details.
 
 4. Output the plan in this exact format:
 
    ```text
-   <plan>{ "issues": [{ "id": "<number>", "title": "<title>", "branch": "{{BRANCH_PREFIX}}-<number>-<slug>" }] }</plan>
+   <plan>{"issues":[{"id":"<number>","title":"<title>","branch":"{{BRANCH_PREFIX}}-<number>-<slug>","issueType":"bug-fix|feature|refactor","confidence":"high|medium|low","rootCauseHypothesis":"...","acceptanceCriteria":["..."]}]}</plan>
    ```
 
 ## Rules
@@ -42,6 +47,8 @@ Read `AGENTS.md` and `.serena/memories/project_overview`.
   ```
 
 - Do not implement anything. Only produce the plan.
+- Acceptance criteria must be testable by reading code or running tests â no subjective assessments.
+- Root cause hypothesis should be specific (mention modules, patterns, or behaviors) â not a restatement of the issue title.
 
 ## Completion
 
diff --git a/.sandcastle/strategies/implement/actor-prompt.md b/.sandcastle/strategies/implement/actor-prompt.md
index 8ca0f40f..d0c1edfc 100644
--- a/.sandcastle/strategies/implement/actor-prompt.md
+++ b/.sandcastle/strategies/implement/actor-prompt.md
@@ -6,6 +6,8 @@ Implement issue **#{{TASK_ID}}** ("{{ISSUE_TITLE}}") on branch `{{BRANCH}}`.
 
 {{ISSUE_BODY}}
 
+{{PLAN_CONTEXT}}
+
 ## Review Findings
 
 {{FINDINGS}}
diff --git a/.sandcastle/strategies/implement/critic-prompt.md b/.sandcastle/strategies/implement/critic-prompt.md
index f274ea38..2caa8bbc 100644
--- a/.sandcastle/strategies/implement/critic-prompt.md
+++ b/.sandcastle/strategies/implement/critic-prompt.md
@@ -8,6 +8,12 @@ Run `git diff {{BASE_BRANCH}}...{{BRANCH}}` to see all changes. Examine the diff
 
 Read `AGENTS.md`, `CONTRIBUTING.md` and `.serena/memories/code_style_conventions`.
 
+## Acceptance Criteria
+
+{{ACCEPTANCE_CRITERIA}}
+
+If acceptance criteria are listed above, verify that the implementation satisfies each one. Report a HIGH finding for any criterion that is not met. Do NOT evaluate whether the actor followed a specific implementation approach â only whether the observable outcome matches the criteria. If no criteria are listed, skip this section.
+
 ## Output Format
 
 Output your findings as JSON wrapped in nonce-tagged delimiters. Use EXACTLY this tag format:
diff --git a/.sandcastle/strategies/implement/strategy.ts b/.sandcastle/strategies/implement/strategy.ts
index 58900902..3d1fbecd 100644
--- a/.sandcastle/strategies/implement/strategy.ts
+++ b/.sandcastle/strategies/implement/strategy.ts
@@ -1,10 +1,30 @@
-import type { FinalizationConfig, LoopStrategy } from '../../types.js'
+import type { FinalizationConfig, LoopStrategy, TaskSpec } from '../../types.js'
 
 import { GIT_TIMEOUT_MS } from '../../constants.js'
 import { attemptRebase, buildPrArgs, pushBranch } from '../../finalizer.js'
 import { execFileAsync, toErrorMessage } from '../../utils.js'
 import { runValidation } from '../../validation.js'
 
+/**
+ *
+ * @param spec
+ */
+function buildPlanContext (spec: TaskSpec): string {
+  const parts: string[] = []
+  const includeHypothesis = spec.confidence === 'high' || spec.confidence === undefined
+
+  if (includeHypothesis && spec.rootCauseHypothesis) {
+    parts.push(`HYPOTHESIS (may be wrong â verify independently): ${spec.rootCauseHypothesis}`)
+  }
+  if (spec.acceptanceCriteria && spec.acceptanceCriteria.length > 0) {
+    parts.push(
+      `Acceptance criteria:\n${spec.acceptanceCriteria.map((c, i) => `${String(i + 1)}. ${c}`).join('\n')}`
+    )
+  }
+  if (parts.length === 0) return ''
+  return `## Planner Analysis\n\n${parts.join('\n\n')}`
+}
+
 export const implementStrategy: FinalizationConfig & LoopStrategy = {
   actorPromptFile: './.sandcastle/strategies/implement/actor-prompt.md',
 
@@ -13,10 +33,13 @@ export const implementStrategy: FinalizationConfig & LoopStrategy = {
     FINDINGS: findings.length > 0 ? JSON.stringify(findings, null, 2) : '',
     ISSUE_BODY: spec.body,
     ISSUE_TITLE: spec.title,
+    PLAN_CONTEXT: buildPlanContext(spec),
     TASK_ID: spec.id,
   }),
 
   buildCriticArgs: (spec, baseBranch) => ({
+    ACCEPTANCE_CRITERIA:
+      spec.acceptanceCriteria?.map((c, i) => `${String(i + 1)}. ${c}`).join('\n') ?? '',
     BASE_BRANCH: baseBranch,
     BRANCH: spec.branch,
   }),
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index c7c9494e..05ea119a 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -246,30 +246,51 @@ export class GithubIssueSource implements TaskSource {
         return null
       }
       const parsed = parseResult.data
-      const validated = parsed.issues.filter(
-        (entry): entry is { branch: string; id: string; title: string } => {
-          if (typeof entry !== 'object' || entry === null) return false
-          const item = entry as Record<string, unknown>
-          if (typeof item.id !== 'string' || !/^\d+$/.test(item.id)) return false
-          if (typeof item.branch !== 'string' || !this.branchPattern.test(item.branch)) return false
-          if (typeof item.title !== 'string') return false
-          if (item.title.length > MAX_TITLE_CHARS) return false
-          // eslint-disable-next-line no-control-regex
-          if (/[\x00-\x1f]/.test(item.title)) return false
-          return true
-        }
-      )
+      const validated = parsed.issues.filter((entry): entry is Record<string, unknown> => {
+        if (typeof entry !== 'object' || entry === null) return false
+        const item = entry as Record<string, unknown>
+        if (typeof item.id !== 'string' || !/^\d+$/.test(item.id)) return false
+        if (typeof item.branch !== 'string' || !this.branchPattern.test(item.branch)) return false
+        if (typeof item.title !== 'string') return false
+        if (item.title.length > MAX_TITLE_CHARS) return false
+        // eslint-disable-next-line no-control-regex
+        if (/[\x00-\x1f]/.test(item.title)) return false
+        return true
+      })
 
       const issueMap = new Map(issuesJson.map(issue => [String(issue.number), issue]))
       return validated
         .map(entry => {
-          const source = issueMap.get(entry.id)
+          const source = issueMap.get(entry.id as string)
           if (!source) return null
-          return {
-            ...entry,
+          const spec: TaskSpec = {
             body: source.body,
+            branch: entry.branch as string,
+            id: entry.id as string,
             labels: source.labels,
+            title: entry.title as string,
+          }
+          if (isValidIssueType(entry.issueType)) {
+            spec.issueType = entry.issueType
+          }
+          if (isValidConfidence(entry.confidence)) {
+            spec.confidence = entry.confidence
           }
+          if (
+            typeof entry.rootCauseHypothesis === 'string' &&
+            entry.rootCauseHypothesis.length > 0
+          ) {
+            spec.rootCauseHypothesis = sanitizeForPrompt(entry.rootCauseHypothesis).slice(0, 500)
+          }
+          if (Array.isArray(entry.acceptanceCriteria)) {
+            const criteria = entry.acceptanceCriteria
+              .filter((c): c is string => typeof c === 'string' && c.length > 0)
+              .map(c => sanitizeForPrompt(c).slice(0, 200))
+            if (criteria.length > 0) {
+              spec.acceptanceCriteria = criteria.slice(0, 5)
+            }
+          }
+          return spec
         })
         .filter((entry): entry is NonNullable<typeof entry> => entry !== null)
     } catch (err: unknown) {
@@ -279,6 +300,25 @@ export class GithubIssueSource implements TaskSource {
   }
 }
 
+const VALID_CONFIDENCE = new Set(['high', 'low', 'medium'])
+const VALID_ISSUE_TYPES = new Set(['bug-fix', 'feature', 'refactor'])
+
+/**
+ *
+ * @param value
+ */
+function isValidConfidence (value: unknown): value is 'high' | 'low' | 'medium' {
+  return typeof value === 'string' && VALID_CONFIDENCE.has(value)
+}
+
+/**
+ *
+ * @param value
+ */
+function isValidIssueType (value: unknown): value is 'bug-fix' | 'feature' | 'refactor' {
+  return typeof value === 'string' && VALID_ISSUE_TYPES.has(value)
+}
+
 /**
  * Strips agent-control tags from text to reduce prompt-injection risk.
  * @param text - Raw text to sanitize.
diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts
index 4de46430..950660f7 100644
--- a/.sandcastle/types.ts
+++ b/.sandcastle/types.ts
@@ -106,16 +106,24 @@ export type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox
 
 /** Specification for a task to be implemented. */
 export interface TaskSpec {
+  /** Verifiable conditions that must hold when implementation is complete. */
+  acceptanceCriteria?: string[]
   /** Sanitized issue body text. */
   body: string
   /** Git branch name for this task. */
   branch: string
+  /** Planner's confidence in its analysis: controls plan specificity injected into actor. */
+  confidence?: 'high' | 'low' | 'medium'
   /** Task identifier (e.g. GitHub issue number as string). */
   id: string
+  /** Classification of the issue. */
+  issueType?: 'bug-fix' | 'feature' | 'refactor'
   /** Label names associated with the task (platform-specific, optional). */
   labels?: string[]
   /** Raw planner agent output that produced this task selection. */
   plannerOutput?: string
+  /** Planner's hypothesis about what is broken/missing â for actor to validate, not follow blindly. */
+  rootCauseHypothesis?: string
   /** Task title. */
   title: string
 }