feat(sandcastle): enrich planner with acceptance criteria and root cause hypothesis

author Jérôme Benoit <jerome.benoit@sap.com>

Thu, 7 May 2026 23:04:27 +0000 (01:04 +0200)

committer Jérôme Benoit <jerome.benoit@sap.com>

Thu, 7 May 2026 23:04:27 +0000 (01:04 +0200)
author Jérôme Benoit <jerome.benoit@sap.com>
Thu, 7 May 2026 23:04:27 +0000 (01:04 +0200)
committer Jérôme Benoit <jerome.benoit@sap.com>
Thu, 7 May 2026 23:04:27 +0000 (01:04 +0200)
diff --git a/.sandcastle/plan-prompt.md b/.sandcastle/plan-prompt.md

index 2f4470fb182043501208758bfd770f8ae90d8be8..3108d2444d1b6fcfe5ead3c0884eed9697c53a95 100644 (file)
--- a/.sandcastle/plan-prompt.md
+++ b/.sandcastle/plan-prompt.md
@@ -1,6 +1,6 @@
  # Plan Agent
  
-Read open GitHub issues and produce a parallelizable execution plan.
+Read open GitHub issues and produce a parallelizable execution plan with implementation context.
  
  ## Context
  
@@ -21,12 +21,17 @@ Read `AGENTS.md` and `.serena/memories/project_overview`.
  
  2. Select all issues that are independent and actionable.
  
-3. For each selected issue, assign a branch name: `{{BRANCH_PREFIX}}-<number>-<slug>` where slug is a short kebab-case summary (e.g., `{{BRANCH_PREFIX}}-42-fix-streaming-id`).
+3. For each selected issue:
+   - Assign a branch name: `{{BRANCH_PREFIX}}-<number>-<slug>` where slug is a short kebab-case summary (e.g., `{{BRANCH_PREFIX}}-42-fix-streaming-id`).
+   - Classify the issue type: `bug-fix`, `feature`, or `refactor`.
+   - Assess your confidence: `high` (clear scope, obvious approach), `medium` (some ambiguity), or `low` (unclear scope, multiple valid approaches).
+   - Formulate a root cause hypothesis: what is broken or missing, and why. This is a hypothesis for the implementer to validate — not a directive.
+   - Define 2-4 acceptance criteria: concrete, verifiable conditions that must be true when the implementation is complete. Focus on observable behavior, not implementation details.
  
  4. Output the plan in this exact format:
  
     ```text
-   <plan>{ "issues": [{ "id": "<number>", "title": "<title>", "branch": "{{BRANCH_PREFIX}}-<number>-<slug>" }] }</plan>
+   <plan>{"issues":[{"id":"<number>","title":"<title>","branch":"{{BRANCH_PREFIX}}-<number>-<slug>","issueType":"bug-fix|feature|refactor","confidence":"high|medium|low","rootCauseHypothesis":"...","acceptanceCriteria":["..."]}]}</plan>
     ```
  
  ## Rules
@@ -42,6 +47,8 @@ Read `AGENTS.md` and `.serena/memories/project_overview`.
    ```
  
  - Do not implement anything. Only produce the plan.
+- Acceptance criteria must be testable by reading code or running tests — no subjective assessments.
+- Root cause hypothesis should be specific (mention modules, patterns, or behaviors) — not a restatement of the issue title.
  
  ## Completion
  
diff --git a/.sandcastle/strategies/implement/actor-prompt.md b/.sandcastle/strategies/implement/actor-prompt.md

index 8ca0f40fbea848f420e5b341898cdd0dabaa8a2d..d0c1edfc4b1c7542b27aa4596901496590c8a6ff 100644 (file)
--- a/.sandcastle/strategies/implement/actor-prompt.md
+++ b/.sandcastle/strategies/implement/actor-prompt.md
@@ -6,6 +6,8 @@ Implement issue **#{{TASK_ID}}** ("{{ISSUE_TITLE}}") on branch `{{BRANCH}}`.
  
  {{ISSUE_BODY}}
  
+{{PLAN_CONTEXT}}
+
  ## Review Findings
  
  {{FINDINGS}}
diff --git a/.sandcastle/strategies/implement/critic-prompt.md b/.sandcastle/strategies/implement/critic-prompt.md

index f274ea38280e04a8505652bf61fc4973b6d5298d..2caa8bbc8d9e2af573dbf49c3487d37446ed69c8 100644 (file)
--- a/.sandcastle/strategies/implement/critic-prompt.md
+++ b/.sandcastle/strategies/implement/critic-prompt.md
@@ -8,6 +8,12 @@ Run `git diff {{BASE_BRANCH}}...{{BRANCH}}` to see all changes. Examine the diff
  
  Read `AGENTS.md`, `CONTRIBUTING.md` and `.serena/memories/code_style_conventions`.
  
+## Acceptance Criteria
+
+{{ACCEPTANCE_CRITERIA}}
+
+If acceptance criteria are listed above, verify that the implementation satisfies each one. Report a HIGH finding for any criterion that is not met. Do NOT evaluate whether the actor followed a specific implementation approach — only whether the observable outcome matches the criteria. If no criteria are listed, skip this section.
+
  ## Output Format
  
  Output your findings as JSON wrapped in nonce-tagged delimiters. Use EXACTLY this tag format:
diff --git a/.sandcastle/strategies/implement/strategy.ts b/.sandcastle/strategies/implement/strategy.ts

index 58900902934b031a891e5b2356845bf7a827ea89..3d1fbecdb25e55ac2d0570f3693121edadefcc74 100644 (file)
--- a/.sandcastle/strategies/implement/strategy.ts
+++ b/.sandcastle/strategies/implement/strategy.ts
@@ -1,10 +1,30 @@
-import type { FinalizationConfig, LoopStrategy } from '../../types.js'
+import type { FinalizationConfig, LoopStrategy, TaskSpec } from '../../types.js'
  
  import { GIT_TIMEOUT_MS } from '../../constants.js'
  import { attemptRebase, buildPrArgs, pushBranch } from '../../finalizer.js'
  import { execFileAsync, toErrorMessage } from '../../utils.js'
  import { runValidation } from '../../validation.js'
  
+/**
+ *
+ * @param spec
+ */
+function buildPlanContext (spec: TaskSpec): string {
+  const parts: string[] = []
+  const includeHypothesis = spec.confidence === 'high' || spec.confidence === undefined
+
+  if (includeHypothesis && spec.rootCauseHypothesis) {
+    parts.push(`HYPOTHESIS (may be wrong — verify independently): ${spec.rootCauseHypothesis}`)
+  }
+  if (spec.acceptanceCriteria && spec.acceptanceCriteria.length > 0) {
+    parts.push(
+      `Acceptance criteria:\n${spec.acceptanceCriteria.map((c, i) => `${String(i + 1)}. ${c}`).join('\n')}`
+    )
+  }
+  if (parts.length === 0) return ''
+  return `## Planner Analysis\n\n${parts.join('\n\n')}`
+}
+
  export const implementStrategy: FinalizationConfig & LoopStrategy = {
    actorPromptFile: './.sandcastle/strategies/implement/actor-prompt.md',
  
@@ -13,10 +33,13 @@ export const implementStrategy: FinalizationConfig & LoopStrategy = {
      FINDINGS: findings.length > 0 ? JSON.stringify(findings, null, 2) : '',
      ISSUE_BODY: spec.body,
      ISSUE_TITLE: spec.title,
+    PLAN_CONTEXT: buildPlanContext(spec),
      TASK_ID: spec.id,
    }),
  
    buildCriticArgs: (spec, baseBranch) => ({
+    ACCEPTANCE_CRITERIA:
+      spec.acceptanceCriteria?.map((c, i) => `${String(i + 1)}. ${c}`).join('\n') ?? '',
      BASE_BRANCH: baseBranch,
      BRANCH: spec.branch,
    }),
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts

index c7c9494eaf9e98dce6430226a7bc852d233afa92..05ea119ae23ba457cded0fc86d458bd04bcf75c5 100644 (file)
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -246,30 +246,51 @@ export class GithubIssueSource implements TaskSource {
          return null
        }
        const parsed = parseResult.data
-      const validated = parsed.issues.filter(
-        (entry): entry is { branch: string; id: string; title: string } => {
-          if (typeof entry !== 'object' || entry === null) return false
-          const item = entry as Record<string, unknown>
-          if (typeof item.id !== 'string' || !/^\d+$/.test(item.id)) return false
-          if (typeof item.branch !== 'string' || !this.branchPattern.test(item.branch)) return false
-          if (typeof item.title !== 'string') return false
-          if (item.title.length > MAX_TITLE_CHARS) return false
-          // eslint-disable-next-line no-control-regex
-          if (/[\x00-\x1f]/.test(item.title)) return false
-          return true
-        }
-      )
+      const validated = parsed.issues.filter((entry): entry is Record<string, unknown> => {
+        if (typeof entry !== 'object' || entry === null) return false
+        const item = entry as Record<string, unknown>
+        if (typeof item.id !== 'string' || !/^\d+$/.test(item.id)) return false
+        if (typeof item.branch !== 'string' || !this.branchPattern.test(item.branch)) return false
+        if (typeof item.title !== 'string') return false
+        if (item.title.length > MAX_TITLE_CHARS) return false
+        // eslint-disable-next-line no-control-regex
+        if (/[\x00-\x1f]/.test(item.title)) return false
+        return true
+      })
  
        const issueMap = new Map(issuesJson.map(issue => [String(issue.number), issue]))
        return validated
          .map(entry => {
-          const source = issueMap.get(entry.id)
+          const source = issueMap.get(entry.id as string)
            if (!source) return null
-          return {
-            ...entry,
+          const spec: TaskSpec = {
              body: source.body,
+            branch: entry.branch as string,
+            id: entry.id as string,
              labels: source.labels,
+            title: entry.title as string,
+          }
+          if (isValidIssueType(entry.issueType)) {
+            spec.issueType = entry.issueType
+          }
+          if (isValidConfidence(entry.confidence)) {
+            spec.confidence = entry.confidence
            }
+          if (
+            typeof entry.rootCauseHypothesis === 'string' &&
+            entry.rootCauseHypothesis.length > 0
+          ) {
+            spec.rootCauseHypothesis = sanitizeForPrompt(entry.rootCauseHypothesis).slice(0, 500)
+          }
+          if (Array.isArray(entry.acceptanceCriteria)) {
+            const criteria = entry.acceptanceCriteria
+              .filter((c): c is string => typeof c === 'string' && c.length > 0)
+              .map(c => sanitizeForPrompt(c).slice(0, 200))
+            if (criteria.length > 0) {
+              spec.acceptanceCriteria = criteria.slice(0, 5)
+            }
+          }
+          return spec
          })
          .filter((entry): entry is NonNullable<typeof entry> => entry !== null)
      } catch (err: unknown) {
@@ -279,6 +300,25 @@ export class GithubIssueSource implements TaskSource {
    }
  }
  
+const VALID_CONFIDENCE = new Set(['high', 'low', 'medium'])
+const VALID_ISSUE_TYPES = new Set(['bug-fix', 'feature', 'refactor'])
+
+/**
+ *
+ * @param value
+ */
+function isValidConfidence (value: unknown): value is 'high' | 'low' | 'medium' {
+  return typeof value === 'string' && VALID_CONFIDENCE.has(value)
+}
+
+/**
+ *
+ * @param value
+ */
+function isValidIssueType (value: unknown): value is 'bug-fix' | 'feature' | 'refactor' {
+  return typeof value === 'string' && VALID_ISSUE_TYPES.has(value)
+}
+
  /**
   * Strips agent-control tags from text to reduce prompt-injection risk.
   * @param text - Raw text to sanitize.
diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts

index 4de464304cdfe90c25c189af7bdb7e383b19450f..950660f761008dda186732e4c38a0bfc10588ec3 100644 (file)
--- a/.sandcastle/types.ts
+++ b/.sandcastle/types.ts
@@ -106,16 +106,24 @@ export type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox
  
  /** Specification for a task to be implemented. */
  export interface TaskSpec {
+  /** Verifiable conditions that must hold when implementation is complete. */
+  acceptanceCriteria?: string[]
    /** Sanitized issue body text. */
    body: string
    /** Git branch name for this task. */
    branch: string
+  /** Planner's confidence in its analysis: controls plan specificity injected into actor. */
+  confidence?: 'high' | 'low' | 'medium'
    /** Task identifier (e.g. GitHub issue number as string). */
    id: string
+  /** Classification of the issue. */
+  issueType?: 'bug-fix' | 'feature' | 'refactor'
    /** Label names associated with the task (platform-specific, optional). */
    labels?: string[]
    /** Raw planner agent output that produced this task selection. */
    plannerOutput?: string
+  /** Planner's hypothesis about what is broken/missing — for actor to validate, not follow blindly. */
+  rootCauseHypothesis?: string
    /** Task title. */
    title: string
  }
author	Jérôme Benoit <jerome.benoit@sap.com>
	Thu, 7 May 2026 23:04:27 +0000 (01:04 +0200)
committer	Jérôme Benoit <jerome.benoit@sap.com>
	Thu, 7 May 2026 23:04:27 +0000 (01:04 +0200)
.sandcastle/plan-prompt.md		patch \| blob \| blame \| history
.sandcastle/strategies/implement/actor-prompt.md		patch \| blob \| blame \| history
.sandcastle/strategies/implement/critic-prompt.md		patch \| blob \| blame \| history
.sandcastle/strategies/implement/strategy.ts		patch \| blob \| blame \| history
.sandcastle/task-source.ts		patch \| blob \| blame \| history
.sandcastle/types.ts		patch \| blob \| blame \| history