feat(sandcastle): add roundHistory to LoopResult and plannerOutput to TaskSpec

author Jérôme Benoit <jerome.benoit@sap.com>

Thu, 7 May 2026 22:36:27 +0000 (00:36 +0200)

committer Jérôme Benoit <jerome.benoit@sap.com>

Thu, 7 May 2026 22:36:27 +0000 (00:36 +0200)
author Jérôme Benoit <jerome.benoit@sap.com>
Thu, 7 May 2026 22:36:27 +0000 (00:36 +0200)
committer Jérôme Benoit <jerome.benoit@sap.com>
Thu, 7 May 2026 22:36:27 +0000 (00:36 +0200)
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts

index 2cdcc8dae9b4fefd95c9bd65fac00779cfedafc1..dcb2c89a03c0c54d50ced74800607194592baea2 100644 (file)
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -8,6 +8,7 @@ import type {
    LoopResult,
    LoopStatus,
    LoopStrategy,
+  RoundSnapshot,
    SandboxInstance,
    TaskSpec,
  } from './types.js'
@@ -35,8 +36,6 @@ export interface RefinementLoopOptions {
    iterationBudget?: number
    /** Maximum number of implement↔critic rounds. */
    maxRounds?: number
-  /** Optional callback invoked after each round completes. */
-  onRoundComplete?: (round: number, findings: Finding[]) => void
    /** When true, run one extra actor attempt if post-loop validation fails. */
    postLoopValidationRetry?: boolean
    /** Abort signal for cooperative cancellation (kills in-flight agent subprocesses). */
@@ -88,8 +87,6 @@ interface ResolvedLoopOptions {
    budget: number
    /** Maximum number of rounds. */
    maxRounds: number
-  /** Optional round-complete callback (no-op if not provided). */
-  onRoundComplete: (round: number, findings: Finding[]) => void
  }
  
  /** Result of a single implement↔critic round. */
@@ -116,13 +113,14 @@ export async function runRefinementLoop (
    strategy: LoopStrategy,
    opts?: RefinementLoopOptions
  ): Promise<LoopResult> {
-  const { baseBranch, budget, maxRounds, onRoundComplete } = resolveLoopOptions(opts)
+  const { baseBranch, budget, maxRounds } = resolveLoopOptions(opts)
    const signal = opts?.signal
    const validate = strategy.validate ?? ((cwd: string, s: TaskSpec) => runValidation(cwd, s))
  
    const ctx: LoopContext = { baseBranch, sandbox, signal, spec, strategy }
  
    const seenKeys = new Set<string>()
+  const roundHistory: RoundSnapshot[] = []
    let failureReason: string | undefined
    let lastFindings: Finding[] = []
    let status: LoopStatus = 'exhausted'
@@ -142,6 +140,8 @@ export async function runRefinementLoop (
  
      const result = await executeRound(ctx, round, budget, lastFindings)
  
+    roundHistory.push(buildRoundSnapshot(result, round))
+
      const earlyExit = checkEarlyExit(spec, round, result, totalCommits)
      if (earlyExit !== null) {
        totalCommits = earlyExit.totalCommits
@@ -188,7 +188,6 @@ export async function runRefinementLoop (
  
      totalCommits += result.commits
      previousFindingsCount = nonLowFindings.length
-    onRoundComplete(round, findings)
  
      if (strategy.shouldConverge?.(findings, round, totalCommits)) {
        lastFindings = findings
@@ -215,6 +214,7 @@ export async function runRefinementLoop (
        status = 'converged'
      } else if (roundsCompleted < maxRounds) {
        const result = await executeRound(ctx, roundsCompleted + 1, budget, lastFindings)
+      roundHistory.push(buildRoundSnapshot(result, roundsCompleted + 1))
        if (result.commits > 0) {
          totalCommits += result.commits
          if (await validate(sandbox.worktreePath, spec)) {
@@ -228,7 +228,34 @@ export async function runRefinementLoop (
      totalCommits = await resetToBestState(sandbox.worktreePath, bestSha, totalCommits, baseBranch)
    }
  
-  return { baseBranch, failureReason, lastFindings, roundsCompleted, status, totalCommits }
+  return {
+    baseBranch,
+    failureReason,
+    lastFindings,
+    roundHistory,
+    roundsCompleted,
+    status,
+    totalCommits,
+  }
+}
+
+/**
+ *
+ * @param result
+ * @param round
+ */
+function buildRoundSnapshot (result: RoundResult, round: number): RoundSnapshot {
+  return {
+    commits: result.commits,
+    findings: result.findings ?? [],
+    round,
+    status:
+      result.findings === null
+        ? 'critic_errored'
+        : result.findings.length > 0
+          ? 'has_findings'
+          : 'no_findings',
+  }
  }
  
  /**
@@ -578,7 +605,6 @@ function resolveLoopOptions (opts: RefinementLoopOptions | undefined): ResolvedL
      baseBranch: opts?.baseBranch ?? GIT_BASE_BRANCH,
      budget: opts?.iterationBudget ?? AGENT_ITERATION_BUDGET,
      maxRounds: opts?.maxRounds ?? AGENT_MAX_CRITIC_ROUNDS,
-    onRoundComplete: opts?.onRoundComplete ?? (() => undefined),
    }
  }
  
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts

index c22c247c1a8dd73f3a28847758014997f85756bc..c7c9494eaf9e98dce6430226a7bc852d233afa92 100644 (file)
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -135,6 +135,10 @@ export class GithubIssueSource implements TaskSource {
          return []
        }
  
+      for (const task of tasks) {
+        task.plannerOutput = plan.stdout
+      }
+
        console.log(`Plan: ${String(tasks.length)} issue(s) to work on:`)
        for (const task of tasks) {
          console.log(`  #${task.id}: ${task.title} → ${task.branch}`)
diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts

index e7633e486d54357e2d8d0ee6995f1f1d7fa909a3..4de464304cdfe90c25c189af7bdb7e383b19450f 100644 (file)
--- a/.sandcastle/types.ts
+++ b/.sandcastle/types.ts
@@ -49,7 +49,12 @@ export interface LoopResult {
    failureReason?: string
    /** Outstanding findings from the last round. */
    lastFindings: Finding[]
-  /** Number of rounds completed. */
+  /**
+   * Complete findings history across all rounds.
+   * Authoritative source — `lastFindings` is kept for backward compatibility.
+   */
+  roundHistory: RoundSnapshot[]
+  /** Number of main-loop rounds completed (excludes post-loop validation retry). */
    roundsCompleted: number
    /** Termination status. */
    status: LoopStatus
@@ -84,6 +89,18 @@ export type LoopStrategy = {
    validate?: (cwd: string, spec: TaskSpec) => Promise<boolean>
  }
  
+/** Snapshot of a single implement↔critic round. */
+export interface RoundSnapshot {
+  /** Number of commits the actor produced this round. */
+  commits: number
+  /** Findings from the critic (empty array if critic errored). */
+  findings: Finding[]
+  /** 1-indexed round number. */
+  round: number
+  /** Outcome of the critic phase for this round. */
+  status: 'critic_errored' | 'has_findings' | 'no_findings'
+}
+
  /** Type alias for a sandcastle sandbox instance. */
  export type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox>>
  
@@ -97,6 +114,8 @@ export interface TaskSpec {
    id: string
    /** Label names associated with the task (platform-specific, optional). */
    labels?: string[]
+  /** Raw planner agent output that produced this task selection. */
+  plannerOutput?: string
    /** Task title. */
    title: string
  }
author	Jérôme Benoit <jerome.benoit@sap.com>
	Thu, 7 May 2026 22:36:27 +0000 (00:36 +0200)
committer	Jérôme Benoit <jerome.benoit@sap.com>
	Thu, 7 May 2026 22:36:27 +0000 (00:36 +0200)
.sandcastle/refinement-loop.ts		patch \| blob \| blame \| history
.sandcastle/task-source.ts		patch \| blob \| blame \| history
.sandcastle/types.ts		patch \| blob \| blame \| history