From: Jérôme Benoit Date: Thu, 7 May 2026 22:36:27 +0000 (+0200) Subject: feat(sandcastle): add roundHistory to LoopResult and plannerOutput to TaskSpec X-Git-Tag: cli@v4.7.0~6 X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=32e774c7ca15f2ad965cb244c85cd3bb0af9b055;p=e-mobility-charging-stations-simulator.git feat(sandcastle): add roundHistory to LoopResult and plannerOutput to TaskSpec Replace the unused onRoundComplete callback with a structured roundHistory array that accumulates RoundSnapshot per round (including post-loop validation retry). Attach raw planner stdout to TaskSpec.plannerOutput for downstream verification use. This enables a future planner-verification step to receive the full findings history alongside the original plan context. --- diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts index 2cdcc8da..dcb2c89a 100644 --- a/.sandcastle/refinement-loop.ts +++ b/.sandcastle/refinement-loop.ts @@ -8,6 +8,7 @@ import type { LoopResult, LoopStatus, LoopStrategy, + RoundSnapshot, SandboxInstance, TaskSpec, } from './types.js' @@ -35,8 +36,6 @@ export interface RefinementLoopOptions { iterationBudget?: number /** Maximum number of implement↔critic rounds. */ maxRounds?: number - /** Optional callback invoked after each round completes. */ - onRoundComplete?: (round: number, findings: Finding[]) => void /** When true, run one extra actor attempt if post-loop validation fails. */ postLoopValidationRetry?: boolean /** Abort signal for cooperative cancellation (kills in-flight agent subprocesses). */ @@ -88,8 +87,6 @@ interface ResolvedLoopOptions { budget: number /** Maximum number of rounds. */ maxRounds: number - /** Optional round-complete callback (no-op if not provided). */ - onRoundComplete: (round: number, findings: Finding[]) => void } /** Result of a single implement↔critic round. */ @@ -116,13 +113,14 @@ export async function runRefinementLoop ( strategy: LoopStrategy, opts?: RefinementLoopOptions ): Promise { - const { baseBranch, budget, maxRounds, onRoundComplete } = resolveLoopOptions(opts) + const { baseBranch, budget, maxRounds } = resolveLoopOptions(opts) const signal = opts?.signal const validate = strategy.validate ?? ((cwd: string, s: TaskSpec) => runValidation(cwd, s)) const ctx: LoopContext = { baseBranch, sandbox, signal, spec, strategy } const seenKeys = new Set() + const roundHistory: RoundSnapshot[] = [] let failureReason: string | undefined let lastFindings: Finding[] = [] let status: LoopStatus = 'exhausted' @@ -142,6 +140,8 @@ export async function runRefinementLoop ( const result = await executeRound(ctx, round, budget, lastFindings) + roundHistory.push(buildRoundSnapshot(result, round)) + const earlyExit = checkEarlyExit(spec, round, result, totalCommits) if (earlyExit !== null) { totalCommits = earlyExit.totalCommits @@ -188,7 +188,6 @@ export async function runRefinementLoop ( totalCommits += result.commits previousFindingsCount = nonLowFindings.length - onRoundComplete(round, findings) if (strategy.shouldConverge?.(findings, round, totalCommits)) { lastFindings = findings @@ -215,6 +214,7 @@ export async function runRefinementLoop ( status = 'converged' } else if (roundsCompleted < maxRounds) { const result = await executeRound(ctx, roundsCompleted + 1, budget, lastFindings) + roundHistory.push(buildRoundSnapshot(result, roundsCompleted + 1)) if (result.commits > 0) { totalCommits += result.commits if (await validate(sandbox.worktreePath, spec)) { @@ -228,7 +228,34 @@ export async function runRefinementLoop ( totalCommits = await resetToBestState(sandbox.worktreePath, bestSha, totalCommits, baseBranch) } - return { baseBranch, failureReason, lastFindings, roundsCompleted, status, totalCommits } + return { + baseBranch, + failureReason, + lastFindings, + roundHistory, + roundsCompleted, + status, + totalCommits, + } +} + +/** + * + * @param result + * @param round + */ +function buildRoundSnapshot (result: RoundResult, round: number): RoundSnapshot { + return { + commits: result.commits, + findings: result.findings ?? [], + round, + status: + result.findings === null + ? 'critic_errored' + : result.findings.length > 0 + ? 'has_findings' + : 'no_findings', + } } /** @@ -578,7 +605,6 @@ function resolveLoopOptions (opts: RefinementLoopOptions | undefined): ResolvedL baseBranch: opts?.baseBranch ?? GIT_BASE_BRANCH, budget: opts?.iterationBudget ?? AGENT_ITERATION_BUDGET, maxRounds: opts?.maxRounds ?? AGENT_MAX_CRITIC_ROUNDS, - onRoundComplete: opts?.onRoundComplete ?? (() => undefined), } } diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts index c22c247c..c7c9494e 100644 --- a/.sandcastle/task-source.ts +++ b/.sandcastle/task-source.ts @@ -135,6 +135,10 @@ export class GithubIssueSource implements TaskSource { return [] } + for (const task of tasks) { + task.plannerOutput = plan.stdout + } + console.log(`Plan: ${String(tasks.length)} issue(s) to work on:`) for (const task of tasks) { console.log(` #${task.id}: ${task.title} → ${task.branch}`) diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts index e7633e48..4de46430 100644 --- a/.sandcastle/types.ts +++ b/.sandcastle/types.ts @@ -49,7 +49,12 @@ export interface LoopResult { failureReason?: string /** Outstanding findings from the last round. */ lastFindings: Finding[] - /** Number of rounds completed. */ + /** + * Complete findings history across all rounds. + * Authoritative source — `lastFindings` is kept for backward compatibility. + */ + roundHistory: RoundSnapshot[] + /** Number of main-loop rounds completed (excludes post-loop validation retry). */ roundsCompleted: number /** Termination status. */ status: LoopStatus @@ -84,6 +89,18 @@ export type LoopStrategy = { validate?: (cwd: string, spec: TaskSpec) => Promise } +/** Snapshot of a single implement↔critic round. */ +export interface RoundSnapshot { + /** Number of commits the actor produced this round. */ + commits: number + /** Findings from the critic (empty array if critic errored). */ + findings: Finding[] + /** 1-indexed round number. */ + round: number + /** Outcome of the critic phase for this round. */ + status: 'critic_errored' | 'has_findings' | 'no_findings' +} + /** Type alias for a sandcastle sandbox instance. */ export type SandboxInstance = Awaited> @@ -97,6 +114,8 @@ export interface TaskSpec { id: string /** Label names associated with the task (platform-specific, optional). */ labels?: string[] + /** Raw planner agent output that produced this task selection. */ + plannerOutput?: string /** Task title. */ title: string }