From 16acd0da8c4ddf3f954f7fdafe5d39762de81b8c Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Thu, 7 May 2026 10:25:26 +0200 Subject: [PATCH] refactor(sandcastle): generalize actor/critic loop with LoopContext and coherent API - Introduce LoopContext to group invariant params (spec, sandbox, strategy, baseBranch, signal) - Extract GIT_BASE_BRANCH constant, thread through loop + finalization + prompts - Rename 'implementer' to 'actor' in generic loop code - Add extensibility: validate?, actorModel?, criticModel? on LoopStrategy - Add baseBranch to RefinementLoopOptions and LoopResult - Make Finding.category extensible (z.string()) - Make TaskSpec.labels optional - Fix param ordering: pushBranch(spec, cwd, ...), deduplicateFindings(findings, cwd, ...) - Simplify root lint-staged to cover all TS files --- .sandcastle/constants.ts | 2 + .sandcastle/finalizer.ts | 34 +++-- .sandcastle/refinement-loop.ts | 116 +++++++++--------- .../strategies/implement/critic-prompt.md | 2 +- .sandcastle/strategies/implement/strategy.ts | 15 ++- .sandcastle/types.ts | 29 ++++- 6 files changed, 115 insertions(+), 83 deletions(-) diff --git a/.sandcastle/constants.ts b/.sandcastle/constants.ts index eb647597..1ebfee6d 100644 --- a/.sandcastle/constants.ts +++ b/.sandcastle/constants.ts @@ -19,6 +19,8 @@ export const AGENT_TASK_TIMEOUT_MS = 6_000_000 // ── Git ────────────────────────────────────────────────────────────────────── +export const GIT_BASE_BRANCH = 'main' + export const GIT_BRANCH_PREFIX = 'agent/issue' export const GIT_PUSH_TIMEOUT_MS = 60_000 diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts index 7ff3f89d..9d7465d1 100644 --- a/.sandcastle/finalizer.ts +++ b/.sandcastle/finalizer.ts @@ -3,6 +3,7 @@ import crypto from 'node:crypto' import type { LoopResult, TaskSpec } from './types.js' import { + GIT_BASE_BRANCH, GIT_PUSH_TIMEOUT_MS, GIT_TIMEOUT_MS, MAX_STDERR_CHARS, @@ -12,18 +13,22 @@ import { import { execFileAsync, toErrorMessage } from './utils.js' /** - * Fetches origin/main and rebases the current branch onto it. + * Fetches the base branch and rebases the current branch onto it. * On failure, aborts the rebase cleanly. * @param cwd - Working directory (worktree path). + * @param baseBranch - Target branch for rebase. * @returns `true` if rebase succeeded, `false` otherwise. */ -export async function attemptRebase (cwd: string): Promise { +export async function attemptRebase (cwd: string, baseBranch = GIT_BASE_BRANCH): Promise { try { - await execFileAsync('git', ['fetch', 'origin', 'main'], { + await execFileAsync('git', ['fetch', 'origin', baseBranch], { + cwd, + timeout: GIT_TIMEOUT_MS, + }) + await execFileAsync('git', ['rebase', `origin/${baseBranch}`], { cwd, timeout: GIT_TIMEOUT_MS, }) - await execFileAsync('git', ['rebase', 'origin/main'], { cwd, timeout: GIT_TIMEOUT_MS }) return true } catch { try { @@ -40,14 +45,16 @@ export async function attemptRebase (cwd: string): Promise { * @param spec - The task specification. * @param loopResult - The result from the refinement loop. * @param validationPassed - Whether the validation suite passed. - * @param rebaseSucceeded - Whether the rebase onto main succeeded. + * @param rebaseSucceeded - Whether the rebase onto the base branch succeeded. + * @param baseBranch - Target branch for PR base. * @returns Object with `isDraft` flag and `prArgs` string array. */ export function buildPrArgs ( spec: TaskSpec, loopResult: LoopResult, validationPassed: boolean, - rebaseSucceeded: boolean + rebaseSucceeded: boolean, + baseBranch = GIT_BASE_BRANCH ): { isDraft: boolean; prArgs: string[] } { const converged = loopResult.status === 'converged' const isDraft = !converged || !validationPassed @@ -59,13 +66,14 @@ export function buildPrArgs ( ? '\n\n⚠️ Validation did not pass. Manual review required.' : '' const rebaseNote = !rebaseSucceeded - ? '\n\n⚠️ Rebase failed. Branch is not rebased onto main.' + ? `\n\n⚠️ Rebase failed. Branch is not rebased onto ${baseBranch}.` : '' const validationCheck = validationPassed ? '- [x]' : '- [ ]' - const commitPrefix = spec.labels.includes('enhancement') + const labels = spec.labels ?? [] + const commitPrefix = labels.includes('enhancement') ? 'feat' - : spec.labels.includes('bug') + : labels.includes('bug') ? 'fix' : 'chore' const cleanTitle = spec.title.replace(/^\[(?:FEATURE|BUG|FIX|CHORE)\]\s*/i, '') @@ -85,12 +93,12 @@ export function buildPrArgs ( '--head', spec.branch, '--base', - 'main', + baseBranch, '--title', prTitle, '--body', prBody, - ...spec.labels.flatMap(label => ['--label', label]), + ...labels.flatMap(label => ['--label', label]), ] return { isDraft, prArgs } @@ -110,14 +118,14 @@ export function extractStderr (err: unknown): string { /** * Pushes the branch to origin. When rebase succeeded, uses force-with-lease * with a rescue-branch fallback. When rebase was aborted, does a plain push. - * @param cwd - Working directory (worktree path). * @param spec - The task specification. + * @param cwd - Working directory (worktree path). * @param rebaseSucceeded - Whether the preceding rebase completed successfully. * @returns `true` if the primary push succeeded, `false` otherwise. */ export async function pushBranch ( - cwd: string, spec: TaskSpec, + cwd: string, rebaseSucceeded: boolean ): Promise { if (rebaseSucceeded) { diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts index 32456337..a81f35b2 100644 --- a/.sandcastle/refinement-loop.ts +++ b/.sandcastle/refinement-loop.ts @@ -5,6 +5,7 @@ import { join, sep } from 'node:path' import type { Finding, + LoopContext, LoopResult, LoopStatus, LoopStrategy, @@ -20,6 +21,7 @@ import { AGENT_MAX_CRITIC_ROUNDS, COMPLETION_SIGNAL, CONTEXT_HASH_RADIUS, + GIT_BASE_BRANCH, HASH_PREFIX_LENGTH, } from './constants.js' import { runValidation } from './finalizer.js' @@ -28,13 +30,15 @@ import { execFileAsync } from './utils.js' /** Options for configuring the refinement loop. */ export interface RefinementLoopOptions { + /** Base branch for commit counting (default: 'main'). */ + baseBranch?: string /** Budget of iterations per round (flat constant applied to every round). */ iterationBudget?: number /** Maximum number of implement↔critic rounds. */ maxRounds?: number /** Optional callback invoked after each round completes. */ onRoundComplete?: (round: number, findings: Finding[]) => void - /** When true, run one extra implementer attempt if post-loop validation fails. */ + /** When true, run one extra actor attempt if post-loop validation fails. */ postLoopValidationRetry?: boolean /** Abort signal for cooperative cancellation (kills in-flight agent subprocesses). */ signal?: AbortSignal @@ -67,7 +71,7 @@ interface HashInput { * Groups the per-round identifiers needed for regression detection and rollback. */ interface RatchetContext { - /** SHA of HEAD before the implementer ran (used for rollback). */ + /** SHA of HEAD before the actor ran (used for rollback). */ readonly beforeSha: string /** Working directory for git operations. */ readonly cwd: string @@ -79,6 +83,8 @@ interface RatchetContext { /** Resolved loop options with defaults applied. */ interface ResolvedLoopOptions { + /** Base branch for commit counting. */ + baseBranch: string /** Iteration budget per round. */ budget: number /** Maximum number of rounds. */ @@ -89,9 +95,9 @@ interface ResolvedLoopOptions { /** Result of a single implement↔critic round. */ interface RoundResult { - /** SHA of HEAD before the implementer ran. */ + /** SHA of HEAD before the actor ran. */ beforeSha: string - /** Number of commits made by the implementer. */ + /** Number of commits made by the actor. */ commits: number /** Parsed findings from the critic, or null on critic failure. */ findings: Finding[] | null @@ -111,8 +117,11 @@ export async function runRefinementLoop ( strategy: LoopStrategy, opts?: RefinementLoopOptions ): Promise { - const { budget, maxRounds, onRoundComplete } = resolveLoopOptions(opts) + const { baseBranch, budget, maxRounds, onRoundComplete } = resolveLoopOptions(opts) const signal = opts?.signal + const validate = strategy.validate ?? ((cwd: string, s: TaskSpec) => runValidation(cwd, s)) + + const ctx: LoopContext = { baseBranch, sandbox, signal, spec, strategy } const seenKeys = new Set() let lastFindings: Finding[] = [] @@ -131,7 +140,7 @@ export async function runRefinementLoop ( ` #${spec.id} round ${String(round)}/${String(maxRounds)} (budget: ${String(budget)})` ) - const result = await executeRound(spec, sandbox, round, budget, lastFindings, strategy, signal) + const result = await executeRound(ctx, round, budget, lastFindings) const earlyExit = checkEarlyExit(spec, round, result, totalCommits) if (earlyExit !== null) { @@ -143,14 +152,14 @@ export async function runRefinementLoop ( if (result.findings === null) break const findings: Finding[] = result.findings - if (result.commits > 0 && (await runValidation(sandbox.worktreePath, spec))) { + if (result.commits > 0 && (await validate(sandbox.worktreePath, spec))) { totalCommits += result.commits status = 'converged' break } const cwd = sandbox.worktreePath - const newFindings = await deduplicateFindings(findings, seenKeys, cwd) + const newFindings = await deduplicateFindings(findings, cwd, seenKeys) console.log( ` #${spec.id}: ${String(findings.length)} findings, ${String(newFindings.length)} new` @@ -197,22 +206,14 @@ export async function runRefinementLoop ( // Post-loop validation retry (if enabled) if (opts?.postLoopValidationRetry && totalCommits > 0 && status !== 'converged') { signal?.throwIfAborted() - const validationPassed = await runValidation(sandbox.worktreePath, spec) + const validationPassed = await validate(sandbox.worktreePath, spec) if (validationPassed) { status = 'converged' } else if (roundsCompleted < maxRounds) { - const result = await executeRound( - spec, - sandbox, - roundsCompleted + 1, - budget, - lastFindings, - strategy, - signal - ) + const result = await executeRound(ctx, roundsCompleted + 1, budget, lastFindings) if (result.commits > 0) { totalCommits += result.commits - if (await runValidation(sandbox.worktreePath, spec)) { + if (await validate(sandbox.worktreePath, spec)) { status = 'converged' } } @@ -220,10 +221,10 @@ export async function runRefinementLoop ( } if (shouldResetToBest(status, bestSha)) { - totalCommits = await resetToBestState(sandbox.worktreePath, bestSha, totalCommits) + totalCommits = await resetToBestState(sandbox.worktreePath, bestSha, totalCommits, baseBranch) } - return { lastFindings, roundsCompleted, status, totalCommits } + return { baseBranch, lastFindings, roundsCompleted, status, totalCommits } } /** @@ -375,14 +376,14 @@ async function computeFindingKey ( /** * Filters findings by confidence and deduplicates against previously seen keys. * @param findings - Raw findings from the critic. - * @param seenKeys - Set of previously seen dedup keys (mutated: new keys are added). * @param cwd - Working directory for context hashing. + * @param seenKeys - Set of previously seen dedup keys (mutated: new keys are added). * @returns Array of new, non-LOW-confidence findings. */ async function deduplicateFindings ( findings: Finding[], - seenKeys: Set, - cwd: string + cwd: string, + seenKeys: Set ): Promise { const fileCache = new Map() const keys = await Promise.all(findings.map(f => computeFindingKey(f, cwd, fileCache))) @@ -400,25 +401,21 @@ async function deduplicateFindings ( /** * Executes a single implement↔critic round. - * @param spec - The task specification. - * @param sandbox - The sandcastle sandbox instance. + * @param ctx - Loop context containing spec, sandbox, strategy, baseBranch, and signal. * @param round - Current round number (1-indexed). - * @param budget - Iteration budget for the implementer. - * @param lastFindings - Findings from the previous round to feed to the implementer. - * @param strategy - Strategy config for prompt/arg customization. - * @param signal - Abort signal for cooperative cancellation. + * @param budget - Iteration budget for the actor. + * @param lastFindings - Findings from the previous round to feed to the actor. * @returns The round result containing commits, findings, and the pre-round SHA. */ async function executeRound ( - spec: TaskSpec, - sandbox: SandboxInstance, + ctx: LoopContext, round: number, budget: number, - lastFindings: Finding[], - strategy: LoopStrategy, - signal?: AbortSignal + lastFindings: Finding[] ): Promise { - // Capture SHA before implementer runs (for quality ratchet rollback) + const { sandbox, signal, spec, strategy } = ctx + + // Capture SHA before actor runs (for quality ratchet rollback) let beforeSha = '' try { const { stdout } = await execFileAsync('git', ['rev-parse', 'HEAD'], { @@ -429,15 +426,15 @@ async function executeRound ( console.warn(` #${spec.id}: Failed to capture HEAD SHA before round ${String(round)}.`) } - // Implementer - let implementerResult: Awaited> + // Actor + let actorResult: Awaited> try { - implementerResult = await sandbox.run({ - agent: sandcastle.opencode(AGENT_ACTOR_MODEL), + actorResult = await sandbox.run({ + agent: sandcastle.opencode(strategy.actorModel ?? AGENT_ACTOR_MODEL), completionSignal: COMPLETION_SIGNAL, idleTimeoutSeconds: AGENT_IDLE_TIMEOUT_S, maxIterations: budget, - name: `Implementer #${spec.id} R${String(round)}`, + name: `Actor #${spec.id} R${String(round)}`, promptArgs: strategy.buildActorArgs(spec, lastFindings), promptFile: strategy.actorPromptFile, signal, @@ -447,7 +444,7 @@ async function executeRound ( throw err } const msg = err instanceof Error ? (err.stack ?? err.message) : String(err) - console.error(` #${spec.id} R${String(round)}: Implementer threw: ${msg}`) + console.error(` #${spec.id} R${String(round)}: Actor threw: ${msg}`) return { beforeSha, commits: 0, findings: null } } @@ -455,7 +452,7 @@ async function executeRound ( const nonce = crypto.randomBytes(4).toString('hex') let findings: Finding[] | null try { - findings = await runCritic(sandbox, spec, round, nonce, strategy, signal) + findings = await runCritic(ctx, round, nonce) } catch (err: unknown) { if (signal?.aborted === true) { throw err @@ -465,7 +462,7 @@ async function executeRound ( findings = null } - return { beforeSha, commits: implementerResult.commits.length, findings } + return { beforeSha, commits: actorResult.commits.length, findings } } /** @@ -544,17 +541,21 @@ function parseFindings (stdout: string, nonce: string): Finding[] | null { * @param cwd - Working directory for git operations. * @param bestSha - The SHA to reset to. * @param currentCommits - Current total commits (fallback if recount fails). + * @param baseBranch - Base branch for commit counting. * @returns Updated total commit count. */ async function resetToBestState ( cwd: string, bestSha: string, - currentCommits: number + currentCommits: number, + baseBranch: string ): Promise { if (!/^[0-9a-f]{40}$/.test(bestSha)) return currentCommits try { await execFileAsync('git', ['reset', '--hard', bestSha], { cwd }) - const { stdout } = await execFileAsync('git', ['rev-list', '--count', 'main..HEAD'], { cwd }) + const { stdout } = await execFileAsync('git', ['rev-list', '--count', `${baseBranch}..HEAD`], { + cwd, + }) return parseInt(stdout.trim(), 10) || 0 } catch { return currentCommits @@ -568,6 +569,7 @@ async function resetToBestState ( */ function resolveLoopOptions (opts: RefinementLoopOptions | undefined): ResolvedLoopOptions { return { + baseBranch: opts?.baseBranch ?? GIT_BASE_BRANCH, budget: opts?.iterationBudget ?? AGENT_ITERATION_BUDGET, maxRounds: opts?.maxRounds ?? AGENT_MAX_CRITIC_ROUNDS, onRoundComplete: opts?.onRoundComplete ?? (() => undefined), @@ -576,29 +578,25 @@ function resolveLoopOptions (opts: RefinementLoopOptions | undefined): ResolvedL /** * Runs the critic agent, retrying once on parse failure. - * @param sandbox - The sandcastle sandbox instance. - * @param spec - The task specification. + * @param ctx - Loop context containing spec, sandbox, strategy, baseBranch, and signal. * @param round - Current round number. * @param nonce - Unique nonce for parsing. - * @param strategy - Strategy config for prompt/arg customization. - * @param signal - Abort signal for cooperative cancellation. * @returns Parsed findings or null if both attempts failed. */ async function runCritic ( - sandbox: SandboxInstance, - spec: TaskSpec, + ctx: LoopContext, round: number, - nonce: string, - strategy: LoopStrategy, - signal?: AbortSignal + nonce: string ): Promise { + const { baseBranch, sandbox, signal, spec, strategy } = ctx + let critic = await sandbox.run({ - agent: sandcastle.opencode(AGENT_CRITIC_MODEL), + agent: sandcastle.opencode(strategy.criticModel ?? AGENT_CRITIC_MODEL), completionSignal: COMPLETION_SIGNAL, idleTimeoutSeconds: AGENT_IDLE_TIMEOUT_S, maxIterations: 1, name: `Critic #${spec.id} R${String(round)}`, - promptArgs: strategy.buildCriticArgs(spec, nonce), + promptArgs: strategy.buildCriticArgs(spec, nonce, baseBranch), promptFile: strategy.criticPromptFile, signal, }) @@ -608,12 +606,12 @@ async function runCritic ( if (findings === null) { console.warn(` #${spec.id}: Critic parse failed. Retrying.`) critic = await sandbox.run({ - agent: sandcastle.opencode(AGENT_CRITIC_MODEL), + agent: sandcastle.opencode(strategy.criticModel ?? AGENT_CRITIC_MODEL), completionSignal: COMPLETION_SIGNAL, idleTimeoutSeconds: AGENT_IDLE_TIMEOUT_S, maxIterations: 1, name: `Critic #${spec.id} R${String(round)} retry`, - promptArgs: strategy.buildCriticArgs(spec, nonce), + promptArgs: strategy.buildCriticArgs(spec, nonce, baseBranch), promptFile: strategy.criticPromptFile, signal, }) diff --git a/.sandcastle/strategies/implement/critic-prompt.md b/.sandcastle/strategies/implement/critic-prompt.md index ea2c76b1..1ca9ec90 100644 --- a/.sandcastle/strategies/implement/critic-prompt.md +++ b/.sandcastle/strategies/implement/critic-prompt.md @@ -4,7 +4,7 @@ Analyze the implementation on branch `{{BRANCH}}` and produce structured finding ## Task -Run `git diff main...{{BRANCH}}` to see all changes. Examine the diff carefully. For each issue found, produce a structured finding. +Run `git diff {{BASE_BRANCH}}...{{BRANCH}}` to see all changes. Examine the diff carefully. For each issue found, produce a structured finding. Read `AGENTS.md`, `CONTRIBUTING.md` and `.serena/memories/code_style_conventions`. diff --git a/.sandcastle/strategies/implement/strategy.ts b/.sandcastle/strategies/implement/strategy.ts index 27f36ea5..0a3a9d13 100644 --- a/.sandcastle/strategies/implement/strategy.ts +++ b/.sandcastle/strategies/implement/strategy.ts @@ -15,7 +15,8 @@ export const implementStrategy: FinalizationConfig & LoopStrategy = { TASK_ID: spec.id, }), - buildCriticArgs: (spec, nonce) => ({ + buildCriticArgs: (spec, nonce, baseBranch) => ({ + BASE_BRANCH: baseBranch, BRANCH: spec.branch, NONCE: nonce, }), @@ -26,20 +27,26 @@ export const implementStrategy: FinalizationConfig & LoopStrategy = { const cwd = sandbox.worktreePath let validationPassed = await runValidation(cwd, spec) - const rebaseSucceeded = await attemptRebase(cwd) + const rebaseSucceeded = await attemptRebase(cwd, loopResult.baseBranch) if (rebaseSucceeded && validationPassed) { if (!(await runValidation(cwd, spec))) { validationPassed = false } } - const pushSucceeded = await pushBranch(cwd, spec, rebaseSucceeded) + const pushSucceeded = await pushBranch(spec, cwd, rebaseSucceeded) if (!pushSucceeded) { console.error(` #${spec.id}: Push failed; cannot create PR without remote branch.`) return { success: false } } - const { isDraft, prArgs } = buildPrArgs(spec, loopResult, validationPassed, rebaseSucceeded) + const { isDraft, prArgs } = buildPrArgs( + spec, + loopResult, + validationPassed, + rebaseSucceeded, + loopResult.baseBranch + ) let prCreated = false try { diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts index 7a7cd940..c51deeb2 100644 --- a/.sandcastle/types.ts +++ b/.sandcastle/types.ts @@ -4,7 +4,7 @@ import { z } from 'zod' /** Zod schema for a single critic finding. */ export const FindingSchema = z.object({ - category: z.enum(['security', 'logic', 'performance', 'architecture', 'style']), + category: z.string(), confidence: z.enum(['HIGH', 'MEDIUM', 'LOW']), description: z.string(), file: z.string(), @@ -32,8 +32,19 @@ export type FinalizationConfig = { /** A single critic finding parsed from agent output. */ export type Finding = z.infer +/** Invariant context for a refinement loop run. */ +export interface LoopContext { + readonly baseBranch: string + readonly sandbox: SandboxInstance + readonly signal?: AbortSignal + readonly spec: TaskSpec + readonly strategy: LoopStrategy +} + /** Result returned by the refinement loop. */ export interface LoopResult { + /** Base branch used for this loop run. */ + baseBranch: string /** Outstanding findings from the last round. */ lastFindings: Finding[] /** Number of rounds completed. */ @@ -53,16 +64,22 @@ export type LoopStatus = 'converged' | 'exhausted' | 'failed' | 'skipped' */ // eslint-disable-next-line @typescript-eslint/consistent-type-definitions export type LoopStrategy = { - /** Path to the actor (implementer) prompt file. */ + /** Model for the actor agent. Defaults to AGENT_ACTOR_MODEL constant. */ + actorModel?: string + /** Path to the actor prompt file. */ actorPromptFile: string /** Builds promptArgs for the actor run from task spec and previous findings. */ buildActorArgs: (spec: TaskSpec, findings: Finding[]) => Record - /** Builds promptArgs for the critic run from task spec and nonce. */ - buildCriticArgs: (spec: TaskSpec, nonce: string) => Record + /** Builds promptArgs for the critic run from task spec, nonce, and base branch. */ + buildCriticArgs: (spec: TaskSpec, nonce: string, baseBranch: string) => Record + /** Model for the critic agent. Defaults to AGENT_CRITIC_MODEL constant. */ + criticModel?: string /** Path to the critic prompt file. */ criticPromptFile: string /** Optional custom convergence check. When omitted, default loop logic applies. */ shouldConverge?: (findings: Finding[], round: number, totalCommits: number) => boolean + /** Optional mid-loop validation. Return true if work passes. When omitted, uses default validation command. */ + validate?: (cwd: string, spec: TaskSpec) => Promise } /** Type alias for a sandcastle sandbox instance. */ @@ -76,8 +93,8 @@ export interface TaskSpec { branch: string /** Task identifier (e.g. GitHub issue number as string). */ id: string - /** Label names associated with the task. */ - labels: string[] + /** Label names associated with the task (platform-specific, optional). */ + labels?: string[] /** Task title. */ title: string } -- 2.53.0