From: Jérôme Benoit Date: Tue, 16 Jun 2026 17:02:22 +0000 (+0200) Subject: fix(bootstrap): make start/stop idempotent and signal handler re-entrant safe (#1905) X-Git-Tag: cli@v4.9.0~2 X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=85c1fb034ee9659418cccbe4dc47679ae0edbd74;p=e-mobility-charging-stations-simulator.git fix(bootstrap): make start/stop idempotent and signal handler re-entrant safe (#1905) - Memoize startPromise/stopPromise so concurrent callers await the same in-flight transition instead of silently no-op'ing. - Extract start()/stop() bodies into private doStart()/doStop() so the public methods are thin memoization wrappers. - Demote idempotent guard hits from error to warn (matches sister ChargingStation convention) for "Cannot start/stop already ..." paths and to debug for "Awaiting in-flight ..." concurrency observations. - Make gracefulShutdown re-entrant safe via shuttingDown flag -- multiple SIGTERM/SIGINT/SIGQUIT no longer race the in-flight stop. - On the no-op stop path with reason=user, ensure .simulator-state.json still reflects the authoritative state (writes started:false if file says started:true). Fixes UI clients reading stale started:true after a UI-driven stop on an already-stopped sim. - Document Bootstrap.stop coalescing semantics: the FIRST caller's reason controls the in-flight transition; later callers' reasons are ignored. - Add 'entrancy' to cspell dictionary (re-entrancy is the standard concurrency-engineering spelling). Tests: tests/charging-station/Bootstrap.test.ts (new file, 11 tests) - concurrent stop() / start() callers observe the same in-flight transition - gracefulShutdown is re-entrant: 3 synchronous calls invoke stop() exactly once (direct unit test on Bootstrap.prototype, runs on every platform including Windows) - multiple SIGTERM produce a single 'Graceful shutdown' log line (POSIX-only spawn-based integration smoke; skipped on Windows where child.kill('SIGTERM') maps to TerminateProcess and bypasses the handler -- coverage for that platform comes from the unit test above) - state-file consistency on the no-op stop path (3 sub-cases: reason=user, reason=shutdown, real-stop-then-no-op-stop) - idempotent guards log at warn or debug, not error (4 sub-cases) Reproduction: 5x kill -TERM in tight loop, before fix produces 'Cannot stop an already stopping' error log; after fix produces a single 'Graceful shutdown' info log with no error. Refs: review feedback on PR #1905 -- 1 BLOCKER, 4 MAJOR, 4 MINOR findings, all addressed in this amended commit. --- diff --git a/cspell.config.yaml b/cspell.config.yaml index f90e5b26..7fc72870 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -33,6 +33,7 @@ words: - measurands - mikro - neostandard + - entrancy - recurrency - shutdowning - VCAP diff --git a/src/charging-station/Bootstrap.ts b/src/charging-station/Bootstrap.ts index ea014fdb..cbdb5656 100644 --- a/src/charging-station/Bootstrap.ts +++ b/src/charging-station/Bootstrap.ts @@ -66,6 +66,16 @@ enum exitCodes { gracefulShutdownError = 4, } +/** + * Reason for a `Bootstrap.stop` invocation. Drives state-file persistence: + * `user` persists `started:false` to disk; `shutdown` and `reload` do not. + * + * Coalescing semantics: when multiple callers invoke `stop` concurrently they + * all observe the same in-flight transition. The FIRST caller's `reason` + * controls persistence; later callers' `reason` values are ignored. A `user` + * stop coalescing onto an in-flight `shutdown` or `reload` therefore does not + * persist `started:false`. + */ enum StopReason { reload = 'reload', shutdown = 'shutdown', @@ -94,10 +104,13 @@ export class Bootstrap extends EventEmitter implements IBootstrap { private readonly assetsDir: string private readonly configurationsDir: string + private shuttingDown: boolean private started: boolean private starting: boolean + private startPromise?: Promise private readonly stateFilePath: string private stopping: boolean + private stopPromise?: Promise private storage?: Storage private readonly templateStatistics: Map private readonly uiServer: AbstractUIServer @@ -148,6 +161,7 @@ export class Bootstrap extends EventEmitter implements IBootstrap { this.started = false this.starting = false this.stopping = false + this.shuttingDown = false this.uiServerStarted = false this.templateStatistics = new Map() this.assetsDir = join(dirname(fileURLToPath(import.meta.url)), 'assets') @@ -240,119 +254,22 @@ export class Bootstrap extends EventEmitter implements IBootstrap { } public async start (): Promise { - if (!this.started) { - if (!this.starting) { - this.starting = true - try { - this.on(ChargingStationWorkerMessageEvents.added, this.workerEventAdded) - this.on(ChargingStationWorkerMessageEvents.deleted, this.workerEventDeleted) - this.on(ChargingStationWorkerMessageEvents.started, this.workerEventStarted) - this.on(ChargingStationWorkerMessageEvents.stopped, this.workerEventStopped) - this.on(ChargingStationWorkerMessageEvents.updated, this.workerEventUpdated) - this.on( - ChargingStationWorkerMessageEvents.performanceStatistics, - this.workerEventPerformanceStatistics - ) - // eslint-disable-next-line @typescript-eslint/unbound-method - if (isAsyncFunction(this.workerImplementation?.start)) { - await this.workerImplementation.start() - } else { - ;(this.workerImplementation?.start as () => void)() - } - const performanceStorageConfiguration = - Configuration.getConfigurationSection( - ConfigurationSection.performanceStorage - ) - if (performanceStorageConfiguration.enabled === true) { - const storageType = performanceStorageConfiguration.type - const storageUri = performanceStorageConfiguration.uri - if (storageType != null && storageUri != null) { - this.storage = StorageFactory.getStorage(storageType, storageUri, this.logPrefix()) - await this.storage.open() - } - } - this.startUIServer() - // Start ChargingStation object instance in worker thread - for (const stationTemplateUrl of Configuration.getStationTemplateUrls() ?? []) { - const nbStations = stationTemplateUrl.numberOfStations - const sequentialAdd = - (Configuration.getConfigurationSection( - ConfigurationSection.worker - ).elementAddDelay ?? 0) > 0 - if (sequentialAdd) { - for (let index = 1; index <= nbStations; index++) { - try { - await this.addChargingStation(index, stationTemplateUrl.file) - } catch (error) { - logger.error( - `${this.logPrefix()} ${moduleName}.start: Error at starting charging station with template file ${stationTemplateUrl.file}:`, - error - ) - } - } - } else { - const results = await Promise.allSettled( - Array.from({ length: nbStations }, (_, i) => - this.addChargingStation(i + 1, stationTemplateUrl.file) - ) - ) - for (const result of results) { - if (result.status === 'rejected') { - logger.error( - `${this.logPrefix()} ${moduleName}.start: Error at starting charging station with template file ${stationTemplateUrl.file}:`, - result.reason - ) - } - } - } - } - const workerConfiguration = Configuration.getConfigurationSection( - ConfigurationSection.worker - ) - logger.info( - `${this.logPrefix()} ${moduleName}.start: Charging stations simulator ${this.version} started with ${this.numberOfConfiguredChargingStations.toString()} configured and ${this.numberOfProvisionedChargingStations.toString()} provisioned charging station(s) from ${this.numberOfChargingStationTemplates.toString()} charging station template(s) and ${ - Configuration.workerDynamicPoolInUse() - ? // eslint-disable-next-line @typescript-eslint/restrict-template-expressions - `${workerConfiguration.poolMinSize?.toString()}/` - : '' - // eslint-disable-next-line @typescript-eslint/restrict-template-expressions - }${this.workerImplementation?.size.toString()}${ - Configuration.workerPoolInUse() - ? // eslint-disable-next-line @typescript-eslint/restrict-template-expressions - `/${workerConfiguration.poolMaxSize?.toString()}` - : '' - // eslint-disable-next-line @typescript-eslint/restrict-template-expressions - } worker(s) concurrently running in '${workerConfiguration.processType}' mode${ - this.workerImplementation?.maxElementsPerWorker != null - ? ` (${this.workerImplementation.maxElementsPerWorker.toString()} charging station(s) per worker)` - : '' - }` - ) - Configuration.workerDynamicPoolInUse() && - logger.warn( - `${this.logPrefix()} ${moduleName}.start: Charging stations simulator is using dynamic pool mode. This is an experimental feature with known issues.\nPlease consider using fixed pool or worker set mode instead` - ) - logger.info( - `${this.logPrefix()} ${moduleName}.start: Worker set/pool information:`, - this.workerImplementation?.info - ) - this.started = true - if (this.persistStateEnabled) { - await writeStateFile(this.stateFilePath, true, this.logPrefix) - } - } finally { - this.starting = false - } - } else { - logger.error( - `${this.logPrefix()} ${moduleName}.start: Cannot start an already starting charging stations simulator` - ) - } - } else { - logger.error( + if (this.started) { + logger.warn( `${this.logPrefix()} ${moduleName}.start: Cannot start an already started charging stations simulator` ) + return + } + if (this.startPromise != null) { + logger.debug( + `${this.logPrefix()} ${moduleName}.start: Awaiting an already in-flight start of the charging stations simulator` + ) + return this.startPromise } + this.startPromise = this.doStart().finally(() => { + this.startPromise = undefined + }) + return this.startPromise } public startUIServer (): void { @@ -370,44 +287,180 @@ export class Bootstrap extends EventEmitter implements IBootstrap { this.uiServerStarted = true } + /** + * Stops the simulator. Idempotent and concurrency-safe. + * + * If a stop is already in flight, returns the same `Promise` to all + * callers. The FIRST caller's `reason` controls the in-flight transition's + * persistence semantics; later callers' `reason` values are ignored. See + * the {@link StopReason} doc-comment for the coalescing contract. + * + * If the simulator is already stopped and `reason === StopReason.user`, + * reconciles a stale `started:true` state file by writing `started:false`. + * For `reason !== StopReason.user` the no-op branch returns silently + * without touching the state file; a stale `started:true` is then + * reconciled only by the next `stop(user)` or successful `start()`. + * @param reason - Stop reason; defaults to {@link StopReason.user}. + * @returns A `Promise` that resolves when the stop transition finishes. + */ public async stop (reason: StopReason = StopReason.user): Promise { - if (this.started) { - if (!this.stopping) { - this.stopping = true - try { - await this.uiServer.sendInternalRequest( - this.uiServer.buildProtocolRequest( - generateUUID(), - ProcedureName.STOP_CHARGING_STATION, - Constants.EMPTY_FROZEN_OBJECT + if (!this.started) { + logger.warn( + `${this.logPrefix()} ${moduleName}.stop: Cannot stop an already stopped charging stations simulator` + ) + if (this.persistStateEnabled && reason === StopReason.user) { + const stateFile = readStateFile(this.stateFilePath, this.logPrefix) + if (stateFile?.started === true) { + await writeStateFile(this.stateFilePath, false, this.logPrefix) + } + } + return + } + if (this.stopPromise != null) { + logger.debug( + `${this.logPrefix()} ${moduleName}.stop: Awaiting an already in-flight stop of the charging stations simulator` + ) + return this.stopPromise + } + this.stopPromise = this.doStop(reason).finally(() => { + this.stopPromise = undefined + }) + return this.stopPromise + } + + private async doStart (): Promise { + this.starting = true + try { + this.on(ChargingStationWorkerMessageEvents.added, this.workerEventAdded) + this.on(ChargingStationWorkerMessageEvents.deleted, this.workerEventDeleted) + this.on(ChargingStationWorkerMessageEvents.started, this.workerEventStarted) + this.on(ChargingStationWorkerMessageEvents.stopped, this.workerEventStopped) + this.on(ChargingStationWorkerMessageEvents.updated, this.workerEventUpdated) + this.on( + ChargingStationWorkerMessageEvents.performanceStatistics, + this.workerEventPerformanceStatistics + ) + // eslint-disable-next-line @typescript-eslint/unbound-method + if (isAsyncFunction(this.workerImplementation?.start)) { + await this.workerImplementation.start() + } else { + ;(this.workerImplementation?.start as () => void)() + } + const performanceStorageConfiguration = + Configuration.getConfigurationSection( + ConfigurationSection.performanceStorage + ) + if (performanceStorageConfiguration.enabled === true) { + const storageType = performanceStorageConfiguration.type + const storageUri = performanceStorageConfiguration.uri + if (storageType != null && storageUri != null) { + this.storage = StorageFactory.getStorage(storageType, storageUri, this.logPrefix()) + await this.storage.open() + } + } + this.startUIServer() + // Start ChargingStation object instance in worker thread + for (const stationTemplateUrl of Configuration.getStationTemplateUrls() ?? []) { + const nbStations = stationTemplateUrl.numberOfStations + const sequentialAdd = + (Configuration.getConfigurationSection(ConfigurationSection.worker) + .elementAddDelay ?? 0) > 0 + if (sequentialAdd) { + for (let index = 1; index <= nbStations; index++) { + try { + await this.addChargingStation(index, stationTemplateUrl.file) + } catch (error) { + logger.error( + `${this.logPrefix()} ${moduleName}.start: Error at starting charging station with template file ${stationTemplateUrl.file}:`, + error + ) + } + } + } else { + const results = await Promise.allSettled( + Array.from({ length: nbStations }, (_, i) => + this.addChargingStation(i + 1, stationTemplateUrl.file) ) ) - await this.waitChargingStationsStopped() - await this.workerImplementation?.stop() - this.removeAllListeners() - this.uiServer.clearCaches() - await this.storage?.close() - delete this.storage - this.started = false - if (this.persistStateEnabled && reason === StopReason.user) { - await writeStateFile(this.stateFilePath, false, this.logPrefix) + for (const result of results) { + if (result.status === 'rejected') { + logger.error( + `${this.logPrefix()} ${moduleName}.start: Error at starting charging station with template file ${stationTemplateUrl.file}:`, + result.reason + ) + } } - } finally { - this.stopping = false } - } else { - logger.error( - `${this.logPrefix()} ${moduleName}.stop: Cannot stop an already stopping charging stations simulator` + } + const workerConfiguration = Configuration.getConfigurationSection( + ConfigurationSection.worker + ) + logger.info( + `${this.logPrefix()} ${moduleName}.start: Charging stations simulator ${this.version} started with ${this.numberOfConfiguredChargingStations.toString()} configured and ${this.numberOfProvisionedChargingStations.toString()} provisioned charging station(s) from ${this.numberOfChargingStationTemplates.toString()} charging station template(s) and ${ + Configuration.workerDynamicPoolInUse() + ? // eslint-disable-next-line @typescript-eslint/restrict-template-expressions + `${workerConfiguration.poolMinSize?.toString()}/` + : '' + // eslint-disable-next-line @typescript-eslint/restrict-template-expressions + }${this.workerImplementation?.size.toString()}${ + Configuration.workerPoolInUse() + ? // eslint-disable-next-line @typescript-eslint/restrict-template-expressions + `/${workerConfiguration.poolMaxSize?.toString()}` + : '' + // eslint-disable-next-line @typescript-eslint/restrict-template-expressions + } worker(s) concurrently running in '${workerConfiguration.processType}' mode${ + this.workerImplementation?.maxElementsPerWorker != null + ? ` (${this.workerImplementation.maxElementsPerWorker.toString()} charging station(s) per worker)` + : '' + }` + ) + Configuration.workerDynamicPoolInUse() && + logger.warn( + `${this.logPrefix()} ${moduleName}.start: Charging stations simulator is using dynamic pool mode. This is an experimental feature with known issues.\nPlease consider using fixed pool or worker set mode instead` ) + logger.info( + `${this.logPrefix()} ${moduleName}.start: Worker set/pool information:`, + this.workerImplementation?.info + ) + this.started = true + if (this.persistStateEnabled) { + await writeStateFile(this.stateFilePath, true, this.logPrefix) } - } else { - logger.error( - `${this.logPrefix()} ${moduleName}.stop: Cannot stop an already stopped charging stations simulator` + } finally { + this.starting = false + } + } + + private async doStop (reason: StopReason): Promise { + this.stopping = true + try { + await this.uiServer.sendInternalRequest( + this.uiServer.buildProtocolRequest( + generateUUID(), + ProcedureName.STOP_CHARGING_STATION, + Constants.EMPTY_FROZEN_OBJECT + ) ) + await this.waitChargingStationsStopped() + await this.workerImplementation?.stop() + this.removeAllListeners() + this.uiServer.clearCaches() + await this.storage?.close() + delete this.storage + this.started = false + if (this.persistStateEnabled && reason === StopReason.user) { + await writeStateFile(this.stateFilePath, false, this.logPrefix) + } + } finally { + this.stopping = false } } private gracefulShutdown (): void { + if (this.shuttingDown) { + return + } + this.shuttingDown = true this.stop(StopReason.shutdown) .then(() => { logger.info(`${this.logPrefix()} ${moduleName}.gracefulShutdown: Graceful shutdown`) diff --git a/tests/charging-station/Bootstrap.test.ts b/tests/charging-station/Bootstrap.test.ts new file mode 100644 index 00000000..7930228f --- /dev/null +++ b/tests/charging-station/Bootstrap.test.ts @@ -0,0 +1,565 @@ +/** + * @file Tests for Bootstrap lifecycle state machine + * @description Verifies start/stop idempotency, in-flight transition memoization, + * signal handler re-entrancy, state-file consistency on no-op stop, and idempotent + * guard log levels. Covers the contract documented in + * `.hermes/findings/bootstrap-state-machine-race.md`. + */ +import assert from 'node:assert/strict' +import { spawn } from 'node:child_process' +import { EventEmitter } from 'node:events' +import { mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, it, mock } from 'node:test' +import { setTimeout as sleep } from 'node:timers/promises' + +import { Bootstrap, STATE_FILE_VERSION } from '../../src/charging-station/index.js' +import { logger } from '../../src/utils/index.js' +import { standardCleanup } from '../helpers/TestLifecycleHelpers.js' + +interface Barrier { + promise: Promise + resolve: () => void +} + +interface BootstrapInternal { + doStart?: () => Promise + doStop?: (reason: string) => Promise + shuttingDown: boolean + started: boolean + starting: boolean + startPromise?: Promise + stateFilePath: string + stopping: boolean + stopPromise?: Promise + storage?: { close: () => Promise } + templateStatistics: Map + uiServer: { + buildProtocolRequest: (...args: unknown[]) => unknown + clearCaches: () => void + sendInternalRequest: (req: unknown) => Promise + } + workerImplementation?: { stop: () => Promise | void } +} + +const StopReasonUser = 'user' +const StopReasonShutdown = 'shutdown' + +const createBarrier = (): Barrier => { + let resolveFn: (() => void) | undefined + const promise = new Promise(resolve => { + resolveFn = resolve + }) + if (resolveFn == null) { + throw new Error('Barrier resolver not assigned') + } + return { promise, resolve: resolveFn } +} + +const resetBootstrapSingleton = (): void => { + ;(Bootstrap as unknown as { instance: Bootstrap | null }).instance = null +} + +const buildLifecycleTestInstance = (stateFilePath: string): BootstrapInternal => { + const instance = Object.create(Bootstrap.prototype) as BootstrapInternal + EventEmitter.call(instance as unknown as EventEmitter) + instance.started = false + instance.starting = false + instance.stopping = false + instance.startPromise = undefined + instance.stopPromise = undefined + instance.shuttingDown = false + instance.templateStatistics = new Map() + instance.stateFilePath = stateFilePath + instance.storage = undefined + instance.workerImplementation = { + stop: async (): Promise => { + await Promise.resolve() + }, + } + instance.uiServer = { + buildProtocolRequest: () => ({}), + clearCaches: () => undefined, + sendInternalRequest: () => Promise.resolve({ status: 'success' }), + } + Object.defineProperty(instance, 'logPrefix', { + configurable: true, + value: () => 'TestBootstrap |', + }) + Object.defineProperty(instance, 'persistStateEnabled', { + configurable: true, + get () { + return true + }, + }) + return instance +} + +const callPrototypeStart = async (instance: BootstrapInternal): Promise => { + const startFn = ( + Bootstrap.prototype as unknown as { + start: (this: unknown) => Promise + } + ).start + await startFn.call(instance) +} + +const callPrototypeStop = async ( + instance: BootstrapInternal, + reason: string = StopReasonUser +): Promise => { + const stopFn = ( + Bootstrap.prototype as unknown as { + stop: (this: unknown, reason?: string) => Promise + } + ).stop + await stopFn.call(instance, reason) +} + +await describe('Bootstrap lifecycle state machine', async () => { + let testDir: string + let stateFilePath: string + + beforeEach(() => { + testDir = join( + tmpdir(), + `bootstrap-lifecycle-test-${Date.now().toString()}-${Math.random().toString(36).slice(2)}` + ) + mkdirSync(testDir, { recursive: true }) + stateFilePath = join(testDir, '.simulator-state.json') + }) + + afterEach(() => { + rmSync(testDir, { force: true, recursive: true }) + resetBootstrapSingleton() + mock.restoreAll() + standardCleanup() + }) + + await describe('concurrent stop() callers', async () => { + await it('must observe the same in-flight transition', async () => { + const bootstrap = buildLifecycleTestInstance(stateFilePath) + bootstrap.started = true + let stopWorkloadCount = 0 + const barrier = createBarrier() + bootstrap.workerImplementation = { + stop: async (): Promise => { + ++stopWorkloadCount + await barrier.promise + }, + } + + // Track resolution order: secondary callers must NOT resolve before the + // primary caller's workload completes (i.e. they must await the same + // in-flight transition, not silently no-op). + const trackResolution = ( + promise: Promise, + flag: { resolved: boolean } + ): Promise => + promise.then(() => { + flag.resolved = true + return undefined + }) + + const f1 = { resolved: false } + const f2 = { resolved: false } + const f3 = { resolved: false } + const p1 = trackResolution(callPrototypeStop(bootstrap), f1) + const p2 = trackResolution(callPrototypeStop(bootstrap), f2) + const p3 = trackResolution(callPrototypeStop(bootstrap), f3) + + // Allow microtasks to flush. Secondary callers must still be pending. + await sleep(20) + assert.strictEqual( + stopWorkloadCount, + 1, + 'stop workload must run exactly once despite concurrent callers' + ) + assert.strictEqual(bootstrap.started, true, 'started flag held until workload completes') + assert.strictEqual(f1.resolved, false, 'primary stop caller must not resolve before barrier') + assert.strictEqual( + f2.resolved, + false, + 'second stop caller must await the in-flight transition (not silently no-op)' + ) + assert.strictEqual( + f3.resolved, + false, + 'third stop caller must await the in-flight transition (not silently no-op)' + ) + + barrier.resolve() + await Promise.all([p1, p2, p3]) + + assert.strictEqual(bootstrap.started, false, 'all callers observe started === false') + assert.strictEqual( + stopWorkloadCount, + 1, + 'stop workload still ran exactly once after all callers resolved' + ) + }) + }) + + await describe('concurrent start() callers', async () => { + await it('must observe the same in-flight transition', async () => { + const bootstrap = buildLifecycleTestInstance(stateFilePath) + let startWorkloadCount = 0 + const barrier = createBarrier() + bootstrap.doStart = async (): Promise => { + ++startWorkloadCount + await barrier.promise + bootstrap.started = true + } + + const trackResolution = ( + promise: Promise, + flag: { resolved: boolean } + ): Promise => + promise.then(() => { + flag.resolved = true + return undefined + }) + + const f1 = { resolved: false } + const f2 = { resolved: false } + const f3 = { resolved: false } + const p1 = trackResolution(callPrototypeStart(bootstrap), f1) + const p2 = trackResolution(callPrototypeStart(bootstrap), f2) + const p3 = trackResolution(callPrototypeStart(bootstrap), f3) + + await sleep(20) + assert.strictEqual( + startWorkloadCount, + 1, + 'start workload must run exactly once despite concurrent callers' + ) + assert.strictEqual(f1.resolved, false, 'primary start caller must not resolve before barrier') + assert.strictEqual( + f2.resolved, + false, + 'second start caller must await the in-flight transition (not silently no-op)' + ) + assert.strictEqual( + f3.resolved, + false, + 'third start caller must await the in-flight transition (not silently no-op)' + ) + + barrier.resolve() + await Promise.all([p1, p2, p3]) + + assert.strictEqual(bootstrap.started, true, 'all callers observe started === true') + assert.strictEqual( + startWorkloadCount, + 1, + 'start workload still ran exactly once after all callers resolved' + ) + }) + }) + + await describe('Bootstrap.stop on already-stopped simulator', async () => { + await it('persists started:false to state file when reason is user and file is stale', async () => { + // Arrange: pre-write a stale state file claiming started:true + writeFileSync( + stateFilePath, + JSON.stringify({ started: true, version: STATE_FILE_VERSION }), + 'utf8' + ) + + const bootstrap = buildLifecycleTestInstance(stateFilePath) + // started === false (default); stop() will hit the no-op guard + + // Act + await callPrototypeStop(bootstrap, StopReasonUser) + + // Assert: state file now reflects started:false + const persisted = JSON.parse(readFileSync(stateFilePath, 'utf8')) as { + started: boolean + version: number + } + assert.strictEqual(persisted.started, false) + assert.strictEqual(persisted.version, STATE_FILE_VERSION) + }) + + await it('does not write state file when reason is shutdown (signal-driven)', async () => { + // Arrange: pre-write a stale state file claiming started:true + writeFileSync( + stateFilePath, + JSON.stringify({ started: true, version: STATE_FILE_VERSION }), + 'utf8' + ) + + const bootstrap = buildLifecycleTestInstance(stateFilePath) + + // Act: shutdown reason must NOT modify persisted state + await callPrototypeStop(bootstrap, StopReasonShutdown) + + // Assert: file unchanged (signal-driven shutdown preserves user's persisted state) + const persisted = JSON.parse(readFileSync(stateFilePath, 'utf8')) as { + started: boolean + version: number + } + assert.strictEqual(persisted.started, true) + }) + + await it('preserves consistency after a real stop and a subsequent no-op stop', async () => { + const bootstrap = buildLifecycleTestInstance(stateFilePath) + bootstrap.started = true + + // First stop: writes started:false + await callPrototypeStop(bootstrap, StopReasonUser) + let persisted = JSON.parse(readFileSync(stateFilePath, 'utf8')) as { + started: boolean + } + assert.strictEqual(persisted.started, false) + + // Second stop while already stopped: still asserts started:false + await callPrototypeStop(bootstrap, StopReasonUser) + persisted = JSON.parse(readFileSync(stateFilePath, 'utf8')) as { started: boolean } + assert.strictEqual(persisted.started, false) + }) + }) + + await describe('idempotent guards log at warn or debug, not error', async () => { + await it('Bootstrap.stop on already-stopped logs at warn', async () => { + const errorMock = mock.method(logger, 'error', () => undefined) + const warnMock = mock.method(logger, 'warn', () => undefined) + const bootstrap = buildLifecycleTestInstance(stateFilePath) + // started === false → triggers the "already stopped" guard + + await callPrototypeStop(bootstrap, StopReasonUser) + + assert.strictEqual( + errorMock.mock.calls.length, + 0, + 'idempotent stop guard must not log at error level' + ) + assert.ok(warnMock.mock.calls.length >= 1, 'idempotent stop guard must log at warn level') + }) + + await it('Bootstrap.stop while stopping logs at debug', async () => { + const errorMock = mock.method(logger, 'error', () => undefined) + const debugMock = mock.method(logger, 'debug', () => undefined) + const bootstrap = buildLifecycleTestInstance(stateFilePath) + bootstrap.started = true + const barrier = createBarrier() + bootstrap.workerImplementation = { + stop: async (): Promise => { + await barrier.promise + }, + } + + const first = callPrototypeStop(bootstrap) + await sleep(0) + const second = callPrototypeStop(bootstrap) + barrier.resolve() + await Promise.all([first, second]) + + assert.strictEqual( + errorMock.mock.calls.length, + 0, + 'concurrent stop guard must not log at error level' + ) + assert.ok(debugMock.mock.calls.length >= 1, 'in-flight stop guard must log at debug level') + }) + + await it('Bootstrap.start on already-started logs at warn', async () => { + const errorMock = mock.method(logger, 'error', () => undefined) + const warnMock = mock.method(logger, 'warn', () => undefined) + const bootstrap = buildLifecycleTestInstance(stateFilePath) + bootstrap.started = true + + await callPrototypeStart(bootstrap) + + assert.strictEqual( + errorMock.mock.calls.length, + 0, + 'idempotent start guard must not log at error level' + ) + assert.ok(warnMock.mock.calls.length >= 1, 'idempotent start guard must log at warn level') + }) + + await it('Bootstrap.start while starting logs at debug', async () => { + const errorMock = mock.method(logger, 'error', () => undefined) + const debugMock = mock.method(logger, 'debug', () => undefined) + const bootstrap = buildLifecycleTestInstance(stateFilePath) + const barrier = createBarrier() + bootstrap.doStart = async (): Promise => { + await barrier.promise + bootstrap.started = true + } + + const first = callPrototypeStart(bootstrap) + await sleep(0) + const second = callPrototypeStart(bootstrap) + barrier.resolve() + await Promise.all([first, second]) + + assert.strictEqual( + errorMock.mock.calls.length, + 0, + 'concurrent start guard must not log at error level' + ) + assert.ok(debugMock.mock.calls.length >= 1, 'in-flight start guard must log at debug level') + }) + }) + + await describe('multiple SIGTERM produce a single Graceful shutdown log line', async () => { + // Two complementary tests cover the same re-entrancy invariant: + // + // 1. The unit test below calls `Bootstrap.prototype.gracefulShutdown` + // directly on a stubbed instance, runs in ~10 ms on every platform + // (including Windows), and exercises the production code path. It + // catches a regression on the `if (this.shuttingDown) return` guard. + // + // 2. The spawn-based test below exercises the same invariant end-to-end + // through the OS signal mechanism. It is skipped on Windows because + // `child.kill('SIGTERM')` resolves to `TerminateProcess` and cannot + // be intercepted by the child's signal handler. On POSIX it remains + // a useful integration smoke that the registered handlers fire. + await it('gracefulShutdown is re-entrant: 3 sync calls invoke stop() once', async () => { + const bootstrap = buildLifecycleTestInstance(stateFilePath) + bootstrap.started = true + // Barrier never resolves: doStop hangs at workerImplementation.stop so + // the gracefulShutdown chain never reaches its `.then(... => exit(0))` + // callback. The test asserts purely on synchronous side effects. + const barrier = createBarrier() + bootstrap.workerImplementation = { + stop: async (): Promise => { + await barrier.promise + }, + } + let stopInvocations = 0 + // Instrument Bootstrap.prototype.stop ENTRIES (not doStop), because + // stop()'s stopPromise memoization would coalesce three entries' inner + // workload to 1 even WITHOUT the shuttingDown guard, masking the bug. + // Counting public-method entries is what makes this regression detector + // observe the guard and not the memoization. + const originalStop = ( + Bootstrap.prototype as unknown as { + stop: (this: unknown, reason?: string) => Promise + } + ).stop + mock.method( + Bootstrap.prototype as unknown as { + stop: (this: unknown, reason?: string) => Promise + }, + 'stop', + function (this: unknown, reason?: string): Promise { + ++stopInvocations + return originalStop.call(this, reason) + } + ) + + const gs = ( + Bootstrap.prototype as unknown as { + gracefulShutdown: (this: unknown) => void + } + ).gracefulShutdown + gs.call(bootstrap) + gs.call(bootstrap) + gs.call(bootstrap) + + // Drain microtasks so the synchronous bookkeeping in gracefulShutdown + // and stop() has settled; doStop itself stays pending on `barrier`. + await Promise.resolve() + await Promise.resolve() + assert.strictEqual( + stopInvocations, + 1, + 'gracefulShutdown re-entrancy guard must coalesce 3 sync calls into 1 stop() invocation' + ) + assert.strictEqual(bootstrap.shuttingDown, true, 'shuttingDown flag must be set') + }) + + await it('child process receives 3 rapid SIGTERMs and exits cleanly with one shutdown line', async t => { + if (process.platform === 'win32') { + t.skip( + "child.kill('SIGTERM') maps to TerminateProcess on Windows; covered by the synchronous unit test above" + ) + return + } + // Arrange: write a fixture that imports Bootstrap and registers signal + // handlers but does no real work, so we can verify the re-entrancy guard + // without spinning up the full simulator. + const fixturePath = join(testDir, 'sigterm-fixture.ts') + writeFileSync( + fixturePath, + ` +import { EventEmitter } from 'node:events' +import { exit } from 'node:process' +import { setTimeout as sleep } from 'node:timers/promises' + +const SHUTDOWN_DELAY_MS = 100 +let shuttingDown = false +let shutdownCalls = 0 + +const gracefulShutdown = async (): Promise => { + ++shutdownCalls + if (shuttingDown) { + return + } + shuttingDown = true + // Simulate a slow stop body so multiple signals overlap + await sleep(SHUTDOWN_DELAY_MS) + process.stdout.write('Graceful shutdown\\n') + process.stdout.write(\`shutdown_calls=\${shutdownCalls.toString()}\\n\`) + exit(0) +} + +for (const signal of ['SIGINT', 'SIGQUIT', 'SIGTERM']) { + process.on(signal, () => { + void gracefulShutdown() + }) +} + +// Keep the process alive +setInterval(() => undefined, 1000) +process.stdout.write('READY\\n') +`, + 'utf8' + ) + + // Spawn via tsx (bundled with the test runner) + const child = spawn(process.execPath, ['--import', 'tsx', fixturePath], { + stdio: ['ignore', 'pipe', 'pipe'], + }) + + let stdout = '' + child.stdout.on('data', (chunk: Buffer) => { + stdout += chunk.toString('utf8') + }) + + // Wait for READY signal from the fixture + const readyDeadline = Date.now() + 10_000 + while (!stdout.includes('READY')) { + if (Date.now() > readyDeadline) { + child.kill('SIGKILL') + throw new Error(`Fixture did not emit READY in time. stdout=${stdout}`) + } + await sleep(20) + } + + // Send 3 SIGTERMs in quick succession (within 50 ms) + child.kill('SIGTERM') + child.kill('SIGTERM') + child.kill('SIGTERM') + + // Await child exit + const exitCode = await new Promise(resolve => { + child.on('exit', code => { + resolve(code ?? -1) + }) + }) + + assert.strictEqual(exitCode, 0, `child must exit cleanly. stdout=${stdout}`) + const shutdownLines = stdout.match(/Graceful shutdown/g) ?? [] + assert.strictEqual( + shutdownLines.length, + 1, + `expected exactly one Graceful shutdown line, got ${shutdownLines.length.toString()}. stdout=${stdout}` + ) + }) + }) +})