Cron: apply timeout to startup catch-up runs (#23966)

* Cron: apply timeout to startup catch-up runs

* Changelog: add cron startup timeout catch-up note
This commit is contained in:
Tak Hoffman
2026-02-22 17:04:30 -06:00
committed by GitHub
parent 26644c4b89
commit 73e5bb7635
3 changed files with 42 additions and 2 deletions

View File

@@ -67,6 +67,7 @@ Docs: https://docs.openclaw.ai
- Cron/Delivery: route text-only announce jobs with explicit thread/topic targets through direct outbound delivery so forum/thread destinations do not get dropped by intermediary announce turns. (#23841) Thanks @AndrewArto.
- Cron: honor `cron.maxConcurrentRuns` in the timer loop so due jobs can execute up to the configured parallelism instead of always running serially. (#11595) Thanks @Takhoffman.
- Cron/Run: enforce the same per-job timeout guard for manual `cron.run` executions as timer-driven runs, including abort propagation for isolated agent jobs, so forced runs cannot wedge indefinitely. (#23704) Thanks @tkuehnl.
- Cron/Startup: enforce per-job timeout guards for startup catch-up replay runs so missed isolated jobs cannot hang indefinitely during gateway boot recovery.
- Cron/Schedule: for `every` jobs, prefer `lastRunAtMs + everyMs` when still in the future after restarts, then fall back to anchor scheduling for catch-up windows, so NEXT timing matches the last successful cadence. (#22895) Thanks @SidQin-cyber.
- Cron/Service: execute manual `cron.run` jobs outside the cron lock (while still persisting started/finished state atomically) so `cron.list` and `cron.status` remain responsive during long forced runs. (#23628) Thanks @dsgraves.
- Cron/Timer: keep a watchdog recheck timer armed while `onTimer` is actively executing so the scheduler continues polling even if a due-run tick stalls for an extended period. (#23628) Thanks @dsgraves.

View File

@@ -8,7 +8,7 @@ import { CronService } from "./service.js";
import { createDeferred, createRunningCronServiceState } from "./service.test-harness.js";
import { computeJobNextRunAtMs } from "./service/jobs.js";
import { createCronServiceState, type CronEvent } from "./service/state.js";
import { onTimer } from "./service/timer.js";
import { onTimer, runMissedJobs } from "./service/timer.js";
import type { CronJob, CronJobState } from "./types.js";
const noopLogger = {
@@ -820,6 +820,45 @@ describe("Cron issue regressions", () => {
cron.stop();
});
it("applies timeoutSeconds to startup catch-up isolated executions", async () => {
vi.useRealTimers();
const store = await makeStorePath();
const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z");
const cronJob = createIsolatedRegressionJob({
id: "startup-timeout",
name: "startup timeout",
scheduledAt,
schedule: { kind: "at", at: new Date(scheduledAt).toISOString() },
payload: { kind: "agentTurn", message: "work", timeoutSeconds: 0.01 },
state: { nextRunAtMs: scheduledAt },
});
await writeCronJobs(store.storePath, [cronJob]);
let now = scheduledAt;
const abortAwareRunner = createAbortAwareIsolatedRunner();
const state = createCronServiceState({
cronEnabled: true,
storePath: store.storePath,
log: noopLogger,
nowMs: () => now,
enqueueSystemEvent: vi.fn(),
requestHeartbeatNow: vi.fn(),
runIsolatedAgentJob: vi.fn(async (params) => {
const result = await abortAwareRunner.runIsolatedAgentJob(params);
now += 5;
return result;
}),
});
await runMissedJobs(state);
expect(abortAwareRunner.getObservedAbortSignal()).toBeDefined();
expect(abortAwareRunner.getObservedAbortSignal()?.aborted).toBe(true);
const job = state.store?.jobs.find((entry) => entry.id === "startup-timeout");
expect(job?.state.lastStatus).toBe("error");
expect(job?.state.lastError).toContain("timed out");
});
it("retries cron schedule computation from the next second when the first attempt returns undefined (#17821)", () => {
const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z");
const cronJob = createIsolatedRegressionJob({

View File

@@ -547,7 +547,7 @@ export async function runMissedJobs(
const startedAt = state.deps.nowMs();
emit(state, { jobId: candidate.job.id, action: "started", runAtMs: startedAt });
try {
const result = await executeJobCore(state, candidate.job);
const result = await executeJobCoreWithTimeout(state, candidate.job);
outcomes.push({
jobId: candidate.jobId,
status: result.status,