fix(cron): isolate maintenance schedule recompute errors

This commit is contained in:
sebslight
2026-02-16 08:22:54 -05:00
parent aed8b90a0a
commit 1ffe6a45af
3 changed files with 88 additions and 21 deletions

View File

@@ -65,6 +65,7 @@ Docs: https://docs.openclaw.ai
- Auto-reply/WhatsApp/TUI/Web: when a final assistant message is `NO_REPLY` and a messaging tool send succeeded, mirror the delivered messaging-tool text into session-visible assistant output so TUI/Web no longer show `NO_REPLY` placeholders. (#7010) Thanks @Morrowind-Xie.
- Auto-reply/TTS: keep tool-result media delivery enabled in group chats and native command sessions (while still suppressing tool summary text) so `NO_REPLY` follow-ups do not drop successful TTS audio. (#17991) Thanks @zerone0x.
- Cron: infer `payload.kind="agentTurn"` for model-only `cron.update` payload patches, so partial agent-turn updates do not fail validation when `kind` is omitted. (#15664) Thanks @rodrigouroz.
- Cron: preserve per-job schedule-error isolation in post-run maintenance recompute so malformed sibling jobs no longer abort persistence of successful runs. (#17852) Thanks @pierreeurope.
- TUI: make searchable-select filtering and highlight rendering ANSI-aware so queries ignore hidden escape codes and no longer corrupt ANSI styling sequences during match highlighting. (#4519) Thanks @bee4come.
- TUI/Windows: coalesce rapid single-line submit bursts in Git Bash into one multiline message as a fallback when bracketed paste is unavailable, preventing pasted multiline text from being split into multiple sends. (#4986) Thanks @adamkane.
- TUI: suppress false `(no output)` placeholders for non-local empty final events during concurrent runs, preventing external-channel replies from showing empty assistant bubbles while a local run is still streaming. (#5782) Thanks @LagWizard and @vignesh07.

View File

@@ -127,4 +127,45 @@ describe("issue #13992 regression - cron jobs skip execution", () => {
// But should NOT have changed nextRunAtMs (it's still future)
expect(job.state.nextRunAtMs).toBe(futureTime);
});
it("isolates schedule errors while filling missing nextRunAtMs", () => {
const now = Date.now();
const pastDue = now - 1_000;
const dueJob: CronJob = {
id: "due-job",
name: "due job",
enabled: true,
schedule: { kind: "cron", expr: "0 8 * * *", tz: "UTC" },
payload: { kind: "systemEvent", text: "due" },
sessionTarget: "main",
createdAtMs: now - 3600_000,
updatedAtMs: now - 3600_000,
state: {
nextRunAtMs: pastDue,
},
};
const malformedJob: CronJob = {
id: "bad-job",
name: "bad job",
enabled: true,
schedule: { kind: "cron", expr: "not a valid cron", tz: "UTC" },
payload: { kind: "systemEvent", text: "bad" },
sessionTarget: "main",
createdAtMs: now - 3600_000,
updatedAtMs: now - 3600_000,
state: {
// missing nextRunAtMs
},
};
const state = createMockState([dueJob, malformedJob]);
expect(() => recomputeNextRunsForMaintenance(state)).not.toThrow();
expect(dueJob.state.nextRunAtMs).toBe(pastDue);
expect(malformedJob.state.nextRunAtMs).toBeUndefined();
expect(malformedJob.state.scheduleErrorCount).toBe(1);
expect(malformedJob.state.lastError).toMatch(/^schedule error:/);
});
});

View File

@@ -102,6 +102,35 @@ export function computeJobNextRunAtMs(job: CronJob, nowMs: number): number | und
/** Maximum consecutive schedule errors before auto-disabling a job. */
const MAX_SCHEDULE_ERRORS = 3;
function recordScheduleComputeError(params: {
state: CronServiceState;
job: CronJob;
err: unknown;
}): boolean {
const { state, job, err } = params;
const errorCount = (job.state.scheduleErrorCount ?? 0) + 1;
const errText = String(err);
job.state.scheduleErrorCount = errorCount;
job.state.nextRunAtMs = undefined;
job.state.lastError = `schedule error: ${errText}`;
if (errorCount >= MAX_SCHEDULE_ERRORS) {
job.enabled = false;
state.deps.log.error(
{ jobId: job.id, name: job.name, errorCount, err: errText },
"cron: auto-disabled job after repeated schedule errors",
);
} else {
state.deps.log.warn(
{ jobId: job.id, name: job.name, errorCount, err: errText },
"cron: failed to compute next run for job (skipping)",
);
}
return true;
}
function normalizeJobTickState(params: { state: CronServiceState; job: CronJob; nowMs: number }): {
changed: boolean;
skip: boolean;
@@ -184,23 +213,8 @@ export function recomputeNextRuns(state: CronServiceState): boolean {
changed = true;
}
} catch (err) {
const errorCount = (job.state.scheduleErrorCount ?? 0) + 1;
job.state.scheduleErrorCount = errorCount;
job.state.nextRunAtMs = undefined;
job.state.lastError = `schedule error: ${String(err)}`;
changed = true;
if (errorCount >= MAX_SCHEDULE_ERRORS) {
job.enabled = false;
state.deps.log.error(
{ jobId: job.id, name: job.name, errorCount, err: String(err) },
"cron: auto-disabled job after repeated schedule errors",
);
} else {
state.deps.log.warn(
{ jobId: job.id, name: job.name, errorCount, err: String(err) },
"cron: failed to compute next run for job (skipping)",
);
if (recordScheduleComputeError({ state, job, err })) {
changed = true;
}
}
}
@@ -222,10 +236,21 @@ export function recomputeNextRunsForMaintenance(state: CronServiceState): boolea
// If a job was past-due but not found by findDueJobs, recomputing would
// cause it to be silently skipped.
if (job.state.nextRunAtMs === undefined) {
const newNext = computeJobNextRunAtMs(job, now);
if (newNext !== undefined) {
job.state.nextRunAtMs = newNext;
changed = true;
try {
const newNext = computeJobNextRunAtMs(job, now);
if (job.state.nextRunAtMs !== newNext) {
job.state.nextRunAtMs = newNext;
changed = true;
}
// Clear schedule error count on successful computation.
if (job.state.scheduleErrorCount) {
job.state.scheduleErrorCount = undefined;
changed = true;
}
} catch (err) {
if (recordScheduleComputeError({ state, job, err })) {
changed = true;
}
}
}
return changed;