From 1ffe6a45afac27fd5b0a8c4fd087f7a8fadd1143 Mon Sep 17 00:00:00 2001 From: sebslight <19554889+sebslight@users.noreply.github.com> Date: Mon, 16 Feb 2026 08:22:54 -0500 Subject: [PATCH] fix(cron): isolate maintenance schedule recompute errors --- CHANGELOG.md | 1 + .../service.issue-13992-regression.test.ts | 41 ++++++++++++ src/cron/service/jobs.ts | 67 +++++++++++++------ 3 files changed, 88 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2644017117..5b540aa70fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,7 @@ Docs: https://docs.openclaw.ai - Auto-reply/WhatsApp/TUI/Web: when a final assistant message is `NO_REPLY` and a messaging tool send succeeded, mirror the delivered messaging-tool text into session-visible assistant output so TUI/Web no longer show `NO_REPLY` placeholders. (#7010) Thanks @Morrowind-Xie. - Auto-reply/TTS: keep tool-result media delivery enabled in group chats and native command sessions (while still suppressing tool summary text) so `NO_REPLY` follow-ups do not drop successful TTS audio. (#17991) Thanks @zerone0x. - Cron: infer `payload.kind="agentTurn"` for model-only `cron.update` payload patches, so partial agent-turn updates do not fail validation when `kind` is omitted. (#15664) Thanks @rodrigouroz. +- Cron: preserve per-job schedule-error isolation in post-run maintenance recompute so malformed sibling jobs no longer abort persistence of successful runs. (#17852) Thanks @pierreeurope. - TUI: make searchable-select filtering and highlight rendering ANSI-aware so queries ignore hidden escape codes and no longer corrupt ANSI styling sequences during match highlighting. (#4519) Thanks @bee4come. - TUI/Windows: coalesce rapid single-line submit bursts in Git Bash into one multiline message as a fallback when bracketed paste is unavailable, preventing pasted multiline text from being split into multiple sends. (#4986) Thanks @adamkane. - TUI: suppress false `(no output)` placeholders for non-local empty final events during concurrent runs, preventing external-channel replies from showing empty assistant bubbles while a local run is still streaming. (#5782) Thanks @LagWizard and @vignesh07. diff --git a/src/cron/service.issue-13992-regression.test.ts b/src/cron/service.issue-13992-regression.test.ts index 256060093fa..5ae776b0106 100644 --- a/src/cron/service.issue-13992-regression.test.ts +++ b/src/cron/service.issue-13992-regression.test.ts @@ -127,4 +127,45 @@ describe("issue #13992 regression - cron jobs skip execution", () => { // But should NOT have changed nextRunAtMs (it's still future) expect(job.state.nextRunAtMs).toBe(futureTime); }); + + it("isolates schedule errors while filling missing nextRunAtMs", () => { + const now = Date.now(); + const pastDue = now - 1_000; + + const dueJob: CronJob = { + id: "due-job", + name: "due job", + enabled: true, + schedule: { kind: "cron", expr: "0 8 * * *", tz: "UTC" }, + payload: { kind: "systemEvent", text: "due" }, + sessionTarget: "main", + createdAtMs: now - 3600_000, + updatedAtMs: now - 3600_000, + state: { + nextRunAtMs: pastDue, + }, + }; + + const malformedJob: CronJob = { + id: "bad-job", + name: "bad job", + enabled: true, + schedule: { kind: "cron", expr: "not a valid cron", tz: "UTC" }, + payload: { kind: "systemEvent", text: "bad" }, + sessionTarget: "main", + createdAtMs: now - 3600_000, + updatedAtMs: now - 3600_000, + state: { + // missing nextRunAtMs + }, + }; + + const state = createMockState([dueJob, malformedJob]); + + expect(() => recomputeNextRunsForMaintenance(state)).not.toThrow(); + expect(dueJob.state.nextRunAtMs).toBe(pastDue); + expect(malformedJob.state.nextRunAtMs).toBeUndefined(); + expect(malformedJob.state.scheduleErrorCount).toBe(1); + expect(malformedJob.state.lastError).toMatch(/^schedule error:/); + }); }); diff --git a/src/cron/service/jobs.ts b/src/cron/service/jobs.ts index 4185987f5a7..f5e96e2b4b7 100644 --- a/src/cron/service/jobs.ts +++ b/src/cron/service/jobs.ts @@ -102,6 +102,35 @@ export function computeJobNextRunAtMs(job: CronJob, nowMs: number): number | und /** Maximum consecutive schedule errors before auto-disabling a job. */ const MAX_SCHEDULE_ERRORS = 3; +function recordScheduleComputeError(params: { + state: CronServiceState; + job: CronJob; + err: unknown; +}): boolean { + const { state, job, err } = params; + const errorCount = (job.state.scheduleErrorCount ?? 0) + 1; + const errText = String(err); + + job.state.scheduleErrorCount = errorCount; + job.state.nextRunAtMs = undefined; + job.state.lastError = `schedule error: ${errText}`; + + if (errorCount >= MAX_SCHEDULE_ERRORS) { + job.enabled = false; + state.deps.log.error( + { jobId: job.id, name: job.name, errorCount, err: errText }, + "cron: auto-disabled job after repeated schedule errors", + ); + } else { + state.deps.log.warn( + { jobId: job.id, name: job.name, errorCount, err: errText }, + "cron: failed to compute next run for job (skipping)", + ); + } + + return true; +} + function normalizeJobTickState(params: { state: CronServiceState; job: CronJob; nowMs: number }): { changed: boolean; skip: boolean; @@ -184,23 +213,8 @@ export function recomputeNextRuns(state: CronServiceState): boolean { changed = true; } } catch (err) { - const errorCount = (job.state.scheduleErrorCount ?? 0) + 1; - job.state.scheduleErrorCount = errorCount; - job.state.nextRunAtMs = undefined; - job.state.lastError = `schedule error: ${String(err)}`; - changed = true; - - if (errorCount >= MAX_SCHEDULE_ERRORS) { - job.enabled = false; - state.deps.log.error( - { jobId: job.id, name: job.name, errorCount, err: String(err) }, - "cron: auto-disabled job after repeated schedule errors", - ); - } else { - state.deps.log.warn( - { jobId: job.id, name: job.name, errorCount, err: String(err) }, - "cron: failed to compute next run for job (skipping)", - ); + if (recordScheduleComputeError({ state, job, err })) { + changed = true; } } } @@ -222,10 +236,21 @@ export function recomputeNextRunsForMaintenance(state: CronServiceState): boolea // If a job was past-due but not found by findDueJobs, recomputing would // cause it to be silently skipped. if (job.state.nextRunAtMs === undefined) { - const newNext = computeJobNextRunAtMs(job, now); - if (newNext !== undefined) { - job.state.nextRunAtMs = newNext; - changed = true; + try { + const newNext = computeJobNextRunAtMs(job, now); + if (job.state.nextRunAtMs !== newNext) { + job.state.nextRunAtMs = newNext; + changed = true; + } + // Clear schedule error count on successful computation. + if (job.state.scheduleErrorCount) { + job.state.scheduleErrorCount = undefined; + changed = true; + } + } catch (err) { + if (recordScheduleComputeError({ state, job, err })) { + changed = true; + } } } return changed;