fix: repair model media probes

This commit is contained in:
Peter Steinberger
2026-05-10 04:41:11 +01:00
parent f81cfc0501
commit e07febd075
9 changed files with 199 additions and 61 deletions

View File

@@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Telegram: handle managed select button callbacks before the raw callback fallback while preserving delimiter-containing option values such as `env|prod`. (#79816) Thanks @moeedahmed.
- CLI/media: let explicit image description model refs use bundled static provider catalogs and generic model-backed image hooks, so `openclaw infer image describe --model zai/glm-4.6v` works like direct model runs and Anthropic auth probes avoid stale Claude 3 Haiku catalog entries.
- Browser: wait longer for existing-session Chrome MCP status and non-deep doctor probes so slow first attaches do not falsely report offline while keeping raw CDP status probes short. (#77473) Thanks @rubencu.
- Exec approvals: keep `exec.approval.list` on the lightweight policy-summary path so listing pending approvals no longer loads the rich tree-sitter command explainer. (#76943) Thanks @rubencu.
- Agents: surface concise default-visible warnings when `exec`/`bash` tool calls fail after the assistant claims success, while keeping raw stderr hidden unless verbose details are enabled. Fixes #60497. (#80003) Thanks @jbetala7.

View File

@@ -334,6 +334,52 @@ describe("buildProbeTargets reason codes", () => {
});
});
it("prefers live Anthropic Haiku 4.5 catalog entries over stale Claude 3 probes", async () => {
mockStore = {
version: 1,
profiles: {},
order: {},
};
loadModelCatalogMock.mockResolvedValueOnce([
{ provider: "anthropic", id: "claude-3-haiku-20240307", name: "Claude Haiku 3" },
{
provider: "anthropic",
id: "claude-haiku-4-5-20251001",
name: "Claude Haiku 4.5",
},
{ provider: "anthropic", id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6" },
]);
const plan = await buildProbeTargets({
cfg: {
models: {
providers: {
anthropic: {
baseUrl: "https://api.anthropic.com/v1",
api: "anthropic-messages",
apiKey: "sk-ant-test",
models: [],
},
},
},
} as OpenClawConfig,
providers: ["anthropic"],
modelCandidates: [],
options: {
timeoutMs: 5_000,
concurrency: 1,
maxTokens: 16,
},
});
expect(plan.results).toStrictEqual([]);
expect(plan.targets).toHaveLength(1);
expect(plan.targets[0]?.model).toStrictEqual({
provider: "anthropic",
model: "claude-haiku-4-5-20251001",
});
});
it("uses workspace-scoped auth evidence when building env probe targets", async () => {
mockStore = {
version: 1,

View File

@@ -152,6 +152,29 @@ function buildCandidateMap(modelCandidates: string[]): Map<string, string[]> {
return map;
}
function catalogProbePriority(provider: string, modelId: string): number {
const id = modelId.trim().toLowerCase();
if (provider !== "anthropic") {
return 50;
}
if (/^claude-haiku-4-5-\d{8}$/.test(id)) {
return 0;
}
if (id === "claude-haiku-4-5") {
return 1;
}
if (id === "claude-sonnet-4-6" || id.startsWith("claude-sonnet-4-6-")) {
return 2;
}
if (id.startsWith("claude-sonnet-4-")) {
return 3;
}
if (id.startsWith("claude-3-")) {
return 100;
}
return 50;
}
function selectProbeModel(params: {
provider: string;
candidates: Map<string, string[]>;
@@ -162,7 +185,15 @@ function selectProbeModel(params: {
if (direct && direct.length > 0) {
return { provider, model: direct[0] };
}
const fromCatalog = catalog.find((entry) => normalizeProviderId(entry.provider) === provider);
const fromCatalog = catalog
.map((entry, index) => ({ entry, index }))
.filter(({ entry }) => normalizeProviderId(entry.provider) === provider)
.toSorted((left, right) => {
const priority =
catalogProbePriority(provider, left.entry.id) -
catalogProbePriority(provider, right.entry.id);
return priority || left.index - right.index;
})[0]?.entry;
if (fromCatalog) {
return { provider, model: fromCatalog.id };
}

View File

@@ -19,6 +19,7 @@ const hoisted = vi.hoisted(() => ({
fetchMock: vi.fn(),
registerProviderStreamForModelMock: vi.fn(),
prepareProviderDynamicModelMock: vi.fn(async () => {}),
resolveModelAsyncMock: vi.fn(),
resolveModelWithRegistryMock: vi.fn(),
}));
const {
@@ -32,6 +33,7 @@ const {
fetchMock,
registerProviderStreamForModelMock,
prepareProviderDynamicModelMock,
resolveModelAsyncMock,
resolveModelWithRegistryMock,
} = hoisted;
@@ -86,7 +88,7 @@ vi.mock("../plugins/provider-runtime.js", async () => ({
}));
vi.mock("../agents/pi-embedded-runner/model.js", () => ({
resolveModelWithRegistry: resolveModelWithRegistryMock,
resolveModelAsync: resolveModelAsyncMock,
}));
const { describeImageWithModel } = await import("./image.js");
@@ -126,6 +128,22 @@ describe("describeImageWithModel", () => {
({ modelRegistry, provider, modelId }: ResolveModelWithRegistryTestParams) =>
modelRegistry.find(provider, modelId),
);
resolveModelAsyncMock.mockImplementation(
async (provider: string, modelId: string, agentDir?: string, cfg?: unknown) => {
const authStorage = {
setRuntimeApiKey: setRuntimeApiKeyMock,
};
const modelRegistry = discoverModelsMock(authStorage, agentDir);
const model = resolveModelWithRegistryMock({
provider,
modelId,
modelRegistry,
cfg,
agentDir,
});
return { authStorage, model, modelRegistry };
},
);
});
function getApiKeyForModelCall(index = 0): AuthRequestCall {

View File

@@ -6,16 +6,15 @@ import {
requireApiKey,
resolveApiKeyForProvider,
} from "../agents/model-auth.js";
import { findNormalizedProviderValue, normalizeModelRef } from "../agents/model-selection.js";
import { normalizeModelRef } from "../agents/model-selection.js";
import { ensureOpenClawModelsJson } from "../agents/models-config.js";
import { resolveModelWithRegistry } from "../agents/pi-embedded-runner/model.js";
import { resolveModelAsync } from "../agents/pi-embedded-runner/model.js";
import { resolveProviderRequestCapabilities } from "../agents/provider-attribution.js";
import { registerProviderStreamForModel } from "../agents/provider-stream.js";
import {
coerceImageAssistantText,
hasImageReasoningOnlyResponse,
} from "../agents/tools/image-tool.helpers.js";
import { prepareProviderDynamicModel } from "../plugins/provider-runtime.js";
import type {
ImageDescriptionRequest,
ImageDescriptionResult,
@@ -23,15 +22,6 @@ import type {
ImagesDescriptionResult,
} from "./types.js";
let piModelDiscoveryRuntimePromise: Promise<
typeof import("../agents/pi-model-discovery-runtime.js")
> | null = null;
function loadPiModelDiscoveryRuntime() {
piModelDiscoveryRuntimePromise ??= import("../agents/pi-model-discovery-runtime.js");
return piModelDiscoveryRuntimePromise;
}
function resolveImageToolMaxTokens(modelMaxTokens: number | undefined, requestedMaxTokens = 4096) {
if (
typeof modelMaxTokens !== "number" ||
@@ -143,48 +133,17 @@ async function resolveImageRuntime(params: {
authStore?: ImageDescriptionRequest["authStore"];
}): Promise<{ apiKey: string; model: Model<Api> }> {
await ensureOpenClawModelsJson(params.cfg, params.agentDir);
const { discoverAuthStorage, discoverModels } = await loadPiModelDiscoveryRuntime();
const authStorage = discoverAuthStorage(params.agentDir);
const modelRegistry = discoverModels(authStorage, params.agentDir);
const resolvedRef = normalizeModelRef(params.provider, params.model);
const configuredProviders = params.cfg.models?.providers;
const providerConfig =
configuredProviders?.[resolvedRef.provider] ??
findNormalizedProviderValue(configuredProviders, resolvedRef.provider);
// Fast path: resolve without dynamic model preparation first.
// This avoids unnecessary prepare hooks (e.g. OpenRouter catalog fetch)
// for models that are already explicitly resolvable.
let model = resolveModelWithRegistry({
provider: resolvedRef.provider,
modelId: resolvedRef.model,
modelRegistry,
cfg: params.cfg,
agentDir: params.agentDir,
}) as Model<Api> | null;
// If the model is not in the registry yet, prepare dynamic provider models
// and retry (needed for provider-runtime-backed dynamic models).
if (!model) {
await prepareProviderDynamicModel({
provider: resolvedRef.provider,
config: params.cfg,
context: {
config: params.cfg,
agentDir: params.agentDir,
provider: resolvedRef.provider,
modelId: resolvedRef.model,
modelRegistry,
providerConfig,
},
});
model = resolveModelWithRegistry({
provider: resolvedRef.provider,
modelId: resolvedRef.model,
modelRegistry,
cfg: params.cfg,
agentDir: params.agentDir,
}) as Model<Api> | null;
}
const resolved = await resolveModelAsync(
resolvedRef.provider,
resolvedRef.model,
params.agentDir,
params.cfg,
{
allowBundledStaticCatalogFallback: true,
},
);
const { authStorage, model } = resolved;
if (!model) {
throw new Error(`Unknown model: ${resolvedRef.provider}/${resolvedRef.model}`);
}

View File

@@ -44,6 +44,8 @@ describe("media-understanding provider registry", () => {
const registry = buildMediaUnderstandingRegistry();
expect(requireMediaProvider(registry, "groq").id).toBe("groq");
expect(typeof requireMediaProvider(registry, "groq").describeImage).toBe("function");
expect(typeof requireMediaProvider(registry, "groq").describeImages).toBe("function");
expect(requireMediaProvider(registry, "deepgram").id).toBe("deepgram");
expect(resolvePluginCapabilityProvidersMock).toHaveBeenCalledWith({
key: "mediaUnderstandingProviders",
@@ -51,6 +53,23 @@ describe("media-understanding provider registry", () => {
});
});
it("hydrates manifest-only image providers with model-backed image hooks", () => {
resolvePluginCapabilityProvidersMock.mockReturnValue([
createMediaProvider({
id: "zai",
capabilities: ["image"],
defaultModels: { image: "glm-4.6v" },
}),
]);
const registry = buildMediaUnderstandingRegistry();
const provider = requireMediaProvider(registry, "zai");
expect(provider.defaultModels?.image).toBe("glm-4.6v");
expect(provider.describeImage).toBeTypeOf("function");
expect(provider.describeImages).toBeTypeOf("function");
});
it("keeps provider id normalization behavior for capability providers", () => {
resolvePluginCapabilityProvidersMock.mockReturnValue([
createMediaProvider({ id: "google", capabilities: ["image", "audio", "video"] }),

View File

@@ -22,7 +22,23 @@ function mergeProviderIntoRegistry(
nativeDocumentInputs: provider.nativeDocumentInputs ?? existing.nativeDocumentInputs,
}
: provider;
registry.set(normalizedKey, merged);
registry.set(normalizedKey, hydrateModelBackedMediaProvider(merged));
}
function hydrateModelBackedMediaProvider(
provider: MediaUnderstandingProvider,
): MediaUnderstandingProvider {
if (!provider.capabilities?.includes("image")) {
return provider;
}
if (provider.describeImage && provider.describeImages) {
return provider;
}
return {
...provider,
describeImage: provider.describeImage ?? describeImageWithModel,
describeImages: provider.describeImages ?? describeImagesWithModel,
};
}
export { normalizeMediaProviderId } from "./provider-id.js";

View File

@@ -1,7 +1,11 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../config/types.js";
import type { MediaAttachment, MediaUnderstandingOutput } from "../media-understanding/types.js";
import { describeImageFile, runMediaUnderstandingFile } from "./runtime.js";
import {
describeImageFile,
describeImageFileWithModel,
runMediaUnderstandingFile,
} from "./runtime.js";
const mocks = vi.hoisted(() => {
const cleanup = vi.fn(async () => {});
@@ -10,6 +14,8 @@ const mocks = vi.hoisted(() => {
createMediaAttachmentCache: vi.fn(() => ({ cleanup })),
normalizeMediaAttachments: vi.fn<() => MediaAttachment[]>(() => []),
normalizeMediaProviderId: vi.fn((provider: string) => provider.trim().toLowerCase()),
readLocalFileSafely: vi.fn(async () => ({ buffer: Buffer.from("image") })),
describeImageWithModel: vi.fn(async () => ({ text: "generic image ok", model: "vision" })),
runCapability: vi.fn(),
cleanup,
};
@@ -26,12 +32,24 @@ vi.mock("./provider-registry.js", () => ({
normalizeMediaProviderId: mocks.normalizeMediaProviderId,
}));
vi.mock("../infra/fs-safe.js", () => ({
readLocalFileSafely: mocks.readLocalFileSafely,
}));
vi.mock("./image-runtime.js", () => ({
describeImageWithModel: mocks.describeImageWithModel,
}));
describe("media-understanding runtime", () => {
afterEach(() => {
mocks.buildProviderRegistry.mockReset();
mocks.createMediaAttachmentCache.mockReset();
mocks.normalizeMediaAttachments.mockReset();
mocks.normalizeMediaProviderId.mockReset();
mocks.readLocalFileSafely.mockReset();
mocks.readLocalFileSafely.mockResolvedValue({ buffer: Buffer.from("image") });
mocks.describeImageWithModel.mockReset();
mocks.describeImageWithModel.mockResolvedValue({ text: "generic image ok", model: "vision" });
mocks.runCapability.mockReset();
mocks.cleanup.mockReset();
mocks.cleanup.mockResolvedValue(undefined);
@@ -204,6 +222,37 @@ describe("media-understanding runtime", () => {
});
});
it("uses the generic model-backed image runtime for explicit models without media hooks", async () => {
mocks.buildProviderRegistry.mockReturnValue(
new Map([["zai", { id: "zai", capabilities: ["image"] }]]),
);
await expect(
describeImageFileWithModel({
filePath: "/tmp/sample.jpg",
mime: "image/jpeg",
provider: "zai",
model: "glm-4.6v",
prompt: "Describe it",
cfg: {} as OpenClawConfig,
agentDir: "/tmp/agent",
}),
).resolves.toEqual({ text: "generic image ok", model: "vision" });
expect(mocks.describeImageWithModel).toHaveBeenCalledWith({
buffer: Buffer.from("image"),
fileName: "sample.jpg",
mime: "image/jpeg",
provider: "zai",
model: "glm-4.6v",
prompt: "Describe it",
maxTokens: undefined,
timeoutMs: 30_000,
cfg: {},
agentDir: "/tmp/agent",
});
});
it("surfaces the underlying provider failure when media understanding fails", async () => {
mocks.normalizeMediaAttachments.mockReturnValue([
{ index: 0, path: "/tmp/sample.ogg", mime: "audio/ogg" },

View File

@@ -1,5 +1,6 @@
import path from "node:path";
import { readLocalFileSafely } from "../infra/fs-safe.js";
import { describeImageWithModel } from "./image-runtime.js";
import { normalizeMediaProviderId } from "./provider-registry.js";
import { findDecisionReason, normalizeDecisionReason } from "./runner.entries.js";
import {
@@ -153,11 +154,9 @@ export async function describeImageFileWithModel(params: DescribeImageFileWithMo
const timeoutMs = params.timeoutMs ?? 30_000;
const providerRegistry = buildProviderRegistry(undefined, params.cfg);
const provider = providerRegistry.get(normalizeMediaProviderId(params.provider));
if (!provider?.describeImage) {
throw new Error(`Provider does not support image analysis: ${params.provider}`);
}
const buffer = (await readLocalFileSafely({ filePath: params.filePath })).buffer;
return await provider.describeImage({
const describeImage = provider?.describeImage ?? describeImageWithModel;
return await describeImage({
buffer,
fileName: path.basename(params.filePath),
mime: params.mime,