mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-16 18:34:18 +00:00
fix: repair model media probes
This commit is contained in:
@@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Fixes
|
||||
|
||||
- Telegram: handle managed select button callbacks before the raw callback fallback while preserving delimiter-containing option values such as `env|prod`. (#79816) Thanks @moeedahmed.
|
||||
- CLI/media: let explicit image description model refs use bundled static provider catalogs and generic model-backed image hooks, so `openclaw infer image describe --model zai/glm-4.6v` works like direct model runs and Anthropic auth probes avoid stale Claude 3 Haiku catalog entries.
|
||||
- Browser: wait longer for existing-session Chrome MCP status and non-deep doctor probes so slow first attaches do not falsely report offline while keeping raw CDP status probes short. (#77473) Thanks @rubencu.
|
||||
- Exec approvals: keep `exec.approval.list` on the lightweight policy-summary path so listing pending approvals no longer loads the rich tree-sitter command explainer. (#76943) Thanks @rubencu.
|
||||
- Agents: surface concise default-visible warnings when `exec`/`bash` tool calls fail after the assistant claims success, while keeping raw stderr hidden unless verbose details are enabled. Fixes #60497. (#80003) Thanks @jbetala7.
|
||||
|
||||
@@ -334,6 +334,52 @@ describe("buildProbeTargets reason codes", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("prefers live Anthropic Haiku 4.5 catalog entries over stale Claude 3 probes", async () => {
|
||||
mockStore = {
|
||||
version: 1,
|
||||
profiles: {},
|
||||
order: {},
|
||||
};
|
||||
loadModelCatalogMock.mockResolvedValueOnce([
|
||||
{ provider: "anthropic", id: "claude-3-haiku-20240307", name: "Claude Haiku 3" },
|
||||
{
|
||||
provider: "anthropic",
|
||||
id: "claude-haiku-4-5-20251001",
|
||||
name: "Claude Haiku 4.5",
|
||||
},
|
||||
{ provider: "anthropic", id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6" },
|
||||
]);
|
||||
|
||||
const plan = await buildProbeTargets({
|
||||
cfg: {
|
||||
models: {
|
||||
providers: {
|
||||
anthropic: {
|
||||
baseUrl: "https://api.anthropic.com/v1",
|
||||
api: "anthropic-messages",
|
||||
apiKey: "sk-ant-test",
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
providers: ["anthropic"],
|
||||
modelCandidates: [],
|
||||
options: {
|
||||
timeoutMs: 5_000,
|
||||
concurrency: 1,
|
||||
maxTokens: 16,
|
||||
},
|
||||
});
|
||||
|
||||
expect(plan.results).toStrictEqual([]);
|
||||
expect(plan.targets).toHaveLength(1);
|
||||
expect(plan.targets[0]?.model).toStrictEqual({
|
||||
provider: "anthropic",
|
||||
model: "claude-haiku-4-5-20251001",
|
||||
});
|
||||
});
|
||||
|
||||
it("uses workspace-scoped auth evidence when building env probe targets", async () => {
|
||||
mockStore = {
|
||||
version: 1,
|
||||
|
||||
@@ -152,6 +152,29 @@ function buildCandidateMap(modelCandidates: string[]): Map<string, string[]> {
|
||||
return map;
|
||||
}
|
||||
|
||||
function catalogProbePriority(provider: string, modelId: string): number {
|
||||
const id = modelId.trim().toLowerCase();
|
||||
if (provider !== "anthropic") {
|
||||
return 50;
|
||||
}
|
||||
if (/^claude-haiku-4-5-\d{8}$/.test(id)) {
|
||||
return 0;
|
||||
}
|
||||
if (id === "claude-haiku-4-5") {
|
||||
return 1;
|
||||
}
|
||||
if (id === "claude-sonnet-4-6" || id.startsWith("claude-sonnet-4-6-")) {
|
||||
return 2;
|
||||
}
|
||||
if (id.startsWith("claude-sonnet-4-")) {
|
||||
return 3;
|
||||
}
|
||||
if (id.startsWith("claude-3-")) {
|
||||
return 100;
|
||||
}
|
||||
return 50;
|
||||
}
|
||||
|
||||
function selectProbeModel(params: {
|
||||
provider: string;
|
||||
candidates: Map<string, string[]>;
|
||||
@@ -162,7 +185,15 @@ function selectProbeModel(params: {
|
||||
if (direct && direct.length > 0) {
|
||||
return { provider, model: direct[0] };
|
||||
}
|
||||
const fromCatalog = catalog.find((entry) => normalizeProviderId(entry.provider) === provider);
|
||||
const fromCatalog = catalog
|
||||
.map((entry, index) => ({ entry, index }))
|
||||
.filter(({ entry }) => normalizeProviderId(entry.provider) === provider)
|
||||
.toSorted((left, right) => {
|
||||
const priority =
|
||||
catalogProbePriority(provider, left.entry.id) -
|
||||
catalogProbePriority(provider, right.entry.id);
|
||||
return priority || left.index - right.index;
|
||||
})[0]?.entry;
|
||||
if (fromCatalog) {
|
||||
return { provider, model: fromCatalog.id };
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ const hoisted = vi.hoisted(() => ({
|
||||
fetchMock: vi.fn(),
|
||||
registerProviderStreamForModelMock: vi.fn(),
|
||||
prepareProviderDynamicModelMock: vi.fn(async () => {}),
|
||||
resolveModelAsyncMock: vi.fn(),
|
||||
resolveModelWithRegistryMock: vi.fn(),
|
||||
}));
|
||||
const {
|
||||
@@ -32,6 +33,7 @@ const {
|
||||
fetchMock,
|
||||
registerProviderStreamForModelMock,
|
||||
prepareProviderDynamicModelMock,
|
||||
resolveModelAsyncMock,
|
||||
resolveModelWithRegistryMock,
|
||||
} = hoisted;
|
||||
|
||||
@@ -86,7 +88,7 @@ vi.mock("../plugins/provider-runtime.js", async () => ({
|
||||
}));
|
||||
|
||||
vi.mock("../agents/pi-embedded-runner/model.js", () => ({
|
||||
resolveModelWithRegistry: resolveModelWithRegistryMock,
|
||||
resolveModelAsync: resolveModelAsyncMock,
|
||||
}));
|
||||
|
||||
const { describeImageWithModel } = await import("./image.js");
|
||||
@@ -126,6 +128,22 @@ describe("describeImageWithModel", () => {
|
||||
({ modelRegistry, provider, modelId }: ResolveModelWithRegistryTestParams) =>
|
||||
modelRegistry.find(provider, modelId),
|
||||
);
|
||||
resolveModelAsyncMock.mockImplementation(
|
||||
async (provider: string, modelId: string, agentDir?: string, cfg?: unknown) => {
|
||||
const authStorage = {
|
||||
setRuntimeApiKey: setRuntimeApiKeyMock,
|
||||
};
|
||||
const modelRegistry = discoverModelsMock(authStorage, agentDir);
|
||||
const model = resolveModelWithRegistryMock({
|
||||
provider,
|
||||
modelId,
|
||||
modelRegistry,
|
||||
cfg,
|
||||
agentDir,
|
||||
});
|
||||
return { authStorage, model, modelRegistry };
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
function getApiKeyForModelCall(index = 0): AuthRequestCall {
|
||||
|
||||
@@ -6,16 +6,15 @@ import {
|
||||
requireApiKey,
|
||||
resolveApiKeyForProvider,
|
||||
} from "../agents/model-auth.js";
|
||||
import { findNormalizedProviderValue, normalizeModelRef } from "../agents/model-selection.js";
|
||||
import { normalizeModelRef } from "../agents/model-selection.js";
|
||||
import { ensureOpenClawModelsJson } from "../agents/models-config.js";
|
||||
import { resolveModelWithRegistry } from "../agents/pi-embedded-runner/model.js";
|
||||
import { resolveModelAsync } from "../agents/pi-embedded-runner/model.js";
|
||||
import { resolveProviderRequestCapabilities } from "../agents/provider-attribution.js";
|
||||
import { registerProviderStreamForModel } from "../agents/provider-stream.js";
|
||||
import {
|
||||
coerceImageAssistantText,
|
||||
hasImageReasoningOnlyResponse,
|
||||
} from "../agents/tools/image-tool.helpers.js";
|
||||
import { prepareProviderDynamicModel } from "../plugins/provider-runtime.js";
|
||||
import type {
|
||||
ImageDescriptionRequest,
|
||||
ImageDescriptionResult,
|
||||
@@ -23,15 +22,6 @@ import type {
|
||||
ImagesDescriptionResult,
|
||||
} from "./types.js";
|
||||
|
||||
let piModelDiscoveryRuntimePromise: Promise<
|
||||
typeof import("../agents/pi-model-discovery-runtime.js")
|
||||
> | null = null;
|
||||
|
||||
function loadPiModelDiscoveryRuntime() {
|
||||
piModelDiscoveryRuntimePromise ??= import("../agents/pi-model-discovery-runtime.js");
|
||||
return piModelDiscoveryRuntimePromise;
|
||||
}
|
||||
|
||||
function resolveImageToolMaxTokens(modelMaxTokens: number | undefined, requestedMaxTokens = 4096) {
|
||||
if (
|
||||
typeof modelMaxTokens !== "number" ||
|
||||
@@ -143,48 +133,17 @@ async function resolveImageRuntime(params: {
|
||||
authStore?: ImageDescriptionRequest["authStore"];
|
||||
}): Promise<{ apiKey: string; model: Model<Api> }> {
|
||||
await ensureOpenClawModelsJson(params.cfg, params.agentDir);
|
||||
const { discoverAuthStorage, discoverModels } = await loadPiModelDiscoveryRuntime();
|
||||
const authStorage = discoverAuthStorage(params.agentDir);
|
||||
const modelRegistry = discoverModels(authStorage, params.agentDir);
|
||||
const resolvedRef = normalizeModelRef(params.provider, params.model);
|
||||
const configuredProviders = params.cfg.models?.providers;
|
||||
const providerConfig =
|
||||
configuredProviders?.[resolvedRef.provider] ??
|
||||
findNormalizedProviderValue(configuredProviders, resolvedRef.provider);
|
||||
// Fast path: resolve without dynamic model preparation first.
|
||||
// This avoids unnecessary prepare hooks (e.g. OpenRouter catalog fetch)
|
||||
// for models that are already explicitly resolvable.
|
||||
let model = resolveModelWithRegistry({
|
||||
provider: resolvedRef.provider,
|
||||
modelId: resolvedRef.model,
|
||||
modelRegistry,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
}) as Model<Api> | null;
|
||||
|
||||
// If the model is not in the registry yet, prepare dynamic provider models
|
||||
// and retry (needed for provider-runtime-backed dynamic models).
|
||||
if (!model) {
|
||||
await prepareProviderDynamicModel({
|
||||
provider: resolvedRef.provider,
|
||||
config: params.cfg,
|
||||
context: {
|
||||
config: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
provider: resolvedRef.provider,
|
||||
modelId: resolvedRef.model,
|
||||
modelRegistry,
|
||||
providerConfig,
|
||||
},
|
||||
});
|
||||
model = resolveModelWithRegistry({
|
||||
provider: resolvedRef.provider,
|
||||
modelId: resolvedRef.model,
|
||||
modelRegistry,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
}) as Model<Api> | null;
|
||||
}
|
||||
const resolved = await resolveModelAsync(
|
||||
resolvedRef.provider,
|
||||
resolvedRef.model,
|
||||
params.agentDir,
|
||||
params.cfg,
|
||||
{
|
||||
allowBundledStaticCatalogFallback: true,
|
||||
},
|
||||
);
|
||||
const { authStorage, model } = resolved;
|
||||
if (!model) {
|
||||
throw new Error(`Unknown model: ${resolvedRef.provider}/${resolvedRef.model}`);
|
||||
}
|
||||
|
||||
@@ -44,6 +44,8 @@ describe("media-understanding provider registry", () => {
|
||||
const registry = buildMediaUnderstandingRegistry();
|
||||
|
||||
expect(requireMediaProvider(registry, "groq").id).toBe("groq");
|
||||
expect(typeof requireMediaProvider(registry, "groq").describeImage).toBe("function");
|
||||
expect(typeof requireMediaProvider(registry, "groq").describeImages).toBe("function");
|
||||
expect(requireMediaProvider(registry, "deepgram").id).toBe("deepgram");
|
||||
expect(resolvePluginCapabilityProvidersMock).toHaveBeenCalledWith({
|
||||
key: "mediaUnderstandingProviders",
|
||||
@@ -51,6 +53,23 @@ describe("media-understanding provider registry", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("hydrates manifest-only image providers with model-backed image hooks", () => {
|
||||
resolvePluginCapabilityProvidersMock.mockReturnValue([
|
||||
createMediaProvider({
|
||||
id: "zai",
|
||||
capabilities: ["image"],
|
||||
defaultModels: { image: "glm-4.6v" },
|
||||
}),
|
||||
]);
|
||||
|
||||
const registry = buildMediaUnderstandingRegistry();
|
||||
const provider = requireMediaProvider(registry, "zai");
|
||||
|
||||
expect(provider.defaultModels?.image).toBe("glm-4.6v");
|
||||
expect(provider.describeImage).toBeTypeOf("function");
|
||||
expect(provider.describeImages).toBeTypeOf("function");
|
||||
});
|
||||
|
||||
it("keeps provider id normalization behavior for capability providers", () => {
|
||||
resolvePluginCapabilityProvidersMock.mockReturnValue([
|
||||
createMediaProvider({ id: "google", capabilities: ["image", "audio", "video"] }),
|
||||
|
||||
@@ -22,7 +22,23 @@ function mergeProviderIntoRegistry(
|
||||
nativeDocumentInputs: provider.nativeDocumentInputs ?? existing.nativeDocumentInputs,
|
||||
}
|
||||
: provider;
|
||||
registry.set(normalizedKey, merged);
|
||||
registry.set(normalizedKey, hydrateModelBackedMediaProvider(merged));
|
||||
}
|
||||
|
||||
function hydrateModelBackedMediaProvider(
|
||||
provider: MediaUnderstandingProvider,
|
||||
): MediaUnderstandingProvider {
|
||||
if (!provider.capabilities?.includes("image")) {
|
||||
return provider;
|
||||
}
|
||||
if (provider.describeImage && provider.describeImages) {
|
||||
return provider;
|
||||
}
|
||||
return {
|
||||
...provider,
|
||||
describeImage: provider.describeImage ?? describeImageWithModel,
|
||||
describeImages: provider.describeImages ?? describeImagesWithModel,
|
||||
};
|
||||
}
|
||||
|
||||
export { normalizeMediaProviderId } from "./provider-id.js";
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import type { MediaAttachment, MediaUnderstandingOutput } from "../media-understanding/types.js";
|
||||
import { describeImageFile, runMediaUnderstandingFile } from "./runtime.js";
|
||||
import {
|
||||
describeImageFile,
|
||||
describeImageFileWithModel,
|
||||
runMediaUnderstandingFile,
|
||||
} from "./runtime.js";
|
||||
|
||||
const mocks = vi.hoisted(() => {
|
||||
const cleanup = vi.fn(async () => {});
|
||||
@@ -10,6 +14,8 @@ const mocks = vi.hoisted(() => {
|
||||
createMediaAttachmentCache: vi.fn(() => ({ cleanup })),
|
||||
normalizeMediaAttachments: vi.fn<() => MediaAttachment[]>(() => []),
|
||||
normalizeMediaProviderId: vi.fn((provider: string) => provider.trim().toLowerCase()),
|
||||
readLocalFileSafely: vi.fn(async () => ({ buffer: Buffer.from("image") })),
|
||||
describeImageWithModel: vi.fn(async () => ({ text: "generic image ok", model: "vision" })),
|
||||
runCapability: vi.fn(),
|
||||
cleanup,
|
||||
};
|
||||
@@ -26,12 +32,24 @@ vi.mock("./provider-registry.js", () => ({
|
||||
normalizeMediaProviderId: mocks.normalizeMediaProviderId,
|
||||
}));
|
||||
|
||||
vi.mock("../infra/fs-safe.js", () => ({
|
||||
readLocalFileSafely: mocks.readLocalFileSafely,
|
||||
}));
|
||||
|
||||
vi.mock("./image-runtime.js", () => ({
|
||||
describeImageWithModel: mocks.describeImageWithModel,
|
||||
}));
|
||||
|
||||
describe("media-understanding runtime", () => {
|
||||
afterEach(() => {
|
||||
mocks.buildProviderRegistry.mockReset();
|
||||
mocks.createMediaAttachmentCache.mockReset();
|
||||
mocks.normalizeMediaAttachments.mockReset();
|
||||
mocks.normalizeMediaProviderId.mockReset();
|
||||
mocks.readLocalFileSafely.mockReset();
|
||||
mocks.readLocalFileSafely.mockResolvedValue({ buffer: Buffer.from("image") });
|
||||
mocks.describeImageWithModel.mockReset();
|
||||
mocks.describeImageWithModel.mockResolvedValue({ text: "generic image ok", model: "vision" });
|
||||
mocks.runCapability.mockReset();
|
||||
mocks.cleanup.mockReset();
|
||||
mocks.cleanup.mockResolvedValue(undefined);
|
||||
@@ -204,6 +222,37 @@ describe("media-understanding runtime", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("uses the generic model-backed image runtime for explicit models without media hooks", async () => {
|
||||
mocks.buildProviderRegistry.mockReturnValue(
|
||||
new Map([["zai", { id: "zai", capabilities: ["image"] }]]),
|
||||
);
|
||||
|
||||
await expect(
|
||||
describeImageFileWithModel({
|
||||
filePath: "/tmp/sample.jpg",
|
||||
mime: "image/jpeg",
|
||||
provider: "zai",
|
||||
model: "glm-4.6v",
|
||||
prompt: "Describe it",
|
||||
cfg: {} as OpenClawConfig,
|
||||
agentDir: "/tmp/agent",
|
||||
}),
|
||||
).resolves.toEqual({ text: "generic image ok", model: "vision" });
|
||||
|
||||
expect(mocks.describeImageWithModel).toHaveBeenCalledWith({
|
||||
buffer: Buffer.from("image"),
|
||||
fileName: "sample.jpg",
|
||||
mime: "image/jpeg",
|
||||
provider: "zai",
|
||||
model: "glm-4.6v",
|
||||
prompt: "Describe it",
|
||||
maxTokens: undefined,
|
||||
timeoutMs: 30_000,
|
||||
cfg: {},
|
||||
agentDir: "/tmp/agent",
|
||||
});
|
||||
});
|
||||
|
||||
it("surfaces the underlying provider failure when media understanding fails", async () => {
|
||||
mocks.normalizeMediaAttachments.mockReturnValue([
|
||||
{ index: 0, path: "/tmp/sample.ogg", mime: "audio/ogg" },
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import path from "node:path";
|
||||
import { readLocalFileSafely } from "../infra/fs-safe.js";
|
||||
import { describeImageWithModel } from "./image-runtime.js";
|
||||
import { normalizeMediaProviderId } from "./provider-registry.js";
|
||||
import { findDecisionReason, normalizeDecisionReason } from "./runner.entries.js";
|
||||
import {
|
||||
@@ -153,11 +154,9 @@ export async function describeImageFileWithModel(params: DescribeImageFileWithMo
|
||||
const timeoutMs = params.timeoutMs ?? 30_000;
|
||||
const providerRegistry = buildProviderRegistry(undefined, params.cfg);
|
||||
const provider = providerRegistry.get(normalizeMediaProviderId(params.provider));
|
||||
if (!provider?.describeImage) {
|
||||
throw new Error(`Provider does not support image analysis: ${params.provider}`);
|
||||
}
|
||||
const buffer = (await readLocalFileSafely({ filePath: params.filePath })).buffer;
|
||||
return await provider.describeImage({
|
||||
const describeImage = provider?.describeImage ?? describeImageWithModel;
|
||||
return await describeImage({
|
||||
buffer,
|
||||
fileName: path.basename(params.filePath),
|
||||
mime: params.mime,
|
||||
|
||||
Reference in New Issue
Block a user