fix(models): support minimax-portal coding plan vlm routing for image tool (openclaw#33953)

Verified:
- pnpm install --frozen-lockfile
- pnpm build
- pnpm check
- pnpm test:macmini

Co-authored-by: tars90percent <252094836+tars90percent@users.noreply.github.com>
This commit is contained in:
Tars
2026-03-08 04:30:53 +08:00
committed by GitHub
parent e554c59aac
commit dab0e97c22
15 changed files with 246 additions and 23 deletions

View File

@@ -134,6 +134,7 @@ Docs: https://docs.openclaw.ai
- Routing/legacy route guard tightening: require legacy session-key channel hints to match the saved delivery channel before inheriting external routing metadata, preventing custom namespaced keys like `agent:<agent>:work:<ticket>` from inheriting stale non-webchat routes.
- Gateway/internal client routing continuity: prevent webchat/TUI/UI turns from inheriting stale external reply routes by requiring explicit `deliver: true` for external delivery, keeping main-session external inheritance scoped to non-Webchat/UI clients, and honoring configured `session.mainKey` when identifying main-session continuity. (from #35321, #34635, #35356) Thanks @alexyyyander and @Octane0411.
- Security/auth labels: remove token and API-key snippets from user-facing auth status labels so `/status` and `/models` do not expose credential fragments. (#33262) thanks @cu1ch3n.
- Models/MiniMax portal vision routing: add `MiniMax-VL-01` to the `minimax-portal` provider, route portal image understanding through the MiniMax VLM endpoint, and align media auto-selection plus Telegram sticker description with the shared portal image provider path. (#33953) Thanks @tars90percent.
- Auth/credential semantics: align profile eligibility + probe diagnostics with SecretRef/expiry rules and harden browser download atomic writes. (#33733) thanks @joshavant.
- Security/audit denyCommands guidance: suggest likely exact node command IDs for unknown `gateway.nodes.denyCommands` entries so ineffective denylist entries are easier to correct. (#29713) thanks @liquidhorizon88-bot.
- Agents/overload failover handling: classify overloaded provider failures separately from rate limits/status timeouts, add short overload backoff before retry/failover, record overloaded prompt/assistant failures as transient auth-profile cooldowns (with probeable same-provider fallback) instead of treating them like persistent auth/billing failures, and keep one-shot cron retry classification aligned so overloaded fallback summaries still count as transient retries.

View File

@@ -45,3 +45,14 @@ describe("minimaxUnderstandImage apiKey normalization", () => {
await runNormalizationCase("minimax-\u0417\u2502test-key");
});
});
describe("isMinimaxVlmModel", () => {
it("only matches the canonical MiniMax VLM model id", async () => {
const { isMinimaxVlmModel } = await import("./minimax-vlm.js");
expect(isMinimaxVlmModel("minimax", "MiniMax-VL-01")).toBe(true);
expect(isMinimaxVlmModel("minimax-portal", "MiniMax-VL-01")).toBe(true);
expect(isMinimaxVlmModel("minimax-portal", "custom-vision")).toBe(false);
expect(isMinimaxVlmModel("openai", "MiniMax-VL-01")).toBe(false);
});
});

View File

@@ -6,6 +6,14 @@ type MinimaxBaseResp = {
status_msg?: string;
};
export function isMinimaxVlmProvider(provider: string): boolean {
return provider === "minimax" || provider === "minimax-portal";
}
export function isMinimaxVlmModel(provider: string, modelId: string): boolean {
return isMinimaxVlmProvider(provider) && modelId.trim() === "MiniMax-VL-01";
}
function coerceApiHost(params: {
apiHost?: string;
modelBaseUrl?: string;

View File

@@ -71,10 +71,9 @@ describe("MiniMax implicit provider (#15275)", () => {
"minimax-portal:default": {
type: "oauth",
provider: "minimax-portal",
oauth: {
access: "token",
expires: Date.now() + 60_000,
},
access: "token",
refresh: "refresh-token",
expires: Date.now() + 60_000,
},
},
},
@@ -87,6 +86,18 @@ describe("MiniMax implicit provider (#15275)", () => {
const providers = await resolveImplicitProviders({ agentDir });
expect(providers?.["minimax-portal"]?.authHeader).toBe(true);
});
it("should include minimax portal provider when MINIMAX_OAUTH_TOKEN is configured", async () => {
const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
await withEnvAsync({ MINIMAX_OAUTH_TOKEN: "portal-token" }, async () => {
const providers = await resolveImplicitProviders({ agentDir });
expect(providers?.["minimax-portal"]).toBeDefined();
expect(providers?.["minimax-portal"]?.authHeader).toBe(true);
expect(providers?.["minimax-portal"]?.models?.some((m) => m.id === "MiniMax-VL-01")).toBe(
true,
);
});
});
});
describe("vLLM provider", () => {

View File

@@ -771,6 +771,12 @@ function buildMinimaxPortalProvider(): ProviderConfig {
api: "anthropic-messages",
authHeader: true,
models: [
buildMinimaxModel({
id: MINIMAX_DEFAULT_VISION_MODEL_ID,
name: "MiniMax VL 01",
reasoning: false,
input: ["text", "image"],
}),
buildMinimaxTextModel({
id: MINIMAX_DEFAULT_MODEL_ID,
name: "MiniMax M2.5",
@@ -1116,8 +1122,9 @@ export async function resolveImplicitProviders(params: {
providers.minimax = { ...buildMinimaxProvider(), apiKey: minimaxKey };
}
const minimaxPortalEnvKey = resolveEnvApiKeyVarName("minimax-portal");
const minimaxOauthProfile = listProfilesForProvider(authStore, "minimax-portal");
if (minimaxOauthProfile.length > 0) {
if (minimaxPortalEnvKey || minimaxOauthProfile.length > 0) {
providers["minimax-portal"] = {
...buildMinimaxPortalProvider(),
apiKey: MINIMAX_OAUTH_MARKER,

View File

@@ -273,6 +273,32 @@ describe("image tool implicit imageModel config", () => {
});
});
it("pairs minimax-portal primary with MiniMax-VL-01 (and fallbacks) when auth exists", async () => {
await withTempAgentDir(async (agentDir) => {
await writeAuthProfiles(agentDir, {
version: 1,
profiles: {
"minimax-portal:default": {
type: "oauth",
provider: "minimax-portal",
access: "oauth-test",
refresh: "refresh-test",
expires: Date.now() + 60_000,
},
},
});
vi.stubEnv("OPENAI_API_KEY", "openai-test");
vi.stubEnv("ANTHROPIC_API_KEY", "anthropic-test");
const cfg: OpenClawConfig = {
agents: { defaults: { model: { primary: "minimax-portal/MiniMax-M2.5" } } },
};
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toEqual(
createDefaultImageFallbackExpectation("minimax-portal/MiniMax-VL-01"),
);
expect(createImageTool({ config: cfg, agentDir })).not.toBeNull();
});
});
it("pairs zai primary with glm-4.6v (and fallbacks) when auth exists", async () => {
await withTempAgentDir(async (agentDir) => {
vi.stubEnv("ZAI_API_KEY", "zai-test");

View File

@@ -3,7 +3,7 @@ import { Type } from "@sinclair/typebox";
import type { OpenClawConfig } from "../../config/config.js";
import { resolveUserPath } from "../../utils.js";
import { loadWebMedia } from "../../web/media.js";
import { minimaxUnderstandImage } from "../minimax-vlm.js";
import { isMinimaxVlmModel, isMinimaxVlmProvider, minimaxUnderstandImage } from "../minimax-vlm.js";
import {
coerceImageAssistantText,
coerceImageModelConfig,
@@ -110,8 +110,8 @@ export function resolveImageModelConfigForTool(params: {
let preferred: string | null = null;
// MiniMax users: always try the canonical vision model first when auth exists.
if (primary.provider === "minimax" && providerOk) {
preferred = "minimax/MiniMax-VL-01";
if (isMinimaxVlmProvider(primary.provider) && providerOk) {
preferred = `${primary.provider}/MiniMax-VL-01`;
} else if (providerOk && providerVisionFromConfig) {
preferred = providerVisionFromConfig;
} else if (primary.provider === "zai" && providerOk) {
@@ -229,7 +229,7 @@ async function runImagePrompt(params: {
});
// MiniMax VLM only supports a single image; use the first one.
if (model.provider === "minimax") {
if (isMinimaxVlmModel(model.provider, model.id)) {
const first = params.images[0];
const imageDataUrl = `data:${first.mimeType};base64,${first.base64}`;
const text = await minimaxUnderstandImage({

View File

@@ -1,8 +1,10 @@
import { describe, expect, it } from "vitest";
import {
AUTO_AUDIO_KEY_PROVIDERS,
AUTO_IMAGE_KEY_PROVIDERS,
AUTO_VIDEO_KEY_PROVIDERS,
DEFAULT_AUDIO_MODELS,
DEFAULT_IMAGE_MODELS,
} from "./defaults.js";
describe("DEFAULT_AUDIO_MODELS", () => {
@@ -22,3 +24,15 @@ describe("AUTO_VIDEO_KEY_PROVIDERS", () => {
expect(AUTO_VIDEO_KEY_PROVIDERS).toContain("moonshot");
});
});
describe("AUTO_IMAGE_KEY_PROVIDERS", () => {
it("includes minimax-portal auto key resolution", () => {
expect(AUTO_IMAGE_KEY_PROVIDERS).toContain("minimax-portal");
});
});
describe("DEFAULT_IMAGE_MODELS", () => {
it("includes the MiniMax portal vision default", () => {
expect(DEFAULT_IMAGE_MODELS["minimax-portal"]).toBe("MiniMax-VL-01");
});
});

View File

@@ -46,6 +46,7 @@ export const AUTO_IMAGE_KEY_PROVIDERS = [
"anthropic",
"google",
"minimax",
"minimax-portal",
"zai",
] as const;
export const AUTO_VIDEO_KEY_PROVIDERS = ["google", "moonshot"] as const;
@@ -54,6 +55,7 @@ export const DEFAULT_IMAGE_MODELS: Record<string, string> = {
anthropic: "claude-opus-4-6",
google: "gemini-3-flash-preview",
minimax: "MiniMax-VL-01",
"minimax-portal": "MiniMax-VL-01",
zai: "glm-4.6v",
};
export const CLI_OUTPUT_MAX_BUFFER = 5 * MB;

View File

@@ -0,0 +1,133 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const completeMock = vi.fn();
const minimaxUnderstandImageMock = vi.fn();
const ensureOpenClawModelsJsonMock = vi.fn(async () => {});
const getApiKeyForModelMock = vi.fn(async () => ({
apiKey: "oauth-test",
source: "test",
mode: "oauth",
}));
const requireApiKeyMock = vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "");
const setRuntimeApiKeyMock = vi.fn();
const discoverModelsMock = vi.fn();
vi.mock("@mariozechner/pi-ai", async (importOriginal) => {
const actual = await importOriginal<typeof import("@mariozechner/pi-ai")>();
return {
...actual,
complete: completeMock,
};
});
vi.mock("../../agents/minimax-vlm.js", () => ({
isMinimaxVlmProvider: (provider: string) =>
provider === "minimax" || provider === "minimax-portal",
isMinimaxVlmModel: (provider: string, modelId: string) =>
(provider === "minimax" || provider === "minimax-portal") && modelId === "MiniMax-VL-01",
minimaxUnderstandImage: minimaxUnderstandImageMock,
}));
vi.mock("../../agents/models-config.js", () => ({
ensureOpenClawModelsJson: ensureOpenClawModelsJsonMock,
}));
vi.mock("../../agents/model-auth.js", () => ({
getApiKeyForModel: getApiKeyForModelMock,
requireApiKey: requireApiKeyMock,
}));
vi.mock("../../agents/pi-model-discovery-runtime.js", () => ({
discoverAuthStorage: () => ({
setRuntimeApiKey: setRuntimeApiKeyMock,
}),
discoverModels: discoverModelsMock,
}));
describe("describeImageWithModel", () => {
beforeEach(() => {
vi.clearAllMocks();
minimaxUnderstandImageMock.mockResolvedValue("portal ok");
discoverModelsMock.mockReturnValue({
find: vi.fn(() => ({
provider: "minimax-portal",
id: "MiniMax-VL-01",
input: ["text", "image"],
baseUrl: "https://api.minimax.io/anthropic",
})),
});
});
it("routes minimax-portal image models through the MiniMax VLM endpoint", async () => {
const { describeImageWithModel } = await import("./image.js");
const result = await describeImageWithModel({
cfg: {},
agentDir: "/tmp/openclaw-agent",
provider: "minimax-portal",
model: "MiniMax-VL-01",
buffer: Buffer.from("png-bytes"),
fileName: "image.png",
mime: "image/png",
prompt: "Describe the image.",
timeoutMs: 1000,
});
expect(result).toEqual({
text: "portal ok",
model: "MiniMax-VL-01",
});
expect(ensureOpenClawModelsJsonMock).toHaveBeenCalled();
expect(getApiKeyForModelMock).toHaveBeenCalled();
expect(requireApiKeyMock).toHaveBeenCalled();
expect(setRuntimeApiKeyMock).toHaveBeenCalledWith("minimax-portal", "oauth-test");
expect(minimaxUnderstandImageMock).toHaveBeenCalledWith({
apiKey: "oauth-test",
prompt: "Describe the image.",
imageDataUrl: `data:image/png;base64,${Buffer.from("png-bytes").toString("base64")}`,
modelBaseUrl: "https://api.minimax.io/anthropic",
});
expect(completeMock).not.toHaveBeenCalled();
});
it("uses generic completion for non-canonical minimax-portal image models", async () => {
discoverModelsMock.mockReturnValue({
find: vi.fn(() => ({
provider: "minimax-portal",
id: "custom-vision",
input: ["text", "image"],
baseUrl: "https://api.minimax.io/anthropic",
})),
});
completeMock.mockResolvedValue({
role: "assistant",
api: "anthropic-messages",
provider: "minimax-portal",
model: "custom-vision",
stopReason: "stop",
timestamp: Date.now(),
content: [{ type: "text", text: "generic ok" }],
});
const { describeImageWithModel } = await import("./image.js");
const result = await describeImageWithModel({
cfg: {},
agentDir: "/tmp/openclaw-agent",
provider: "minimax-portal",
model: "custom-vision",
buffer: Buffer.from("png-bytes"),
fileName: "image.png",
mime: "image/png",
prompt: "Describe the image.",
timeoutMs: 1000,
});
expect(result).toEqual({
text: "generic ok",
model: "custom-vision",
});
expect(completeMock).toHaveBeenCalledOnce();
expect(minimaxUnderstandImageMock).not.toHaveBeenCalled();
});
});

View File

@@ -1,6 +1,6 @@
import type { Api, Context, Model } from "@mariozechner/pi-ai";
import { complete } from "@mariozechner/pi-ai";
import { minimaxUnderstandImage } from "../../agents/minimax-vlm.js";
import { isMinimaxVlmModel, minimaxUnderstandImage } from "../../agents/minimax-vlm.js";
import { getApiKeyForModel, requireApiKey } from "../../agents/model-auth.js";
import { ensureOpenClawModelsJson } from "../../agents/models-config.js";
import { coerceImageAssistantText } from "../../agents/tools/image-tool.helpers.js";
@@ -40,7 +40,7 @@ export async function describeImageWithModel(
authStorage.setRuntimeApiKey(model.provider, apiKey);
const base64 = params.buffer.toString("base64");
if (model.provider === "minimax") {
if (isMinimaxVlmModel(model.provider, model.id)) {
const text = await minimaxUnderstandImage({
apiKey,
prompt: params.prompt ?? "Describe the image.",

View File

@@ -24,4 +24,12 @@ describe("media-understanding provider registry", () => {
expect(provider?.id).toBe("moonshot");
expect(provider?.capabilities).toEqual(["image", "video"]);
});
it("registers the minimax portal provider", () => {
const registry = buildMediaUnderstandingRegistry();
const provider = getMediaUnderstandingProvider("minimax-portal", registry);
expect(provider?.id).toBe("minimax-portal");
expect(provider?.capabilities).toEqual(["image"]);
});
});

View File

@@ -4,7 +4,7 @@ import { anthropicProvider } from "./anthropic/index.js";
import { deepgramProvider } from "./deepgram/index.js";
import { googleProvider } from "./google/index.js";
import { groqProvider } from "./groq/index.js";
import { minimaxProvider } from "./minimax/index.js";
import { minimaxPortalProvider, minimaxProvider } from "./minimax/index.js";
import { mistralProvider } from "./mistral/index.js";
import { moonshotProvider } from "./moonshot/index.js";
import { openaiProvider } from "./openai/index.js";
@@ -16,6 +16,7 @@ const PROVIDERS: MediaUnderstandingProvider[] = [
googleProvider,
anthropicProvider,
minimaxProvider,
minimaxPortalProvider,
moonshotProvider,
mistralProvider,
zaiProvider,

View File

@@ -6,3 +6,9 @@ export const minimaxProvider: MediaUnderstandingProvider = {
capabilities: ["image"],
describeImage: describeImageWithModel,
};
export const minimaxPortalProvider: MediaUnderstandingProvider = {
id: "minimax-portal",
capabilities: ["image"],
describeImage: describeImageWithModel,
};

View File

@@ -12,6 +12,7 @@ import type { OpenClawConfig } from "../config/config.js";
import { STATE_DIR } from "../config/paths.js";
import { logVerbose } from "../globals.js";
import { loadJsonFile, saveJsonFile } from "../infra/json-file.js";
import { AUTO_IMAGE_KEY_PROVIDERS, DEFAULT_IMAGE_MODELS } from "../media-understanding/defaults.js";
import { resolveAutoImageModel } from "../media-understanding/runner.js";
const CACHE_FILE = path.join(STATE_DIR, "telegram", "sticker-cache.json");
@@ -142,7 +143,6 @@ export function getCacheStats(): { count: number; oldestAt?: string; newestAt?:
const STICKER_DESCRIPTION_PROMPT =
"Describe this sticker image in 1-2 sentences. Focus on what the sticker depicts (character, object, action, emotion). Be concise and objective.";
const VISION_PROVIDERS = ["openai", "anthropic", "google", "minimax"] as const;
let imageRuntimePromise: Promise<
typeof import("../media-understanding/providers/image-runtime.js")
> | null = null;
@@ -198,14 +198,7 @@ export async function describeStickerImage(params: DescribeStickerParams): Promi
if (entries.length === 0) {
return undefined;
}
const defaultId =
provider === "openai"
? "gpt-5-mini"
: provider === "anthropic"
? "claude-opus-4-6"
: provider === "google"
? "gemini-3-flash-preview"
: "MiniMax-VL-01";
const defaultId = DEFAULT_IMAGE_MODELS[provider];
const preferred = entries.find((entry) => entry.id === defaultId);
return preferred ?? entries[0];
};
@@ -213,14 +206,16 @@ export async function describeStickerImage(params: DescribeStickerParams): Promi
let resolved = null as { provider: string; model?: string } | null;
if (
activeModel &&
VISION_PROVIDERS.includes(activeModel.provider as (typeof VISION_PROVIDERS)[number]) &&
AUTO_IMAGE_KEY_PROVIDERS.includes(
activeModel.provider as (typeof AUTO_IMAGE_KEY_PROVIDERS)[number],
) &&
(await hasProviderKey(activeModel.provider))
) {
resolved = activeModel;
}
if (!resolved) {
for (const provider of VISION_PROVIDERS) {
for (const provider of AUTO_IMAGE_KEY_PROVIDERS) {
if (!(await hasProviderKey(provider))) {
continue;
}