Merge pull request #756 from leaph/check-ai-thinking-settings

feat(iflow): add model-specific thinking configs for GLM-4.7 and Mini…
This commit is contained in:
Luis Pater
2025-12-28 02:08:27 +08:00
committed by GitHub
3 changed files with 98 additions and 11 deletions

View File

@@ -7,6 +7,13 @@ assignees: ''
--- ---
**Is it a request payload issue?**
[ ] Yes, this is a request payload issue. I am using a client/cURL to send a request payload, but I received an unexpected error.
[ ] No, it's another issue.
**If it's a request payload issue, you MUST know**
Our team doesn't have any GODs or ORACLEs or MIND READERs. Please make sure to attach the request log or curl payload.
**Describe the bug** **Describe the bug**
A clear and concise description of what the bug is. A clear and concise description of what the bug is.

View File

@@ -741,7 +741,7 @@ func GetIFlowModels() []*ModelInfo {
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600}, {ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600}, {ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000}, {ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000}, {ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
} }
models := make([]*ModelInfo, 0, len(entries)) models := make([]*ModelInfo, 0, len(entries))
for _, entry := range entries { for _, entry := range entries {

View File

@@ -67,6 +67,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
return resp, errValidate return resp, errValidate
} }
body = applyIFlowThinkingConfig(body) body = applyIFlowThinkingConfig(body)
body = preserveReasoningContentInMessages(body)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -159,6 +160,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
return nil, errValidate return nil, errValidate
} }
body = applyIFlowThinkingConfig(body) body = applyIFlowThinkingConfig(body)
body = preserveReasoningContentInMessages(body)
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour. // Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
toolsResult := gjson.GetBytes(body, "tools") toolsResult := gjson.GetBytes(body, "tools")
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
@@ -445,20 +447,98 @@ func ensureToolsArray(body []byte) []byte {
return updated return updated
} }
// applyIFlowThinkingConfig converts normalized reasoning_effort to iFlow chat_template_kwargs.enable_thinking. // preserveReasoningContentInMessages ensures reasoning_content from assistant messages in the
// This should be called after NormalizeThinkingConfig has processed the payload. // conversation history is preserved when sending to iFlow models that support thinking.
// iFlow only supports boolean enable_thinking, so any non-"none" effort enables thinking. // This is critical for multi-turn conversations where the model needs to see its previous
func applyIFlowThinkingConfig(body []byte) []byte { // reasoning to maintain coherent thought chains across tool calls and conversation turns.
effort := gjson.GetBytes(body, "reasoning_effort") //
if !effort.Exists() { // For GLM-4.7 and MiniMax-M2.1, the full assistant response (including reasoning) must be
// appended back into message history before the next call.
func preserveReasoningContentInMessages(body []byte) []byte {
model := strings.ToLower(gjson.GetBytes(body, "model").String())
// Only apply to models that support thinking with history preservation
needsPreservation := strings.HasPrefix(model, "glm-4.7") ||
strings.HasPrefix(model, "glm-4-7") ||
strings.HasPrefix(model, "minimax-m2.1") ||
strings.HasPrefix(model, "minimax-m2-1")
if !needsPreservation {
return body return body
} }
val := strings.ToLower(strings.TrimSpace(effort.String())) messages := gjson.GetBytes(body, "messages")
enableThinking := val != "none" && val != "" if !messages.Exists() || !messages.IsArray() {
return body
}
body, _ = sjson.DeleteBytes(body, "reasoning_effort") // Check if any assistant message already has reasoning_content preserved
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking) hasReasoningContent := false
messages.ForEach(func(_, msg gjson.Result) bool {
role := msg.Get("role").String()
if role == "assistant" {
rc := msg.Get("reasoning_content")
if rc.Exists() && rc.String() != "" {
hasReasoningContent = true
return false // stop iteration
}
}
return true
})
// If reasoning content is already present, the messages are properly formatted
// No need to modify - the client has correctly preserved reasoning in history
if hasReasoningContent {
log.Debugf("iflow executor: reasoning_content found in message history for %s", model)
}
return body
}
// applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations.
// This should be called after NormalizeThinkingConfig has processed the payload.
//
// Model-specific handling:
// - GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false}
// - MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
// - Other iFlow models: Uses chat_template_kwargs.enable_thinking (boolean)
func applyIFlowThinkingConfig(body []byte) []byte {
effort := gjson.GetBytes(body, "reasoning_effort")
model := strings.ToLower(gjson.GetBytes(body, "model").String())
// Check if thinking should be enabled
val := ""
if effort.Exists() {
val = strings.ToLower(strings.TrimSpace(effort.String()))
}
enableThinking := effort.Exists() && val != "none" && val != ""
// Remove reasoning_effort as we'll convert to model-specific format
if effort.Exists() {
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
}
// GLM-4.7: Use extra_body with thinking config and clear_thinking: false
if strings.HasPrefix(model, "glm-4.7") || strings.HasPrefix(model, "glm-4-7") {
if enableThinking {
body, _ = sjson.SetBytes(body, "extra_body.thinking.type", "enabled")
body, _ = sjson.SetBytes(body, "extra_body.clear_thinking", false)
}
return body
}
// MiniMax-M2.1: Use reasoning_split=true for interleaved thinking
if strings.HasPrefix(model, "minimax-m2.1") || strings.HasPrefix(model, "minimax-m2-1") {
if enableThinking {
body, _ = sjson.SetBytes(body, "reasoning_split", true)
}
return body
}
// Other iFlow models (including GLM-4.6): Use chat_template_kwargs.enable_thinking
if effort.Exists() {
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
}
return body return body
} }