mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-03-14 01:34:40 +00:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f957b8948c | ||
|
|
cd0b14dd2d | ||
|
|
894703a484 | ||
|
|
d08a2453f7 | ||
|
|
3f53eea1e0 | ||
|
|
5a812a1e93 | ||
|
|
5e624cc7b1 | ||
|
|
f3d1cc8dc1 | ||
|
|
e889efeda7 | ||
|
|
0a3a95521c | ||
|
|
e0be6c5786 |
@@ -28,3 +28,4 @@ bin/*
|
||||
.claude/*
|
||||
.vscode/*
|
||||
.serena/*
|
||||
.bmad/*
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -32,6 +32,7 @@ GEMINI.md
|
||||
.vscode/*
|
||||
.claude/*
|
||||
.serena/*
|
||||
.bmad/*
|
||||
.mcp/cache/
|
||||
|
||||
# macOS
|
||||
|
||||
@@ -25,6 +25,9 @@ remote-management:
|
||||
# Disable the bundled management control panel asset download and HTTP route when true.
|
||||
disable-control-panel: false
|
||||
|
||||
# GitHub repository for the management control panel. Accepts a repository URL or releases API URL.
|
||||
panel-github-repository: "https://github.com/router-for-me/Cli-Proxy-API-Management-Center"
|
||||
|
||||
# Authentication directory (supports ~ for home directory)
|
||||
auth-dir: "~/.cli-proxy-api"
|
||||
|
||||
|
||||
12
go.mod
12
go.mod
@@ -18,10 +18,10 @@ require (
|
||||
github.com/tidwall/gjson v1.18.0
|
||||
github.com/tidwall/sjson v1.2.5
|
||||
github.com/tiktoken-go/tokenizer v0.7.0
|
||||
golang.org/x/crypto v0.43.0
|
||||
golang.org/x/net v0.46.0
|
||||
golang.org/x/crypto v0.45.0
|
||||
golang.org/x/net v0.47.0
|
||||
golang.org/x/oauth2 v0.30.0
|
||||
golang.org/x/term v0.36.0
|
||||
golang.org/x/term v0.37.0
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
@@ -69,9 +69,9 @@ require (
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||
golang.org/x/arch v0.8.0 // indirect
|
||||
golang.org/x/sync v0.17.0 // indirect
|
||||
golang.org/x/sys v0.37.0 // indirect
|
||||
golang.org/x/text v0.30.0 // indirect
|
||||
golang.org/x/sync v0.18.0 // indirect
|
||||
golang.org/x/sys v0.38.0 // indirect
|
||||
golang.org/x/text v0.31.0 // indirect
|
||||
google.golang.org/protobuf v1.34.1 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
)
|
||||
|
||||
24
go.sum
24
go.sum
@@ -160,23 +160,23 @@ github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZ
|
||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
||||
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||
golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
|
||||
golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
|
||||
golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
|
||||
golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
|
||||
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
|
||||
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
|
||||
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
|
||||
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
|
||||
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
|
||||
golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
|
||||
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
|
||||
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
|
||||
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
|
||||
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
|
||||
golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
|
||||
golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
|
||||
golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
|
||||
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
|
||||
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
|
||||
golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
|
||||
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
|
||||
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
|
||||
|
||||
@@ -628,7 +628,7 @@ func (s *Server) serveManagementControlPanel(c *gin.Context) {
|
||||
|
||||
if _, err := os.Stat(filePath); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
go managementasset.EnsureLatestManagementHTML(context.Background(), managementasset.StaticDir(s.configFilePath), cfg.ProxyURL)
|
||||
go managementasset.EnsureLatestManagementHTML(context.Background(), managementasset.StaticDir(s.configFilePath), cfg.ProxyURL, cfg.RemoteManagement.PanelGitHubRepository)
|
||||
c.AbortWithStatus(http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
@@ -948,7 +948,7 @@ func (s *Server) UpdateClients(cfg *config.Config) {
|
||||
|
||||
if !cfg.RemoteManagement.DisableControlPanel {
|
||||
staticDir := managementasset.StaticDir(s.configFilePath)
|
||||
go managementasset.EnsureLatestManagementHTML(context.Background(), staticDir, cfg.ProxyURL)
|
||||
go managementasset.EnsureLatestManagementHTML(context.Background(), staticDir, cfg.ProxyURL, cfg.RemoteManagement.PanelGitHubRepository)
|
||||
}
|
||||
if s.mgmt != nil {
|
||||
s.mgmt.SetConfig(cfg)
|
||||
|
||||
@@ -17,6 +17,8 @@ import (
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
const DefaultPanelGitHubRepository = "https://github.com/router-for-me/Cli-Proxy-API-Management-Center"
|
||||
|
||||
// Config represents the application's configuration, loaded from a YAML file.
|
||||
type Config struct {
|
||||
config.SDKConfig `yaml:",inline"`
|
||||
@@ -116,6 +118,9 @@ type RemoteManagement struct {
|
||||
SecretKey string `yaml:"secret-key"`
|
||||
// DisableControlPanel skips serving and syncing the bundled management UI when true.
|
||||
DisableControlPanel bool `yaml:"disable-control-panel"`
|
||||
// PanelGitHubRepository overrides the GitHub repository used to fetch the management panel asset.
|
||||
// Accepts either a repository URL (https://github.com/org/repo) or an API releases endpoint.
|
||||
PanelGitHubRepository string `yaml:"panel-github-repository"`
|
||||
}
|
||||
|
||||
// QuotaExceeded defines the behavior when API quota limits are exceeded.
|
||||
@@ -369,6 +374,7 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
|
||||
cfg.UsageStatisticsEnabled = false
|
||||
cfg.DisableCooling = false
|
||||
cfg.AmpCode.RestrictManagementToLocalhost = false // Default to false: API key auth is sufficient
|
||||
cfg.RemoteManagement.PanelGitHubRepository = DefaultPanelGitHubRepository
|
||||
cfg.IncognitoBrowser = false // Default to normal browser (AWS uses incognito by force)
|
||||
if err = yaml.Unmarshal(data, &cfg); err != nil {
|
||||
if optional {
|
||||
@@ -405,6 +411,11 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
|
||||
_ = SaveConfigPreserveCommentsUpdateNestedScalar(configFile, []string{"remote-management", "secret-key"}, hashed)
|
||||
}
|
||||
|
||||
cfg.RemoteManagement.PanelGitHubRepository = strings.TrimSpace(cfg.RemoteManagement.PanelGitHubRepository)
|
||||
if cfg.RemoteManagement.PanelGitHubRepository == "" {
|
||||
cfg.RemoteManagement.PanelGitHubRepository = DefaultPanelGitHubRepository
|
||||
}
|
||||
|
||||
// Sync request authentication providers with inline API keys for backwards compatibility.
|
||||
syncInlineAccessProvider(&cfg)
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -23,10 +24,10 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
managementReleaseURL = "https://api.github.com/repos/router-for-me/Cli-Proxy-API-Management-Center/releases/latest"
|
||||
managementAssetName = "management.html"
|
||||
httpUserAgent = "CLIProxyAPI-management-updater"
|
||||
updateCheckInterval = 3 * time.Hour
|
||||
defaultManagementReleaseURL = "https://api.github.com/repos/router-for-me/Cli-Proxy-API-Management-Center/releases/latest"
|
||||
managementAssetName = "management.html"
|
||||
httpUserAgent = "CLIProxyAPI-management-updater"
|
||||
updateCheckInterval = 3 * time.Hour
|
||||
)
|
||||
|
||||
// ManagementFileName exposes the control panel asset filename.
|
||||
@@ -97,7 +98,7 @@ func runAutoUpdater(ctx context.Context) {
|
||||
|
||||
configPath, _ := schedulerConfigPath.Load().(string)
|
||||
staticDir := StaticDir(configPath)
|
||||
EnsureLatestManagementHTML(ctx, staticDir, cfg.ProxyURL)
|
||||
EnsureLatestManagementHTML(ctx, staticDir, cfg.ProxyURL, cfg.RemoteManagement.PanelGitHubRepository)
|
||||
}
|
||||
|
||||
runOnce()
|
||||
@@ -181,7 +182,7 @@ func FilePath(configFilePath string) string {
|
||||
// EnsureLatestManagementHTML checks the latest management.html asset and updates the local copy when needed.
|
||||
// The function is designed to run in a background goroutine and will never panic.
|
||||
// It enforces a 3-hour rate limit to avoid frequent checks on config/auth file changes.
|
||||
func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL string) {
|
||||
func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL string, panelRepository string) {
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
@@ -214,6 +215,7 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
|
||||
return
|
||||
}
|
||||
|
||||
releaseURL := resolveReleaseURL(panelRepository)
|
||||
client := newHTTPClient(proxyURL)
|
||||
|
||||
localPath := filepath.Join(staticDir, managementAssetName)
|
||||
@@ -225,7 +227,7 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
|
||||
localHash = ""
|
||||
}
|
||||
|
||||
asset, remoteHash, err := fetchLatestAsset(ctx, client)
|
||||
asset, remoteHash, err := fetchLatestAsset(ctx, client, releaseURL)
|
||||
if err != nil {
|
||||
log.WithError(err).Warn("failed to fetch latest management release information")
|
||||
return
|
||||
@@ -254,8 +256,44 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
|
||||
log.Infof("management asset updated successfully (hash=%s)", downloadedHash)
|
||||
}
|
||||
|
||||
func fetchLatestAsset(ctx context.Context, client *http.Client) (*releaseAsset, string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, managementReleaseURL, nil)
|
||||
func resolveReleaseURL(repo string) string {
|
||||
repo = strings.TrimSpace(repo)
|
||||
if repo == "" {
|
||||
return defaultManagementReleaseURL
|
||||
}
|
||||
|
||||
parsed, err := url.Parse(repo)
|
||||
if err != nil || parsed.Host == "" {
|
||||
return defaultManagementReleaseURL
|
||||
}
|
||||
|
||||
host := strings.ToLower(parsed.Host)
|
||||
parsed.Path = strings.TrimSuffix(parsed.Path, "/")
|
||||
|
||||
if host == "api.github.com" {
|
||||
if !strings.HasSuffix(strings.ToLower(parsed.Path), "/releases/latest") {
|
||||
parsed.Path = parsed.Path + "/releases/latest"
|
||||
}
|
||||
return parsed.String()
|
||||
}
|
||||
|
||||
if host == "github.com" {
|
||||
parts := strings.Split(strings.Trim(parsed.Path, "/"), "/")
|
||||
if len(parts) >= 2 && parts[0] != "" && parts[1] != "" {
|
||||
repoName := strings.TrimSuffix(parts[1], ".git")
|
||||
return fmt.Sprintf("https://api.github.com/repos/%s/%s/releases/latest", parts[0], repoName)
|
||||
}
|
||||
}
|
||||
|
||||
return defaultManagementReleaseURL
|
||||
}
|
||||
|
||||
func fetchLatestAsset(ctx context.Context, client *http.Client, releaseURL string) (*releaseAsset, string, error) {
|
||||
if strings.TrimSpace(releaseURL) == "" {
|
||||
releaseURL = defaultManagementReleaseURL
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, releaseURL, nil)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("create release request: %w", err)
|
||||
}
|
||||
|
||||
@@ -166,16 +166,17 @@ type KiroExecutor struct {
|
||||
// This is critical because OpenAI and Claude formats have different tool structures:
|
||||
// - OpenAI: tools[].function.name, tools[].function.description
|
||||
// - Claude: tools[].name, tools[].description
|
||||
// headers parameter allows checking Anthropic-Beta header for thinking mode detection.
|
||||
// Returns the serialized JSON payload and a boolean indicating whether thinking mode was injected.
|
||||
func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format) ([]byte, bool) {
|
||||
func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format, headers http.Header) ([]byte, bool) {
|
||||
switch sourceFormat.String() {
|
||||
case "openai":
|
||||
log.Debugf("kiro: using OpenAI payload builder for source format: %s", sourceFormat.String())
|
||||
return kiroopenai.BuildKiroPayloadFromOpenAI(body, modelID, profileArn, origin, isAgentic, isChatOnly)
|
||||
return kiroopenai.BuildKiroPayloadFromOpenAI(body, modelID, profileArn, origin, isAgentic, isChatOnly, headers, nil)
|
||||
default:
|
||||
// Default to Claude format (also handles "claude", "kiro", etc.)
|
||||
log.Debugf("kiro: using Claude payload builder for source format: %s", sourceFormat.String())
|
||||
return kiroclaude.BuildKiroPayload(body, modelID, profileArn, origin, isAgentic, isChatOnly)
|
||||
return kiroclaude.BuildKiroPayload(body, modelID, profileArn, origin, isAgentic, isChatOnly, headers, nil)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,7 +250,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
|
||||
// Rebuild payload with the correct origin for this endpoint
|
||||
// Each endpoint requires its matching Origin value in the request body
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
|
||||
|
||||
log.Debugf("kiro: trying endpoint %d/%d: %s (Name: %s, Origin: %s)",
|
||||
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
|
||||
@@ -359,7 +360,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
auth = refreshedAuth
|
||||
accessToken, profileArn = kiroCredentials(auth)
|
||||
// Rebuild payload with new profile ARN if changed
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
|
||||
log.Infof("kiro: token refreshed successfully, retrying request")
|
||||
continue
|
||||
}
|
||||
@@ -416,7 +417,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
if refreshedAuth != nil {
|
||||
auth = refreshedAuth
|
||||
accessToken, profileArn = kiroCredentials(auth)
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
|
||||
log.Infof("kiro: token refreshed for 403, retrying request")
|
||||
continue
|
||||
}
|
||||
@@ -555,10 +556,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
|
||||
// Rebuild payload with the correct origin for this endpoint
|
||||
// Each endpoint requires its matching Origin value in the request body
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
|
||||
// Kiro API always returns <thinking> tags regardless of whether thinking mode was requested
|
||||
// So we always enable thinking parsing for Kiro responses
|
||||
thinkingEnabled := true
|
||||
kiroPayload, thinkingEnabled := buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
|
||||
|
||||
log.Debugf("kiro: stream trying endpoint %d/%d: %s (Name: %s, Origin: %s)",
|
||||
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
|
||||
@@ -681,7 +679,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
auth = refreshedAuth
|
||||
accessToken, profileArn = kiroCredentials(auth)
|
||||
// Rebuild payload with new profile ARN if changed
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
|
||||
log.Infof("kiro: token refreshed successfully, retrying stream request")
|
||||
continue
|
||||
}
|
||||
@@ -738,7 +736,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
if refreshedAuth != nil {
|
||||
auth = refreshedAuth
|
||||
accessToken, profileArn = kiroCredentials(auth)
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
|
||||
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
|
||||
log.Infof("kiro: token refreshed for 403, retrying stream request")
|
||||
continue
|
||||
}
|
||||
@@ -1702,6 +1700,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
pendingEndTagChars := 0 // Number of chars that might be start of </thinking>
|
||||
isThinkingBlockOpen := false // Track if thinking content block is open
|
||||
thinkingBlockIndex := -1 // Index of the thinking content block
|
||||
var accumulatedThinkingContent strings.Builder // Accumulate thinking content for signature generation
|
||||
|
||||
// Code block state tracking for heuristic thinking tag parsing
|
||||
// When inside a markdown code block, <thinking> tags should NOT be parsed
|
||||
@@ -1847,6 +1846,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
// Accumulate thinking content for signature generation
|
||||
accumulatedThinkingContent.WriteString(pendingText)
|
||||
} else {
|
||||
// Output as regular text
|
||||
if !isTextBlockOpen {
|
||||
@@ -2390,6 +2391,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
// Accumulate thinking content for signature generation
|
||||
accumulatedThinkingContent.WriteString(thinkContent)
|
||||
}
|
||||
|
||||
// Note: Partial tag handling is done via pendingEndTagChars
|
||||
@@ -2397,7 +2400,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
|
||||
// Close thinking block
|
||||
if isThinkingBlockOpen {
|
||||
blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(thinkingBlockIndex)
|
||||
blockStop := kiroclaude.BuildClaudeThinkingBlockStopEvent(thinkingBlockIndex)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
@@ -2405,6 +2408,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
}
|
||||
}
|
||||
isThinkingBlockOpen = false
|
||||
accumulatedThinkingContent.Reset() // Reset for potential next thinking block
|
||||
}
|
||||
|
||||
inThinkBlock = false
|
||||
@@ -2450,6 +2454,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
// Accumulate thinking content for signature generation
|
||||
accumulatedThinkingContent.WriteString(contentToEmit)
|
||||
}
|
||||
|
||||
remaining = ""
|
||||
@@ -2592,6 +2598,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
// Handle tool uses in response (with deduplication)
|
||||
for _, tu := range toolUses {
|
||||
toolUseID := kirocommon.GetString(tu, "toolUseId")
|
||||
toolName := kirocommon.GetString(tu, "name")
|
||||
|
||||
// Check for duplicate
|
||||
if processedIDs[toolUseID] {
|
||||
@@ -2615,7 +2622,6 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
|
||||
// Emit tool_use content block
|
||||
contentBlockIndex++
|
||||
toolName := kirocommon.GetString(tu, "name")
|
||||
|
||||
blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", toolUseID, toolName)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
|
||||
@@ -2888,7 +2894,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
if calculatedInputTokens > 0 {
|
||||
localEstimate := totalUsage.InputTokens
|
||||
totalUsage.InputTokens = calculatedInputTokens
|
||||
log.Infof("kiro: using contextUsagePercentage (%.2f%%) to calculate input tokens: %d (local estimate was: %d)",
|
||||
log.Debugf("kiro: using contextUsagePercentage (%.2f%%) to calculate input tokens: %d (local estimate was: %d)",
|
||||
upstreamContextPercentage, calculatedInputTokens, localEstimate)
|
||||
}
|
||||
}
|
||||
@@ -2897,7 +2903,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
|
||||
// Log upstream usage information if received
|
||||
if hasUpstreamUsage {
|
||||
log.Infof("kiro: upstream usage - credits: %.4f, context: %.2f%%, final tokens - input: %d, output: %d, total: %d",
|
||||
log.Debugf("kiro: upstream usage - credits: %.4f, context: %.2f%%, final tokens - input: %d, output: %d, total: %d",
|
||||
upstreamCreditUsage, upstreamContextPercentage,
|
||||
totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens)
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ package claude
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
@@ -33,6 +34,7 @@ type KiroInferenceConfig struct {
|
||||
TopP float64 `json:"topP,omitempty"`
|
||||
}
|
||||
|
||||
|
||||
// KiroConversationState holds the conversation context
|
||||
type KiroConversationState struct {
|
||||
ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL" - must be first field
|
||||
@@ -134,9 +136,11 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo
|
||||
// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
|
||||
// isAgentic parameter enables chunked write optimization prompt for -agentic model variants.
|
||||
// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
|
||||
// Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint.
|
||||
// headers parameter allows checking Anthropic-Beta header for thinking mode detection.
|
||||
// metadata parameter is kept for API compatibility but no longer used for thinking configuration.
|
||||
// Supports thinking mode - when enabled, injects thinking tags into system prompt.
|
||||
// Returns the payload and a boolean indicating whether thinking mode was injected.
|
||||
func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) ([]byte, bool) {
|
||||
func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, headers http.Header, metadata map[string]any) ([]byte, bool) {
|
||||
// Extract max_tokens for potential use in inferenceConfig
|
||||
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
|
||||
const kiroMaxOutputTokens = 32000
|
||||
@@ -181,26 +185,9 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
|
||||
// Extract system prompt
|
||||
systemPrompt := extractSystemPrompt(claudeBody)
|
||||
|
||||
// Check for thinking mode using the comprehensive IsThinkingEnabled function
|
||||
// This supports Claude API format, OpenAI reasoning_effort, and AMP/Cursor format
|
||||
thinkingEnabled := IsThinkingEnabled(claudeBody)
|
||||
_, budgetTokens := checkThinkingMode(claudeBody) // Get budget tokens from Claude format if available
|
||||
if budgetTokens <= 0 {
|
||||
// Calculate budgetTokens based on max_tokens if available
|
||||
// Use 50% of max_tokens for thinking, with min 8000 and max 24000
|
||||
if maxTokens > 0 {
|
||||
budgetTokens = maxTokens / 2
|
||||
if budgetTokens < 8000 {
|
||||
budgetTokens = 8000
|
||||
}
|
||||
if budgetTokens > 24000 {
|
||||
budgetTokens = 24000
|
||||
}
|
||||
log.Debugf("kiro: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
|
||||
} else {
|
||||
budgetTokens = 16000 // Default budget tokens
|
||||
}
|
||||
}
|
||||
// Check for thinking mode using the comprehensive IsThinkingEnabledWithHeaders function
|
||||
// This supports Claude API format, OpenAI reasoning_effort, AMP/Cursor format, and Anthropic-Beta header
|
||||
thinkingEnabled := IsThinkingEnabledWithHeaders(claudeBody, headers)
|
||||
|
||||
// Inject timestamp context
|
||||
timestamp := time.Now().Format("2006-01-02 15:04:05 MST")
|
||||
@@ -231,19 +218,26 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
|
||||
log.Debugf("kiro: injected tool_choice hint into system prompt")
|
||||
}
|
||||
|
||||
// Inject thinking hint when thinking mode is enabled
|
||||
if thinkingEnabled {
|
||||
if systemPrompt != "" {
|
||||
systemPrompt += "\n"
|
||||
}
|
||||
dynamicThinkingHint := fmt.Sprintf("<thinking_mode>interleaved</thinking_mode><max_thinking_length>%d</max_thinking_length>", budgetTokens)
|
||||
systemPrompt += dynamicThinkingHint
|
||||
log.Debugf("kiro: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens)
|
||||
}
|
||||
|
||||
// Convert Claude tools to Kiro format
|
||||
kiroTools := convertClaudeToolsToKiro(tools)
|
||||
|
||||
// Thinking mode implementation:
|
||||
// Kiro API doesn't accept max_tokens for thinking. Instead, thinking mode is enabled
|
||||
// by injecting <thinking_mode> and <max_thinking_length> tags into the system prompt.
|
||||
// We use a fixed max_thinking_length value since Kiro handles the actual budget internally.
|
||||
if thinkingEnabled {
|
||||
thinkingHint := `<thinking_mode>interleaved</thinking_mode>
|
||||
<max_thinking_length>200000</max_thinking_length>
|
||||
|
||||
IMPORTANT: You MUST use <thinking>...</thinking> tags to show your reasoning process before providing your final response. Think step by step inside the thinking tags.`
|
||||
if systemPrompt != "" {
|
||||
systemPrompt = thinkingHint + "\n\n" + systemPrompt
|
||||
} else {
|
||||
systemPrompt = thinkingHint
|
||||
}
|
||||
log.Infof("kiro: injected thinking prompt, has_tools: %v", len(kiroTools) > 0)
|
||||
}
|
||||
|
||||
// Process messages and build history
|
||||
history, currentUserMsg, currentToolResults := processMessages(messages, modelID, origin)
|
||||
|
||||
@@ -280,6 +274,7 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
|
||||
}
|
||||
|
||||
// Build inferenceConfig if we have any inference parameters
|
||||
// Note: Kiro API doesn't actually use max_tokens for thinking budget
|
||||
var inferenceConfig *KiroInferenceConfig
|
||||
if maxTokens > 0 || hasTemperature || hasTopP {
|
||||
inferenceConfig = &KiroInferenceConfig{}
|
||||
@@ -350,7 +345,7 @@ func extractSystemPrompt(claudeBody []byte) string {
|
||||
// checkThinkingMode checks if thinking mode is enabled in the Claude request
|
||||
func checkThinkingMode(claudeBody []byte) (bool, int64) {
|
||||
thinkingEnabled := false
|
||||
var budgetTokens int64 = 16000
|
||||
var budgetTokens int64 = 24000
|
||||
|
||||
thinkingField := gjson.GetBytes(claudeBody, "thinking")
|
||||
if thinkingField.Exists() {
|
||||
@@ -373,6 +368,32 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) {
|
||||
return thinkingEnabled, budgetTokens
|
||||
}
|
||||
|
||||
// hasThinkingTagInBody checks if the request body already contains thinking configuration tags.
|
||||
// This is used to prevent duplicate injection when client (e.g., AMP/Cursor) already includes thinking config.
|
||||
func hasThinkingTagInBody(body []byte) bool {
|
||||
bodyStr := string(body)
|
||||
return strings.Contains(bodyStr, "<thinking_mode>") || strings.Contains(bodyStr, "<max_thinking_length>")
|
||||
}
|
||||
|
||||
|
||||
// IsThinkingEnabledFromHeader checks if thinking mode is enabled via Anthropic-Beta header.
|
||||
// Claude CLI uses "Anthropic-Beta: interleaved-thinking-2025-05-14" to enable thinking.
|
||||
func IsThinkingEnabledFromHeader(headers http.Header) bool {
|
||||
if headers == nil {
|
||||
return false
|
||||
}
|
||||
betaHeader := headers.Get("Anthropic-Beta")
|
||||
if betaHeader == "" {
|
||||
return false
|
||||
}
|
||||
// Check for interleaved-thinking beta feature
|
||||
if strings.Contains(betaHeader, "interleaved-thinking") {
|
||||
log.Debugf("kiro: thinking mode enabled via Anthropic-Beta header: %s", betaHeader)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsThinkingEnabled is a public wrapper to check if thinking mode is enabled.
|
||||
// This is used by the executor to determine whether to parse <thinking> tags in responses.
|
||||
// When thinking is NOT enabled in the request, <thinking> tags in responses should be
|
||||
@@ -383,6 +404,21 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) {
|
||||
// - OpenAI format: reasoning_effort parameter
|
||||
// - AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
|
||||
func IsThinkingEnabled(body []byte) bool {
|
||||
return IsThinkingEnabledWithHeaders(body, nil)
|
||||
}
|
||||
|
||||
// IsThinkingEnabledWithHeaders checks if thinking mode is enabled from body or headers.
|
||||
// This is the comprehensive check that supports all thinking detection methods:
|
||||
// - Claude API format: thinking.type = "enabled"
|
||||
// - OpenAI format: reasoning_effort parameter
|
||||
// - AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
|
||||
// - Anthropic-Beta header: interleaved-thinking-2025-05-14
|
||||
func IsThinkingEnabledWithHeaders(body []byte, headers http.Header) bool {
|
||||
// Check Anthropic-Beta header first (Claude Code uses this)
|
||||
if IsThinkingEnabledFromHeader(headers) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check Claude API format first (thinking.type = "enabled")
|
||||
enabled, _ := checkThinkingMode(body)
|
||||
if enabled {
|
||||
@@ -771,4 +807,4 @@ func BuildAssistantMessageStruct(msg gjson.Result) KiroAssistantResponseMessage
|
||||
Content: contentBuilder.String(),
|
||||
ToolUses: toolUses,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
package claude
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
|
||||
@@ -14,6 +16,18 @@ import (
|
||||
kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
|
||||
)
|
||||
|
||||
// generateThinkingSignature generates a signature for thinking content.
|
||||
// This is required by Claude API for thinking blocks in non-streaming responses.
|
||||
// The signature is a base64-encoded hash of the thinking content.
|
||||
func generateThinkingSignature(thinkingContent string) string {
|
||||
if thinkingContent == "" {
|
||||
return ""
|
||||
}
|
||||
// Generate a deterministic signature based on content hash
|
||||
hash := sha256.Sum256([]byte(thinkingContent))
|
||||
return base64.StdEncoding.EncodeToString(hash[:])
|
||||
}
|
||||
|
||||
// Local references to kirocommon constants for thinking block parsing
|
||||
var (
|
||||
thinkingStartTag = kirocommon.ThinkingStartTag
|
||||
@@ -149,9 +163,12 @@ func ExtractThinkingFromContent(content string) []map[string]interface{} {
|
||||
if endIdx == -1 {
|
||||
// No closing tag found, treat rest as thinking content (incomplete response)
|
||||
if strings.TrimSpace(remaining) != "" {
|
||||
// Generate signature for thinking content (required by Claude API)
|
||||
signature := generateThinkingSignature(remaining)
|
||||
blocks = append(blocks, map[string]interface{}{
|
||||
"type": "thinking",
|
||||
"thinking": remaining,
|
||||
"type": "thinking",
|
||||
"thinking": remaining,
|
||||
"signature": signature,
|
||||
})
|
||||
log.Warnf("kiro: extractThinkingFromContent - missing closing </thinking> tag")
|
||||
}
|
||||
@@ -161,9 +178,12 @@ func ExtractThinkingFromContent(content string) []map[string]interface{} {
|
||||
// Extract thinking content between tags
|
||||
thinkContent := remaining[:endIdx]
|
||||
if strings.TrimSpace(thinkContent) != "" {
|
||||
// Generate signature for thinking content (required by Claude API)
|
||||
signature := generateThinkingSignature(thinkContent)
|
||||
blocks = append(blocks, map[string]interface{}{
|
||||
"type": "thinking",
|
||||
"thinking": thinkContent,
|
||||
"type": "thinking",
|
||||
"thinking": thinkContent,
|
||||
"signature": signature,
|
||||
})
|
||||
log.Debugf("kiro: extractThinkingFromContent - extracted thinking block (len: %d)", len(thinkContent))
|
||||
}
|
||||
|
||||
@@ -99,6 +99,16 @@ func BuildClaudeContentBlockStopEvent(index int) []byte {
|
||||
return []byte("event: content_block_stop\ndata: " + string(result))
|
||||
}
|
||||
|
||||
// BuildClaudeThinkingBlockStopEvent creates a content_block_stop SSE event for thinking blocks.
|
||||
func BuildClaudeThinkingBlockStopEvent(index int) []byte {
|
||||
event := map[string]interface{}{
|
||||
"type": "content_block_stop",
|
||||
"index": index,
|
||||
}
|
||||
result, _ := json.Marshal(event)
|
||||
return []byte("event: content_block_stop\ndata: " + string(result))
|
||||
}
|
||||
|
||||
// BuildClaudeMessageDeltaEvent creates the message_delta event with stop_reason and usage
|
||||
func BuildClaudeMessageDeltaEvent(stopReason string, usageInfo usage.Detail) []byte {
|
||||
deltaEvent := map[string]interface{}{
|
||||
|
||||
@@ -187,6 +187,7 @@ func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalReq
|
||||
|
||||
// Extract content
|
||||
var content string
|
||||
var reasoningContent string
|
||||
var toolUses []KiroToolUse
|
||||
var stopReason string
|
||||
|
||||
@@ -202,7 +203,8 @@ func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalReq
|
||||
case "text":
|
||||
content += block.Get("text").String()
|
||||
case "thinking":
|
||||
// Skip thinking blocks for OpenAI format (or convert to reasoning_content if needed)
|
||||
// Convert thinking blocks to reasoning_content for OpenAI format
|
||||
reasoningContent += block.Get("thinking").String()
|
||||
case "tool_use":
|
||||
toolUseID := block.Get("id").String()
|
||||
toolName := block.Get("name").String()
|
||||
@@ -233,8 +235,8 @@ func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalReq
|
||||
}
|
||||
usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
|
||||
|
||||
// Build OpenAI response
|
||||
openaiResponse := BuildOpenAIResponse(content, toolUses, model, usageInfo, stopReason)
|
||||
// Build OpenAI response with reasoning_content support
|
||||
openaiResponse := BuildOpenAIResponseWithReasoning(content, reasoningContent, toolUses, model, usageInfo, stopReason)
|
||||
return string(openaiResponse)
|
||||
}
|
||||
|
||||
|
||||
@@ -6,11 +6,13 @@ package openai
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/google/uuid"
|
||||
kiroclaude "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/claude"
|
||||
kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -133,8 +135,10 @@ func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bo
|
||||
// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
|
||||
// isAgentic parameter enables chunked write optimization prompt for -agentic model variants.
|
||||
// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
|
||||
// headers parameter allows checking Anthropic-Beta header for thinking mode detection.
|
||||
// metadata parameter is kept for API compatibility but no longer used for thinking configuration.
|
||||
// Returns the payload and a boolean indicating whether thinking mode was injected.
|
||||
func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) ([]byte, bool) {
|
||||
func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, headers http.Header, metadata map[string]any) ([]byte, bool) {
|
||||
// Extract max_tokens for potential use in inferenceConfig
|
||||
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
|
||||
const kiroMaxOutputTokens = 32000
|
||||
@@ -219,35 +223,30 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
|
||||
log.Debugf("kiro-openai: injected response_format hint into system prompt")
|
||||
}
|
||||
|
||||
// Check for thinking mode and inject thinking hint
|
||||
// Supports OpenAI reasoning_effort parameter and model name hints
|
||||
thinkingEnabled, budgetTokens := checkThinkingModeFromOpenAI(openaiBody)
|
||||
if thinkingEnabled {
|
||||
// Adjust budgetTokens based on max_tokens if not explicitly set by reasoning_effort
|
||||
// Use 50% of max_tokens for thinking, with min 8000 and max 24000
|
||||
if maxTokens > 0 && budgetTokens == 16000 { // 16000 is the default, meaning not explicitly set
|
||||
calculatedBudget := maxTokens / 2
|
||||
if calculatedBudget < 8000 {
|
||||
calculatedBudget = 8000
|
||||
}
|
||||
if calculatedBudget > 24000 {
|
||||
calculatedBudget = 24000
|
||||
}
|
||||
budgetTokens = calculatedBudget
|
||||
log.Debugf("kiro-openai: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
|
||||
}
|
||||
|
||||
if systemPrompt != "" {
|
||||
systemPrompt += "\n"
|
||||
}
|
||||
dynamicThinkingHint := fmt.Sprintf("<thinking_mode>interleaved</thinking_mode><max_thinking_length>%d</max_thinking_length>", budgetTokens)
|
||||
systemPrompt += dynamicThinkingHint
|
||||
log.Debugf("kiro-openai: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens)
|
||||
}
|
||||
// Check for thinking mode
|
||||
// Supports OpenAI reasoning_effort parameter, model name hints, and Anthropic-Beta header
|
||||
thinkingEnabled := checkThinkingModeFromOpenAIWithHeaders(openaiBody, headers)
|
||||
|
||||
// Convert OpenAI tools to Kiro format
|
||||
kiroTools := convertOpenAIToolsToKiro(tools)
|
||||
|
||||
// Thinking mode implementation:
|
||||
// Kiro API doesn't accept max_tokens for thinking. Instead, thinking mode is enabled
|
||||
// by injecting <thinking_mode> and <max_thinking_length> tags into the system prompt.
|
||||
// We use a fixed max_thinking_length value since Kiro handles the actual budget internally.
|
||||
if thinkingEnabled {
|
||||
thinkingHint := `<thinking_mode>interleaved</thinking_mode>
|
||||
<max_thinking_length>200000</max_thinking_length>
|
||||
|
||||
IMPORTANT: You MUST use <thinking>...</thinking> tags to show your reasoning process before providing your final response. Think step by step inside the thinking tags.`
|
||||
if systemPrompt != "" {
|
||||
systemPrompt = thinkingHint + "\n\n" + systemPrompt
|
||||
} else {
|
||||
systemPrompt = thinkingHint
|
||||
}
|
||||
log.Debugf("kiro-openai: injected thinking prompt")
|
||||
}
|
||||
|
||||
// Process messages and build history
|
||||
history, currentUserMsg, currentToolResults := processOpenAIMessages(messages, modelID, origin)
|
||||
|
||||
@@ -284,6 +283,7 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
|
||||
}
|
||||
|
||||
// Build inferenceConfig if we have any inference parameters
|
||||
// Note: Kiro API doesn't actually use max_tokens for thinking budget
|
||||
var inferenceConfig *KiroInferenceConfig
|
||||
if maxTokens > 0 || hasTemperature || hasTopP {
|
||||
inferenceConfig = &KiroInferenceConfig{}
|
||||
@@ -682,13 +682,28 @@ func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResul
|
||||
}
|
||||
|
||||
// checkThinkingModeFromOpenAI checks if thinking mode is enabled in the OpenAI request.
|
||||
// Returns (thinkingEnabled, budgetTokens).
|
||||
// Returns thinkingEnabled.
|
||||
// Supports:
|
||||
// - reasoning_effort parameter (low/medium/high/auto)
|
||||
// - Model name containing "thinking" or "reason"
|
||||
// - <thinking_mode> tag in system prompt (AMP/Cursor format)
|
||||
func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
|
||||
var budgetTokens int64 = 16000 // Default budget
|
||||
func checkThinkingModeFromOpenAI(openaiBody []byte) bool {
|
||||
return checkThinkingModeFromOpenAIWithHeaders(openaiBody, nil)
|
||||
}
|
||||
|
||||
// checkThinkingModeFromOpenAIWithHeaders checks if thinking mode is enabled in the OpenAI request.
|
||||
// Returns thinkingEnabled.
|
||||
// Supports:
|
||||
// - Anthropic-Beta header with interleaved-thinking (Claude CLI)
|
||||
// - reasoning_effort parameter (low/medium/high/auto)
|
||||
// - Model name containing "thinking" or "reason"
|
||||
// - <thinking_mode> tag in system prompt (AMP/Cursor format)
|
||||
func checkThinkingModeFromOpenAIWithHeaders(openaiBody []byte, headers http.Header) bool {
|
||||
// Check Anthropic-Beta header first (Claude CLI uses this)
|
||||
if kiroclaude.IsThinkingEnabledFromHeader(headers) {
|
||||
log.Debugf("kiro-openai: thinking mode enabled via Anthropic-Beta header")
|
||||
return true
|
||||
}
|
||||
|
||||
// Check OpenAI format: reasoning_effort parameter
|
||||
// Valid values: "low", "medium", "high", "auto" (not "none")
|
||||
@@ -697,18 +712,7 @@ func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
|
||||
effort := reasoningEffort.String()
|
||||
if effort != "" && effort != "none" {
|
||||
log.Debugf("kiro-openai: thinking mode enabled via reasoning_effort: %s", effort)
|
||||
// Adjust budget based on effort level
|
||||
switch effort {
|
||||
case "low":
|
||||
budgetTokens = 8000
|
||||
case "medium":
|
||||
budgetTokens = 16000
|
||||
case "high":
|
||||
budgetTokens = 32000
|
||||
case "auto":
|
||||
budgetTokens = 16000
|
||||
}
|
||||
return true, budgetTokens
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -725,17 +729,7 @@ func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
|
||||
thinkingMode := bodyStr[startIdx : startIdx+endIdx]
|
||||
if thinkingMode == "interleaved" || thinkingMode == "enabled" {
|
||||
log.Debugf("kiro-openai: thinking mode enabled via AMP/Cursor format: %s", thinkingMode)
|
||||
// Try to extract max_thinking_length if present
|
||||
if maxLenStart := strings.Index(bodyStr, "<max_thinking_length>"); maxLenStart >= 0 {
|
||||
maxLenStart += len("<max_thinking_length>")
|
||||
if maxLenEnd := strings.Index(bodyStr[maxLenStart:], "</max_thinking_length>"); maxLenEnd >= 0 {
|
||||
maxLenStr := bodyStr[maxLenStart : maxLenStart+maxLenEnd]
|
||||
if parsed, err := fmt.Sscanf(maxLenStr, "%d", &budgetTokens); err == nil && parsed == 1 {
|
||||
log.Debugf("kiro-openai: extracted max_thinking_length: %d", budgetTokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
return true, budgetTokens
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -746,13 +740,21 @@ func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
|
||||
modelLower := strings.ToLower(model)
|
||||
if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") {
|
||||
log.Debugf("kiro-openai: thinking mode enabled via model name hint: %s", model)
|
||||
return true, budgetTokens
|
||||
return true
|
||||
}
|
||||
|
||||
log.Debugf("kiro-openai: no thinking mode detected in OpenAI request")
|
||||
return false, budgetTokens
|
||||
return false
|
||||
}
|
||||
|
||||
// hasThinkingTagInBody checks if the request body already contains thinking configuration tags.
|
||||
// This is used to prevent duplicate injection when client (e.g., AMP/Cursor) already includes thinking config.
|
||||
func hasThinkingTagInBody(body []byte) bool {
|
||||
bodyStr := string(body)
|
||||
return strings.Contains(bodyStr, "<thinking_mode>") || strings.Contains(bodyStr, "<max_thinking_length>")
|
||||
}
|
||||
|
||||
|
||||
// extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint.
|
||||
// OpenAI tool_choice values:
|
||||
// - "none": Don't use any tools
|
||||
@@ -845,4 +847,4 @@ func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult {
|
||||
}
|
||||
}
|
||||
return unique
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,12 +21,25 @@ var functionCallIDCounter uint64
|
||||
// Supports tool_calls when tools are present in the response.
|
||||
// stopReason is passed from upstream; fallback logic applied if empty.
|
||||
func BuildOpenAIResponse(content string, toolUses []KiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte {
|
||||
return BuildOpenAIResponseWithReasoning(content, "", toolUses, model, usageInfo, stopReason)
|
||||
}
|
||||
|
||||
// BuildOpenAIResponseWithReasoning constructs an OpenAI Chat Completions-compatible response with reasoning_content support.
|
||||
// Supports tool_calls when tools are present in the response.
|
||||
// reasoningContent is included as reasoning_content field in the message when present.
|
||||
// stopReason is passed from upstream; fallback logic applied if empty.
|
||||
func BuildOpenAIResponseWithReasoning(content, reasoningContent string, toolUses []KiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte {
|
||||
// Build the message object
|
||||
message := map[string]interface{}{
|
||||
"role": "assistant",
|
||||
"content": content,
|
||||
}
|
||||
|
||||
// Add reasoning_content if present (for thinking/reasoning models)
|
||||
if reasoningContent != "" {
|
||||
message["reasoning_content"] = reasoningContent
|
||||
}
|
||||
|
||||
// Add tool_calls if present
|
||||
if len(toolUses) > 0 {
|
||||
var toolCalls []map[string]interface{}
|
||||
|
||||
@@ -2091,6 +2091,11 @@ func buildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
|
||||
if oldCfg.RemoteManagement.DisableControlPanel != newCfg.RemoteManagement.DisableControlPanel {
|
||||
changes = append(changes, fmt.Sprintf("remote-management.disable-control-panel: %t -> %t", oldCfg.RemoteManagement.DisableControlPanel, newCfg.RemoteManagement.DisableControlPanel))
|
||||
}
|
||||
oldPanelRepo := strings.TrimSpace(oldCfg.RemoteManagement.PanelGitHubRepository)
|
||||
newPanelRepo := strings.TrimSpace(newCfg.RemoteManagement.PanelGitHubRepository)
|
||||
if oldPanelRepo != newPanelRepo {
|
||||
changes = append(changes, fmt.Sprintf("remote-management.panel-github-repository: %s -> %s", oldPanelRepo, newPanelRepo))
|
||||
}
|
||||
if oldCfg.RemoteManagement.SecretKey != newCfg.RemoteManagement.SecretKey {
|
||||
switch {
|
||||
case oldCfg.RemoteManagement.SecretKey == "" && newCfg.RemoteManagement.SecretKey != "":
|
||||
|
||||
Reference in New Issue
Block a user