mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-03-12 16:53:18 +00:00
Compare commits
82 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0606a7762c | ||
|
|
f887f9985d | ||
|
|
550da0cee8 | ||
|
|
e662c020a9 | ||
|
|
7ff3936efe | ||
|
|
29594086c0 | ||
|
|
b0433c9f2a | ||
|
|
b1204b1423 | ||
|
|
43ca112fff | ||
|
|
24cf7fa6a2 | ||
|
|
bf66bcad86 | ||
|
|
f36a5f5654 | ||
|
|
c1facdff67 | ||
|
|
0263f9d35b | ||
|
|
101498e737 | ||
|
|
4ee46bc9f2 | ||
|
|
c3e94a8277 | ||
|
|
fafef32b9e | ||
|
|
1e764de0a8 | ||
|
|
b3b8d71dfc | ||
|
|
ca29c42805 | ||
|
|
fcefa2c820 | ||
|
|
6b6d030ed3 | ||
|
|
fd5b669c87 | ||
|
|
30d832c9b1 | ||
|
|
2448691136 | ||
|
|
e7cd7b5243 | ||
|
|
33f89a2609 | ||
|
|
403a731e22 | ||
|
|
3631fab7e2 | ||
|
|
b3d292a5f9 | ||
|
|
9293c685e0 | ||
|
|
38094a2339 | ||
|
|
538039f583 | ||
|
|
ca796510e9 | ||
|
|
d0d66cdcb7 | ||
|
|
d7d54fa2cc | ||
|
|
31649325f0 | ||
|
|
3a43ecb19b | ||
|
|
a709e5a12d | ||
|
|
f0ac77197b | ||
|
|
da0bbf2a3f | ||
|
|
295f34d7f0 | ||
|
|
c41ce77eea | ||
|
|
876b86ff91 | ||
|
|
acdfa1c87f | ||
|
|
4eb1e6093f | ||
|
|
189a066807 | ||
|
|
d0bada7a43 | ||
|
|
9dc0e6d08b | ||
|
|
8510fc313e | ||
|
|
2666708c30 | ||
|
|
f2b0ce13d9 | ||
|
|
b8652b7387 | ||
|
|
b18b2ebe9f | ||
|
|
9e5b1d24e8 | ||
|
|
a7dae6ad52 | ||
|
|
e93e05ae25 | ||
|
|
c8c27325dc | ||
|
|
c3b6f3918c | ||
|
|
bbb55a8ab4 | ||
|
|
04b2290927 | ||
|
|
53920b0399 | ||
|
|
58290760a9 | ||
|
|
8f522eed43 | ||
|
|
3dc001a9d2 | ||
|
|
ee54ee8825 | ||
|
|
2395b7a180 | ||
|
|
7583193c2a | ||
|
|
7cc3bd4ba0 | ||
|
|
88a0f095e8 | ||
|
|
c65f64dce0 | ||
|
|
d18cd217e1 | ||
|
|
ba4a1ab433 | ||
|
|
decddb521e | ||
|
|
33ab3a99f0 | ||
|
|
de6b1ada5d | ||
|
|
e08f48c7a1 | ||
|
|
95096bc3fc | ||
|
|
f74a688fb9 | ||
|
|
f7bfa8a05c | ||
|
|
c8620d1633 |
110
.github/workflows/docker-image.yml
vendored
110
.github/workflows/docker-image.yml
vendored
@@ -10,13 +10,11 @@ env:
|
||||
DOCKERHUB_REPO: eceasy/cli-proxy-api-plus
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
docker_amd64:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to DockerHub
|
||||
@@ -29,19 +27,113 @@ jobs:
|
||||
echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
|
||||
echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
|
||||
echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
|
||||
- name: Build and push
|
||||
- name: Build and push (amd64)
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
platforms: |
|
||||
linux/amd64
|
||||
linux/arm64
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
build-args: |
|
||||
VERSION=${{ env.VERSION }}
|
||||
COMMIT=${{ env.COMMIT }}
|
||||
BUILD_DATE=${{ env.BUILD_DATE }}
|
||||
tags: |
|
||||
${{ env.DOCKERHUB_REPO }}:latest
|
||||
${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}
|
||||
${{ env.DOCKERHUB_REPO }}:latest-amd64
|
||||
${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}-amd64
|
||||
|
||||
docker_arm64:
|
||||
runs-on: ubuntu-24.04-arm
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
- name: Generate Build Metadata
|
||||
run: |
|
||||
echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
|
||||
echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
|
||||
echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
|
||||
- name: Build and push (arm64)
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/arm64
|
||||
push: true
|
||||
build-args: |
|
||||
VERSION=${{ env.VERSION }}
|
||||
COMMIT=${{ env.COMMIT }}
|
||||
BUILD_DATE=${{ env.BUILD_DATE }}
|
||||
tags: |
|
||||
${{ env.DOCKERHUB_REPO }}:latest-arm64
|
||||
${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}-arm64
|
||||
|
||||
docker_manifest:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- docker_amd64
|
||||
- docker_arm64
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
- name: Generate Build Metadata
|
||||
run: |
|
||||
echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
|
||||
echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
|
||||
echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
|
||||
- name: Create and push multi-arch manifests
|
||||
run: |
|
||||
docker buildx imagetools create \
|
||||
--tag "${DOCKERHUB_REPO}:latest" \
|
||||
"${DOCKERHUB_REPO}:latest-amd64" \
|
||||
"${DOCKERHUB_REPO}:latest-arm64"
|
||||
docker buildx imagetools create \
|
||||
--tag "${DOCKERHUB_REPO}:${VERSION}" \
|
||||
"${DOCKERHUB_REPO}:${VERSION}-amd64" \
|
||||
"${DOCKERHUB_REPO}:${VERSION}-arm64"
|
||||
- name: Cleanup temporary tags
|
||||
continue-on-error: true
|
||||
env:
|
||||
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
namespace="${DOCKERHUB_REPO%%/*}"
|
||||
repo_name="${DOCKERHUB_REPO#*/}"
|
||||
|
||||
token="$(
|
||||
curl -fsSL \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "{\"username\":\"${DOCKERHUB_USERNAME}\",\"password\":\"${DOCKERHUB_TOKEN}\"}" \
|
||||
'https://hub.docker.com/v2/users/login/' \
|
||||
| python3 -c 'import json,sys; print(json.load(sys.stdin)["token"])'
|
||||
)"
|
||||
|
||||
delete_tag() {
|
||||
local tag="$1"
|
||||
local url="https://hub.docker.com/v2/repositories/${namespace}/${repo_name}/tags/${tag}/"
|
||||
local http_code
|
||||
http_code="$(curl -sS -o /dev/null -w "%{http_code}" -X DELETE -H "Authorization: JWT ${token}" "${url}" || true)"
|
||||
if [ "${http_code}" = "204" ] || [ "${http_code}" = "404" ]; then
|
||||
echo "Docker Hub tag removed (or missing): ${DOCKERHUB_REPO}:${tag} (HTTP ${http_code})"
|
||||
return 0
|
||||
fi
|
||||
echo "Docker Hub tag delete failed: ${DOCKERHUB_REPO}:${tag} (HTTP ${http_code})"
|
||||
return 0
|
||||
}
|
||||
|
||||
delete_tag "latest-amd64"
|
||||
delete_tag "latest-arm64"
|
||||
delete_tag "${VERSION}-amd64"
|
||||
delete_tag "${VERSION}-arm64"
|
||||
|
||||
@@ -52,7 +52,7 @@ mkdir -p ~/cli-proxy && cd ~/cli-proxy
|
||||
cat > docker-compose.yml << 'EOF'
|
||||
services:
|
||||
cli-proxy-api:
|
||||
image: 17600006524/cli-proxy-api-plus:latest
|
||||
image: eceasy/cli-proxy-api-plus:latest
|
||||
container_name: cli-proxy-api-plus
|
||||
ports:
|
||||
- "8317:8317"
|
||||
@@ -64,7 +64,7 @@ services:
|
||||
EOF
|
||||
|
||||
# Download example config
|
||||
curl -o config.yaml https://raw.githubusercontent.com/linlang781/CLIProxyAPIPlus/main/config.example.yaml
|
||||
curl -o config.yaml https://raw.githubusercontent.com/router-for-me/CLIProxyAPIPlus/main/config.example.yaml
|
||||
|
||||
# Pull and start
|
||||
docker compose pull && docker compose up -d
|
||||
@@ -93,7 +93,7 @@ docker compose pull && docker compose up -d
|
||||
|
||||
This project only accepts pull requests that relate to third-party provider support. Any pull requests unrelated to third-party provider support will be rejected.
|
||||
|
||||
If you need to submit any non-third-party provider changes, please open them against the mainline repository.
|
||||
If you need to submit any non-third-party provider changes, please open them against the [mainline](https://github.com/router-for-me/CLIProxyAPI) repository.
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ mkdir -p ~/cli-proxy && cd ~/cli-proxy
|
||||
cat > docker-compose.yml << 'EOF'
|
||||
services:
|
||||
cli-proxy-api:
|
||||
image: 17600006524/cli-proxy-api-plus:latest
|
||||
image: eceasy/cli-proxy-api-plus:latest
|
||||
container_name: cli-proxy-api-plus
|
||||
ports:
|
||||
- "8317:8317"
|
||||
@@ -64,7 +64,7 @@ services:
|
||||
EOF
|
||||
|
||||
# 下载示例配置
|
||||
curl -o config.yaml https://raw.githubusercontent.com/linlang781/CLIProxyAPIPlus/main/config.example.yaml
|
||||
curl -o config.yaml https://raw.githubusercontent.com/router-for-me/CLIProxyAPIPlus/main/config.example.yaml
|
||||
|
||||
# 拉取并启动
|
||||
docker compose pull && docker compose up -d
|
||||
@@ -93,7 +93,7 @@ docker compose pull && docker compose up -d
|
||||
|
||||
该项目仅接受第三方供应商支持的 Pull Request。任何非第三方供应商支持的 Pull Request 都将被拒绝。
|
||||
|
||||
如果需要提交任何非第三方供应商支持的 Pull Request,请提交到主线版本。
|
||||
如果需要提交任何非第三方供应商支持的 Pull Request,请提交到[主线](https://github.com/router-for-me/CLIProxyAPI)版本。
|
||||
|
||||
## 许可证
|
||||
|
||||
|
||||
1
go.mod
1
go.mod
@@ -13,6 +13,7 @@ require (
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/klauspost/compress v1.17.4
|
||||
github.com/minio/minio-go/v7 v7.0.66
|
||||
github.com/refraction-networking/utls v1.8.2
|
||||
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/tidwall/gjson v1.18.0
|
||||
|
||||
2
go.sum
2
go.sum
@@ -122,6 +122,8 @@ github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmd
|
||||
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/refraction-networking/utls v1.8.2 h1:j4Q1gJj0xngdeH+Ox/qND11aEfhpgoEvV+S9iJ2IdQo=
|
||||
github.com/refraction-networking/utls v1.8.2/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc=
|
||||
|
||||
33
internal/api/handlers/management/model_definitions.go
Normal file
33
internal/api/handlers/management/model_definitions.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package management
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
)
|
||||
|
||||
// GetStaticModelDefinitions returns static model metadata for a given channel.
|
||||
// Channel is provided via path param (:channel) or query param (?channel=...).
|
||||
func (h *Handler) GetStaticModelDefinitions(c *gin.Context) {
|
||||
channel := strings.TrimSpace(c.Param("channel"))
|
||||
if channel == "" {
|
||||
channel = strings.TrimSpace(c.Query("channel"))
|
||||
}
|
||||
if channel == "" {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "channel is required"})
|
||||
return
|
||||
}
|
||||
|
||||
models := registry.GetStaticModelDefinitionsByChannel(channel)
|
||||
if models == nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "unknown channel", "channel": channel})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"channel": strings.ToLower(strings.TrimSpace(channel)),
|
||||
"models": models,
|
||||
})
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
|
||||
@@ -103,6 +104,7 @@ func captureRequestInfo(c *gin.Context) (*RequestInfo, error) {
|
||||
Headers: headers,
|
||||
Body: body,
|
||||
RequestID: logging.GetGinRequestID(c),
|
||||
Timestamp: time.Now(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"bytes"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
||||
@@ -20,22 +21,24 @@ type RequestInfo struct {
|
||||
Headers map[string][]string // Headers contains the request headers.
|
||||
Body []byte // Body is the raw request body.
|
||||
RequestID string // RequestID is the unique identifier for the request.
|
||||
Timestamp time.Time // Timestamp is when the request was received.
|
||||
}
|
||||
|
||||
// ResponseWriterWrapper wraps the standard gin.ResponseWriter to intercept and log response data.
|
||||
// It is designed to handle both standard and streaming responses, ensuring that logging operations do not block the client response.
|
||||
type ResponseWriterWrapper struct {
|
||||
gin.ResponseWriter
|
||||
body *bytes.Buffer // body is a buffer to store the response body for non-streaming responses.
|
||||
isStreaming bool // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
|
||||
streamWriter logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
|
||||
chunkChannel chan []byte // chunkChannel is a channel for asynchronously passing response chunks to the logger.
|
||||
streamDone chan struct{} // streamDone signals when the streaming goroutine completes.
|
||||
logger logging.RequestLogger // logger is the instance of the request logger service.
|
||||
requestInfo *RequestInfo // requestInfo holds the details of the original request.
|
||||
statusCode int // statusCode stores the HTTP status code of the response.
|
||||
headers map[string][]string // headers stores the response headers.
|
||||
logOnErrorOnly bool // logOnErrorOnly enables logging only when an error response is detected.
|
||||
body *bytes.Buffer // body is a buffer to store the response body for non-streaming responses.
|
||||
isStreaming bool // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
|
||||
streamWriter logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
|
||||
chunkChannel chan []byte // chunkChannel is a channel for asynchronously passing response chunks to the logger.
|
||||
streamDone chan struct{} // streamDone signals when the streaming goroutine completes.
|
||||
logger logging.RequestLogger // logger is the instance of the request logger service.
|
||||
requestInfo *RequestInfo // requestInfo holds the details of the original request.
|
||||
statusCode int // statusCode stores the HTTP status code of the response.
|
||||
headers map[string][]string // headers stores the response headers.
|
||||
logOnErrorOnly bool // logOnErrorOnly enables logging only when an error response is detected.
|
||||
firstChunkTimestamp time.Time // firstChunkTimestamp captures TTFB for streaming responses.
|
||||
}
|
||||
|
||||
// NewResponseWriterWrapper creates and initializes a new ResponseWriterWrapper.
|
||||
@@ -73,6 +76,10 @@ func (w *ResponseWriterWrapper) Write(data []byte) (int, error) {
|
||||
|
||||
// THEN: Handle logging based on response type
|
||||
if w.isStreaming && w.chunkChannel != nil {
|
||||
// Capture TTFB on first chunk (synchronous, before async channel send)
|
||||
if w.firstChunkTimestamp.IsZero() {
|
||||
w.firstChunkTimestamp = time.Now()
|
||||
}
|
||||
// For streaming responses: Send to async logging channel (non-blocking)
|
||||
select {
|
||||
case w.chunkChannel <- append([]byte(nil), data...): // Non-blocking send with copy
|
||||
@@ -117,6 +124,10 @@ func (w *ResponseWriterWrapper) WriteString(data string) (int, error) {
|
||||
|
||||
// THEN: Capture for logging
|
||||
if w.isStreaming && w.chunkChannel != nil {
|
||||
// Capture TTFB on first chunk (synchronous, before async channel send)
|
||||
if w.firstChunkTimestamp.IsZero() {
|
||||
w.firstChunkTimestamp = time.Now()
|
||||
}
|
||||
select {
|
||||
case w.chunkChannel <- []byte(data):
|
||||
default:
|
||||
@@ -280,6 +291,8 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
|
||||
w.streamDone = nil
|
||||
}
|
||||
|
||||
w.streamWriter.SetFirstChunkTimestamp(w.firstChunkTimestamp)
|
||||
|
||||
// Write API Request and Response to the streaming log before closing
|
||||
apiRequest := w.extractAPIRequest(c)
|
||||
if len(apiRequest) > 0 {
|
||||
@@ -297,7 +310,7 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), slicesAPIResponseError, forceLog)
|
||||
return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
|
||||
}
|
||||
|
||||
func (w *ResponseWriterWrapper) cloneHeaders() map[string][]string {
|
||||
@@ -337,7 +350,18 @@ func (w *ResponseWriterWrapper) extractAPIResponse(c *gin.Context) []byte {
|
||||
return data
|
||||
}
|
||||
|
||||
func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
|
||||
func (w *ResponseWriterWrapper) extractAPIResponseTimestamp(c *gin.Context) time.Time {
|
||||
ts, isExist := c.Get("API_RESPONSE_TIMESTAMP")
|
||||
if !isExist {
|
||||
return time.Time{}
|
||||
}
|
||||
if t, ok := ts.(time.Time); ok {
|
||||
return t
|
||||
}
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
|
||||
if w.requestInfo == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -348,7 +372,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
|
||||
}
|
||||
|
||||
if loggerWithOptions, ok := w.logger.(interface {
|
||||
LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string) error
|
||||
LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string, time.Time, time.Time) error
|
||||
}); ok {
|
||||
return loggerWithOptions.LogRequestWithOptions(
|
||||
w.requestInfo.URL,
|
||||
@@ -363,6 +387,8 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
|
||||
apiResponseErrors,
|
||||
forceLog,
|
||||
w.requestInfo.RequestID,
|
||||
w.requestInfo.Timestamp,
|
||||
apiResponseTimestamp,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -378,5 +404,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
|
||||
apiResponseBody,
|
||||
apiResponseErrors,
|
||||
w.requestInfo.RequestID,
|
||||
w.requestInfo.Timestamp,
|
||||
apiResponseTimestamp,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -331,6 +332,7 @@ func (s *Server) setupRoutes() {
|
||||
v1.POST("/messages", claudeCodeHandlers.ClaudeMessages)
|
||||
v1.POST("/messages/count_tokens", claudeCodeHandlers.ClaudeCountTokens)
|
||||
v1.POST("/responses", openaiResponsesHandlers.Responses)
|
||||
v1.POST("/responses/compact", openaiResponsesHandlers.Compact)
|
||||
}
|
||||
|
||||
// Gemini compatible API routes
|
||||
@@ -633,6 +635,7 @@ func (s *Server) registerManagementRoutes() {
|
||||
|
||||
mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
|
||||
mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
|
||||
mgmt.GET("/model-definitions/:channel", s.mgmt.GetStaticModelDefinitions)
|
||||
mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
|
||||
mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
|
||||
mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)
|
||||
@@ -1017,14 +1020,17 @@ func (s *Server) UpdateClients(cfg *config.Config) {
|
||||
s.mgmt.SetAuthManager(s.handlers.AuthManager)
|
||||
}
|
||||
|
||||
// Notify Amp module of config changes (for model mapping hot-reload)
|
||||
if s.ampModule != nil {
|
||||
log.Debugf("triggering amp module config update")
|
||||
if err := s.ampModule.OnConfigUpdated(cfg); err != nil {
|
||||
log.Errorf("failed to update Amp module config: %v", err)
|
||||
// Notify Amp module only when Amp config has changed.
|
||||
ampConfigChanged := oldCfg == nil || !reflect.DeepEqual(oldCfg.AmpCode, cfg.AmpCode)
|
||||
if ampConfigChanged {
|
||||
if s.ampModule != nil {
|
||||
log.Debugf("triggering amp module config update")
|
||||
if err := s.ampModule.OnConfigUpdated(cfg); err != nil {
|
||||
log.Errorf("failed to update Amp module config: %v", err)
|
||||
}
|
||||
} else {
|
||||
log.Warnf("amp module is nil, skipping config update")
|
||||
}
|
||||
} else {
|
||||
log.Warnf("amp module is nil, skipping config update")
|
||||
}
|
||||
|
||||
// Count client sources from configuration and auth store.
|
||||
|
||||
@@ -14,7 +14,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@@ -51,7 +50,8 @@ type ClaudeAuth struct {
|
||||
}
|
||||
|
||||
// NewClaudeAuth creates a new Anthropic authentication service.
|
||||
// It initializes the HTTP client with proxy settings from the configuration.
|
||||
// It initializes the HTTP client with a custom TLS transport that uses Firefox
|
||||
// fingerprint to bypass Cloudflare's TLS fingerprinting on Anthropic domains.
|
||||
//
|
||||
// Parameters:
|
||||
// - cfg: The application configuration containing proxy settings
|
||||
@@ -59,8 +59,10 @@ type ClaudeAuth struct {
|
||||
// Returns:
|
||||
// - *ClaudeAuth: A new Claude authentication service instance
|
||||
func NewClaudeAuth(cfg *config.Config) *ClaudeAuth {
|
||||
// Use custom HTTP client with Firefox TLS fingerprint to bypass
|
||||
// Cloudflare's bot detection on Anthropic domains
|
||||
return &ClaudeAuth{
|
||||
httpClient: util.SetProxy(&cfg.SDKConfig, &http.Client{}),
|
||||
httpClient: NewAnthropicHttpClient(&cfg.SDKConfig),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
165
internal/auth/claude/utls_transport.go
Normal file
165
internal/auth/claude/utls_transport.go
Normal file
@@ -0,0 +1,165 @@
|
||||
// Package claude provides authentication functionality for Anthropic's Claude API.
|
||||
// This file implements a custom HTTP transport using utls to bypass TLS fingerprinting.
|
||||
package claude
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
tls "github.com/refraction-networking/utls"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/net/http2"
|
||||
"golang.org/x/net/proxy"
|
||||
)
|
||||
|
||||
// utlsRoundTripper implements http.RoundTripper using utls with Firefox fingerprint
|
||||
// to bypass Cloudflare's TLS fingerprinting on Anthropic domains.
|
||||
type utlsRoundTripper struct {
|
||||
// mu protects the connections map and pending map
|
||||
mu sync.Mutex
|
||||
// connections caches HTTP/2 client connections per host
|
||||
connections map[string]*http2.ClientConn
|
||||
// pending tracks hosts that are currently being connected to (prevents race condition)
|
||||
pending map[string]*sync.Cond
|
||||
// dialer is used to create network connections, supporting proxies
|
||||
dialer proxy.Dialer
|
||||
}
|
||||
|
||||
// newUtlsRoundTripper creates a new utls-based round tripper with optional proxy support
|
||||
func newUtlsRoundTripper(cfg *config.SDKConfig) *utlsRoundTripper {
|
||||
var dialer proxy.Dialer = proxy.Direct
|
||||
if cfg != nil && cfg.ProxyURL != "" {
|
||||
proxyURL, err := url.Parse(cfg.ProxyURL)
|
||||
if err != nil {
|
||||
log.Errorf("failed to parse proxy URL %q: %v", cfg.ProxyURL, err)
|
||||
} else {
|
||||
pDialer, err := proxy.FromURL(proxyURL, proxy.Direct)
|
||||
if err != nil {
|
||||
log.Errorf("failed to create proxy dialer for %q: %v", cfg.ProxyURL, err)
|
||||
} else {
|
||||
dialer = pDialer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &utlsRoundTripper{
|
||||
connections: make(map[string]*http2.ClientConn),
|
||||
pending: make(map[string]*sync.Cond),
|
||||
dialer: dialer,
|
||||
}
|
||||
}
|
||||
|
||||
// getOrCreateConnection gets an existing connection or creates a new one.
|
||||
// It uses a per-host locking mechanism to prevent multiple goroutines from
|
||||
// creating connections to the same host simultaneously.
|
||||
func (t *utlsRoundTripper) getOrCreateConnection(host, addr string) (*http2.ClientConn, error) {
|
||||
t.mu.Lock()
|
||||
|
||||
// Check if connection exists and is usable
|
||||
if h2Conn, ok := t.connections[host]; ok && h2Conn.CanTakeNewRequest() {
|
||||
t.mu.Unlock()
|
||||
return h2Conn, nil
|
||||
}
|
||||
|
||||
// Check if another goroutine is already creating a connection
|
||||
if cond, ok := t.pending[host]; ok {
|
||||
// Wait for the other goroutine to finish
|
||||
cond.Wait()
|
||||
// Check if connection is now available
|
||||
if h2Conn, ok := t.connections[host]; ok && h2Conn.CanTakeNewRequest() {
|
||||
t.mu.Unlock()
|
||||
return h2Conn, nil
|
||||
}
|
||||
// Connection still not available, we'll create one
|
||||
}
|
||||
|
||||
// Mark this host as pending
|
||||
cond := sync.NewCond(&t.mu)
|
||||
t.pending[host] = cond
|
||||
t.mu.Unlock()
|
||||
|
||||
// Create connection outside the lock
|
||||
h2Conn, err := t.createConnection(host, addr)
|
||||
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
// Remove pending marker and wake up waiting goroutines
|
||||
delete(t.pending, host)
|
||||
cond.Broadcast()
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Store the new connection
|
||||
t.connections[host] = h2Conn
|
||||
return h2Conn, nil
|
||||
}
|
||||
|
||||
// createConnection creates a new HTTP/2 connection with Firefox TLS fingerprint
|
||||
func (t *utlsRoundTripper) createConnection(host, addr string) (*http2.ClientConn, error) {
|
||||
conn, err := t.dialer.Dial("tcp", addr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tlsConfig := &tls.Config{ServerName: host}
|
||||
tlsConn := tls.UClient(conn, tlsConfig, tls.HelloFirefox_Auto)
|
||||
|
||||
if err := tlsConn.Handshake(); err != nil {
|
||||
conn.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tr := &http2.Transport{}
|
||||
h2Conn, err := tr.NewClientConn(tlsConn)
|
||||
if err != nil {
|
||||
tlsConn.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return h2Conn, nil
|
||||
}
|
||||
|
||||
// RoundTrip implements http.RoundTripper
|
||||
func (t *utlsRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
host := req.URL.Host
|
||||
addr := host
|
||||
if !strings.Contains(addr, ":") {
|
||||
addr += ":443"
|
||||
}
|
||||
|
||||
// Get hostname without port for TLS ServerName
|
||||
hostname := req.URL.Hostname()
|
||||
|
||||
h2Conn, err := t.getOrCreateConnection(hostname, addr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, err := h2Conn.RoundTrip(req)
|
||||
if err != nil {
|
||||
// Connection failed, remove it from cache
|
||||
t.mu.Lock()
|
||||
if cached, ok := t.connections[hostname]; ok && cached == h2Conn {
|
||||
delete(t.connections, hostname)
|
||||
}
|
||||
t.mu.Unlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// NewAnthropicHttpClient creates an HTTP client that bypasses TLS fingerprinting
|
||||
// for Anthropic domains by using utls with Firefox fingerprint.
|
||||
// It accepts optional SDK configuration for proxy settings.
|
||||
func NewAnthropicHttpClient(cfg *config.SDKConfig) *http.Client {
|
||||
return &http.Client{
|
||||
Transport: newUtlsRoundTripper(cfg),
|
||||
}
|
||||
}
|
||||
@@ -32,14 +32,17 @@ type KiroTokenData struct {
|
||||
ProfileArn string `json:"profileArn"`
|
||||
// ExpiresAt is the timestamp when the token expires
|
||||
ExpiresAt string `json:"expiresAt"`
|
||||
// AuthMethod indicates the authentication method used (e.g., "builder-id", "social")
|
||||
// AuthMethod indicates the authentication method used (e.g., "builder-id", "social", "idc")
|
||||
AuthMethod string `json:"authMethod"`
|
||||
// Provider indicates the OAuth provider (e.g., "AWS", "Google")
|
||||
// Provider indicates the OAuth provider (e.g., "AWS", "Google", "Enterprise")
|
||||
Provider string `json:"provider"`
|
||||
// ClientID is the OIDC client ID (needed for token refresh)
|
||||
ClientID string `json:"clientId,omitempty"`
|
||||
// ClientSecret is the OIDC client secret (needed for token refresh)
|
||||
ClientSecret string `json:"clientSecret,omitempty"`
|
||||
// ClientIDHash is the hash of client ID used to locate device registration file
|
||||
// (Enterprise Kiro IDE stores clientId/clientSecret in ~/.aws/sso/cache/{clientIdHash}.json)
|
||||
ClientIDHash string `json:"clientIdHash,omitempty"`
|
||||
// Email is the user's email address (used for file naming)
|
||||
Email string `json:"email,omitempty"`
|
||||
// StartURL is the IDC/Identity Center start URL (only for IDC auth method)
|
||||
@@ -169,6 +172,8 @@ func LoadKiroIDETokenWithRetry(maxAttempts int, baseDelay time.Duration) (*KiroT
|
||||
}
|
||||
|
||||
// LoadKiroIDEToken loads token data from Kiro IDE's token file.
|
||||
// For Enterprise Kiro IDE (IDC auth), it also loads clientId and clientSecret
|
||||
// from the device registration file referenced by clientIdHash.
|
||||
func LoadKiroIDEToken() (*KiroTokenData, error) {
|
||||
homeDir, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
@@ -193,18 +198,69 @@ func LoadKiroIDEToken() (*KiroTokenData, error) {
|
||||
// Normalize AuthMethod to lowercase (Kiro IDE uses "IdC" but we expect "idc")
|
||||
token.AuthMethod = strings.ToLower(token.AuthMethod)
|
||||
|
||||
// For Enterprise Kiro IDE (IDC auth), load clientId and clientSecret from device registration
|
||||
// The device registration file is located at ~/.aws/sso/cache/{clientIdHash}.json
|
||||
if token.ClientIDHash != "" && token.ClientID == "" {
|
||||
if err := loadDeviceRegistration(homeDir, token.ClientIDHash, &token); err != nil {
|
||||
// Log warning but don't fail - token might still work for some operations
|
||||
fmt.Printf("warning: failed to load device registration for clientIdHash %s: %v\n", token.ClientIDHash, err)
|
||||
}
|
||||
}
|
||||
|
||||
return &token, nil
|
||||
}
|
||||
|
||||
// loadDeviceRegistration loads clientId and clientSecret from the device registration file.
|
||||
// Enterprise Kiro IDE stores these in ~/.aws/sso/cache/{clientIdHash}.json
|
||||
func loadDeviceRegistration(homeDir, clientIDHash string, token *KiroTokenData) error {
|
||||
if clientIDHash == "" {
|
||||
return fmt.Errorf("clientIdHash is empty")
|
||||
}
|
||||
|
||||
// Sanitize clientIdHash to prevent path traversal
|
||||
if strings.Contains(clientIDHash, "/") || strings.Contains(clientIDHash, "\\") || strings.Contains(clientIDHash, "..") {
|
||||
return fmt.Errorf("invalid clientIdHash: contains path separator")
|
||||
}
|
||||
|
||||
deviceRegPath := filepath.Join(homeDir, ".aws", "sso", "cache", clientIDHash+".json")
|
||||
data, err := os.ReadFile(deviceRegPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read device registration file (%s): %w", deviceRegPath, err)
|
||||
}
|
||||
|
||||
// Device registration file structure
|
||||
var deviceReg struct {
|
||||
ClientID string `json:"clientId"`
|
||||
ClientSecret string `json:"clientSecret"`
|
||||
ExpiresAt string `json:"expiresAt"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(data, &deviceReg); err != nil {
|
||||
return fmt.Errorf("failed to parse device registration: %w", err)
|
||||
}
|
||||
|
||||
if deviceReg.ClientID == "" || deviceReg.ClientSecret == "" {
|
||||
return fmt.Errorf("device registration missing clientId or clientSecret")
|
||||
}
|
||||
|
||||
token.ClientID = deviceReg.ClientID
|
||||
token.ClientSecret = deviceReg.ClientSecret
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadKiroTokenFromPath loads token data from a custom path.
|
||||
// This supports multiple accounts by allowing different token files.
|
||||
// For Enterprise Kiro IDE (IDC auth), it also loads clientId and clientSecret
|
||||
// from the device registration file referenced by clientIdHash.
|
||||
func LoadKiroTokenFromPath(tokenPath string) (*KiroTokenData, error) {
|
||||
homeDir, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get home directory: %w", err)
|
||||
}
|
||||
|
||||
// Expand ~ to home directory
|
||||
if len(tokenPath) > 0 && tokenPath[0] == '~' {
|
||||
homeDir, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get home directory: %w", err)
|
||||
}
|
||||
tokenPath = filepath.Join(homeDir, tokenPath[1:])
|
||||
}
|
||||
|
||||
@@ -225,6 +281,14 @@ func LoadKiroTokenFromPath(tokenPath string) (*KiroTokenData, error) {
|
||||
// Normalize AuthMethod to lowercase (Kiro IDE uses "IdC" but we expect "idc")
|
||||
token.AuthMethod = strings.ToLower(token.AuthMethod)
|
||||
|
||||
// For Enterprise Kiro IDE (IDC auth), load clientId and clientSecret from device registration
|
||||
if token.ClientIDHash != "" && token.ClientID == "" {
|
||||
if err := loadDeviceRegistration(homeDir, token.ClientIDHash, &token); err != nil {
|
||||
// Log warning but don't fail - token might still work for some operations
|
||||
fmt.Printf("warning: failed to load device registration for clientIdHash %s: %v\n", token.ClientIDHash, err)
|
||||
}
|
||||
}
|
||||
|
||||
return &token, nil
|
||||
}
|
||||
|
||||
@@ -360,7 +424,7 @@ func SanitizeEmailForFilename(email string) string {
|
||||
}
|
||||
|
||||
result := email
|
||||
|
||||
|
||||
// First, handle URL-encoded path traversal attempts (%2F, %2E, %5C, etc.)
|
||||
// This prevents encoded characters from bypassing the sanitization.
|
||||
// Note: We replace % last to catch any remaining encodings including double-encoding (%252F)
|
||||
@@ -378,7 +442,7 @@ func SanitizeEmailForFilename(email string) string {
|
||||
for _, char := range []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|", " ", "\x00"} {
|
||||
result = strings.ReplaceAll(result, char, "_")
|
||||
}
|
||||
|
||||
|
||||
// Prevent path traversal: replace leading dots in each path component
|
||||
// This handles cases like "../../../etc/passwd" → "_.._.._.._etc_passwd"
|
||||
parts := strings.Split(result, "_")
|
||||
@@ -389,6 +453,65 @@ func SanitizeEmailForFilename(email string) string {
|
||||
parts[i] = part
|
||||
}
|
||||
result = strings.Join(parts, "_")
|
||||
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// ExtractIDCIdentifier extracts a unique identifier from IDC startUrl.
|
||||
// Examples:
|
||||
// - "https://d-1234567890.awsapps.com/start" -> "d-1234567890"
|
||||
// - "https://my-company.awsapps.com/start" -> "my-company"
|
||||
// - "https://acme-corp.awsapps.com/start" -> "acme-corp"
|
||||
func ExtractIDCIdentifier(startURL string) string {
|
||||
if startURL == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Remove protocol prefix
|
||||
url := strings.TrimPrefix(startURL, "https://")
|
||||
url = strings.TrimPrefix(url, "http://")
|
||||
|
||||
// Extract subdomain (first part before the first dot)
|
||||
// Format: {identifier}.awsapps.com/start
|
||||
parts := strings.Split(url, ".")
|
||||
if len(parts) > 0 && parts[0] != "" {
|
||||
identifier := parts[0]
|
||||
// Sanitize for filename safety
|
||||
identifier = strings.ReplaceAll(identifier, "/", "_")
|
||||
identifier = strings.ReplaceAll(identifier, "\\", "_")
|
||||
identifier = strings.ReplaceAll(identifier, ":", "_")
|
||||
return identifier
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// GenerateTokenFileName generates a unique filename for token storage.
|
||||
// Priority: email > startUrl identifier (for IDC) > authMethod only
|
||||
// Format: kiro-{authMethod}-{identifier}.json
|
||||
func GenerateTokenFileName(tokenData *KiroTokenData) string {
|
||||
authMethod := tokenData.AuthMethod
|
||||
if authMethod == "" {
|
||||
authMethod = "unknown"
|
||||
}
|
||||
|
||||
// Priority 1: Use email if available
|
||||
if tokenData.Email != "" {
|
||||
// Sanitize email for filename (replace @ and . with -)
|
||||
sanitizedEmail := tokenData.Email
|
||||
sanitizedEmail = strings.ReplaceAll(sanitizedEmail, "@", "-")
|
||||
sanitizedEmail = strings.ReplaceAll(sanitizedEmail, ".", "-")
|
||||
return fmt.Sprintf("kiro-%s-%s.json", authMethod, sanitizedEmail)
|
||||
}
|
||||
|
||||
// Priority 2: For IDC, use startUrl identifier
|
||||
if authMethod == "idc" && tokenData.StartURL != "" {
|
||||
identifier := ExtractIDCIdentifier(tokenData.StartURL)
|
||||
if identifier != "" {
|
||||
return fmt.Sprintf("kiro-%s-%s.json", authMethod, identifier)
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 3: Fallback to authMethod only
|
||||
return fmt.Sprintf("kiro-%s.json", authMethod)
|
||||
}
|
||||
|
||||
@@ -151,11 +151,161 @@ func TestSanitizeEmailForFilename(t *testing.T) {
|
||||
// createTestJWT creates a test JWT token with the given claims
|
||||
func createTestJWT(claims map[string]any) string {
|
||||
header := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"RS256","typ":"JWT"}`))
|
||||
|
||||
|
||||
payloadBytes, _ := json.Marshal(claims)
|
||||
payload := base64.RawURLEncoding.EncodeToString(payloadBytes)
|
||||
|
||||
|
||||
signature := base64.RawURLEncoding.EncodeToString([]byte("fake-signature"))
|
||||
|
||||
|
||||
return header + "." + payload + "." + signature
|
||||
}
|
||||
|
||||
func TestExtractIDCIdentifier(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
startURL string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "Empty URL",
|
||||
startURL: "",
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Standard IDC URL with d- prefix",
|
||||
startURL: "https://d-1234567890.awsapps.com/start",
|
||||
expected: "d-1234567890",
|
||||
},
|
||||
{
|
||||
name: "IDC URL with company name",
|
||||
startURL: "https://my-company.awsapps.com/start",
|
||||
expected: "my-company",
|
||||
},
|
||||
{
|
||||
name: "IDC URL with simple name",
|
||||
startURL: "https://acme-corp.awsapps.com/start",
|
||||
expected: "acme-corp",
|
||||
},
|
||||
{
|
||||
name: "IDC URL without https",
|
||||
startURL: "http://d-9876543210.awsapps.com/start",
|
||||
expected: "d-9876543210",
|
||||
},
|
||||
{
|
||||
name: "IDC URL with subdomain only",
|
||||
startURL: "https://test.awsapps.com/start",
|
||||
expected: "test",
|
||||
},
|
||||
{
|
||||
name: "Builder ID URL",
|
||||
startURL: "https://view.awsapps.com/start",
|
||||
expected: "view",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := ExtractIDCIdentifier(tt.startURL)
|
||||
if result != tt.expected {
|
||||
t.Errorf("ExtractIDCIdentifier() = %q, want %q", result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateTokenFileName(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
tokenData *KiroTokenData
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "IDC with email",
|
||||
tokenData: &KiroTokenData{
|
||||
AuthMethod: "idc",
|
||||
Email: "user@example.com",
|
||||
StartURL: "https://d-1234567890.awsapps.com/start",
|
||||
},
|
||||
expected: "kiro-idc-user-example-com.json",
|
||||
},
|
||||
{
|
||||
name: "IDC without email but with startUrl",
|
||||
tokenData: &KiroTokenData{
|
||||
AuthMethod: "idc",
|
||||
Email: "",
|
||||
StartURL: "https://d-1234567890.awsapps.com/start",
|
||||
},
|
||||
expected: "kiro-idc-d-1234567890.json",
|
||||
},
|
||||
{
|
||||
name: "IDC with company name in startUrl",
|
||||
tokenData: &KiroTokenData{
|
||||
AuthMethod: "idc",
|
||||
Email: "",
|
||||
StartURL: "https://my-company.awsapps.com/start",
|
||||
},
|
||||
expected: "kiro-idc-my-company.json",
|
||||
},
|
||||
{
|
||||
name: "IDC without email and without startUrl",
|
||||
tokenData: &KiroTokenData{
|
||||
AuthMethod: "idc",
|
||||
Email: "",
|
||||
StartURL: "",
|
||||
},
|
||||
expected: "kiro-idc.json",
|
||||
},
|
||||
{
|
||||
name: "Builder ID with email",
|
||||
tokenData: &KiroTokenData{
|
||||
AuthMethod: "builder-id",
|
||||
Email: "user@gmail.com",
|
||||
StartURL: "https://view.awsapps.com/start",
|
||||
},
|
||||
expected: "kiro-builder-id-user-gmail-com.json",
|
||||
},
|
||||
{
|
||||
name: "Builder ID without email",
|
||||
tokenData: &KiroTokenData{
|
||||
AuthMethod: "builder-id",
|
||||
Email: "",
|
||||
StartURL: "https://view.awsapps.com/start",
|
||||
},
|
||||
expected: "kiro-builder-id.json",
|
||||
},
|
||||
{
|
||||
name: "Social auth with email",
|
||||
tokenData: &KiroTokenData{
|
||||
AuthMethod: "google",
|
||||
Email: "user@gmail.com",
|
||||
},
|
||||
expected: "kiro-google-user-gmail-com.json",
|
||||
},
|
||||
{
|
||||
name: "Empty auth method",
|
||||
tokenData: &KiroTokenData{
|
||||
AuthMethod: "",
|
||||
Email: "",
|
||||
},
|
||||
expected: "kiro-unknown.json",
|
||||
},
|
||||
{
|
||||
name: "Email with special characters",
|
||||
tokenData: &KiroTokenData{
|
||||
AuthMethod: "idc",
|
||||
Email: "user.name+tag@sub.example.com",
|
||||
StartURL: "https://d-1234567890.awsapps.com/start",
|
||||
},
|
||||
expected: "kiro-idc-user-name+tag-sub-example-com.json",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := GenerateTokenFileName(tt.tokenData)
|
||||
if result != tt.expected {
|
||||
t.Errorf("GenerateTokenFileName() = %q, want %q", result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package kiro
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -58,7 +59,7 @@ type BackgroundRefresher struct {
|
||||
wg sync.WaitGroup
|
||||
oauth *KiroOAuth
|
||||
ssoClient *SSOOIDCClient
|
||||
callbackMu sync.RWMutex // 保护回调函数的并发访问
|
||||
callbackMu sync.RWMutex // 保护回调函数的并发访问
|
||||
onTokenRefreshed func(tokenID string, tokenData *KiroTokenData) // 刷新成功回调
|
||||
}
|
||||
|
||||
@@ -160,37 +161,59 @@ func (r *BackgroundRefresher) refreshBatch(ctx context.Context) {
|
||||
}
|
||||
|
||||
func (r *BackgroundRefresher) refreshSingle(ctx context.Context, token *Token) {
|
||||
var newTokenData *KiroTokenData
|
||||
var err error
|
||||
// Normalize auth method to lowercase for case-insensitive matching
|
||||
authMethod := strings.ToLower(token.AuthMethod)
|
||||
|
||||
switch token.AuthMethod {
|
||||
case "idc":
|
||||
newTokenData, err = r.ssoClient.RefreshTokenWithRegion(
|
||||
ctx,
|
||||
token.ClientID,
|
||||
token.ClientSecret,
|
||||
token.RefreshToken,
|
||||
token.Region,
|
||||
token.StartURL,
|
||||
)
|
||||
case "builder-id":
|
||||
newTokenData, err = r.ssoClient.RefreshToken(
|
||||
ctx,
|
||||
token.ClientID,
|
||||
token.ClientSecret,
|
||||
token.RefreshToken,
|
||||
)
|
||||
default:
|
||||
newTokenData, err = r.oauth.RefreshToken(ctx, token.RefreshToken)
|
||||
// Create refresh function based on auth method
|
||||
refreshFunc := func(ctx context.Context) (*KiroTokenData, error) {
|
||||
switch authMethod {
|
||||
case "idc":
|
||||
return r.ssoClient.RefreshTokenWithRegion(
|
||||
ctx,
|
||||
token.ClientID,
|
||||
token.ClientSecret,
|
||||
token.RefreshToken,
|
||||
token.Region,
|
||||
token.StartURL,
|
||||
)
|
||||
case "builder-id":
|
||||
return r.ssoClient.RefreshToken(
|
||||
ctx,
|
||||
token.ClientID,
|
||||
token.ClientSecret,
|
||||
token.RefreshToken,
|
||||
)
|
||||
default:
|
||||
return r.oauth.RefreshTokenWithFingerprint(ctx, token.RefreshToken, token.ID)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Printf("failed to refresh token %s: %v", token.ID, err)
|
||||
// Use graceful degradation for better reliability
|
||||
result := RefreshWithGracefulDegradation(
|
||||
ctx,
|
||||
refreshFunc,
|
||||
token.AccessToken,
|
||||
token.ExpiresAt,
|
||||
)
|
||||
|
||||
if result.Error != nil {
|
||||
log.Printf("failed to refresh token %s: %v", token.ID, result.Error)
|
||||
return
|
||||
}
|
||||
|
||||
newTokenData := result.TokenData
|
||||
if result.UsedFallback {
|
||||
log.Printf("token %s: using existing token as fallback (refresh failed but token still valid)", token.ID)
|
||||
// Don't update the token file if we're using fallback
|
||||
// Just update LastVerified to prevent immediate re-check
|
||||
token.LastVerified = time.Now()
|
||||
return
|
||||
}
|
||||
|
||||
token.AccessToken = newTokenData.AccessToken
|
||||
token.RefreshToken = newTokenData.RefreshToken
|
||||
if newTokenData.RefreshToken != "" {
|
||||
token.RefreshToken = newTokenData.RefreshToken
|
||||
}
|
||||
token.LastVerified = time.Now()
|
||||
|
||||
if newTokenData.ExpiresAt != "" {
|
||||
|
||||
@@ -190,7 +190,7 @@ func (o *KiroOAuth) exchangeCodeForToken(ctx context.Context, code, codeVerifier
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", "cli-proxy-api/1.0.0")
|
||||
req.Header.Set("User-Agent", "KiroIDE-0.7.45-cli-proxy-api")
|
||||
|
||||
resp, err := o.httpClient.Do(req)
|
||||
if err != nil {
|
||||
@@ -232,7 +232,14 @@ func (o *KiroOAuth) exchangeCodeForToken(ctx context.Context, code, codeVerifier
|
||||
}
|
||||
|
||||
// RefreshToken refreshes an expired access token.
|
||||
// Uses KiroIDE-style User-Agent to match official Kiro IDE behavior.
|
||||
func (o *KiroOAuth) RefreshToken(ctx context.Context, refreshToken string) (*KiroTokenData, error) {
|
||||
return o.RefreshTokenWithFingerprint(ctx, refreshToken, "")
|
||||
}
|
||||
|
||||
// RefreshTokenWithFingerprint refreshes an expired access token with a specific fingerprint.
|
||||
// tokenKey is used to generate a consistent fingerprint for the token.
|
||||
func (o *KiroOAuth) RefreshTokenWithFingerprint(ctx context.Context, refreshToken, tokenKey string) (*KiroTokenData, error) {
|
||||
payload := map[string]string{
|
||||
"refreshToken": refreshToken,
|
||||
}
|
||||
@@ -249,7 +256,11 @@ func (o *KiroOAuth) RefreshToken(ctx context.Context, refreshToken string) (*Kir
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", "cli-proxy-api/1.0.0")
|
||||
|
||||
// Use KiroIDE-style User-Agent to match official Kiro IDE behavior
|
||||
// This helps avoid 403 errors from server-side User-Agent validation
|
||||
userAgent := buildKiroUserAgent(tokenKey)
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
resp, err := o.httpClient.Do(req)
|
||||
if err != nil {
|
||||
@@ -264,7 +275,7 @@ func (o *KiroOAuth) RefreshToken(ctx context.Context, refreshToken string) (*Kir
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
log.Debugf("token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
|
||||
return nil, fmt.Errorf("token refresh failed (status %d)", resp.StatusCode)
|
||||
return nil, fmt.Errorf("token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
|
||||
}
|
||||
|
||||
var tokenResp KiroTokenResponse
|
||||
@@ -290,6 +301,19 @@ func (o *KiroOAuth) RefreshToken(ctx context.Context, refreshToken string) (*Kir
|
||||
}, nil
|
||||
}
|
||||
|
||||
// buildKiroUserAgent builds a KiroIDE-style User-Agent string.
|
||||
// If tokenKey is provided, uses fingerprint manager for consistent fingerprint.
|
||||
// Otherwise generates a simple KiroIDE User-Agent.
|
||||
func buildKiroUserAgent(tokenKey string) string {
|
||||
if tokenKey != "" {
|
||||
fm := NewFingerprintManager()
|
||||
fp := fm.GetFingerprint(tokenKey)
|
||||
return fmt.Sprintf("KiroIDE-%s-%s", fp.KiroVersion, fp.KiroHash[:16])
|
||||
}
|
||||
// Default KiroIDE User-Agent matching kiro-openai-gateway format
|
||||
return "KiroIDE-0.7.45-cli-proxy-api"
|
||||
}
|
||||
|
||||
// LoginWithGoogle performs OAuth login with Google using Kiro's social auth.
|
||||
// This uses a custom protocol handler (kiro://) to receive the callback.
|
||||
func (o *KiroOAuth) LoginWithGoogle(ctx context.Context) (*KiroTokenData, error) {
|
||||
|
||||
@@ -421,7 +421,7 @@ func (h *OAuthWebHandler) saveTokenToFile(tokenData *KiroTokenData) {
|
||||
log.Errorf("OAuth Web: failed to resolve auth directory: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Fall back to default location
|
||||
if authDir == "" {
|
||||
home, err := os.UserHomeDir()
|
||||
@@ -431,24 +431,16 @@ func (h *OAuthWebHandler) saveTokenToFile(tokenData *KiroTokenData) {
|
||||
}
|
||||
authDir = filepath.Join(home, ".cli-proxy-api")
|
||||
}
|
||||
|
||||
|
||||
// Create directory if not exists
|
||||
if err := os.MkdirAll(authDir, 0700); err != nil {
|
||||
log.Errorf("OAuth Web: failed to create auth directory: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Generate filename based on auth method
|
||||
// Format: kiro-{authMethod}.json or kiro-{authMethod}-{email}.json
|
||||
fileName := fmt.Sprintf("kiro-%s.json", tokenData.AuthMethod)
|
||||
if tokenData.Email != "" {
|
||||
// Sanitize email for filename (replace @ and . with -)
|
||||
sanitizedEmail := tokenData.Email
|
||||
sanitizedEmail = strings.ReplaceAll(sanitizedEmail, "@", "-")
|
||||
sanitizedEmail = strings.ReplaceAll(sanitizedEmail, ".", "-")
|
||||
fileName = fmt.Sprintf("kiro-%s-%s.json", tokenData.AuthMethod, sanitizedEmail)
|
||||
}
|
||||
|
||||
|
||||
// Generate filename using the unified function
|
||||
fileName := GenerateTokenFileName(tokenData)
|
||||
|
||||
authFilePath := filepath.Join(authDir, fileName)
|
||||
|
||||
// Convert to storage format and save
|
||||
@@ -811,13 +803,8 @@ func (h *OAuthWebHandler) handleImportToken(c *gin.Context) {
|
||||
// Save token to file
|
||||
h.saveTokenToFile(tokenData)
|
||||
|
||||
// Generate filename for response
|
||||
fileName := fmt.Sprintf("kiro-%s.json", tokenData.AuthMethod)
|
||||
if tokenData.Email != "" {
|
||||
sanitizedEmail := strings.ReplaceAll(tokenData.Email, "@", "-")
|
||||
sanitizedEmail = strings.ReplaceAll(sanitizedEmail, ".", "-")
|
||||
fileName = fmt.Sprintf("kiro-%s-%s.json", tokenData.AuthMethod, sanitizedEmail)
|
||||
}
|
||||
// Generate filename for response using the unified function
|
||||
fileName := GenerateTokenFileName(tokenData)
|
||||
|
||||
log.Infof("OAuth Web: token imported successfully")
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
|
||||
@@ -9,14 +9,14 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultMinTokenInterval = 10 * time.Second
|
||||
DefaultMaxTokenInterval = 30 * time.Second
|
||||
DefaultMinTokenInterval = 1 * time.Second
|
||||
DefaultMaxTokenInterval = 2 * time.Second
|
||||
DefaultDailyMaxRequests = 500
|
||||
DefaultJitterPercent = 0.3
|
||||
DefaultBackoffBase = 2 * time.Minute
|
||||
DefaultBackoffMax = 60 * time.Minute
|
||||
DefaultBackoffMultiplier = 2.0
|
||||
DefaultSuspendCooldown = 24 * time.Hour
|
||||
DefaultBackoffBase = 30 * time.Second
|
||||
DefaultBackoffMax = 5 * time.Minute
|
||||
DefaultBackoffMultiplier = 1.5
|
||||
DefaultSuspendCooldown = 1 * time.Hour
|
||||
)
|
||||
|
||||
// TokenState Token 状态
|
||||
|
||||
159
internal/auth/kiro/refresh_utils.go
Normal file
159
internal/auth/kiro/refresh_utils.go
Normal file
@@ -0,0 +1,159 @@
|
||||
// Package kiro provides refresh utilities for Kiro token management.
|
||||
package kiro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// RefreshResult contains the result of a token refresh attempt.
|
||||
type RefreshResult struct {
|
||||
TokenData *KiroTokenData
|
||||
Error error
|
||||
UsedFallback bool // True if we used the existing token as fallback
|
||||
}
|
||||
|
||||
// RefreshWithGracefulDegradation attempts to refresh a token with graceful degradation.
|
||||
// If refresh fails but the existing access token is still valid, it returns the existing token.
|
||||
// This matches kiro-openai-gateway's behavior for better reliability.
|
||||
//
|
||||
// Parameters:
|
||||
// - ctx: Context for the request
|
||||
// - refreshFunc: Function to perform the actual refresh
|
||||
// - existingAccessToken: Current access token (for fallback)
|
||||
// - expiresAt: Expiration time of the existing token
|
||||
//
|
||||
// Returns:
|
||||
// - RefreshResult containing the new or existing token data
|
||||
func RefreshWithGracefulDegradation(
|
||||
ctx context.Context,
|
||||
refreshFunc func(ctx context.Context) (*KiroTokenData, error),
|
||||
existingAccessToken string,
|
||||
expiresAt time.Time,
|
||||
) RefreshResult {
|
||||
// Try to refresh the token
|
||||
newTokenData, err := refreshFunc(ctx)
|
||||
if err == nil {
|
||||
return RefreshResult{
|
||||
TokenData: newTokenData,
|
||||
Error: nil,
|
||||
UsedFallback: false,
|
||||
}
|
||||
}
|
||||
|
||||
// Refresh failed - check if we can use the existing token
|
||||
log.Warnf("kiro: token refresh failed: %v", err)
|
||||
|
||||
// Check if existing token is still valid (not expired)
|
||||
if existingAccessToken != "" && time.Now().Before(expiresAt) {
|
||||
remainingTime := time.Until(expiresAt)
|
||||
log.Warnf("kiro: using existing access token (expires in %v). Will retry refresh later.", remainingTime.Round(time.Second))
|
||||
|
||||
return RefreshResult{
|
||||
TokenData: &KiroTokenData{
|
||||
AccessToken: existingAccessToken,
|
||||
ExpiresAt: expiresAt.Format(time.RFC3339),
|
||||
},
|
||||
Error: nil,
|
||||
UsedFallback: true,
|
||||
}
|
||||
}
|
||||
|
||||
// Token is expired and refresh failed - return the error
|
||||
return RefreshResult{
|
||||
TokenData: nil,
|
||||
Error: fmt.Errorf("token refresh failed and existing token is expired: %w", err),
|
||||
UsedFallback: false,
|
||||
}
|
||||
}
|
||||
|
||||
// IsTokenExpiringSoon checks if a token is expiring within the given threshold.
|
||||
// Default threshold is 5 minutes if not specified.
|
||||
func IsTokenExpiringSoon(expiresAt time.Time, threshold time.Duration) bool {
|
||||
if threshold == 0 {
|
||||
threshold = 5 * time.Minute
|
||||
}
|
||||
return time.Now().Add(threshold).After(expiresAt)
|
||||
}
|
||||
|
||||
// IsTokenExpired checks if a token has already expired.
|
||||
func IsTokenExpired(expiresAt time.Time) bool {
|
||||
return time.Now().After(expiresAt)
|
||||
}
|
||||
|
||||
// ParseExpiresAt parses an expiration time string in RFC3339 format.
|
||||
// Returns zero time if parsing fails.
|
||||
func ParseExpiresAt(expiresAtStr string) time.Time {
|
||||
if expiresAtStr == "" {
|
||||
return time.Time{}
|
||||
}
|
||||
t, err := time.Parse(time.RFC3339, expiresAtStr)
|
||||
if err != nil {
|
||||
log.Debugf("kiro: failed to parse expiresAt '%s': %v", expiresAtStr, err)
|
||||
return time.Time{}
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// RefreshConfig contains configuration for token refresh behavior.
|
||||
type RefreshConfig struct {
|
||||
// MaxRetries is the maximum number of refresh attempts (default: 1)
|
||||
MaxRetries int
|
||||
// RetryDelay is the delay between retry attempts (default: 1 second)
|
||||
RetryDelay time.Duration
|
||||
// RefreshThreshold is how early to refresh before expiration (default: 5 minutes)
|
||||
RefreshThreshold time.Duration
|
||||
// EnableGracefulDegradation allows using existing token if refresh fails (default: true)
|
||||
EnableGracefulDegradation bool
|
||||
}
|
||||
|
||||
// DefaultRefreshConfig returns the default refresh configuration.
|
||||
func DefaultRefreshConfig() RefreshConfig {
|
||||
return RefreshConfig{
|
||||
MaxRetries: 1,
|
||||
RetryDelay: time.Second,
|
||||
RefreshThreshold: 5 * time.Minute,
|
||||
EnableGracefulDegradation: true,
|
||||
}
|
||||
}
|
||||
|
||||
// RefreshWithRetry attempts to refresh a token with retry logic.
|
||||
func RefreshWithRetry(
|
||||
ctx context.Context,
|
||||
refreshFunc func(ctx context.Context) (*KiroTokenData, error),
|
||||
config RefreshConfig,
|
||||
) (*KiroTokenData, error) {
|
||||
var lastErr error
|
||||
|
||||
maxAttempts := config.MaxRetries + 1
|
||||
if maxAttempts < 1 {
|
||||
maxAttempts = 1
|
||||
}
|
||||
|
||||
for attempt := 1; attempt <= maxAttempts; attempt++ {
|
||||
tokenData, err := refreshFunc(ctx)
|
||||
if err == nil {
|
||||
if attempt > 1 {
|
||||
log.Infof("kiro: token refresh succeeded on attempt %d", attempt)
|
||||
}
|
||||
return tokenData, nil
|
||||
}
|
||||
|
||||
lastErr = err
|
||||
log.Warnf("kiro: token refresh attempt %d/%d failed: %v", attempt, maxAttempts, err)
|
||||
|
||||
// Don't sleep after the last attempt
|
||||
if attempt < maxAttempts {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
case <-time.After(config.RetryDelay):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("token refresh failed after %d attempts: %w", maxAttempts, lastErr)
|
||||
}
|
||||
@@ -229,7 +229,7 @@ func (c *SocialAuthClient) CreateToken(ctx context.Context, req *CreateTokenRequ
|
||||
}
|
||||
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
httpReq.Header.Set("User-Agent", "cli-proxy-api/1.0.0")
|
||||
httpReq.Header.Set("User-Agent", "KiroIDE-0.7.45-cli-proxy-api")
|
||||
|
||||
resp, err := c.httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
|
||||
@@ -684,6 +684,7 @@ func (c *SSOOIDCClient) CreateToken(ctx context.Context, clientID, clientSecret,
|
||||
}
|
||||
|
||||
// RefreshToken refreshes an access token using the refresh token.
|
||||
// Includes retry logic and improved error handling for better reliability.
|
||||
func (c *SSOOIDCClient) RefreshToken(ctx context.Context, clientID, clientSecret, refreshToken string) (*KiroTokenData, error) {
|
||||
payload := map[string]string{
|
||||
"clientId": clientID,
|
||||
@@ -701,8 +702,13 @@ func (c *SSOOIDCClient) RefreshToken(ctx context.Context, clientID, clientSecret
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Set headers matching Kiro IDE behavior for better compatibility
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", kiroUserAgent)
|
||||
req.Header.Set("Host", "oidc.us-east-1.amazonaws.com")
|
||||
req.Header.Set("x-amz-user-agent", idcAmzUserAgent)
|
||||
req.Header.Set("User-Agent", "node")
|
||||
req.Header.Set("Accept", "*/*")
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
@@ -716,8 +722,8 @@ func (c *SSOOIDCClient) RefreshToken(ctx context.Context, clientID, clientSecret
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
log.Debugf("token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
|
||||
return nil, fmt.Errorf("token refresh failed (status %d)", resp.StatusCode)
|
||||
log.Warnf("token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
|
||||
return nil, fmt.Errorf("token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
|
||||
}
|
||||
|
||||
var result CreateTokenResponse
|
||||
|
||||
@@ -187,8 +187,9 @@ func (r *FileTokenRepository) readTokenFile(path string) (*Token, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// 检查 auth_method
|
||||
// 检查 auth_method (case-insensitive comparison to handle "IdC", "IDC", "idc", etc.)
|
||||
authMethod, _ := metadata["auth_method"].(string)
|
||||
authMethod = strings.ToLower(authMethod)
|
||||
if authMethod != "idc" && authMethod != "builder-id" {
|
||||
return nil, nil // 只处理 IDC 和 Builder ID token
|
||||
}
|
||||
|
||||
@@ -986,6 +986,7 @@ func SaveConfigPreserveComments(configFile string, cfg *Config) error {
|
||||
removeLegacyGenerativeLanguageKeys(original.Content[0])
|
||||
|
||||
pruneMappingToGeneratedKeys(original.Content[0], generated.Content[0], "oauth-excluded-models")
|
||||
pruneMappingToGeneratedKeys(original.Content[0], generated.Content[0], "oauth-model-alias")
|
||||
|
||||
// Merge generated into original in-place, preserving comments/order of existing nodes.
|
||||
mergeMappingPreserve(original.Content[0], generated.Content[0])
|
||||
@@ -1476,6 +1477,16 @@ func pruneMappingToGeneratedKeys(dstRoot, srcRoot *yaml.Node, key string) {
|
||||
}
|
||||
srcIdx := findMapKeyIndex(srcRoot, key)
|
||||
if srcIdx < 0 {
|
||||
// Keep an explicit empty mapping for oauth-model-alias when it was previously present.
|
||||
//
|
||||
// Rationale: LoadConfig runs MigrateOAuthModelAlias before unmarshalling. If the
|
||||
// oauth-model-alias key is missing, migration will add the default antigravity aliases.
|
||||
// When users delete the last channel from oauth-model-alias via the management API,
|
||||
// we want that deletion to persist across hot reloads and restarts.
|
||||
if key == "oauth-model-alias" {
|
||||
dstRoot.Content[dstIdx+1] = &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
|
||||
return
|
||||
}
|
||||
removeMapKey(dstRoot, key)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -44,10 +44,12 @@ type RequestLogger interface {
|
||||
// - apiRequest: The API request data
|
||||
// - apiResponse: The API response data
|
||||
// - requestID: Optional request ID for log file naming
|
||||
// - requestTimestamp: When the request was received
|
||||
// - apiResponseTimestamp: When the API response was received
|
||||
//
|
||||
// Returns:
|
||||
// - error: An error if logging fails, nil otherwise
|
||||
LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error
|
||||
LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error
|
||||
|
||||
// LogStreamingRequest initiates logging for a streaming request and returns a writer for chunks.
|
||||
//
|
||||
@@ -109,6 +111,12 @@ type StreamingLogWriter interface {
|
||||
// - error: An error if writing fails, nil otherwise
|
||||
WriteAPIResponse(apiResponse []byte) error
|
||||
|
||||
// SetFirstChunkTimestamp sets the TTFB timestamp captured when first chunk was received.
|
||||
//
|
||||
// Parameters:
|
||||
// - timestamp: The time when first response chunk was received
|
||||
SetFirstChunkTimestamp(timestamp time.Time)
|
||||
|
||||
// Close finalizes the log file and cleans up resources.
|
||||
//
|
||||
// Returns:
|
||||
@@ -180,20 +188,22 @@ func (l *FileRequestLogger) SetEnabled(enabled bool) {
|
||||
// - apiRequest: The API request data
|
||||
// - apiResponse: The API response data
|
||||
// - requestID: Optional request ID for log file naming
|
||||
// - requestTimestamp: When the request was received
|
||||
// - apiResponseTimestamp: When the API response was received
|
||||
//
|
||||
// Returns:
|
||||
// - error: An error if logging fails, nil otherwise
|
||||
func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error {
|
||||
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID)
|
||||
func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
|
||||
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID, requestTimestamp, apiResponseTimestamp)
|
||||
}
|
||||
|
||||
// LogRequestWithOptions logs a request with optional forced logging behavior.
|
||||
// The force flag allows writing error logs even when regular request logging is disabled.
|
||||
func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
|
||||
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID)
|
||||
func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
|
||||
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID, requestTimestamp, apiResponseTimestamp)
|
||||
}
|
||||
|
||||
func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
|
||||
func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
|
||||
if !l.enabled && !force {
|
||||
return nil
|
||||
}
|
||||
@@ -247,6 +257,8 @@ func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[st
|
||||
responseHeaders,
|
||||
responseToWrite,
|
||||
decompressErr,
|
||||
requestTimestamp,
|
||||
apiResponseTimestamp,
|
||||
)
|
||||
if errClose := logFile.Close(); errClose != nil {
|
||||
log.WithError(errClose).Warn("failed to close request log file")
|
||||
@@ -499,17 +511,22 @@ func (l *FileRequestLogger) writeNonStreamingLog(
|
||||
responseHeaders map[string][]string,
|
||||
response []byte,
|
||||
decompressErr error,
|
||||
requestTimestamp time.Time,
|
||||
apiResponseTimestamp time.Time,
|
||||
) error {
|
||||
if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, time.Now()); errWrite != nil {
|
||||
if requestTimestamp.IsZero() {
|
||||
requestTimestamp = time.Now()
|
||||
}
|
||||
if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, requestTimestamp); errWrite != nil {
|
||||
return errWrite
|
||||
}
|
||||
if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest); errWrite != nil {
|
||||
if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest, time.Time{}); errWrite != nil {
|
||||
return errWrite
|
||||
}
|
||||
if errWrite := writeAPIErrorResponses(w, apiResponseErrors); errWrite != nil {
|
||||
return errWrite
|
||||
}
|
||||
if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse); errWrite != nil {
|
||||
if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse, apiResponseTimestamp); errWrite != nil {
|
||||
return errWrite
|
||||
}
|
||||
return writeResponseSection(w, statusCode, true, responseHeaders, bytes.NewReader(response), decompressErr, true)
|
||||
@@ -583,7 +600,7 @@ func writeRequestInfoWithBody(
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte) error {
|
||||
func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte, timestamp time.Time) error {
|
||||
if len(payload) == 0 {
|
||||
return nil
|
||||
}
|
||||
@@ -601,6 +618,11 @@ func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, pa
|
||||
if _, errWrite := io.WriteString(w, sectionHeader); errWrite != nil {
|
||||
return errWrite
|
||||
}
|
||||
if !timestamp.IsZero() {
|
||||
if _, errWrite := io.WriteString(w, fmt.Sprintf("Timestamp: %s\n", timestamp.Format(time.RFC3339Nano))); errWrite != nil {
|
||||
return errWrite
|
||||
}
|
||||
}
|
||||
if _, errWrite := w.Write(payload); errWrite != nil {
|
||||
return errWrite
|
||||
}
|
||||
@@ -974,6 +996,9 @@ type FileStreamingLogWriter struct {
|
||||
|
||||
// apiResponse stores the upstream API response data.
|
||||
apiResponse []byte
|
||||
|
||||
// apiResponseTimestamp captures when the API response was received.
|
||||
apiResponseTimestamp time.Time
|
||||
}
|
||||
|
||||
// WriteChunkAsync writes a response chunk asynchronously (non-blocking).
|
||||
@@ -1053,6 +1078,12 @@ func (w *FileStreamingLogWriter) WriteAPIResponse(apiResponse []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *FileStreamingLogWriter) SetFirstChunkTimestamp(timestamp time.Time) {
|
||||
if !timestamp.IsZero() {
|
||||
w.apiResponseTimestamp = timestamp
|
||||
}
|
||||
}
|
||||
|
||||
// Close finalizes the log file and cleans up resources.
|
||||
// It writes all buffered data to the file in the correct order:
|
||||
// API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
|
||||
@@ -1140,10 +1171,10 @@ func (w *FileStreamingLogWriter) writeFinalLog(logFile *os.File) error {
|
||||
if errWrite := writeRequestInfoWithBody(logFile, w.url, w.method, w.requestHeaders, nil, w.requestBodyPath, w.timestamp); errWrite != nil {
|
||||
return errWrite
|
||||
}
|
||||
if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest); errWrite != nil {
|
||||
if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest, time.Time{}); errWrite != nil {
|
||||
return errWrite
|
||||
}
|
||||
if errWrite := writeAPISection(logFile, "=== API RESPONSE ===\n", "=== API RESPONSE", w.apiResponse); errWrite != nil {
|
||||
if errWrite := writeAPISection(logFile, "=== API RESPONSE ===\n", "=== API RESPONSE", w.apiResponse, w.apiResponseTimestamp); errWrite != nil {
|
||||
return errWrite
|
||||
}
|
||||
|
||||
@@ -1220,6 +1251,8 @@ func (w *NoOpStreamingLogWriter) WriteAPIResponse(_ []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *NoOpStreamingLogWriter) SetFirstChunkTimestamp(_ time.Time) {}
|
||||
|
||||
// Close is a no-op implementation that does nothing and always returns nil.
|
||||
//
|
||||
// Returns:
|
||||
|
||||
@@ -1,848 +1,69 @@
|
||||
// Package registry provides model definitions for various AI service providers.
|
||||
// This file contains static model definitions that can be used by clients
|
||||
// when registering their supported models.
|
||||
// Package registry provides model definitions and lookup helpers for various AI providers.
|
||||
// Static model metadata is stored in model_definitions_static_data.go.
|
||||
package registry
|
||||
|
||||
// GetClaudeModels returns the standard Claude model definitions
|
||||
func GetClaudeModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
{
|
||||
ID: "claude-haiku-4-5-20251001",
|
||||
Object: "model",
|
||||
Created: 1759276800, // 2025-10-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Haiku",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
// Thinking: not supported for Haiku models
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-5-20250929",
|
||||
Object: "model",
|
||||
Created: 1759104000, // 2025-09-29
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-20251101",
|
||||
Object: "model",
|
||||
Created: 1761955200, // 2025-11-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Opus",
|
||||
Description: "Premium model combining maximum intelligence with practical performance",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-1-20250805",
|
||||
Object: "model",
|
||||
Created: 1722945600, // 2025-08-05
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.1 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-20250514",
|
||||
Object: "model",
|
||||
Created: 1715644800, // 2025-05-14
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-20250514",
|
||||
Object: "model",
|
||||
Created: 1715644800, // 2025-05-14
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-7-sonnet-20250219",
|
||||
Object: "model",
|
||||
Created: 1708300800, // 2025-02-19
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 3.7 Sonnet",
|
||||
ContextLength: 128000,
|
||||
MaxCompletionTokens: 8192,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-5-haiku-20241022",
|
||||
Object: "model",
|
||||
Created: 1729555200, // 2024-10-22
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 3.5 Haiku",
|
||||
ContextLength: 128000,
|
||||
MaxCompletionTokens: 8192,
|
||||
// Thinking: not supported for Haiku models
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetGeminiModels returns the standard Gemini model definitions
|
||||
func GetGeminiModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-pro",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-pro",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Pro",
|
||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash",
|
||||
Version: "001",
|
||||
DisplayName: "Gemini 2.5 Flash",
|
||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-lite",
|
||||
Object: "model",
|
||||
Created: 1753142400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-lite",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Lite",
|
||||
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Preview",
|
||||
Description: "Gemini 3 Pro Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-flash-preview",
|
||||
Object: "model",
|
||||
Created: 1765929600,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-flash-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Flash Preview",
|
||||
Description: "Gemini 3 Flash Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-image-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-image-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Image Preview",
|
||||
Description: "Gemini 3 Pro Image Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func GetGeminiVertexModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-pro",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-pro",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Pro",
|
||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash",
|
||||
Version: "001",
|
||||
DisplayName: "Gemini 2.5 Flash",
|
||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-lite",
|
||||
Object: "model",
|
||||
Created: 1753142400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-lite",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Lite",
|
||||
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Preview",
|
||||
Description: "Gemini 3 Pro Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-flash-preview",
|
||||
Object: "model",
|
||||
Created: 1765929600,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-flash-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Flash Preview",
|
||||
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-image-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-image-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Image Preview",
|
||||
Description: "Gemini 3 Pro Image Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
// Imagen image generation models - use :predict action
|
||||
{
|
||||
ID: "imagen-4.0-generate-001",
|
||||
Object: "model",
|
||||
Created: 1750000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-4.0-generate-001",
|
||||
Version: "4.0",
|
||||
DisplayName: "Imagen 4.0 Generate",
|
||||
Description: "Imagen 4.0 image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-4.0-ultra-generate-001",
|
||||
Object: "model",
|
||||
Created: 1750000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-4.0-ultra-generate-001",
|
||||
Version: "4.0",
|
||||
DisplayName: "Imagen 4.0 Ultra Generate",
|
||||
Description: "Imagen 4.0 Ultra high-quality image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-3.0-generate-002",
|
||||
Object: "model",
|
||||
Created: 1740000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-3.0-generate-002",
|
||||
Version: "3.0",
|
||||
DisplayName: "Imagen 3.0 Generate",
|
||||
Description: "Imagen 3.0 image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-3.0-fast-generate-001",
|
||||
Object: "model",
|
||||
Created: 1740000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-3.0-fast-generate-001",
|
||||
Version: "3.0",
|
||||
DisplayName: "Imagen 3.0 Fast Generate",
|
||||
Description: "Imagen 3.0 fast image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-4.0-fast-generate-001",
|
||||
Object: "model",
|
||||
Created: 1750000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-4.0-fast-generate-001",
|
||||
Version: "4.0",
|
||||
DisplayName: "Imagen 4.0 Fast Generate",
|
||||
Description: "Imagen 4.0 fast image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetGeminiCLIModels returns the standard Gemini model definitions
|
||||
func GetGeminiCLIModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-pro",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-pro",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Pro",
|
||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash",
|
||||
Version: "001",
|
||||
DisplayName: "Gemini 2.5 Flash",
|
||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-lite",
|
||||
Object: "model",
|
||||
Created: 1753142400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-lite",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Lite",
|
||||
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Preview",
|
||||
Description: "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-flash-preview",
|
||||
Object: "model",
|
||||
Created: 1765929600,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-flash-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Flash Preview",
|
||||
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
|
||||
func GetAIStudioModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-pro",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-pro",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Pro",
|
||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash",
|
||||
Version: "001",
|
||||
DisplayName: "Gemini 2.5 Flash",
|
||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-lite",
|
||||
Object: "model",
|
||||
Created: 1753142400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-lite",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Lite",
|
||||
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Preview",
|
||||
Description: "Gemini 3 Pro Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-flash-preview",
|
||||
Object: "model",
|
||||
Created: 1765929600,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-flash-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Flash Preview",
|
||||
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-pro-latest",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-pro-latest",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini Pro Latest",
|
||||
Description: "Latest release of Gemini Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-flash-latest",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-flash-latest",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini Flash Latest",
|
||||
Description: "Latest release of Gemini Flash",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-flash-lite-latest",
|
||||
Object: "model",
|
||||
Created: 1753142400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-flash-lite-latest",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini Flash-Lite Latest",
|
||||
Description: "Latest release of Gemini Flash-Lite",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-image-preview",
|
||||
Object: "model",
|
||||
Created: 1756166400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-image-preview",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Image Preview",
|
||||
Description: "State-of-the-art image generation and editing model.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 8192,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
// image models don't support thinkingConfig; leave Thinking nil
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-image",
|
||||
Object: "model",
|
||||
Created: 1759363200,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-image",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Image",
|
||||
Description: "State-of-the-art image generation and editing model.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 8192,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
// image models don't support thinkingConfig; leave Thinking nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetOpenAIModels returns the standard OpenAI model definitions
|
||||
func GetOpenAIModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gpt-5",
|
||||
Object: "model",
|
||||
Created: 1754524800,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-08-07",
|
||||
DisplayName: "GPT 5",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex",
|
||||
Object: "model",
|
||||
Created: 1757894400,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-09-15",
|
||||
DisplayName: "GPT 5 Codex",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-mini",
|
||||
Object: "model",
|
||||
Created: 1762473600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-11-07",
|
||||
DisplayName: "GPT 5 Codex Mini",
|
||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Codex",
|
||||
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-mini",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Codex Mini",
|
||||
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max",
|
||||
Object: "model",
|
||||
Created: 1763424000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5.1 Codex Max",
|
||||
Description: "Stable version of GPT 5.1 Codex Max",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.2",
|
||||
Object: "model",
|
||||
Created: 1765440000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.2",
|
||||
DisplayName: "GPT 5.2",
|
||||
Description: "Stable version of GPT 5.2",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.2-codex",
|
||||
Object: "model",
|
||||
Created: 1765440000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.2",
|
||||
DisplayName: "GPT 5.2 Codex",
|
||||
Description: "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetQwenModels returns the standard Qwen model definitions
|
||||
func GetQwenModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "qwen3-coder-plus",
|
||||
Object: "model",
|
||||
Created: 1753228800,
|
||||
OwnedBy: "qwen",
|
||||
Type: "qwen",
|
||||
Version: "3.0",
|
||||
DisplayName: "Qwen3 Coder Plus",
|
||||
Description: "Advanced code generation and understanding model",
|
||||
ContextLength: 32768,
|
||||
MaxCompletionTokens: 8192,
|
||||
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
|
||||
},
|
||||
{
|
||||
ID: "qwen3-coder-flash",
|
||||
Object: "model",
|
||||
Created: 1753228800,
|
||||
OwnedBy: "qwen",
|
||||
Type: "qwen",
|
||||
Version: "3.0",
|
||||
DisplayName: "Qwen3 Coder Flash",
|
||||
Description: "Fast code generation model",
|
||||
ContextLength: 8192,
|
||||
MaxCompletionTokens: 2048,
|
||||
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
|
||||
},
|
||||
{
|
||||
ID: "vision-model",
|
||||
Object: "model",
|
||||
Created: 1758672000,
|
||||
OwnedBy: "qwen",
|
||||
Type: "qwen",
|
||||
Version: "3.0",
|
||||
DisplayName: "Qwen3 Vision Model",
|
||||
Description: "Vision model model",
|
||||
ContextLength: 32768,
|
||||
MaxCompletionTokens: 2048,
|
||||
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
|
||||
// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
|
||||
// Uses level-based configuration so standard normalization flows apply before conversion.
|
||||
var iFlowThinkingSupport = &ThinkingSupport{
|
||||
Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
|
||||
}
|
||||
|
||||
// GetIFlowModels returns supported models for iFlow OAuth accounts.
|
||||
func GetIFlowModels() []*ModelInfo {
|
||||
entries := []struct {
|
||||
ID string
|
||||
DisplayName string
|
||||
Description string
|
||||
Created int64
|
||||
Thinking *ThinkingSupport
|
||||
}{
|
||||
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
|
||||
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
|
||||
{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
|
||||
{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
|
||||
{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400},
|
||||
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
|
||||
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
|
||||
{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
|
||||
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
|
||||
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
|
||||
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
|
||||
{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
|
||||
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
|
||||
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
|
||||
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
|
||||
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
|
||||
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
|
||||
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
|
||||
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
|
||||
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
|
||||
{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
|
||||
}
|
||||
models := make([]*ModelInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
models = append(models, &ModelInfo{
|
||||
ID: entry.ID,
|
||||
Object: "model",
|
||||
Created: entry.Created,
|
||||
OwnedBy: "iflow",
|
||||
Type: "iflow",
|
||||
DisplayName: entry.DisplayName,
|
||||
Description: entry.Description,
|
||||
Thinking: entry.Thinking,
|
||||
// GetStaticModelDefinitionsByChannel returns static model definitions for a given channel/provider.
|
||||
// It returns nil when the channel is unknown.
|
||||
//
|
||||
// Supported channels:
|
||||
// - claude
|
||||
// - gemini
|
||||
// - vertex
|
||||
// - gemini-cli
|
||||
// - aistudio
|
||||
// - codex
|
||||
// - qwen
|
||||
// - iflow
|
||||
// - antigravity (returns static overrides only)
|
||||
func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
|
||||
key := strings.ToLower(strings.TrimSpace(channel))
|
||||
switch key {
|
||||
case "claude":
|
||||
return GetClaudeModels()
|
||||
case "gemini":
|
||||
return GetGeminiModels()
|
||||
case "vertex":
|
||||
return GetGeminiVertexModels()
|
||||
case "gemini-cli":
|
||||
return GetGeminiCLIModels()
|
||||
case "aistudio":
|
||||
return GetAIStudioModels()
|
||||
case "codex":
|
||||
return GetOpenAIModels()
|
||||
case "qwen":
|
||||
return GetQwenModels()
|
||||
case "iflow":
|
||||
return GetIFlowModels()
|
||||
case "antigravity":
|
||||
cfg := GetAntigravityModelConfig()
|
||||
if len(cfg) == 0 {
|
||||
return nil
|
||||
}
|
||||
models := make([]*ModelInfo, 0, len(cfg))
|
||||
for modelID, entry := range cfg {
|
||||
if modelID == "" || entry == nil {
|
||||
continue
|
||||
}
|
||||
models = append(models, &ModelInfo{
|
||||
ID: modelID,
|
||||
Object: "model",
|
||||
OwnedBy: "antigravity",
|
||||
Type: "antigravity",
|
||||
Thinking: entry.Thinking,
|
||||
MaxCompletionTokens: entry.MaxCompletionTokens,
|
||||
})
|
||||
}
|
||||
sort.Slice(models, func(i, j int) bool {
|
||||
return strings.ToLower(models[i].ID) < strings.ToLower(models[j].ID)
|
||||
})
|
||||
}
|
||||
return models
|
||||
}
|
||||
|
||||
// AntigravityModelConfig captures static antigravity model overrides, including
|
||||
// Thinking budget limits and provider max completion tokens.
|
||||
type AntigravityModelConfig struct {
|
||||
Thinking *ThinkingSupport
|
||||
MaxCompletionTokens int
|
||||
}
|
||||
|
||||
// GetAntigravityModelConfig returns static configuration for antigravity models.
|
||||
// Keys use upstream model names returned by the Antigravity models endpoint.
|
||||
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
||||
return map[string]*AntigravityModelConfig{
|
||||
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||
"rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
|
||||
"gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
|
||||
"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"claude-sonnet-4-5": {MaxCompletionTokens: 64000},
|
||||
"gpt-oss-120b-medium": {},
|
||||
"tab_flash_lite_preview": {},
|
||||
return models
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
846
internal/registry/model_definitions_static_data.go
Normal file
846
internal/registry/model_definitions_static_data.go
Normal file
@@ -0,0 +1,846 @@
|
||||
// Package registry provides model definitions for various AI service providers.
|
||||
// This file stores the static model metadata catalog.
|
||||
package registry
|
||||
|
||||
// GetClaudeModels returns the standard Claude model definitions
|
||||
func GetClaudeModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
|
||||
{
|
||||
ID: "claude-haiku-4-5-20251001",
|
||||
Object: "model",
|
||||
Created: 1759276800, // 2025-10-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Haiku",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
// Thinking: not supported for Haiku models
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-5-20250929",
|
||||
Object: "model",
|
||||
Created: 1759104000, // 2025-09-29
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-20251101",
|
||||
Object: "model",
|
||||
Created: 1761955200, // 2025-11-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Opus",
|
||||
Description: "Premium model combining maximum intelligence with practical performance",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-1-20250805",
|
||||
Object: "model",
|
||||
Created: 1722945600, // 2025-08-05
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.1 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-20250514",
|
||||
Object: "model",
|
||||
Created: 1715644800, // 2025-05-14
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-20250514",
|
||||
Object: "model",
|
||||
Created: 1715644800, // 2025-05-14
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-7-sonnet-20250219",
|
||||
Object: "model",
|
||||
Created: 1708300800, // 2025-02-19
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 3.7 Sonnet",
|
||||
ContextLength: 128000,
|
||||
MaxCompletionTokens: 8192,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-5-haiku-20241022",
|
||||
Object: "model",
|
||||
Created: 1729555200, // 2024-10-22
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 3.5 Haiku",
|
||||
ContextLength: 128000,
|
||||
MaxCompletionTokens: 8192,
|
||||
// Thinking: not supported for Haiku models
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetGeminiModels returns the standard Gemini model definitions
|
||||
func GetGeminiModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-pro",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-pro",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Pro",
|
||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash",
|
||||
Version: "001",
|
||||
DisplayName: "Gemini 2.5 Flash",
|
||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-lite",
|
||||
Object: "model",
|
||||
Created: 1753142400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-lite",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Lite",
|
||||
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Preview",
|
||||
Description: "Gemini 3 Pro Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-flash-preview",
|
||||
Object: "model",
|
||||
Created: 1765929600,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-flash-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Flash Preview",
|
||||
Description: "Gemini 3 Flash Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-image-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-image-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Image Preview",
|
||||
Description: "Gemini 3 Pro Image Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func GetGeminiVertexModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-pro",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-pro",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Pro",
|
||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash",
|
||||
Version: "001",
|
||||
DisplayName: "Gemini 2.5 Flash",
|
||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-lite",
|
||||
Object: "model",
|
||||
Created: 1753142400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-lite",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Lite",
|
||||
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Preview",
|
||||
Description: "Gemini 3 Pro Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-flash-preview",
|
||||
Object: "model",
|
||||
Created: 1765929600,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-flash-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Flash Preview",
|
||||
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-image-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-image-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Image Preview",
|
||||
Description: "Gemini 3 Pro Image Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
// Imagen image generation models - use :predict action
|
||||
{
|
||||
ID: "imagen-4.0-generate-001",
|
||||
Object: "model",
|
||||
Created: 1750000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-4.0-generate-001",
|
||||
Version: "4.0",
|
||||
DisplayName: "Imagen 4.0 Generate",
|
||||
Description: "Imagen 4.0 image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-4.0-ultra-generate-001",
|
||||
Object: "model",
|
||||
Created: 1750000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-4.0-ultra-generate-001",
|
||||
Version: "4.0",
|
||||
DisplayName: "Imagen 4.0 Ultra Generate",
|
||||
Description: "Imagen 4.0 Ultra high-quality image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-3.0-generate-002",
|
||||
Object: "model",
|
||||
Created: 1740000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-3.0-generate-002",
|
||||
Version: "3.0",
|
||||
DisplayName: "Imagen 3.0 Generate",
|
||||
Description: "Imagen 3.0 image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-3.0-fast-generate-001",
|
||||
Object: "model",
|
||||
Created: 1740000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-3.0-fast-generate-001",
|
||||
Version: "3.0",
|
||||
DisplayName: "Imagen 3.0 Fast Generate",
|
||||
Description: "Imagen 3.0 fast image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-4.0-fast-generate-001",
|
||||
Object: "model",
|
||||
Created: 1750000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-4.0-fast-generate-001",
|
||||
Version: "4.0",
|
||||
DisplayName: "Imagen 4.0 Fast Generate",
|
||||
Description: "Imagen 4.0 fast image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetGeminiCLIModels returns the standard Gemini model definitions
|
||||
func GetGeminiCLIModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-pro",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-pro",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Pro",
|
||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash",
|
||||
Version: "001",
|
||||
DisplayName: "Gemini 2.5 Flash",
|
||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-lite",
|
||||
Object: "model",
|
||||
Created: 1753142400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-lite",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Lite",
|
||||
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Preview",
|
||||
Description: "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-flash-preview",
|
||||
Object: "model",
|
||||
Created: 1765929600,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-flash-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Flash Preview",
|
||||
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
|
||||
func GetAIStudioModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-pro",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-pro",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Pro",
|
||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash",
|
||||
Version: "001",
|
||||
DisplayName: "Gemini 2.5 Flash",
|
||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-lite",
|
||||
Object: "model",
|
||||
Created: 1753142400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-lite",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Lite",
|
||||
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-pro-preview",
|
||||
Object: "model",
|
||||
Created: 1737158400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Preview",
|
||||
Description: "Gemini 3 Pro Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-flash-preview",
|
||||
Object: "model",
|
||||
Created: 1765929600,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-flash-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Flash Preview",
|
||||
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-pro-latest",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-pro-latest",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini Pro Latest",
|
||||
Description: "Latest release of Gemini Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-flash-latest",
|
||||
Object: "model",
|
||||
Created: 1750118400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-flash-latest",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini Flash Latest",
|
||||
Description: "Latest release of Gemini Flash",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-flash-lite-latest",
|
||||
Object: "model",
|
||||
Created: 1753142400,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-flash-lite-latest",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini Flash-Lite Latest",
|
||||
Description: "Latest release of Gemini Flash-Lite",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
// {
|
||||
// ID: "gemini-2.5-flash-image-preview",
|
||||
// Object: "model",
|
||||
// Created: 1756166400,
|
||||
// OwnedBy: "google",
|
||||
// Type: "gemini",
|
||||
// Name: "models/gemini-2.5-flash-image-preview",
|
||||
// Version: "2.5",
|
||||
// DisplayName: "Gemini 2.5 Flash Image Preview",
|
||||
// Description: "State-of-the-art image generation and editing model.",
|
||||
// InputTokenLimit: 1048576,
|
||||
// OutputTokenLimit: 8192,
|
||||
// SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
// // image models don't support thinkingConfig; leave Thinking nil
|
||||
// },
|
||||
{
|
||||
ID: "gemini-2.5-flash-image",
|
||||
Object: "model",
|
||||
Created: 1759363200,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-image",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Image",
|
||||
Description: "State-of-the-art image generation and editing model.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 8192,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
// image models don't support thinkingConfig; leave Thinking nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetOpenAIModels returns the standard OpenAI model definitions
|
||||
func GetOpenAIModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gpt-5",
|
||||
Object: "model",
|
||||
Created: 1754524800,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-08-07",
|
||||
DisplayName: "GPT 5",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex",
|
||||
Object: "model",
|
||||
Created: 1757894400,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-09-15",
|
||||
DisplayName: "GPT 5 Codex",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-mini",
|
||||
Object: "model",
|
||||
Created: 1762473600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-11-07",
|
||||
DisplayName: "GPT 5 Codex Mini",
|
||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Codex",
|
||||
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-mini",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Codex Mini",
|
||||
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max",
|
||||
Object: "model",
|
||||
Created: 1763424000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5.1 Codex Max",
|
||||
Description: "Stable version of GPT 5.1 Codex Max",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.2",
|
||||
Object: "model",
|
||||
Created: 1765440000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.2",
|
||||
DisplayName: "GPT 5.2",
|
||||
Description: "Stable version of GPT 5.2",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.2-codex",
|
||||
Object: "model",
|
||||
Created: 1765440000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.2",
|
||||
DisplayName: "GPT 5.2 Codex",
|
||||
Description: "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetQwenModels returns the standard Qwen model definitions
|
||||
func GetQwenModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "qwen3-coder-plus",
|
||||
Object: "model",
|
||||
Created: 1753228800,
|
||||
OwnedBy: "qwen",
|
||||
Type: "qwen",
|
||||
Version: "3.0",
|
||||
DisplayName: "Qwen3 Coder Plus",
|
||||
Description: "Advanced code generation and understanding model",
|
||||
ContextLength: 32768,
|
||||
MaxCompletionTokens: 8192,
|
||||
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
|
||||
},
|
||||
{
|
||||
ID: "qwen3-coder-flash",
|
||||
Object: "model",
|
||||
Created: 1753228800,
|
||||
OwnedBy: "qwen",
|
||||
Type: "qwen",
|
||||
Version: "3.0",
|
||||
DisplayName: "Qwen3 Coder Flash",
|
||||
Description: "Fast code generation model",
|
||||
ContextLength: 8192,
|
||||
MaxCompletionTokens: 2048,
|
||||
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
|
||||
},
|
||||
{
|
||||
ID: "vision-model",
|
||||
Object: "model",
|
||||
Created: 1758672000,
|
||||
OwnedBy: "qwen",
|
||||
Type: "qwen",
|
||||
Version: "3.0",
|
||||
DisplayName: "Qwen3 Vision Model",
|
||||
Description: "Vision model model",
|
||||
ContextLength: 32768,
|
||||
MaxCompletionTokens: 2048,
|
||||
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
|
||||
// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
|
||||
// Uses level-based configuration so standard normalization flows apply before conversion.
|
||||
var iFlowThinkingSupport = &ThinkingSupport{
|
||||
Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
|
||||
}
|
||||
|
||||
// GetIFlowModels returns supported models for iFlow OAuth accounts.
|
||||
func GetIFlowModels() []*ModelInfo {
|
||||
entries := []struct {
|
||||
ID string
|
||||
DisplayName string
|
||||
Description string
|
||||
Created int64
|
||||
Thinking *ThinkingSupport
|
||||
}{
|
||||
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
|
||||
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
|
||||
{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
|
||||
{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
|
||||
{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400, Thinking: iFlowThinkingSupport},
|
||||
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
|
||||
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
|
||||
{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
|
||||
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
|
||||
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
|
||||
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
|
||||
{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
|
||||
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000, Thinking: iFlowThinkingSupport},
|
||||
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200, Thinking: iFlowThinkingSupport},
|
||||
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
|
||||
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
|
||||
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
|
||||
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
|
||||
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
|
||||
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
|
||||
{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
|
||||
}
|
||||
models := make([]*ModelInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
models = append(models, &ModelInfo{
|
||||
ID: entry.ID,
|
||||
Object: "model",
|
||||
Created: entry.Created,
|
||||
OwnedBy: "iflow",
|
||||
Type: "iflow",
|
||||
DisplayName: entry.DisplayName,
|
||||
Description: entry.Description,
|
||||
Thinking: entry.Thinking,
|
||||
})
|
||||
}
|
||||
return models
|
||||
}
|
||||
|
||||
// AntigravityModelConfig captures static antigravity model overrides, including
|
||||
// Thinking budget limits and provider max completion tokens.
|
||||
type AntigravityModelConfig struct {
|
||||
Thinking *ThinkingSupport
|
||||
MaxCompletionTokens int
|
||||
}
|
||||
|
||||
// GetAntigravityModelConfig returns static configuration for antigravity models.
|
||||
// Keys use upstream model names returned by the Antigravity models endpoint.
|
||||
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
||||
return map[string]*AntigravityModelConfig{
|
||||
// "rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
|
||||
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||
"gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
|
||||
"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"claude-sonnet-4-5": {MaxCompletionTokens: 64000},
|
||||
"gpt-oss-120b-medium": {},
|
||||
"tab_flash_lite_preview": {},
|
||||
}
|
||||
}
|
||||
@@ -111,6 +111,9 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A
|
||||
|
||||
// Execute performs a non-streaming request to the AI Studio API.
|
||||
func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
@@ -167,6 +170,9 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
|
||||
// ExecuteStream performs a streaming request to the AI Studio API.
|
||||
func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
@@ -109,6 +109,9 @@ func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyaut
|
||||
|
||||
// Execute performs a non-streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
|
||||
|
||||
@@ -641,6 +644,9 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
|
||||
|
||||
// ExecuteStream performs a streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
ctx = context.WithValue(ctx, "alt", "")
|
||||
|
||||
258
internal/runtime/executor/caching_verify_test.go
Normal file
258
internal/runtime/executor/caching_verify_test.go
Normal file
@@ -0,0 +1,258 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestEnsureCacheControl(t *testing.T) {
|
||||
// Test case 1: System prompt as string
|
||||
t.Run("String System Prompt", func(t *testing.T) {
|
||||
input := []byte(`{"model": "claude-3-5-sonnet", "system": "This is a long system prompt", "messages": []}`)
|
||||
output := ensureCacheControl(input)
|
||||
|
||||
res := gjson.GetBytes(output, "system.0.cache_control.type")
|
||||
if res.String() != "ephemeral" {
|
||||
t.Errorf("cache_control not found in system string. Output: %s", string(output))
|
||||
}
|
||||
})
|
||||
|
||||
// Test case 2: System prompt as array
|
||||
t.Run("Array System Prompt", func(t *testing.T) {
|
||||
input := []byte(`{"model": "claude-3-5-sonnet", "system": [{"type": "text", "text": "Part 1"}, {"type": "text", "text": "Part 2"}], "messages": []}`)
|
||||
output := ensureCacheControl(input)
|
||||
|
||||
// cache_control should only be on the LAST element
|
||||
res0 := gjson.GetBytes(output, "system.0.cache_control")
|
||||
res1 := gjson.GetBytes(output, "system.1.cache_control.type")
|
||||
|
||||
if res0.Exists() {
|
||||
t.Errorf("cache_control should NOT be on the first element")
|
||||
}
|
||||
if res1.String() != "ephemeral" {
|
||||
t.Errorf("cache_control not found on last system element. Output: %s", string(output))
|
||||
}
|
||||
})
|
||||
|
||||
// Test case 3: Tools are cached
|
||||
t.Run("Tools Caching", func(t *testing.T) {
|
||||
input := []byte(`{
|
||||
"model": "claude-3-5-sonnet",
|
||||
"tools": [
|
||||
{"name": "tool1", "description": "First tool", "input_schema": {"type": "object"}},
|
||||
{"name": "tool2", "description": "Second tool", "input_schema": {"type": "object"}}
|
||||
],
|
||||
"system": "System prompt",
|
||||
"messages": []
|
||||
}`)
|
||||
output := ensureCacheControl(input)
|
||||
|
||||
// cache_control should only be on the LAST tool
|
||||
tool0Cache := gjson.GetBytes(output, "tools.0.cache_control")
|
||||
tool1Cache := gjson.GetBytes(output, "tools.1.cache_control.type")
|
||||
|
||||
if tool0Cache.Exists() {
|
||||
t.Errorf("cache_control should NOT be on the first tool")
|
||||
}
|
||||
if tool1Cache.String() != "ephemeral" {
|
||||
t.Errorf("cache_control not found on last tool. Output: %s", string(output))
|
||||
}
|
||||
|
||||
// System should also have cache_control
|
||||
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
|
||||
if systemCache.String() != "ephemeral" {
|
||||
t.Errorf("cache_control not found in system. Output: %s", string(output))
|
||||
}
|
||||
})
|
||||
|
||||
// Test case 4: Tools and system are INDEPENDENT breakpoints
|
||||
// Per Anthropic docs: Up to 4 breakpoints allowed, tools and system are cached separately
|
||||
t.Run("Independent Cache Breakpoints", func(t *testing.T) {
|
||||
input := []byte(`{
|
||||
"model": "claude-3-5-sonnet",
|
||||
"tools": [
|
||||
{"name": "tool1", "description": "First tool", "input_schema": {"type": "object"}, "cache_control": {"type": "ephemeral"}}
|
||||
],
|
||||
"system": [{"type": "text", "text": "System"}],
|
||||
"messages": []
|
||||
}`)
|
||||
output := ensureCacheControl(input)
|
||||
|
||||
// Tool already has cache_control - should not be changed
|
||||
tool0Cache := gjson.GetBytes(output, "tools.0.cache_control.type")
|
||||
if tool0Cache.String() != "ephemeral" {
|
||||
t.Errorf("existing cache_control was incorrectly removed")
|
||||
}
|
||||
|
||||
// System SHOULD get cache_control because it is an INDEPENDENT breakpoint
|
||||
// Tools and system are separate cache levels in the hierarchy
|
||||
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
|
||||
if systemCache.String() != "ephemeral" {
|
||||
t.Errorf("system should have its own cache_control breakpoint (independent of tools)")
|
||||
}
|
||||
})
|
||||
|
||||
// Test case 5: Only tools, no system
|
||||
t.Run("Only Tools No System", func(t *testing.T) {
|
||||
input := []byte(`{
|
||||
"model": "claude-3-5-sonnet",
|
||||
"tools": [
|
||||
{"name": "tool1", "description": "Tool", "input_schema": {"type": "object"}}
|
||||
],
|
||||
"messages": [{"role": "user", "content": "Hi"}]
|
||||
}`)
|
||||
output := ensureCacheControl(input)
|
||||
|
||||
toolCache := gjson.GetBytes(output, "tools.0.cache_control.type")
|
||||
if toolCache.String() != "ephemeral" {
|
||||
t.Errorf("cache_control not found on tool. Output: %s", string(output))
|
||||
}
|
||||
})
|
||||
|
||||
// Test case 6: Many tools (Claude Code scenario)
|
||||
t.Run("Many Tools (Claude Code Scenario)", func(t *testing.T) {
|
||||
// Simulate Claude Code with many tools
|
||||
toolsJSON := `[`
|
||||
for i := 0; i < 50; i++ {
|
||||
if i > 0 {
|
||||
toolsJSON += ","
|
||||
}
|
||||
toolsJSON += fmt.Sprintf(`{"name": "tool%d", "description": "Tool %d", "input_schema": {"type": "object"}}`, i, i)
|
||||
}
|
||||
toolsJSON += `]`
|
||||
|
||||
input := []byte(fmt.Sprintf(`{
|
||||
"model": "claude-3-5-sonnet",
|
||||
"tools": %s,
|
||||
"system": [{"type": "text", "text": "You are Claude Code"}],
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
}`, toolsJSON))
|
||||
|
||||
output := ensureCacheControl(input)
|
||||
|
||||
// Only the last tool (index 49) should have cache_control
|
||||
for i := 0; i < 49; i++ {
|
||||
path := fmt.Sprintf("tools.%d.cache_control", i)
|
||||
if gjson.GetBytes(output, path).Exists() {
|
||||
t.Errorf("tool %d should NOT have cache_control", i)
|
||||
}
|
||||
}
|
||||
|
||||
lastToolCache := gjson.GetBytes(output, "tools.49.cache_control.type")
|
||||
if lastToolCache.String() != "ephemeral" {
|
||||
t.Errorf("last tool (49) should have cache_control")
|
||||
}
|
||||
|
||||
// System should also have cache_control
|
||||
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
|
||||
if systemCache.String() != "ephemeral" {
|
||||
t.Errorf("system should have cache_control")
|
||||
}
|
||||
|
||||
t.Log("test passed: 50 tools - cache_control only on last tool")
|
||||
})
|
||||
|
||||
// Test case 7: Empty tools array
|
||||
t.Run("Empty Tools Array", func(t *testing.T) {
|
||||
input := []byte(`{"model": "claude-3-5-sonnet", "tools": [], "system": "Test", "messages": []}`)
|
||||
output := ensureCacheControl(input)
|
||||
|
||||
// System should still get cache_control
|
||||
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
|
||||
if systemCache.String() != "ephemeral" {
|
||||
t.Errorf("system should have cache_control even with empty tools array")
|
||||
}
|
||||
})
|
||||
|
||||
// Test case 8: Messages caching for multi-turn (second-to-last user)
|
||||
t.Run("Messages Caching Second-To-Last User", func(t *testing.T) {
|
||||
input := []byte(`{
|
||||
"model": "claude-3-5-sonnet",
|
||||
"messages": [
|
||||
{"role": "user", "content": "First user"},
|
||||
{"role": "assistant", "content": "Assistant reply"},
|
||||
{"role": "user", "content": "Second user"},
|
||||
{"role": "assistant", "content": "Assistant reply 2"},
|
||||
{"role": "user", "content": "Third user"}
|
||||
]
|
||||
}`)
|
||||
output := ensureCacheControl(input)
|
||||
|
||||
cacheType := gjson.GetBytes(output, "messages.2.content.0.cache_control.type")
|
||||
if cacheType.String() != "ephemeral" {
|
||||
t.Errorf("cache_control not found on second-to-last user turn. Output: %s", string(output))
|
||||
}
|
||||
|
||||
lastUserCache := gjson.GetBytes(output, "messages.4.content.0.cache_control")
|
||||
if lastUserCache.Exists() {
|
||||
t.Errorf("last user turn should NOT have cache_control")
|
||||
}
|
||||
})
|
||||
|
||||
// Test case 9: Existing message cache_control should skip injection
|
||||
t.Run("Messages Skip When Cache Control Exists", func(t *testing.T) {
|
||||
input := []byte(`{
|
||||
"model": "claude-3-5-sonnet",
|
||||
"messages": [
|
||||
{"role": "user", "content": [{"type": "text", "text": "First user"}]},
|
||||
{"role": "assistant", "content": [{"type": "text", "text": "Assistant reply", "cache_control": {"type": "ephemeral"}}]},
|
||||
{"role": "user", "content": [{"type": "text", "text": "Second user"}]}
|
||||
]
|
||||
}`)
|
||||
output := ensureCacheControl(input)
|
||||
|
||||
userCache := gjson.GetBytes(output, "messages.0.content.0.cache_control")
|
||||
if userCache.Exists() {
|
||||
t.Errorf("cache_control should NOT be injected when a message already has cache_control")
|
||||
}
|
||||
|
||||
existingCache := gjson.GetBytes(output, "messages.1.content.0.cache_control.type")
|
||||
if existingCache.String() != "ephemeral" {
|
||||
t.Errorf("existing cache_control should be preserved. Output: %s", string(output))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestCacheControlOrder verifies the correct order: tools -> system -> messages
|
||||
func TestCacheControlOrder(t *testing.T) {
|
||||
input := []byte(`{
|
||||
"model": "claude-sonnet-4",
|
||||
"tools": [
|
||||
{"name": "Read", "description": "Read file", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}}},
|
||||
{"name": "Write", "description": "Write file", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}}}
|
||||
],
|
||||
"system": [
|
||||
{"type": "text", "text": "You are Claude Code, Anthropic's official CLI for Claude."},
|
||||
{"type": "text", "text": "Additional instructions here..."}
|
||||
],
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello"}
|
||||
]
|
||||
}`)
|
||||
|
||||
output := ensureCacheControl(input)
|
||||
|
||||
// 1. Last tool has cache_control
|
||||
if gjson.GetBytes(output, "tools.1.cache_control.type").String() != "ephemeral" {
|
||||
t.Error("last tool should have cache_control")
|
||||
}
|
||||
|
||||
// 2. First tool has NO cache_control
|
||||
if gjson.GetBytes(output, "tools.0.cache_control").Exists() {
|
||||
t.Error("first tool should NOT have cache_control")
|
||||
}
|
||||
|
||||
// 3. Last system element has cache_control
|
||||
if gjson.GetBytes(output, "system.1.cache_control.type").String() != "ephemeral" {
|
||||
t.Error("last system element should have cache_control")
|
||||
}
|
||||
|
||||
// 4. First system element has NO cache_control
|
||||
if gjson.GetBytes(output, "system.0.cache_control").Exists() {
|
||||
t.Error("first system element should NOT have cache_control")
|
||||
}
|
||||
|
||||
t.Log("cache order correct: tools -> system")
|
||||
}
|
||||
@@ -84,6 +84,9 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
@@ -120,6 +123,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||
body = disableThinkingIfToolChoiceForced(body)
|
||||
|
||||
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
|
||||
body = ensureCacheControl(body)
|
||||
|
||||
// Extract betas from body and convert to header
|
||||
var extraBetas []string
|
||||
extraBetas, body = extractAndRemoveBetas(body)
|
||||
@@ -218,6 +224,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
@@ -252,6 +261,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||
body = disableThinkingIfToolChoiceForced(body)
|
||||
|
||||
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
|
||||
body = ensureCacheControl(body)
|
||||
|
||||
// Extract betas from body and convert to header
|
||||
var extraBetas []string
|
||||
extraBetas, body = extractAndRemoveBetas(body)
|
||||
@@ -636,13 +648,17 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
|
||||
ginHeaders = ginCtx.Request.Header
|
||||
}
|
||||
|
||||
baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
|
||||
promptCachingBeta := "prompt-caching-2024-07-31"
|
||||
baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14," + promptCachingBeta
|
||||
if val := strings.TrimSpace(ginHeaders.Get("Anthropic-Beta")); val != "" {
|
||||
baseBetas = val
|
||||
if !strings.Contains(val, "oauth") {
|
||||
baseBetas += ",oauth-2025-04-20"
|
||||
}
|
||||
}
|
||||
if !strings.Contains(baseBetas, promptCachingBeta) {
|
||||
baseBetas += "," + promptCachingBeta
|
||||
}
|
||||
|
||||
// Merge extra betas from request body
|
||||
if len(extraBetas) > 0 {
|
||||
@@ -990,3 +1006,214 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
// ensureCacheControl injects cache_control breakpoints into the payload for optimal prompt caching.
|
||||
// According to Anthropic's documentation, cache prefixes are created in order: tools -> system -> messages.
|
||||
// This function adds cache_control to:
|
||||
// 1. The LAST tool in the tools array (caches all tool definitions)
|
||||
// 2. The LAST element in the system array (caches system prompt)
|
||||
// 3. The SECOND-TO-LAST user turn (caches conversation history for multi-turn)
|
||||
//
|
||||
// Up to 4 cache breakpoints are allowed per request. Tools, System, and Messages are INDEPENDENT breakpoints.
|
||||
// This enables up to 90% cost reduction on cached tokens (cache read = 0.1x base price).
|
||||
// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
||||
func ensureCacheControl(payload []byte) []byte {
|
||||
// 1. Inject cache_control into the LAST tool (caches all tool definitions)
|
||||
// Tools are cached first in the hierarchy, so this is the most important breakpoint.
|
||||
payload = injectToolsCacheControl(payload)
|
||||
|
||||
// 2. Inject cache_control into the LAST system prompt element
|
||||
// System is the second level in the cache hierarchy.
|
||||
payload = injectSystemCacheControl(payload)
|
||||
|
||||
// 3. Inject cache_control into messages for multi-turn conversation caching
|
||||
// This caches the conversation history up to the second-to-last user turn.
|
||||
payload = injectMessagesCacheControl(payload)
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
// injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching.
|
||||
// Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache."
|
||||
// This enables caching of conversation history, which is especially beneficial for long multi-turn conversations.
|
||||
// Only adds cache_control if:
|
||||
// - There are at least 2 user turns in the conversation
|
||||
// - No message content already has cache_control
|
||||
func injectMessagesCacheControl(payload []byte) []byte {
|
||||
messages := gjson.GetBytes(payload, "messages")
|
||||
if !messages.Exists() || !messages.IsArray() {
|
||||
return payload
|
||||
}
|
||||
|
||||
// Check if ANY message content already has cache_control
|
||||
hasCacheControlInMessages := false
|
||||
messages.ForEach(func(_, msg gjson.Result) bool {
|
||||
content := msg.Get("content")
|
||||
if content.IsArray() {
|
||||
content.ForEach(func(_, item gjson.Result) bool {
|
||||
if item.Get("cache_control").Exists() {
|
||||
hasCacheControlInMessages = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
return !hasCacheControlInMessages
|
||||
})
|
||||
if hasCacheControlInMessages {
|
||||
return payload
|
||||
}
|
||||
|
||||
// Find all user message indices
|
||||
var userMsgIndices []int
|
||||
messages.ForEach(func(index gjson.Result, msg gjson.Result) bool {
|
||||
if msg.Get("role").String() == "user" {
|
||||
userMsgIndices = append(userMsgIndices, int(index.Int()))
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
// Need at least 2 user turns to cache the second-to-last
|
||||
if len(userMsgIndices) < 2 {
|
||||
return payload
|
||||
}
|
||||
|
||||
// Get the second-to-last user message index
|
||||
secondToLastUserIdx := userMsgIndices[len(userMsgIndices)-2]
|
||||
|
||||
// Get the content of this message
|
||||
contentPath := fmt.Sprintf("messages.%d.content", secondToLastUserIdx)
|
||||
content := gjson.GetBytes(payload, contentPath)
|
||||
|
||||
if content.IsArray() {
|
||||
// Add cache_control to the last content block of this message
|
||||
contentCount := int(content.Get("#").Int())
|
||||
if contentCount > 0 {
|
||||
cacheControlPath := fmt.Sprintf("messages.%d.content.%d.cache_control", secondToLastUserIdx, contentCount-1)
|
||||
result, err := sjson.SetBytes(payload, cacheControlPath, map[string]string{"type": "ephemeral"})
|
||||
if err != nil {
|
||||
log.Warnf("failed to inject cache_control into messages: %v", err)
|
||||
return payload
|
||||
}
|
||||
payload = result
|
||||
}
|
||||
} else if content.Type == gjson.String {
|
||||
// Convert string content to array with cache_control
|
||||
text := content.String()
|
||||
newContent := []map[string]interface{}{
|
||||
{
|
||||
"type": "text",
|
||||
"text": text,
|
||||
"cache_control": map[string]string{
|
||||
"type": "ephemeral",
|
||||
},
|
||||
},
|
||||
}
|
||||
result, err := sjson.SetBytes(payload, contentPath, newContent)
|
||||
if err != nil {
|
||||
log.Warnf("failed to inject cache_control into message string content: %v", err)
|
||||
return payload
|
||||
}
|
||||
payload = result
|
||||
}
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
// injectToolsCacheControl adds cache_control to the last tool in the tools array.
|
||||
// Per Anthropic docs: "The cache_control parameter on the last tool definition caches all tool definitions."
|
||||
// This only adds cache_control if NO tool in the array already has it.
|
||||
func injectToolsCacheControl(payload []byte) []byte {
|
||||
tools := gjson.GetBytes(payload, "tools")
|
||||
if !tools.Exists() || !tools.IsArray() {
|
||||
return payload
|
||||
}
|
||||
|
||||
toolCount := int(tools.Get("#").Int())
|
||||
if toolCount == 0 {
|
||||
return payload
|
||||
}
|
||||
|
||||
// Check if ANY tool already has cache_control - if so, don't modify tools
|
||||
hasCacheControlInTools := false
|
||||
tools.ForEach(func(_, tool gjson.Result) bool {
|
||||
if tool.Get("cache_control").Exists() {
|
||||
hasCacheControlInTools = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
if hasCacheControlInTools {
|
||||
return payload
|
||||
}
|
||||
|
||||
// Add cache_control to the last tool
|
||||
lastToolPath := fmt.Sprintf("tools.%d.cache_control", toolCount-1)
|
||||
result, err := sjson.SetBytes(payload, lastToolPath, map[string]string{"type": "ephemeral"})
|
||||
if err != nil {
|
||||
log.Warnf("failed to inject cache_control into tools array: %v", err)
|
||||
return payload
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// injectSystemCacheControl adds cache_control to the last element in the system prompt.
|
||||
// Converts string system prompts to array format if needed.
|
||||
// This only adds cache_control if NO system element already has it.
|
||||
func injectSystemCacheControl(payload []byte) []byte {
|
||||
system := gjson.GetBytes(payload, "system")
|
||||
if !system.Exists() {
|
||||
return payload
|
||||
}
|
||||
|
||||
if system.IsArray() {
|
||||
count := int(system.Get("#").Int())
|
||||
if count == 0 {
|
||||
return payload
|
||||
}
|
||||
|
||||
// Check if ANY system element already has cache_control
|
||||
hasCacheControlInSystem := false
|
||||
system.ForEach(func(_, item gjson.Result) bool {
|
||||
if item.Get("cache_control").Exists() {
|
||||
hasCacheControlInSystem = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
if hasCacheControlInSystem {
|
||||
return payload
|
||||
}
|
||||
|
||||
// Add cache_control to the last system element
|
||||
lastSystemPath := fmt.Sprintf("system.%d.cache_control", count-1)
|
||||
result, err := sjson.SetBytes(payload, lastSystemPath, map[string]string{"type": "ephemeral"})
|
||||
if err != nil {
|
||||
log.Warnf("failed to inject cache_control into system array: %v", err)
|
||||
return payload
|
||||
}
|
||||
payload = result
|
||||
} else if system.Type == gjson.String {
|
||||
// Convert string system prompt to array with cache_control
|
||||
// "system": "text" -> "system": [{"type": "text", "text": "text", "cache_control": {"type": "ephemeral"}}]
|
||||
text := system.String()
|
||||
newSystem := []map[string]interface{}{
|
||||
{
|
||||
"type": "text",
|
||||
"text": text,
|
||||
"cache_control": map[string]string{
|
||||
"type": "ephemeral",
|
||||
},
|
||||
},
|
||||
}
|
||||
result, err := sjson.SetBytes(payload, "system", newSystem)
|
||||
if err != nil {
|
||||
log.Warnf("failed to inject cache_control into system string: %v", err)
|
||||
return payload
|
||||
}
|
||||
payload = result
|
||||
}
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
@@ -73,6 +73,9 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return e.executeCompact(ctx, auth, req, opts)
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
@@ -117,7 +120,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
applyCodexHeaders(httpReq, auth, apiKey)
|
||||
applyCodexHeaders(httpReq, auth, apiKey, true)
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
@@ -185,7 +188,96 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
return resp, err
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://chatgpt.com/backend-api/codex"
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai-response")
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
requestedModel := payloadRequestedModel(opts, req.Model)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
body, _ = sjson.DeleteBytes(body, "stream")
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/responses/compact"
|
||||
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
applyCodexHeaders(httpReq, auth, apiKey, false)
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
authLabel = auth.Label
|
||||
authType, authValue = auth.AccountInfo()
|
||||
}
|
||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||
URL: url,
|
||||
Method: http.MethodPost,
|
||||
Headers: httpReq.Header.Clone(),
|
||||
Body: body,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpResp, err := httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return resp, err
|
||||
}
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("codex executor: close response body error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
err = statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
return resp, err
|
||||
}
|
||||
data, err := io.ReadAll(httpResp.Body)
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return resp, err
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
reporter.publish(ctx, parseOpenAIUsage(data))
|
||||
reporter.ensurePublished(ctx)
|
||||
var param any
|
||||
out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, data, ¶m)
|
||||
resp = cliproxyexecutor.Response{Payload: []byte(out)}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return nil, statusErr{code: http.StatusBadRequest, msg: "streaming not supported for /responses/compact"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
@@ -229,7 +321,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
applyCodexHeaders(httpReq, auth, apiKey)
|
||||
applyCodexHeaders(httpReq, auth, apiKey, true)
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
@@ -530,17 +622,21 @@ func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Form
|
||||
}
|
||||
}
|
||||
|
||||
rawJSON, _ = sjson.SetBytes(rawJSON, "prompt_cache_key", cache.ID)
|
||||
if cache.ID != "" {
|
||||
rawJSON, _ = sjson.SetBytes(rawJSON, "prompt_cache_key", cache.ID)
|
||||
}
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(rawJSON))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
httpReq.Header.Set("Conversation_id", cache.ID)
|
||||
httpReq.Header.Set("Session_id", cache.ID)
|
||||
if cache.ID != "" {
|
||||
httpReq.Header.Set("Conversation_id", cache.ID)
|
||||
httpReq.Header.Set("Session_id", cache.ID)
|
||||
}
|
||||
return httpReq, nil
|
||||
}
|
||||
|
||||
func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string) {
|
||||
func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, stream bool) {
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
r.Header.Set("Authorization", "Bearer "+token)
|
||||
|
||||
@@ -554,7 +650,11 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string) {
|
||||
misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString())
|
||||
misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "codex_cli_rs/0.50.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464")
|
||||
|
||||
r.Header.Set("Accept", "text/event-stream")
|
||||
if stream {
|
||||
r.Header.Set("Accept", "text/event-stream")
|
||||
} else {
|
||||
r.Header.Set("Accept", "application/json")
|
||||
}
|
||||
r.Header.Set("Connection", "Keep-Alive")
|
||||
|
||||
isAPIKey := false
|
||||
|
||||
@@ -103,6 +103,9 @@ func (e *GeminiCLIExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.
|
||||
|
||||
// Execute performs a non-streaming request to the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
@@ -253,6 +256,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
|
||||
// ExecuteStream performs a streaming request to the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
|
||||
@@ -103,6 +103,9 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
|
||||
// - cliproxyexecutor.Response: The response from the API
|
||||
// - error: An error if the request fails
|
||||
func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
@@ -207,6 +210,9 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
|
||||
// ExecuteStream performs a streaming request to the Gemini API.
|
||||
func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
@@ -233,6 +233,9 @@ func (e *GeminiVertexExecutor) HttpRequest(ctx context.Context, auth *cliproxyau
|
||||
|
||||
// Execute performs a non-streaming request to the Vertex AI API.
|
||||
func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
// Try API key authentication first
|
||||
apiKey, baseURL := vertexAPICreds(auth)
|
||||
|
||||
@@ -251,6 +254,9 @@ func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
|
||||
// ExecuteStream performs a streaming request to the Vertex AI API.
|
||||
func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
// Try API key authentication first
|
||||
apiKey, baseURL := vertexAPICreds(auth)
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ import (
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
@@ -134,6 +135,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
|
||||
}
|
||||
e.applyHeaders(httpReq, apiToken)
|
||||
|
||||
// Add Copilot-Vision-Request header if the request contains vision content
|
||||
if detectVisionContent(body) {
|
||||
httpReq.Header.Set("Copilot-Vision-Request", "true")
|
||||
}
|
||||
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
@@ -238,6 +244,11 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
||||
}
|
||||
e.applyHeaders(httpReq, apiToken)
|
||||
|
||||
// Add Copilot-Vision-Request header if the request contains vision content
|
||||
if detectVisionContent(body) {
|
||||
httpReq.Header.Set("Copilot-Vision-Request", "true")
|
||||
}
|
||||
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
@@ -415,6 +426,34 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string) {
|
||||
r.Header.Set("X-Request-Id", uuid.NewString())
|
||||
}
|
||||
|
||||
// detectVisionContent checks if the request body contains vision/image content.
|
||||
// Returns true if the request includes image_url or image type content blocks.
|
||||
func detectVisionContent(body []byte) bool {
|
||||
// Parse messages array
|
||||
messagesResult := gjson.GetBytes(body, "messages")
|
||||
if !messagesResult.Exists() || !messagesResult.IsArray() {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check each message for vision content
|
||||
for _, message := range messagesResult.Array() {
|
||||
content := message.Get("content")
|
||||
|
||||
// If content is an array, check each content block
|
||||
if content.IsArray() {
|
||||
for _, block := range content.Array() {
|
||||
blockType := block.Get("type").String()
|
||||
// Check for image_url or image type
|
||||
if blockType == "image_url" || blockType == "image" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// normalizeModel is a no-op as GitHub Copilot accepts model names directly.
|
||||
// Model mapping should be done at the registry level if needed.
|
||||
func (e *GitHubCopilotExecutor) normalizeModel(_ string, body []byte) []byte {
|
||||
|
||||
@@ -68,6 +68,9 @@ func (e *IFlowExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
|
||||
|
||||
// Execute performs a non-streaming chat completion request.
|
||||
func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := iflowCreds(auth)
|
||||
@@ -167,6 +170,9 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
|
||||
// ExecuteStream performs a streaming chat completion request.
|
||||
func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := iflowCreds(auth)
|
||||
|
||||
@@ -35,7 +35,7 @@ import (
|
||||
|
||||
const (
|
||||
// Kiro API common constants
|
||||
kiroContentType = "application/x-amz-json-1.0"
|
||||
kiroContentType = "application/json"
|
||||
kiroAcceptStream = "*/*"
|
||||
|
||||
// Event Stream frame size constants for boundary protection
|
||||
@@ -47,17 +47,18 @@ const (
|
||||
// Event Stream error type constants
|
||||
ErrStreamFatal = "fatal" // Connection/authentication errors, not recoverable
|
||||
ErrStreamMalformed = "malformed" // Format errors, data cannot be parsed
|
||||
// kiroUserAgent matches amq2api format for User-Agent header (Amazon Q CLI style)
|
||||
|
||||
// kiroUserAgent matches Amazon Q CLI style for User-Agent header
|
||||
kiroUserAgent = "aws-sdk-rust/1.3.9 os/macos lang/rust/1.87.0"
|
||||
// kiroFullUserAgent is the complete x-amz-user-agent header matching amq2api (Amazon Q CLI style)
|
||||
// kiroFullUserAgent is the complete x-amz-user-agent header (Amazon Q CLI style)
|
||||
kiroFullUserAgent = "aws-sdk-rust/1.3.9 ua/2.1 api/ssooidc/1.88.0 os/macos lang/rust/1.87.0 m/E app/AmazonQ-For-CLI"
|
||||
|
||||
// Kiro IDE style headers (from kiro2api - for IDC auth)
|
||||
kiroIDEUserAgent = "aws-sdk-js/1.0.18 ua/2.1 os/darwin#25.0.0 lang/js md/nodejs#20.16.0 api/codewhispererstreaming#1.0.18 m/E KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
|
||||
kiroIDEAmzUserAgent = "aws-sdk-js/1.0.18 KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
|
||||
kiroIDEAgentModeSpec = "spec"
|
||||
// Kiro IDE style headers for IDC auth
|
||||
kiroIDEUserAgent = "aws-sdk-js/1.0.27 ua/2.1 os/win32#10.0.19044 lang/js md/nodejs#22.21.1 api/codewhispererstreaming#1.0.27 m/E"
|
||||
kiroIDEAmzUserAgent = "aws-sdk-js/1.0.27"
|
||||
kiroIDEAgentModeVibe = "vibe"
|
||||
|
||||
// Socket retry configuration constants (based on kiro2Api reference implementation)
|
||||
// Socket retry configuration constants
|
||||
// Maximum number of retry attempts for socket/network errors
|
||||
kiroSocketMaxRetries = 3
|
||||
// Base delay between retry attempts (uses exponential backoff: delay * 2^attempt)
|
||||
@@ -104,13 +105,13 @@ func getGlobalFingerprintManager() *kiroauth.FingerprintManager {
|
||||
// retryConfig holds configuration for socket retry logic.
|
||||
// Based on kiro2Api Python implementation patterns.
|
||||
type retryConfig struct {
|
||||
MaxRetries int // Maximum number of retry attempts
|
||||
BaseDelay time.Duration // Base delay between retries (exponential backoff)
|
||||
MaxDelay time.Duration // Maximum delay cap
|
||||
RetryableErrors []string // List of retryable error patterns
|
||||
RetryableStatus map[int]bool // HTTP status codes to retry
|
||||
FirstTokenTmout time.Duration // Timeout for first token in streaming
|
||||
StreamReadTmout time.Duration // Timeout between stream chunks
|
||||
MaxRetries int // Maximum number of retry attempts
|
||||
BaseDelay time.Duration // Base delay between retries (exponential backoff)
|
||||
MaxDelay time.Duration // Maximum delay cap
|
||||
RetryableErrors []string // List of retryable error patterns
|
||||
RetryableStatus map[int]bool // HTTP status codes to retry
|
||||
FirstTokenTmout time.Duration // Timeout for first token in streaming
|
||||
StreamReadTmout time.Duration // Timeout between stream chunks
|
||||
}
|
||||
|
||||
// defaultRetryConfig returns the default retry configuration for Kiro socket operations.
|
||||
@@ -334,52 +335,111 @@ type kiroEndpointConfig struct {
|
||||
Name string // Endpoint name for logging
|
||||
}
|
||||
|
||||
// kiroEndpointConfigs defines the available Kiro API endpoints with their compatible configurations.
|
||||
// The order determines fallback priority: primary endpoint first, then fallbacks.
|
||||
//
|
||||
// CRITICAL: Each endpoint MUST use its compatible Origin and AmzTarget values:
|
||||
// - CodeWhisperer endpoint (codewhisperer.us-east-1.amazonaws.com): Uses AI_EDITOR origin and AmazonCodeWhispererStreamingService target
|
||||
// - Amazon Q endpoint (q.us-east-1.amazonaws.com): Uses CLI origin and AmazonQDeveloperStreamingService target
|
||||
//
|
||||
// Mismatched combinations will result in 403 Forbidden errors.
|
||||
//
|
||||
// NOTE: CodeWhisperer is set as the default endpoint because:
|
||||
// 1. Most tokens come from Kiro IDE / VSCode extensions (AWS Builder ID auth)
|
||||
// 2. These tokens use AI_EDITOR origin which is only compatible with CodeWhisperer endpoint
|
||||
// 3. Amazon Q endpoint requires CLI origin which is for Amazon Q CLI tokens
|
||||
// This matches the AIClient-2-API-main project's configuration.
|
||||
var kiroEndpointConfigs = []kiroEndpointConfig{
|
||||
{
|
||||
URL: "https://codewhisperer.us-east-1.amazonaws.com/generateAssistantResponse",
|
||||
Origin: "AI_EDITOR",
|
||||
AmzTarget: "AmazonCodeWhispererStreamingService.GenerateAssistantResponse",
|
||||
Name: "CodeWhisperer",
|
||||
},
|
||||
{
|
||||
URL: "https://q.us-east-1.amazonaws.com/",
|
||||
Origin: "CLI",
|
||||
AmzTarget: "AmazonQDeveloperStreamingService.SendMessage",
|
||||
Name: "AmazonQ",
|
||||
},
|
||||
// kiroDefaultRegion is the default AWS region for Kiro API endpoints.
|
||||
// Used when no region is specified in auth metadata.
|
||||
const kiroDefaultRegion = "us-east-1"
|
||||
|
||||
// extractRegionFromProfileARN extracts the AWS region from a ProfileARN.
|
||||
// ARN format: arn:aws:codewhisperer:REGION:ACCOUNT:profile/PROFILE_ID
|
||||
// Returns empty string if region cannot be extracted.
|
||||
func extractRegionFromProfileARN(profileArn string) string {
|
||||
if profileArn == "" {
|
||||
return ""
|
||||
}
|
||||
parts := strings.Split(profileArn, ":")
|
||||
if len(parts) >= 4 && parts[3] != "" {
|
||||
return parts[3]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// buildKiroEndpointConfigs creates endpoint configurations for the specified region.
|
||||
// This enables dynamic region support for Enterprise/IdC users in non-us-east-1 regions.
|
||||
//
|
||||
// Uses Q endpoint (q.{region}.amazonaws.com) as primary for ALL auth types:
|
||||
// - Works universally across all AWS regions (CodeWhisperer endpoint only exists in us-east-1)
|
||||
// - Uses /generateAssistantResponse path with AI_EDITOR origin
|
||||
// - Does NOT require X-Amz-Target header
|
||||
//
|
||||
// The AmzTarget field is kept for backward compatibility but should be empty
|
||||
// to indicate that the header should NOT be set.
|
||||
func buildKiroEndpointConfigs(region string) []kiroEndpointConfig {
|
||||
if region == "" {
|
||||
region = kiroDefaultRegion
|
||||
}
|
||||
return []kiroEndpointConfig{
|
||||
{
|
||||
// Primary: Q endpoint - works for all regions and auth types
|
||||
URL: fmt.Sprintf("https://q.%s.amazonaws.com/generateAssistantResponse", region),
|
||||
Origin: "AI_EDITOR",
|
||||
AmzTarget: "", // Empty = don't set X-Amz-Target header
|
||||
Name: "AmazonQ",
|
||||
},
|
||||
{
|
||||
// Fallback: CodeWhisperer endpoint (legacy, only works in us-east-1)
|
||||
URL: fmt.Sprintf("https://codewhisperer.%s.amazonaws.com/generateAssistantResponse", region),
|
||||
Origin: "AI_EDITOR",
|
||||
AmzTarget: "AmazonCodeWhispererStreamingService.GenerateAssistantResponse",
|
||||
Name: "CodeWhisperer",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// kiroEndpointConfigs is kept for backward compatibility with default us-east-1 region.
|
||||
// Prefer using buildKiroEndpointConfigs(region) for dynamic region support.
|
||||
var kiroEndpointConfigs = buildKiroEndpointConfigs(kiroDefaultRegion)
|
||||
|
||||
// getKiroEndpointConfigs returns the list of Kiro API endpoint configurations to try in order.
|
||||
// Supports dynamic region based on auth metadata "api_region", "profile_arn", or "region" field.
|
||||
// Supports reordering based on "preferred_endpoint" in auth metadata/attributes.
|
||||
// For IDC auth method, automatically uses CodeWhisperer endpoint with CLI origin.
|
||||
//
|
||||
// Region priority:
|
||||
// 1. auth.Metadata["api_region"] - explicit API region override
|
||||
// 2. ProfileARN region - extracted from arn:aws:service:REGION:account:resource
|
||||
// 3. kiroDefaultRegion (us-east-1) - fallback
|
||||
// Note: OIDC "region" is NOT used - it's for token refresh, not API calls
|
||||
func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig {
|
||||
if auth == nil {
|
||||
return kiroEndpointConfigs
|
||||
}
|
||||
|
||||
// For IDC auth, use CodeWhisperer endpoint with AI_EDITOR origin (same as Social auth)
|
||||
// Based on kiro2api analysis: IDC tokens work with CodeWhisperer endpoint using Bearer auth
|
||||
// Determine API region with priority: api_region > profile_arn > region > default
|
||||
region := kiroDefaultRegion
|
||||
regionSource := "default"
|
||||
|
||||
if auth.Metadata != nil {
|
||||
// Priority 1: Explicit api_region override
|
||||
if r, ok := auth.Metadata["api_region"].(string); ok && r != "" {
|
||||
region = r
|
||||
regionSource = "api_region"
|
||||
} else {
|
||||
// Priority 2: Extract from ProfileARN
|
||||
if profileArn, ok := auth.Metadata["profile_arn"].(string); ok && profileArn != "" {
|
||||
if arnRegion := extractRegionFromProfileARN(profileArn); arnRegion != "" {
|
||||
region = arnRegion
|
||||
regionSource = "profile_arn"
|
||||
}
|
||||
}
|
||||
// Note: OIDC "region" field is NOT used for API endpoint
|
||||
// Kiro API only exists in us-east-1, while OIDC region can vary (e.g., ap-northeast-2)
|
||||
// Using OIDC region for API calls causes DNS failures
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("kiro: using region %s (source: %s)", region, regionSource)
|
||||
|
||||
// Build endpoint configs for the specified region
|
||||
endpointConfigs := buildKiroEndpointConfigs(region)
|
||||
|
||||
// For IDC auth, use Q endpoint with AI_EDITOR origin
|
||||
// IDC tokens work with Q endpoint using Bearer auth
|
||||
// The difference is only in how tokens are refreshed (OIDC with clientId/clientSecret for IDC)
|
||||
// NOT in how API calls are made - both Social and IDC use the same endpoint/origin
|
||||
if auth.Metadata != nil {
|
||||
authMethod, _ := auth.Metadata["auth_method"].(string)
|
||||
if authMethod == "idc" {
|
||||
log.Debugf("kiro: IDC auth, using CodeWhisperer endpoint")
|
||||
return kiroEndpointConfigs
|
||||
if strings.ToLower(authMethod) == "idc" {
|
||||
log.Debugf("kiro: IDC auth, using Q endpoint (region: %s)", region)
|
||||
return endpointConfigs
|
||||
}
|
||||
}
|
||||
|
||||
@@ -396,7 +456,7 @@ func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig {
|
||||
}
|
||||
|
||||
if preference == "" {
|
||||
return kiroEndpointConfigs
|
||||
return endpointConfigs
|
||||
}
|
||||
|
||||
preference = strings.ToLower(strings.TrimSpace(preference))
|
||||
@@ -405,7 +465,7 @@ func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig {
|
||||
var sorted []kiroEndpointConfig
|
||||
var remaining []kiroEndpointConfig
|
||||
|
||||
for _, cfg := range kiroEndpointConfigs {
|
||||
for _, cfg := range endpointConfigs {
|
||||
name := strings.ToLower(cfg.Name)
|
||||
// Check for matches
|
||||
// CodeWhisperer aliases: codewhisperer, ide
|
||||
@@ -426,7 +486,7 @@ func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig {
|
||||
|
||||
// If preference didn't match anything, return default
|
||||
if len(sorted) == 0 {
|
||||
return kiroEndpointConfigs
|
||||
return endpointConfigs
|
||||
}
|
||||
|
||||
// Combine: preferred first, then others
|
||||
@@ -445,7 +505,7 @@ func isIDCAuth(auth *cliproxyauth.Auth) bool {
|
||||
return false
|
||||
}
|
||||
authMethod, _ := auth.Metadata["auth_method"].(string)
|
||||
return authMethod == "idc"
|
||||
return strings.ToLower(authMethod) == "idc"
|
||||
}
|
||||
|
||||
// buildKiroPayloadForFormat builds the Kiro API payload based on the source format.
|
||||
@@ -482,12 +542,12 @@ func applyDynamicFingerprint(req *http.Request, auth *cliproxyauth.Auth) {
|
||||
// Get token-specific fingerprint for dynamic UA generation
|
||||
tokenKey := getTokenKey(auth)
|
||||
fp := getGlobalFingerprintManager().GetFingerprint(tokenKey)
|
||||
|
||||
|
||||
// Use fingerprint-generated dynamic User-Agent
|
||||
req.Header.Set("User-Agent", fp.BuildUserAgent())
|
||||
req.Header.Set("X-Amz-User-Agent", fp.BuildAmzUserAgent())
|
||||
req.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
|
||||
|
||||
req.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeVibe)
|
||||
|
||||
log.Debugf("kiro: using dynamic fingerprint for token %s (SDK:%s, OS:%s/%s, Kiro:%s)",
|
||||
tokenKey[:8]+"...", fp.SDKVersion, fp.OSType, fp.OSVersion, fp.KiroVersion)
|
||||
} else {
|
||||
@@ -506,10 +566,10 @@ func (e *KiroExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth
|
||||
if strings.TrimSpace(accessToken) == "" {
|
||||
return statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
|
||||
}
|
||||
|
||||
|
||||
// Apply dynamic fingerprint-based headers
|
||||
applyDynamicFingerprint(req, auth)
|
||||
|
||||
|
||||
req.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
|
||||
req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
|
||||
req.Header.Set("Authorization", "Bearer "+accessToken)
|
||||
@@ -665,12 +725,17 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
|
||||
httpReq.Header.Set("Content-Type", kiroContentType)
|
||||
httpReq.Header.Set("Accept", kiroAcceptStream)
|
||||
// Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors)
|
||||
httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
|
||||
// Only set X-Amz-Target if specified (Q endpoint doesn't require it)
|
||||
if endpointConfig.AmzTarget != "" {
|
||||
httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
|
||||
}
|
||||
// Kiro-specific headers
|
||||
httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeVibe)
|
||||
httpReq.Header.Set("x-amzn-codewhisperer-optout", "true")
|
||||
|
||||
// Apply dynamic fingerprint-based headers
|
||||
applyDynamicFingerprint(httpReq, auth)
|
||||
|
||||
|
||||
httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
|
||||
httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
|
||||
|
||||
@@ -910,30 +975,36 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
}
|
||||
|
||||
// Fallback for usage if missing from upstream
|
||||
if usageInfo.TotalTokens == 0 {
|
||||
|
||||
// 1. Estimate InputTokens if missing
|
||||
if usageInfo.InputTokens == 0 {
|
||||
if enc, encErr := getTokenizer(req.Model); encErr == nil {
|
||||
if inp, countErr := countOpenAIChatTokens(enc, opts.OriginalRequest); countErr == nil {
|
||||
usageInfo.InputTokens = inp
|
||||
}
|
||||
}
|
||||
if len(content) > 0 {
|
||||
// Use tiktoken for more accurate output token calculation
|
||||
if enc, encErr := getTokenizer(req.Model); encErr == nil {
|
||||
if tokenCount, countErr := enc.Count(content); countErr == nil {
|
||||
usageInfo.OutputTokens = int64(tokenCount)
|
||||
}
|
||||
}
|
||||
// Fallback to character count estimation if tiktoken fails
|
||||
if usageInfo.OutputTokens == 0 {
|
||||
usageInfo.OutputTokens = int64(len(content) / 4)
|
||||
if usageInfo.OutputTokens == 0 {
|
||||
usageInfo.OutputTokens = 1
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Estimate OutputTokens if missing and content is available
|
||||
if usageInfo.OutputTokens == 0 && len(content) > 0 {
|
||||
// Use tiktoken for more accurate output token calculation
|
||||
if enc, encErr := getTokenizer(req.Model); encErr == nil {
|
||||
if tokenCount, countErr := enc.Count(content); countErr == nil {
|
||||
usageInfo.OutputTokens = int64(tokenCount)
|
||||
}
|
||||
}
|
||||
// Fallback to character count estimation if tiktoken fails
|
||||
if usageInfo.OutputTokens == 0 {
|
||||
usageInfo.OutputTokens = int64(len(content) / 4)
|
||||
if usageInfo.OutputTokens == 0 {
|
||||
usageInfo.OutputTokens = 1
|
||||
}
|
||||
}
|
||||
usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
|
||||
}
|
||||
|
||||
// 3. Update TotalTokens
|
||||
usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
|
||||
|
||||
appendAPIResponseChunk(ctx, e.cfg, []byte(content))
|
||||
reporter.publish(ctx, usageInfo)
|
||||
|
||||
@@ -1074,12 +1145,17 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
|
||||
httpReq.Header.Set("Content-Type", kiroContentType)
|
||||
httpReq.Header.Set("Accept", kiroAcceptStream)
|
||||
// Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors)
|
||||
httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
|
||||
// Only set X-Amz-Target if specified (Q endpoint doesn't require it)
|
||||
if endpointConfig.AmzTarget != "" {
|
||||
httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
|
||||
}
|
||||
// Kiro-specific headers
|
||||
httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeVibe)
|
||||
httpReq.Header.Set("x-amzn-codewhisperer-optout", "true")
|
||||
|
||||
// Apply dynamic fingerprint-based headers
|
||||
applyDynamicFingerprint(httpReq, auth)
|
||||
|
||||
|
||||
httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
|
||||
httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
|
||||
|
||||
@@ -1537,11 +1613,27 @@ func determineAgenticMode(model string) (isAgentic, isChatOnly bool) {
|
||||
}
|
||||
|
||||
// getEffectiveProfileArn determines if profileArn should be included based on auth method.
|
||||
// profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO).
|
||||
// profileArn is only needed for social auth (Google OAuth), not for AWS SSO OIDC (Builder ID/IDC).
|
||||
//
|
||||
// Detection logic (matching kiro-openai-gateway):
|
||||
// 1. Check auth_method field: "builder-id" or "idc"
|
||||
// 2. Check auth_type field: "aws_sso_oidc" (from kiro-cli tokens)
|
||||
// 3. Check for client_id + client_secret presence (AWS SSO OIDC signature)
|
||||
func getEffectiveProfileArn(auth *cliproxyauth.Auth, profileArn string) string {
|
||||
if auth != nil && auth.Metadata != nil {
|
||||
if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" {
|
||||
return "" // Don't include profileArn for builder-id auth
|
||||
// Check 1: auth_method field (from CLIProxyAPI tokens)
|
||||
if authMethod, ok := auth.Metadata["auth_method"].(string); ok && (authMethod == "builder-id" || authMethod == "idc") {
|
||||
return "" // AWS SSO OIDC - don't include profileArn
|
||||
}
|
||||
// Check 2: auth_type field (from kiro-cli tokens)
|
||||
if authType, ok := auth.Metadata["auth_type"].(string); ok && authType == "aws_sso_oidc" {
|
||||
return "" // AWS SSO OIDC - don't include profileArn
|
||||
}
|
||||
// Check 3: client_id + client_secret presence (AWS SSO OIDC signature)
|
||||
_, hasClientID := auth.Metadata["client_id"].(string)
|
||||
_, hasClientSecret := auth.Metadata["client_secret"].(string)
|
||||
if hasClientID && hasClientSecret {
|
||||
return "" // AWS SSO OIDC - don't include profileArn
|
||||
}
|
||||
}
|
||||
return profileArn
|
||||
@@ -1550,14 +1642,32 @@ func getEffectiveProfileArn(auth *cliproxyauth.Auth, profileArn string) string {
|
||||
// getEffectiveProfileArnWithWarning determines if profileArn should be included based on auth method,
|
||||
// and logs a warning if profileArn is missing for non-builder-id auth.
|
||||
// This consolidates the auth_method check that was previously done separately.
|
||||
//
|
||||
// AWS SSO OIDC (Builder ID/IDC) users don't need profileArn - sending it causes 403 errors.
|
||||
// Only Kiro Desktop (social auth like Google/GitHub) users need profileArn.
|
||||
//
|
||||
// Detection logic (matching kiro-openai-gateway):
|
||||
// 1. Check auth_method field: "builder-id" or "idc"
|
||||
// 2. Check auth_type field: "aws_sso_oidc" (from kiro-cli tokens)
|
||||
// 3. Check for client_id + client_secret presence (AWS SSO OIDC signature)
|
||||
func getEffectiveProfileArnWithWarning(auth *cliproxyauth.Auth, profileArn string) string {
|
||||
if auth != nil && auth.Metadata != nil {
|
||||
// Check 1: auth_method field (from CLIProxyAPI tokens)
|
||||
if authMethod, ok := auth.Metadata["auth_method"].(string); ok && (authMethod == "builder-id" || authMethod == "idc") {
|
||||
// builder-id and idc auth don't need profileArn
|
||||
return ""
|
||||
return "" // AWS SSO OIDC - don't include profileArn
|
||||
}
|
||||
// Check 2: auth_type field (from kiro-cli tokens)
|
||||
if authType, ok := auth.Metadata["auth_type"].(string); ok && authType == "aws_sso_oidc" {
|
||||
return "" // AWS SSO OIDC - don't include profileArn
|
||||
}
|
||||
// Check 3: client_id + client_secret presence (AWS SSO OIDC signature, like kiro-openai-gateway)
|
||||
_, hasClientID := auth.Metadata["client_id"].(string)
|
||||
_, hasClientSecret := auth.Metadata["client_secret"].(string)
|
||||
if hasClientID && hasClientSecret {
|
||||
return "" // AWS SSO OIDC - don't include profileArn
|
||||
}
|
||||
}
|
||||
// For non-builder-id/idc auth (social auth), profileArn is required
|
||||
// For social auth (Kiro Desktop), profileArn is required
|
||||
if profileArn == "" {
|
||||
log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
|
||||
}
|
||||
@@ -2332,8 +2442,8 @@ func (e *KiroExecutor) extractEventTypeFromBytes(headers []byte) string {
|
||||
func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter, thinkingEnabled bool) {
|
||||
reader := bufio.NewReaderSize(body, 20*1024*1024) // 20MB buffer to match other providers
|
||||
var totalUsage usage.Detail
|
||||
var hasToolUses bool // Track if any tool uses were emitted
|
||||
var upstreamStopReason string // Track stop_reason from upstream events
|
||||
var hasToolUses bool // Track if any tool uses were emitted
|
||||
var upstreamStopReason string // Track stop_reason from upstream events
|
||||
|
||||
// Tool use state tracking for input buffering and deduplication
|
||||
processedIDs := make(map[string]bool)
|
||||
@@ -3111,12 +3221,92 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
_ = signature // Signature can be used for verification if needed
|
||||
|
||||
case "toolUseEvent":
|
||||
// Debug: log raw toolUseEvent payload for large tool inputs
|
||||
if log.IsLevelEnabled(log.DebugLevel) {
|
||||
payloadStr := string(payload)
|
||||
if len(payloadStr) > 500 {
|
||||
payloadStr = payloadStr[:500] + "...[truncated]"
|
||||
}
|
||||
log.Debugf("kiro: raw toolUseEvent payload (%d bytes): %s", len(payload), payloadStr)
|
||||
}
|
||||
// Handle dedicated tool use events with input buffering
|
||||
completedToolUses, newState := kiroclaude.ProcessToolUseEvent(event, currentToolUse, processedIDs)
|
||||
currentToolUse = newState
|
||||
|
||||
// Emit completed tool uses
|
||||
for _, tu := range completedToolUses {
|
||||
// Check for truncated write marker - emit as a Bash tool that echoes the error
|
||||
// This way Claude Code will execute it, see the error, and the agent can retry
|
||||
if tu.Name == "__truncated_write__" {
|
||||
filePath := ""
|
||||
if fp, ok := tu.Input["file_path"].(string); ok && fp != "" {
|
||||
filePath = fp
|
||||
}
|
||||
|
||||
// Create a Bash tool that echoes the error message
|
||||
// This will be executed by Claude Code and the agent will see the result
|
||||
var errorMsg string
|
||||
if filePath != "" {
|
||||
errorMsg = fmt.Sprintf("echo '[WRITE TOOL ERROR] The file content for \"%s\" is too large to be transmitted by the upstream API. You MUST retry by writing the file in smaller chunks: First use Write to create the file with the first 700 lines, then use multiple Edit operations to append the remaining content in chunks of ~700 lines each.'", filePath)
|
||||
} else {
|
||||
errorMsg = "echo '[WRITE TOOL ERROR] The file content is too large to be transmitted by the upstream API. The Write tool input was truncated. You MUST retry by writing the file in smaller chunks: First use Write to create the file with the first 700 lines, then use multiple Edit operations to append the remaining content in chunks of ~700 lines each.'"
|
||||
}
|
||||
|
||||
log.Warnf("kiro: converting truncated write to Bash echo for file: %s", filePath)
|
||||
|
||||
hasToolUses = true
|
||||
|
||||
// Close text block if open
|
||||
if isTextBlockOpen && contentBlockIndex >= 0 {
|
||||
blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
isTextBlockOpen = false
|
||||
}
|
||||
|
||||
contentBlockIndex++
|
||||
|
||||
// Emit as Bash tool_use
|
||||
blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", tu.ToolUseID, "Bash")
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
// Emit the Bash command as input
|
||||
bashInput := map[string]interface{}{
|
||||
"command": errorMsg,
|
||||
}
|
||||
inputJSON, err := json.Marshal(bashInput)
|
||||
if err != nil {
|
||||
log.Errorf("kiro: failed to marshal bash input for truncated write error: %v", err)
|
||||
continue
|
||||
}
|
||||
inputDelta := kiroclaude.BuildClaudeInputJsonDeltaEvent(string(inputJSON), contentBlockIndex)
|
||||
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, inputDelta, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex)
|
||||
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
continue // Skip the normal tool_use emission
|
||||
}
|
||||
|
||||
hasToolUses = true
|
||||
|
||||
// Close text block if open
|
||||
|
||||
@@ -81,9 +81,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
return
|
||||
}
|
||||
|
||||
// Translate inbound request to OpenAI format
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
endpoint := "/chat/completions"
|
||||
if opts.Alt == "responses/compact" {
|
||||
to = sdktranslator.FromString("openai-response")
|
||||
endpoint = "/responses/compact"
|
||||
}
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
@@ -92,13 +96,18 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
|
||||
requestedModel := payloadRequestedModel(opts, req.Model)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)
|
||||
if opts.Alt == "responses/compact" {
|
||||
if updated, errDelete := sjson.DeleteBytes(translated, "stream"); errDelete == nil {
|
||||
translated = updated
|
||||
}
|
||||
}
|
||||
|
||||
translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
url := strings.TrimSuffix(baseURL, "/") + endpoint
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||
if err != nil {
|
||||
return resp, err
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestOpenAICompatExecutorCompactPassthrough(t *testing.T) {
|
||||
var gotPath string
|
||||
var gotBody []byte
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gotPath = r.URL.Path
|
||||
body, _ := io.ReadAll(r.Body)
|
||||
gotBody = body
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewOpenAICompatExecutor("openai-compatibility", &config.Config{})
|
||||
auth := &cliproxyauth.Auth{Attributes: map[string]string{
|
||||
"base_url": server.URL + "/v1",
|
||||
"api_key": "test",
|
||||
}}
|
||||
payload := []byte(`{"model":"gpt-5.1-codex-max","input":[{"role":"user","content":"hi"}]}`)
|
||||
resp, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
|
||||
Model: "gpt-5.1-codex-max",
|
||||
Payload: payload,
|
||||
}, cliproxyexecutor.Options{
|
||||
SourceFormat: sdktranslator.FromString("openai-response"),
|
||||
Alt: "responses/compact",
|
||||
Stream: false,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Execute error: %v", err)
|
||||
}
|
||||
if gotPath != "/v1/responses/compact" {
|
||||
t.Fatalf("path = %q, want %q", gotPath, "/v1/responses/compact")
|
||||
}
|
||||
if !gjson.GetBytes(gotBody, "input").Exists() {
|
||||
t.Fatalf("expected input in body")
|
||||
}
|
||||
if gjson.GetBytes(gotBody, "messages").Exists() {
|
||||
t.Fatalf("unexpected messages in body")
|
||||
}
|
||||
if string(resp.Payload) != `{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}` {
|
||||
t.Fatalf("payload = %s", string(resp.Payload))
|
||||
}
|
||||
}
|
||||
@@ -66,6 +66,9 @@ func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
}
|
||||
|
||||
func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
token, baseURL := qwenCreds(auth)
|
||||
@@ -153,6 +156,9 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
}
|
||||
|
||||
func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
if opts.Alt == "responses/compact" {
|
||||
return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
token, baseURL := qwenCreds(auth)
|
||||
|
||||
@@ -199,15 +199,31 @@ func parseOpenAIUsage(data []byte) usage.Detail {
|
||||
if !usageNode.Exists() {
|
||||
return usage.Detail{}
|
||||
}
|
||||
inputNode := usageNode.Get("prompt_tokens")
|
||||
if !inputNode.Exists() {
|
||||
inputNode = usageNode.Get("input_tokens")
|
||||
}
|
||||
outputNode := usageNode.Get("completion_tokens")
|
||||
if !outputNode.Exists() {
|
||||
outputNode = usageNode.Get("output_tokens")
|
||||
}
|
||||
detail := usage.Detail{
|
||||
InputTokens: usageNode.Get("prompt_tokens").Int(),
|
||||
OutputTokens: usageNode.Get("completion_tokens").Int(),
|
||||
InputTokens: inputNode.Int(),
|
||||
OutputTokens: outputNode.Int(),
|
||||
TotalTokens: usageNode.Get("total_tokens").Int(),
|
||||
}
|
||||
if cached := usageNode.Get("prompt_tokens_details.cached_tokens"); cached.Exists() {
|
||||
cached := usageNode.Get("prompt_tokens_details.cached_tokens")
|
||||
if !cached.Exists() {
|
||||
cached = usageNode.Get("input_tokens_details.cached_tokens")
|
||||
}
|
||||
if cached.Exists() {
|
||||
detail.CachedTokens = cached.Int()
|
||||
}
|
||||
if reasoning := usageNode.Get("completion_tokens_details.reasoning_tokens"); reasoning.Exists() {
|
||||
reasoning := usageNode.Get("completion_tokens_details.reasoning_tokens")
|
||||
if !reasoning.Exists() {
|
||||
reasoning = usageNode.Get("output_tokens_details.reasoning_tokens")
|
||||
}
|
||||
if reasoning.Exists() {
|
||||
detail.ReasoningTokens = reasoning.Int()
|
||||
}
|
||||
return detail
|
||||
|
||||
43
internal/runtime/executor/usage_helpers_test.go
Normal file
43
internal/runtime/executor/usage_helpers_test.go
Normal file
@@ -0,0 +1,43 @@
|
||||
package executor
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseOpenAIUsageChatCompletions(t *testing.T) {
|
||||
data := []byte(`{"usage":{"prompt_tokens":1,"completion_tokens":2,"total_tokens":3,"prompt_tokens_details":{"cached_tokens":4},"completion_tokens_details":{"reasoning_tokens":5}}}`)
|
||||
detail := parseOpenAIUsage(data)
|
||||
if detail.InputTokens != 1 {
|
||||
t.Fatalf("input tokens = %d, want %d", detail.InputTokens, 1)
|
||||
}
|
||||
if detail.OutputTokens != 2 {
|
||||
t.Fatalf("output tokens = %d, want %d", detail.OutputTokens, 2)
|
||||
}
|
||||
if detail.TotalTokens != 3 {
|
||||
t.Fatalf("total tokens = %d, want %d", detail.TotalTokens, 3)
|
||||
}
|
||||
if detail.CachedTokens != 4 {
|
||||
t.Fatalf("cached tokens = %d, want %d", detail.CachedTokens, 4)
|
||||
}
|
||||
if detail.ReasoningTokens != 5 {
|
||||
t.Fatalf("reasoning tokens = %d, want %d", detail.ReasoningTokens, 5)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOpenAIUsageResponses(t *testing.T) {
|
||||
data := []byte(`{"usage":{"input_tokens":10,"output_tokens":20,"total_tokens":30,"input_tokens_details":{"cached_tokens":7},"output_tokens_details":{"reasoning_tokens":9}}}`)
|
||||
detail := parseOpenAIUsage(data)
|
||||
if detail.InputTokens != 10 {
|
||||
t.Fatalf("input tokens = %d, want %d", detail.InputTokens, 10)
|
||||
}
|
||||
if detail.OutputTokens != 20 {
|
||||
t.Fatalf("output tokens = %d, want %d", detail.OutputTokens, 20)
|
||||
}
|
||||
if detail.TotalTokens != 30 {
|
||||
t.Fatalf("total tokens = %d, want %d", detail.TotalTokens, 30)
|
||||
}
|
||||
if detail.CachedTokens != 7 {
|
||||
t.Fatalf("cached tokens = %d, want %d", detail.CachedTokens, 7)
|
||||
}
|
||||
if detail.ReasoningTokens != 9 {
|
||||
t.Fatalf("reasoning tokens = %d, want %d", detail.ReasoningTokens, 9)
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
|
||||
// Package iflow implements thinking configuration for iFlow models.
|
||||
//
|
||||
// iFlow models use boolean toggle semantics:
|
||||
// - GLM models: chat_template_kwargs.enable_thinking (boolean)
|
||||
// - Models using chat_template_kwargs.enable_thinking (boolean toggle)
|
||||
// - MiniMax models: reasoning_split (boolean)
|
||||
//
|
||||
// Level values are converted to boolean: none=false, all others=true
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
// Applier implements thinking.ProviderApplier for iFlow models.
|
||||
//
|
||||
// iFlow-specific behavior:
|
||||
// - enable_thinking toggle models: enable_thinking boolean
|
||||
// - GLM models: enable_thinking boolean + clear_thinking=false
|
||||
// - MiniMax models: reasoning_split boolean
|
||||
// - Level to boolean: none=false, others=true
|
||||
@@ -61,8 +62,8 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if isGLMModel(modelInfo.ID) {
|
||||
return applyGLM(body, config), nil
|
||||
if isEnableThinkingModel(modelInfo.ID) {
|
||||
return applyEnableThinking(body, config, isGLMModel(modelInfo.ID)), nil
|
||||
}
|
||||
|
||||
if isMiniMaxModel(modelInfo.ID) {
|
||||
@@ -97,7 +98,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool {
|
||||
}
|
||||
}
|
||||
|
||||
// applyGLM applies thinking configuration for GLM models.
|
||||
// applyEnableThinking applies thinking configuration for models that use
|
||||
// chat_template_kwargs.enable_thinking format.
|
||||
//
|
||||
// Output format when enabled:
|
||||
//
|
||||
@@ -107,9 +109,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool {
|
||||
//
|
||||
// {"chat_template_kwargs": {"enable_thinking": false}}
|
||||
//
|
||||
// Note: clear_thinking is only set when thinking is enabled, to preserve
|
||||
// thinking output in the response.
|
||||
func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
|
||||
// Note: clear_thinking is only set for GLM models when thinking is enabled.
|
||||
func applyEnableThinking(body []byte, config thinking.ThinkingConfig, setClearThinking bool) []byte {
|
||||
enableThinking := configToBoolean(config)
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
@@ -118,8 +119,11 @@ func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
|
||||
|
||||
result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
||||
|
||||
// clear_thinking is a GLM-only knob, strip it for other models.
|
||||
result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking")
|
||||
|
||||
// clear_thinking only needed when thinking is enabled
|
||||
if enableThinking {
|
||||
if enableThinking && setClearThinking {
|
||||
result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
|
||||
}
|
||||
|
||||
@@ -143,8 +147,21 @@ func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
|
||||
return result
|
||||
}
|
||||
|
||||
// isEnableThinkingModel determines if the model uses chat_template_kwargs.enable_thinking format.
|
||||
func isEnableThinkingModel(modelID string) bool {
|
||||
if isGLMModel(modelID) {
|
||||
return true
|
||||
}
|
||||
id := strings.ToLower(modelID)
|
||||
switch id {
|
||||
case "qwen3-max-preview", "deepseek-v3.2", "deepseek-v3.1":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// isGLMModel determines if the model is a GLM series model.
|
||||
// GLM models use chat_template_kwargs.enable_thinking format.
|
||||
func isGLMModel(modelID string) bool {
|
||||
return strings.HasPrefix(strings.ToLower(modelID), "glm")
|
||||
}
|
||||
|
||||
@@ -155,10 +155,13 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
clientContentJSON, _ = sjson.SetRaw(clientContentJSON, "parts.-1", partJSON)
|
||||
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "text" {
|
||||
prompt := contentResult.Get("text").String()
|
||||
partJSON := `{}`
|
||||
if prompt != "" {
|
||||
partJSON, _ = sjson.Set(partJSON, "text", prompt)
|
||||
// Skip empty text parts to avoid Gemini API error:
|
||||
// "required oneof field 'data' must have one initialized field"
|
||||
if prompt == "" {
|
||||
continue
|
||||
}
|
||||
partJSON := `{}`
|
||||
partJSON, _ = sjson.Set(partJSON, "text", prompt)
|
||||
clientContentJSON, _ = sjson.SetRaw(clientContentJSON, "parts.-1", partJSON)
|
||||
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "tool_use" {
|
||||
// NOTE: Do NOT inject dummy thinking blocks here.
|
||||
@@ -285,6 +288,13 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
}
|
||||
}
|
||||
|
||||
// Skip messages with empty parts array to avoid Gemini API error:
|
||||
// "required oneof field 'data' must have one initialized field"
|
||||
partsCheck := gjson.Get(clientContentJSON, "parts")
|
||||
if !partsCheck.IsArray() || len(partsCheck.Array()) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
contentsJSON, _ = sjson.SetRaw(contentsJSON, "-1", clientContentJSON)
|
||||
hasContents = true
|
||||
} else if contentsResult.Type == gjson.String {
|
||||
|
||||
@@ -41,6 +41,7 @@ func ConvertAntigravityResponseToGemini(ctx context.Context, _ string, originalR
|
||||
responseResult := gjson.GetBytes(rawJSON, "response")
|
||||
if responseResult.Exists() {
|
||||
chunk = []byte(responseResult.Raw)
|
||||
chunk = restoreUsageMetadata(chunk)
|
||||
}
|
||||
} else {
|
||||
chunkTemplate := "[]"
|
||||
@@ -76,7 +77,8 @@ func ConvertAntigravityResponseToGemini(ctx context.Context, _ string, originalR
|
||||
func ConvertAntigravityResponseToGeminiNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
|
||||
responseResult := gjson.GetBytes(rawJSON, "response")
|
||||
if responseResult.Exists() {
|
||||
return responseResult.Raw
|
||||
chunk := restoreUsageMetadata([]byte(responseResult.Raw))
|
||||
return string(chunk)
|
||||
}
|
||||
return string(rawJSON)
|
||||
}
|
||||
@@ -84,3 +86,15 @@ func ConvertAntigravityResponseToGeminiNonStream(_ context.Context, _ string, or
|
||||
func GeminiTokenCount(ctx context.Context, count int64) string {
|
||||
return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count)
|
||||
}
|
||||
|
||||
// restoreUsageMetadata renames cpaUsageMetadata back to usageMetadata.
|
||||
// The executor renames usageMetadata to cpaUsageMetadata in non-terminal chunks
|
||||
// to preserve usage data while hiding it from clients that don't expect it.
|
||||
// When returning standard Gemini API format, we must restore the original name.
|
||||
func restoreUsageMetadata(chunk []byte) []byte {
|
||||
if cpaUsage := gjson.GetBytes(chunk, "cpaUsageMetadata"); cpaUsage.Exists() {
|
||||
chunk, _ = sjson.SetRawBytes(chunk, "usageMetadata", []byte(cpaUsage.Raw))
|
||||
chunk, _ = sjson.DeleteBytes(chunk, "cpaUsageMetadata")
|
||||
}
|
||||
return chunk
|
||||
}
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRestoreUsageMetadata(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input []byte
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "cpaUsageMetadata renamed to usageMetadata",
|
||||
input: []byte(`{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100,"candidatesTokenCount":200}}`),
|
||||
expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":200}}`,
|
||||
},
|
||||
{
|
||||
name: "no cpaUsageMetadata unchanged",
|
||||
input: []byte(`{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`),
|
||||
expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
|
||||
},
|
||||
{
|
||||
name: "empty input",
|
||||
input: []byte(`{}`),
|
||||
expected: `{}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := restoreUsageMetadata(tt.input)
|
||||
if string(result) != tt.expected {
|
||||
t.Errorf("restoreUsageMetadata() = %s, want %s", string(result), tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertAntigravityResponseToGeminiNonStream(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input []byte
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "cpaUsageMetadata restored in response",
|
||||
input: []byte(`{"response":{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100}}}`),
|
||||
expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
|
||||
},
|
||||
{
|
||||
name: "usageMetadata preserved",
|
||||
input: []byte(`{"response":{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}}`),
|
||||
expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := ConvertAntigravityResponseToGeminiNonStream(context.Background(), "", nil, nil, tt.input, nil)
|
||||
if result != tt.expected {
|
||||
t.Errorf("ConvertAntigravityResponseToGeminiNonStream() = %s, want %s", result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertAntigravityResponseToGeminiStream(t *testing.T) {
|
||||
ctx := context.WithValue(context.Background(), "alt", "")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input []byte
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "cpaUsageMetadata restored in streaming response",
|
||||
input: []byte(`data: {"response":{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100}}}`),
|
||||
expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
results := ConvertAntigravityResponseToGemini(ctx, "", nil, nil, tt.input, nil)
|
||||
if len(results) != 1 {
|
||||
t.Fatalf("expected 1 result, got %d", len(results))
|
||||
}
|
||||
if results[0] != tt.expected {
|
||||
t.Errorf("ConvertAntigravityResponseToGemini() = %s, want %s", results[0], tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -305,12 +305,14 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
}
|
||||
}
|
||||
|
||||
// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
|
||||
// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch/codeExecution/urlContext passthrough
|
||||
tools := gjson.GetBytes(rawJSON, "tools")
|
||||
if tools.IsArray() && len(tools.Array()) > 0 {
|
||||
functionToolNode := []byte(`{}`)
|
||||
hasFunction := false
|
||||
googleSearchNodes := make([][]byte, 0)
|
||||
codeExecutionNodes := make([][]byte, 0)
|
||||
urlContextNodes := make([][]byte, 0)
|
||||
for _, t := range tools.Array() {
|
||||
if t.Get("type").String() == "function" {
|
||||
fn := t.Get("function")
|
||||
@@ -370,8 +372,28 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
}
|
||||
googleSearchNodes = append(googleSearchNodes, googleToolNode)
|
||||
}
|
||||
if ce := t.Get("code_execution"); ce.Exists() {
|
||||
codeToolNode := []byte(`{}`)
|
||||
var errSet error
|
||||
codeToolNode, errSet = sjson.SetRawBytes(codeToolNode, "codeExecution", []byte(ce.Raw))
|
||||
if errSet != nil {
|
||||
log.Warnf("Failed to set codeExecution tool: %v", errSet)
|
||||
continue
|
||||
}
|
||||
codeExecutionNodes = append(codeExecutionNodes, codeToolNode)
|
||||
}
|
||||
if uc := t.Get("url_context"); uc.Exists() {
|
||||
urlToolNode := []byte(`{}`)
|
||||
var errSet error
|
||||
urlToolNode, errSet = sjson.SetRawBytes(urlToolNode, "urlContext", []byte(uc.Raw))
|
||||
if errSet != nil {
|
||||
log.Warnf("Failed to set urlContext tool: %v", errSet)
|
||||
continue
|
||||
}
|
||||
urlContextNodes = append(urlContextNodes, urlToolNode)
|
||||
}
|
||||
}
|
||||
if hasFunction || len(googleSearchNodes) > 0 {
|
||||
if hasFunction || len(googleSearchNodes) > 0 || len(codeExecutionNodes) > 0 || len(urlContextNodes) > 0 {
|
||||
toolsNode := []byte("[]")
|
||||
if hasFunction {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
|
||||
@@ -379,6 +401,12 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
for _, googleNode := range googleSearchNodes {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
|
||||
}
|
||||
for _, codeNode := range codeExecutionNodes {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", codeNode)
|
||||
}
|
||||
for _, urlNode := range urlContextNodes {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", urlNode)
|
||||
}
|
||||
out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -283,12 +283,14 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
}
|
||||
}
|
||||
|
||||
// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
|
||||
// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch/codeExecution/urlContext passthrough
|
||||
tools := gjson.GetBytes(rawJSON, "tools")
|
||||
if tools.IsArray() && len(tools.Array()) > 0 {
|
||||
functionToolNode := []byte(`{}`)
|
||||
hasFunction := false
|
||||
googleSearchNodes := make([][]byte, 0)
|
||||
codeExecutionNodes := make([][]byte, 0)
|
||||
urlContextNodes := make([][]byte, 0)
|
||||
for _, t := range tools.Array() {
|
||||
if t.Get("type").String() == "function" {
|
||||
fn := t.Get("function")
|
||||
@@ -348,8 +350,28 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
}
|
||||
googleSearchNodes = append(googleSearchNodes, googleToolNode)
|
||||
}
|
||||
if ce := t.Get("code_execution"); ce.Exists() {
|
||||
codeToolNode := []byte(`{}`)
|
||||
var errSet error
|
||||
codeToolNode, errSet = sjson.SetRawBytes(codeToolNode, "codeExecution", []byte(ce.Raw))
|
||||
if errSet != nil {
|
||||
log.Warnf("Failed to set codeExecution tool: %v", errSet)
|
||||
continue
|
||||
}
|
||||
codeExecutionNodes = append(codeExecutionNodes, codeToolNode)
|
||||
}
|
||||
if uc := t.Get("url_context"); uc.Exists() {
|
||||
urlToolNode := []byte(`{}`)
|
||||
var errSet error
|
||||
urlToolNode, errSet = sjson.SetRawBytes(urlToolNode, "urlContext", []byte(uc.Raw))
|
||||
if errSet != nil {
|
||||
log.Warnf("Failed to set urlContext tool: %v", errSet)
|
||||
continue
|
||||
}
|
||||
urlContextNodes = append(urlContextNodes, urlToolNode)
|
||||
}
|
||||
}
|
||||
if hasFunction || len(googleSearchNodes) > 0 {
|
||||
if hasFunction || len(googleSearchNodes) > 0 || len(codeExecutionNodes) > 0 || len(urlContextNodes) > 0 {
|
||||
toolsNode := []byte("[]")
|
||||
if hasFunction {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
|
||||
@@ -357,6 +379,12 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
for _, googleNode := range googleSearchNodes {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
|
||||
}
|
||||
for _, codeNode := range codeExecutionNodes {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", codeNode)
|
||||
}
|
||||
for _, urlNode := range urlContextNodes {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", urlNode)
|
||||
}
|
||||
out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -289,12 +289,14 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
}
|
||||
}
|
||||
|
||||
// tools -> tools[].functionDeclarations + tools[].googleSearch passthrough
|
||||
// tools -> tools[].functionDeclarations + tools[].googleSearch/codeExecution/urlContext passthrough
|
||||
tools := gjson.GetBytes(rawJSON, "tools")
|
||||
if tools.IsArray() && len(tools.Array()) > 0 {
|
||||
functionToolNode := []byte(`{}`)
|
||||
hasFunction := false
|
||||
googleSearchNodes := make([][]byte, 0)
|
||||
codeExecutionNodes := make([][]byte, 0)
|
||||
urlContextNodes := make([][]byte, 0)
|
||||
for _, t := range tools.Array() {
|
||||
if t.Get("type").String() == "function" {
|
||||
fn := t.Get("function")
|
||||
@@ -354,8 +356,28 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
}
|
||||
googleSearchNodes = append(googleSearchNodes, googleToolNode)
|
||||
}
|
||||
if ce := t.Get("code_execution"); ce.Exists() {
|
||||
codeToolNode := []byte(`{}`)
|
||||
var errSet error
|
||||
codeToolNode, errSet = sjson.SetRawBytes(codeToolNode, "codeExecution", []byte(ce.Raw))
|
||||
if errSet != nil {
|
||||
log.Warnf("Failed to set codeExecution tool: %v", errSet)
|
||||
continue
|
||||
}
|
||||
codeExecutionNodes = append(codeExecutionNodes, codeToolNode)
|
||||
}
|
||||
if uc := t.Get("url_context"); uc.Exists() {
|
||||
urlToolNode := []byte(`{}`)
|
||||
var errSet error
|
||||
urlToolNode, errSet = sjson.SetRawBytes(urlToolNode, "urlContext", []byte(uc.Raw))
|
||||
if errSet != nil {
|
||||
log.Warnf("Failed to set urlContext tool: %v", errSet)
|
||||
continue
|
||||
}
|
||||
urlContextNodes = append(urlContextNodes, urlToolNode)
|
||||
}
|
||||
}
|
||||
if hasFunction || len(googleSearchNodes) > 0 {
|
||||
if hasFunction || len(googleSearchNodes) > 0 || len(codeExecutionNodes) > 0 || len(urlContextNodes) > 0 {
|
||||
toolsNode := []byte("[]")
|
||||
if hasFunction {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
|
||||
@@ -363,6 +385,12 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
for _, googleNode := range googleSearchNodes {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
|
||||
}
|
||||
for _, codeNode := range codeExecutionNodes {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", codeNode)
|
||||
}
|
||||
for _, urlNode := range urlContextNodes {
|
||||
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", urlNode)
|
||||
}
|
||||
out, _ = sjson.SetRawBytes(out, "tools", toolsNode)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -395,6 +395,17 @@ func ProcessToolUseEvent(event map[string]interface{}, currentToolUse *ToolUseSt
|
||||
isStop = stop
|
||||
}
|
||||
|
||||
// Debug: log when stop event arrives
|
||||
if isStop {
|
||||
log.Debugf("kiro: toolUseEvent stop=true received for tool %s (ID: %s), currentToolUse buffer len: %d",
|
||||
toolName, toolUseID, func() int {
|
||||
if currentToolUse != nil {
|
||||
return currentToolUse.InputBuffer.Len()
|
||||
}
|
||||
return -1
|
||||
}())
|
||||
}
|
||||
|
||||
// Get input - can be string (fragment) or object (complete)
|
||||
var inputFragment string
|
||||
var inputMap map[string]interface{}
|
||||
@@ -466,12 +477,92 @@ func ProcessToolUseEvent(event map[string]interface{}, currentToolUse *ToolUseSt
|
||||
if isStop && currentToolUse != nil {
|
||||
fullInput := currentToolUse.InputBuffer.String()
|
||||
|
||||
// Check for Write tool with empty or missing input - this happens when Kiro API
|
||||
// completely skips sending input for large file writes
|
||||
if currentToolUse.Name == "Write" && len(strings.TrimSpace(fullInput)) == 0 {
|
||||
log.Warnf("kiro: Write tool received no input from upstream API. The file content may be too large to transmit.")
|
||||
// Return nil to skip this tool use - it will be handled as a truncation error
|
||||
// The caller should emit a text block explaining the error instead
|
||||
if processedIDs != nil {
|
||||
processedIDs[currentToolUse.ToolUseID] = true
|
||||
}
|
||||
log.Infof("kiro: skipping Write tool use %s due to empty input (content too large)", currentToolUse.ToolUseID)
|
||||
// Return a special marker tool use that indicates truncation
|
||||
toolUse := KiroToolUse{
|
||||
ToolUseID: currentToolUse.ToolUseID,
|
||||
Name: "__truncated_write__", // Special marker name
|
||||
Input: map[string]interface{}{
|
||||
"error": "Write tool input was not transmitted by upstream API. The file content is too large.",
|
||||
},
|
||||
}
|
||||
toolUses = append(toolUses, toolUse)
|
||||
return toolUses, nil
|
||||
}
|
||||
|
||||
// Repair and parse the accumulated JSON
|
||||
repairedJSON := RepairJSON(fullInput)
|
||||
var finalInput map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(repairedJSON), &finalInput); err != nil {
|
||||
log.Warnf("kiro: failed to parse accumulated tool input: %v, raw: %s", err, fullInput)
|
||||
finalInput = make(map[string]interface{})
|
||||
|
||||
// Check if this is a Write tool with truncated input (missing content field)
|
||||
// This happens when the Kiro API truncates large tool inputs
|
||||
if currentToolUse.Name == "Write" && strings.Contains(fullInput, "file_path") && !strings.Contains(fullInput, "content") {
|
||||
log.Warnf("kiro: Write tool input was truncated by upstream API (content field missing). The file content may be too large.")
|
||||
// Extract file_path if possible for error context
|
||||
filePath := ""
|
||||
if idx := strings.Index(fullInput, "file_path"); idx >= 0 {
|
||||
// Try to extract the file path value
|
||||
rest := fullInput[idx:]
|
||||
if colonIdx := strings.Index(rest, ":"); colonIdx >= 0 {
|
||||
rest = strings.TrimSpace(rest[colonIdx+1:])
|
||||
if len(rest) > 0 && rest[0] == '"' {
|
||||
rest = rest[1:]
|
||||
if endQuote := strings.Index(rest, "\""); endQuote >= 0 {
|
||||
filePath = rest[:endQuote]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if processedIDs != nil {
|
||||
processedIDs[currentToolUse.ToolUseID] = true
|
||||
}
|
||||
// Return a special marker tool use that indicates truncation
|
||||
toolUse := KiroToolUse{
|
||||
ToolUseID: currentToolUse.ToolUseID,
|
||||
Name: "__truncated_write__", // Special marker name
|
||||
Input: map[string]interface{}{
|
||||
"error": "Write tool content was truncated by upstream API. The file content is too large.",
|
||||
"file_path": filePath,
|
||||
},
|
||||
}
|
||||
toolUses = append(toolUses, toolUse)
|
||||
return toolUses, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Additional check: Write tool parsed successfully but missing content field
|
||||
if currentToolUse.Name == "Write" {
|
||||
if _, hasContent := finalInput["content"]; !hasContent {
|
||||
if filePath, hasPath := finalInput["file_path"]; hasPath {
|
||||
log.Warnf("kiro: Write tool input missing 'content' field, likely truncated by upstream API")
|
||||
if processedIDs != nil {
|
||||
processedIDs[currentToolUse.ToolUseID] = true
|
||||
}
|
||||
// Return a special marker tool use that indicates truncation
|
||||
toolUse := KiroToolUse{
|
||||
ToolUseID: currentToolUse.ToolUseID,
|
||||
Name: "__truncated_write__", // Special marker name
|
||||
Input: map[string]interface{}{
|
||||
"error": "Write tool content field was missing. The file content is too large.",
|
||||
"file_path": filePath,
|
||||
},
|
||||
}
|
||||
toolUses = append(toolUses, toolUse)
|
||||
return toolUses, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
toolUse := KiroToolUse{
|
||||
|
||||
@@ -347,7 +347,7 @@ func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams)
|
||||
|
||||
// If we haven't sent message_delta yet (no usage info was received), send it now
|
||||
if param.FinishReason != "" && !param.MessageDeltaSent {
|
||||
messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null}}`
|
||||
messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
|
||||
results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
|
||||
param.MessageDeltaSent = true
|
||||
|
||||
@@ -4,6 +4,7 @@ package util
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -175,7 +176,7 @@ func convertConstToEnum(jsonStr string) string {
|
||||
return jsonStr
|
||||
}
|
||||
|
||||
// convertEnumValuesToStrings ensures all enum values are strings.
|
||||
// convertEnumValuesToStrings ensures all enum values are strings and the schema type is set to string.
|
||||
// Gemini API requires enum values to be of type string, not numbers or booleans.
|
||||
func convertEnumValuesToStrings(jsonStr string) string {
|
||||
for _, p := range findPaths(jsonStr, "enum") {
|
||||
@@ -185,19 +186,15 @@ func convertEnumValuesToStrings(jsonStr string) string {
|
||||
}
|
||||
|
||||
var stringVals []string
|
||||
needsConversion := false
|
||||
for _, item := range arr.Array() {
|
||||
// Check if any value is not a string
|
||||
if item.Type != gjson.String {
|
||||
needsConversion = true
|
||||
}
|
||||
stringVals = append(stringVals, item.String())
|
||||
}
|
||||
|
||||
// Only update if we found non-string values
|
||||
if needsConversion {
|
||||
jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
|
||||
}
|
||||
// Always update enum values to strings and set type to "string"
|
||||
// This ensures compatibility with Antigravity Gemini which only allows enum for STRING type
|
||||
jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
|
||||
parentPath := trimSuffix(p, ".enum")
|
||||
jsonStr, _ = sjson.Set(jsonStr, joinPath(parentPath, "type"), "string")
|
||||
}
|
||||
return jsonStr
|
||||
}
|
||||
@@ -435,9 +432,54 @@ func removeUnsupportedKeywords(jsonStr string) string {
|
||||
jsonStr, _ = sjson.Delete(jsonStr, p)
|
||||
}
|
||||
}
|
||||
// Remove x-* extension fields (e.g., x-google-enum-descriptions) that are not supported by Gemini API
|
||||
jsonStr = removeExtensionFields(jsonStr)
|
||||
return jsonStr
|
||||
}
|
||||
|
||||
// removeExtensionFields removes all x-* extension fields from the JSON schema.
|
||||
// These are OpenAPI/JSON Schema extension fields that Google APIs don't recognize.
|
||||
func removeExtensionFields(jsonStr string) string {
|
||||
var paths []string
|
||||
walkForExtensions(gjson.Parse(jsonStr), "", &paths)
|
||||
// walkForExtensions returns paths in a way that deeper paths are added before their ancestors
|
||||
// when they are not deleted wholesale, but since we skip children of deleted x-* nodes,
|
||||
// any collected path is safe to delete. We still use DeleteBytes for efficiency.
|
||||
|
||||
b := []byte(jsonStr)
|
||||
for _, p := range paths {
|
||||
b, _ = sjson.DeleteBytes(b, p)
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func walkForExtensions(value gjson.Result, path string, paths *[]string) {
|
||||
if value.IsArray() {
|
||||
arr := value.Array()
|
||||
for i := len(arr) - 1; i >= 0; i-- {
|
||||
walkForExtensions(arr[i], joinPath(path, strconv.Itoa(i)), paths)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if value.IsObject() {
|
||||
value.ForEach(func(key, val gjson.Result) bool {
|
||||
keyStr := key.String()
|
||||
safeKey := escapeGJSONPathKey(keyStr)
|
||||
childPath := joinPath(path, safeKey)
|
||||
|
||||
// If it's an extension field, we delete it and don't need to look at its children.
|
||||
if strings.HasPrefix(keyStr, "x-") && !isPropertyDefinition(path) {
|
||||
*paths = append(*paths, childPath)
|
||||
return true
|
||||
}
|
||||
|
||||
walkForExtensions(val, childPath, paths)
|
||||
return true
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func cleanupRequiredFields(jsonStr string) string {
|
||||
for _, p := range findPaths(jsonStr, "required") {
|
||||
parentPath := trimSuffix(p, ".required")
|
||||
|
||||
@@ -869,3 +869,129 @@ func TestCleanJSONSchemaForAntigravity_BooleanEnumToString(t *testing.T) {
|
||||
t.Errorf("Boolean enum values should be converted to string format, got: %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoveExtensionFields(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "removes x- fields at root",
|
||||
input: `{
|
||||
"type": "object",
|
||||
"x-custom-meta": "value",
|
||||
"properties": {
|
||||
"foo": { "type": "string" }
|
||||
}
|
||||
}`,
|
||||
expected: `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"foo": { "type": "string" }
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
name: "removes x- fields in nested properties",
|
||||
input: `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"foo": {
|
||||
"type": "string",
|
||||
"x-internal-id": 123
|
||||
}
|
||||
}
|
||||
}`,
|
||||
expected: `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"foo": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
name: "does NOT remove properties named x-",
|
||||
input: `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"x-data": { "type": "string" },
|
||||
"normal": { "type": "number", "x-meta": "remove" }
|
||||
},
|
||||
"required": ["x-data"]
|
||||
}`,
|
||||
expected: `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"x-data": { "type": "string" },
|
||||
"normal": { "type": "number" }
|
||||
},
|
||||
"required": ["x-data"]
|
||||
}`,
|
||||
},
|
||||
{
|
||||
name: "does NOT remove $schema and other meta fields (as requested)",
|
||||
input: `{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"$id": "test",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"foo": { "type": "string" }
|
||||
}
|
||||
}`,
|
||||
expected: `{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"$id": "test",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"foo": { "type": "string" }
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
name: "handles properties named $schema",
|
||||
input: `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"$schema": { "type": "string" }
|
||||
}
|
||||
}`,
|
||||
expected: `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"$schema": { "type": "string" }
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
name: "handles escaping in paths",
|
||||
input: `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"foo.bar": {
|
||||
"type": "string",
|
||||
"x-meta": "remove"
|
||||
}
|
||||
},
|
||||
"x-root.meta": "remove"
|
||||
}`,
|
||||
expected: `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"foo.bar": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
actual := removeExtensionFields(tt.input)
|
||||
compareJSON(t, tt.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,12 +86,19 @@ func (s *FileSynthesizer) Synthesize(ctx *SynthesisContext) ([]*coreauth.Auth, e
|
||||
}
|
||||
}
|
||||
|
||||
disabled, _ := metadata["disabled"].(bool)
|
||||
status := coreauth.StatusActive
|
||||
if disabled {
|
||||
status = coreauth.StatusDisabled
|
||||
}
|
||||
|
||||
a := &coreauth.Auth{
|
||||
ID: id,
|
||||
Provider: provider,
|
||||
Label: label,
|
||||
Prefix: prefix,
|
||||
Status: coreauth.StatusActive,
|
||||
Status: status,
|
||||
Disabled: disabled,
|
||||
Attributes: map[string]string{
|
||||
"source": full,
|
||||
"path": full,
|
||||
|
||||
@@ -124,32 +124,47 @@ func (m *Manager) Stream(ctx context.Context, provider string, req *HTTPRequest)
|
||||
out := make(chan StreamEvent)
|
||||
go func() {
|
||||
defer close(out)
|
||||
send := func(ev StreamEvent) bool {
|
||||
if ctx == nil {
|
||||
out <- ev
|
||||
return true
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
case out <- ev:
|
||||
return true
|
||||
}
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
out <- StreamEvent{Err: ctx.Err()}
|
||||
return
|
||||
case msg, ok := <-respCh:
|
||||
if !ok {
|
||||
out <- StreamEvent{Err: errors.New("wsrelay: stream closed")}
|
||||
_ = send(StreamEvent{Err: errors.New("wsrelay: stream closed")})
|
||||
return
|
||||
}
|
||||
switch msg.Type {
|
||||
case MessageTypeStreamStart:
|
||||
resp := decodeResponse(msg.Payload)
|
||||
out <- StreamEvent{Type: MessageTypeStreamStart, Status: resp.Status, Headers: resp.Headers}
|
||||
if okSend := send(StreamEvent{Type: MessageTypeStreamStart, Status: resp.Status, Headers: resp.Headers}); !okSend {
|
||||
return
|
||||
}
|
||||
case MessageTypeStreamChunk:
|
||||
chunk := decodeChunk(msg.Payload)
|
||||
out <- StreamEvent{Type: MessageTypeStreamChunk, Payload: chunk}
|
||||
if okSend := send(StreamEvent{Type: MessageTypeStreamChunk, Payload: chunk}); !okSend {
|
||||
return
|
||||
}
|
||||
case MessageTypeStreamEnd:
|
||||
out <- StreamEvent{Type: MessageTypeStreamEnd}
|
||||
_ = send(StreamEvent{Type: MessageTypeStreamEnd})
|
||||
return
|
||||
case MessageTypeError:
|
||||
out <- StreamEvent{Type: MessageTypeError, Err: decodeError(msg.Payload)}
|
||||
_ = send(StreamEvent{Type: MessageTypeError, Err: decodeError(msg.Payload)})
|
||||
return
|
||||
case MessageTypeHTTPResp:
|
||||
resp := decodeResponse(msg.Payload)
|
||||
out <- StreamEvent{Type: MessageTypeHTTPResp, Status: resp.Status, Headers: resp.Headers, Payload: resp.Body}
|
||||
_ = send(StreamEvent{Type: MessageTypeHTTPResp, Status: resp.Status, Headers: resp.Headers, Payload: resp.Body})
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
@@ -124,6 +124,7 @@ func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
|
||||
log.Errorf("Failed to read response body: %v", err)
|
||||
return
|
||||
}
|
||||
c.Set("API_RESPONSE_TIMESTAMP", time.Now())
|
||||
_, _ = c.Writer.Write(output)
|
||||
c.Set("API_RESPONSE", output)
|
||||
}
|
||||
|
||||
@@ -362,6 +362,11 @@ func appendAPIResponse(c *gin.Context, data []byte) {
|
||||
return
|
||||
}
|
||||
|
||||
// Capture timestamp on first API response
|
||||
if _, exists := c.Get("API_RESPONSE_TIMESTAMP"); !exists {
|
||||
c.Set("API_RESPONSE_TIMESTAMP", time.Now())
|
||||
}
|
||||
|
||||
if existing, exists := c.Get("API_RESPONSE"); exists {
|
||||
if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
|
||||
combined := make([]byte, 0, len(existingBytes)+len(data)+1)
|
||||
@@ -507,6 +512,32 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
|
||||
bootstrapRetries := 0
|
||||
maxBootstrapRetries := StreamingBootstrapRetries(h.Cfg)
|
||||
|
||||
sendErr := func(msg *interfaces.ErrorMessage) bool {
|
||||
if ctx == nil {
|
||||
errChan <- msg
|
||||
return true
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
case errChan <- msg:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
sendData := func(chunk []byte) bool {
|
||||
if ctx == nil {
|
||||
dataChan <- chunk
|
||||
return true
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
case dataChan <- chunk:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
bootstrapEligible := func(err error) bool {
|
||||
status := statusFromError(err)
|
||||
if status == 0 {
|
||||
@@ -566,12 +597,14 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
|
||||
addon = hdr.Clone()
|
||||
}
|
||||
}
|
||||
errChan <- &interfaces.ErrorMessage{StatusCode: status, Error: streamErr, Addon: addon}
|
||||
_ = sendErr(&interfaces.ErrorMessage{StatusCode: status, Error: streamErr, Addon: addon})
|
||||
return
|
||||
}
|
||||
if len(chunk.Payload) > 0 {
|
||||
sentPayload = true
|
||||
dataChan <- cloneBytes(chunk.Payload)
|
||||
if okSendData := sendData(cloneBytes(chunk.Payload)); !okSendData {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,6 +70,58 @@ func (e *failOnceStreamExecutor) Calls() int {
|
||||
return e.calls
|
||||
}
|
||||
|
||||
type payloadThenErrorStreamExecutor struct {
|
||||
mu sync.Mutex
|
||||
calls int
|
||||
}
|
||||
|
||||
func (e *payloadThenErrorStreamExecutor) Identifier() string { return "codex" }
|
||||
|
||||
func (e *payloadThenErrorStreamExecutor) Execute(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
|
||||
return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "Execute not implemented"}
|
||||
}
|
||||
|
||||
func (e *payloadThenErrorStreamExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (<-chan coreexecutor.StreamChunk, error) {
|
||||
e.mu.Lock()
|
||||
e.calls++
|
||||
e.mu.Unlock()
|
||||
|
||||
ch := make(chan coreexecutor.StreamChunk, 2)
|
||||
ch <- coreexecutor.StreamChunk{Payload: []byte("partial")}
|
||||
ch <- coreexecutor.StreamChunk{
|
||||
Err: &coreauth.Error{
|
||||
Code: "upstream_closed",
|
||||
Message: "upstream closed",
|
||||
Retryable: false,
|
||||
HTTPStatus: http.StatusBadGateway,
|
||||
},
|
||||
}
|
||||
close(ch)
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
func (e *payloadThenErrorStreamExecutor) Refresh(ctx context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) {
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
func (e *payloadThenErrorStreamExecutor) CountTokens(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
|
||||
return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "CountTokens not implemented"}
|
||||
}
|
||||
|
||||
func (e *payloadThenErrorStreamExecutor) HttpRequest(ctx context.Context, auth *coreauth.Auth, req *http.Request) (*http.Response, error) {
|
||||
return nil, &coreauth.Error{
|
||||
Code: "not_implemented",
|
||||
Message: "HttpRequest not implemented",
|
||||
HTTPStatus: http.StatusNotImplemented,
|
||||
}
|
||||
}
|
||||
|
||||
func (e *payloadThenErrorStreamExecutor) Calls() int {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
return e.calls
|
||||
}
|
||||
|
||||
func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
|
||||
executor := &failOnceStreamExecutor{}
|
||||
manager := coreauth.NewManager(nil, nil, nil)
|
||||
@@ -130,3 +182,73 @@ func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
|
||||
t.Fatalf("expected 2 stream attempts, got %d", executor.Calls())
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecuteStreamWithAuthManager_DoesNotRetryAfterFirstByte(t *testing.T) {
|
||||
executor := &payloadThenErrorStreamExecutor{}
|
||||
manager := coreauth.NewManager(nil, nil, nil)
|
||||
manager.RegisterExecutor(executor)
|
||||
|
||||
auth1 := &coreauth.Auth{
|
||||
ID: "auth1",
|
||||
Provider: "codex",
|
||||
Status: coreauth.StatusActive,
|
||||
Metadata: map[string]any{"email": "test1@example.com"},
|
||||
}
|
||||
if _, err := manager.Register(context.Background(), auth1); err != nil {
|
||||
t.Fatalf("manager.Register(auth1): %v", err)
|
||||
}
|
||||
|
||||
auth2 := &coreauth.Auth{
|
||||
ID: "auth2",
|
||||
Provider: "codex",
|
||||
Status: coreauth.StatusActive,
|
||||
Metadata: map[string]any{"email": "test2@example.com"},
|
||||
}
|
||||
if _, err := manager.Register(context.Background(), auth2); err != nil {
|
||||
t.Fatalf("manager.Register(auth2): %v", err)
|
||||
}
|
||||
|
||||
registry.GetGlobalRegistry().RegisterClient(auth1.ID, auth1.Provider, []*registry.ModelInfo{{ID: "test-model"}})
|
||||
registry.GetGlobalRegistry().RegisterClient(auth2.ID, auth2.Provider, []*registry.ModelInfo{{ID: "test-model"}})
|
||||
t.Cleanup(func() {
|
||||
registry.GetGlobalRegistry().UnregisterClient(auth1.ID)
|
||||
registry.GetGlobalRegistry().UnregisterClient(auth2.ID)
|
||||
})
|
||||
|
||||
handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
|
||||
Streaming: sdkconfig.StreamingConfig{
|
||||
BootstrapRetries: 1,
|
||||
},
|
||||
}, manager)
|
||||
dataChan, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
|
||||
if dataChan == nil || errChan == nil {
|
||||
t.Fatalf("expected non-nil channels")
|
||||
}
|
||||
|
||||
var got []byte
|
||||
for chunk := range dataChan {
|
||||
got = append(got, chunk...)
|
||||
}
|
||||
|
||||
var gotErr error
|
||||
var gotStatus int
|
||||
for msg := range errChan {
|
||||
if msg != nil && msg.Error != nil {
|
||||
gotErr = msg.Error
|
||||
gotStatus = msg.StatusCode
|
||||
}
|
||||
}
|
||||
|
||||
if string(got) != "partial" {
|
||||
t.Fatalf("expected payload partial, got %q", string(got))
|
||||
}
|
||||
if gotErr == nil {
|
||||
t.Fatalf("expected terminal error, got nil")
|
||||
}
|
||||
if gotStatus != http.StatusBadGateway {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusBadGateway, gotStatus)
|
||||
}
|
||||
if executor.Calls() != 1 {
|
||||
t.Fatalf("expected 1 stream attempt, got %d", executor.Calls())
|
||||
}
|
||||
}
|
||||
|
||||
120
sdk/api/handlers/openai/openai_responses_compact_test.go
Normal file
120
sdk/api/handlers/openai/openai_responses_compact_test.go
Normal file
@@ -0,0 +1,120 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
|
||||
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
|
||||
)
|
||||
|
||||
type compactCaptureExecutor struct {
|
||||
alt string
|
||||
sourceFormat string
|
||||
calls int
|
||||
}
|
||||
|
||||
func (e *compactCaptureExecutor) Identifier() string { return "test-provider" }
|
||||
|
||||
func (e *compactCaptureExecutor) Execute(ctx context.Context, auth *coreauth.Auth, req coreexecutor.Request, opts coreexecutor.Options) (coreexecutor.Response, error) {
|
||||
e.calls++
|
||||
e.alt = opts.Alt
|
||||
e.sourceFormat = opts.SourceFormat.String()
|
||||
return coreexecutor.Response{Payload: []byte(`{"ok":true}`)}, nil
|
||||
}
|
||||
|
||||
func (e *compactCaptureExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (<-chan coreexecutor.StreamChunk, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (e *compactCaptureExecutor) Refresh(ctx context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) {
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
func (e *compactCaptureExecutor) CountTokens(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
|
||||
return coreexecutor.Response{}, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (e *compactCaptureExecutor) HttpRequest(context.Context, *coreauth.Auth, *http.Request) (*http.Response, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func TestOpenAIResponsesCompactRejectsStream(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
executor := &compactCaptureExecutor{}
|
||||
manager := coreauth.NewManager(nil, nil, nil)
|
||||
manager.RegisterExecutor(executor)
|
||||
|
||||
auth := &coreauth.Auth{ID: "auth1", Provider: executor.Identifier(), Status: coreauth.StatusActive}
|
||||
if _, err := manager.Register(context.Background(), auth); err != nil {
|
||||
t.Fatalf("Register auth: %v", err)
|
||||
}
|
||||
registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}})
|
||||
t.Cleanup(func() {
|
||||
registry.GetGlobalRegistry().UnregisterClient(auth.ID)
|
||||
})
|
||||
|
||||
base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
|
||||
h := NewOpenAIResponsesAPIHandler(base)
|
||||
router := gin.New()
|
||||
router.POST("/v1/responses/compact", h.Compact)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/responses/compact", strings.NewReader(`{"model":"test-model","stream":true}`))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
resp := httptest.NewRecorder()
|
||||
router.ServeHTTP(resp, req)
|
||||
|
||||
if resp.Code != http.StatusBadRequest {
|
||||
t.Fatalf("status = %d, want %d", resp.Code, http.StatusBadRequest)
|
||||
}
|
||||
if executor.calls != 0 {
|
||||
t.Fatalf("executor calls = %d, want 0", executor.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenAIResponsesCompactExecute(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
executor := &compactCaptureExecutor{}
|
||||
manager := coreauth.NewManager(nil, nil, nil)
|
||||
manager.RegisterExecutor(executor)
|
||||
|
||||
auth := &coreauth.Auth{ID: "auth2", Provider: executor.Identifier(), Status: coreauth.StatusActive}
|
||||
if _, err := manager.Register(context.Background(), auth); err != nil {
|
||||
t.Fatalf("Register auth: %v", err)
|
||||
}
|
||||
registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}})
|
||||
t.Cleanup(func() {
|
||||
registry.GetGlobalRegistry().UnregisterClient(auth.ID)
|
||||
})
|
||||
|
||||
base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
|
||||
h := NewOpenAIResponsesAPIHandler(base)
|
||||
router := gin.New()
|
||||
router.POST("/v1/responses/compact", h.Compact)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/responses/compact", strings.NewReader(`{"model":"test-model","input":"hello"}`))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
resp := httptest.NewRecorder()
|
||||
router.ServeHTTP(resp, req)
|
||||
|
||||
if resp.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, want %d", resp.Code, http.StatusOK)
|
||||
}
|
||||
if executor.alt != "responses/compact" {
|
||||
t.Fatalf("alt = %q, want %q", executor.alt, "responses/compact")
|
||||
}
|
||||
if executor.sourceFormat != "openai-response" {
|
||||
t.Fatalf("source format = %q, want %q", executor.sourceFormat, "openai-response")
|
||||
}
|
||||
if strings.TrimSpace(resp.Body.String()) != `{"ok":true}` {
|
||||
t.Fatalf("body = %s", resp.Body.String())
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
responsesconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/openai/responses"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// OpenAIResponsesAPIHandler contains the handlers for OpenAIResponses API endpoints.
|
||||
@@ -106,6 +107,49 @@ func (h *OpenAIResponsesAPIHandler) Responses(c *gin.Context) {
|
||||
|
||||
}
|
||||
|
||||
func (h *OpenAIResponsesAPIHandler) Compact(c *gin.Context) {
|
||||
rawJSON, err := c.GetRawData()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
|
||||
Error: handlers.ErrorDetail{
|
||||
Message: fmt.Sprintf("Invalid request: %v", err),
|
||||
Type: "invalid_request_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
streamResult := gjson.GetBytes(rawJSON, "stream")
|
||||
if streamResult.Type == gjson.True {
|
||||
c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
|
||||
Error: handlers.ErrorDetail{
|
||||
Message: "Streaming not supported for compact responses",
|
||||
Type: "invalid_request_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
if streamResult.Exists() {
|
||||
if updated, err := sjson.DeleteBytes(rawJSON, "stream"); err == nil {
|
||||
rawJSON = updated
|
||||
}
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "application/json")
|
||||
modelName := gjson.GetBytes(rawJSON, "model").String()
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
|
||||
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
|
||||
resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "responses/compact")
|
||||
stopKeepAlive()
|
||||
if errMsg != nil {
|
||||
h.WriteErrorResponse(c, errMsg)
|
||||
cliCancel(errMsg.Error)
|
||||
return
|
||||
}
|
||||
_, _ = c.Writer.Write(resp)
|
||||
cliCancel()
|
||||
}
|
||||
|
||||
// handleNonStreamingResponse handles non-streaming chat completion responses
|
||||
// for Gemini models. It selects a client from the pool, sends the request, and
|
||||
// aggregates the response before sending it back to the client in OpenAIResponses format.
|
||||
|
||||
@@ -176,13 +176,16 @@ waitForCallback:
|
||||
}
|
||||
|
||||
if result.State != state {
|
||||
log.Errorf("State mismatch: expected %s, got %s", state, result.State)
|
||||
return nil, claude.NewAuthenticationError(claude.ErrInvalidState, fmt.Errorf("state mismatch"))
|
||||
}
|
||||
|
||||
log.Debug("Claude authorization code received; exchanging for tokens")
|
||||
log.Debugf("Code: %s, State: %s", result.Code[:min(20, len(result.Code))], state)
|
||||
|
||||
authBundle, err := authSvc.ExchangeCodeForTokens(ctx, result.Code, state, pkceCodes)
|
||||
if err != nil {
|
||||
log.Errorf("Token exchange failed: %v", err)
|
||||
return nil, claude.NewAuthenticationError(claude.ErrCodeExchangeFailed, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -68,6 +68,7 @@ func (s *FileTokenStore) Save(ctx context.Context, auth *cliproxyauth.Auth) (str
|
||||
return "", err
|
||||
}
|
||||
case auth.Metadata != nil:
|
||||
auth.Metadata["disabled"] = auth.Disabled
|
||||
raw, errMarshal := json.Marshal(auth.Metadata)
|
||||
if errMarshal != nil {
|
||||
return "", fmt.Errorf("auth filestore: marshal metadata failed: %w", errMarshal)
|
||||
@@ -214,6 +215,11 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
|
||||
return nil, fmt.Errorf("stat file: %w", err)
|
||||
}
|
||||
id := s.idFor(path, baseDir)
|
||||
disabled, _ := metadata["disabled"].(bool)
|
||||
status := cliproxyauth.StatusActive
|
||||
if disabled {
|
||||
status = cliproxyauth.StatusDisabled
|
||||
}
|
||||
|
||||
// Calculate NextRefreshAfter from expires_at (20 minutes before expiry)
|
||||
var nextRefreshAfter time.Time
|
||||
@@ -228,7 +234,8 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
|
||||
Provider: provider,
|
||||
FileName: id,
|
||||
Label: s.labelFor(metadata),
|
||||
Status: cliproxyauth.StatusActive,
|
||||
Status: status,
|
||||
Disabled: disabled,
|
||||
Attributes: map[string]string{"path": path},
|
||||
Metadata: metadata,
|
||||
CreatedAt: info.ModTime(),
|
||||
|
||||
@@ -2,7 +2,10 @@ package auth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -279,18 +282,19 @@ func (a *KiroAuthenticator) ImportFromKiroIDE(ctx context.Context, cfg *config.C
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
Metadata: map[string]any{
|
||||
"type": "kiro",
|
||||
"access_token": tokenData.AccessToken,
|
||||
"refresh_token": tokenData.RefreshToken,
|
||||
"profile_arn": tokenData.ProfileArn,
|
||||
"expires_at": tokenData.ExpiresAt,
|
||||
"auth_method": tokenData.AuthMethod,
|
||||
"provider": tokenData.Provider,
|
||||
"client_id": tokenData.ClientID,
|
||||
"client_secret": tokenData.ClientSecret,
|
||||
"email": tokenData.Email,
|
||||
"region": tokenData.Region,
|
||||
"start_url": tokenData.StartURL,
|
||||
"type": "kiro",
|
||||
"access_token": tokenData.AccessToken,
|
||||
"refresh_token": tokenData.RefreshToken,
|
||||
"profile_arn": tokenData.ProfileArn,
|
||||
"expires_at": tokenData.ExpiresAt,
|
||||
"auth_method": tokenData.AuthMethod,
|
||||
"provider": tokenData.Provider,
|
||||
"client_id": tokenData.ClientID,
|
||||
"client_secret": tokenData.ClientSecret,
|
||||
"client_id_hash": tokenData.ClientIDHash,
|
||||
"email": tokenData.Email,
|
||||
"region": tokenData.Region,
|
||||
"start_url": tokenData.StartURL,
|
||||
},
|
||||
Attributes: map[string]string{
|
||||
"profile_arn": tokenData.ProfileArn,
|
||||
@@ -325,10 +329,21 @@ func (a *KiroAuthenticator) Refresh(ctx context.Context, cfg *config.Config, aut
|
||||
|
||||
clientID, _ := auth.Metadata["client_id"].(string)
|
||||
clientSecret, _ := auth.Metadata["client_secret"].(string)
|
||||
clientIDHash, _ := auth.Metadata["client_id_hash"].(string)
|
||||
authMethod, _ := auth.Metadata["auth_method"].(string)
|
||||
startURL, _ := auth.Metadata["start_url"].(string)
|
||||
region, _ := auth.Metadata["region"].(string)
|
||||
|
||||
// For Enterprise Kiro IDE (IDC auth), try to load clientId/clientSecret from device registration
|
||||
// if they are missing from metadata. This handles the case where token was imported without
|
||||
// clientId/clientSecret but has clientIdHash.
|
||||
if (clientID == "" || clientSecret == "") && clientIDHash != "" {
|
||||
if loadedClientID, loadedClientSecret, err := loadDeviceRegistrationCredentials(clientIDHash); err == nil {
|
||||
clientID = loadedClientID
|
||||
clientSecret = loadedClientSecret
|
||||
}
|
||||
}
|
||||
|
||||
var tokenData *kiroauth.KiroTokenData
|
||||
var err error
|
||||
|
||||
@@ -339,8 +354,8 @@ func (a *KiroAuthenticator) Refresh(ctx context.Context, cfg *config.Config, aut
|
||||
case clientID != "" && clientSecret != "" && authMethod == "idc" && region != "":
|
||||
// IDC refresh with region-specific endpoint
|
||||
tokenData, err = ssoClient.RefreshTokenWithRegion(ctx, clientID, clientSecret, refreshToken, region, startURL)
|
||||
case clientID != "" && clientSecret != "" && authMethod == "builder-id":
|
||||
// Builder ID refresh with default endpoint
|
||||
case clientID != "" && clientSecret != "" && (authMethod == "builder-id" || authMethod == "idc"):
|
||||
// Builder ID or IDC refresh with default endpoint (us-east-1)
|
||||
tokenData, err = ssoClient.RefreshToken(ctx, clientID, clientSecret, refreshToken)
|
||||
default:
|
||||
// Fallback to Kiro's refresh endpoint (for social auth: Google/GitHub)
|
||||
@@ -367,8 +382,54 @@ func (a *KiroAuthenticator) Refresh(ctx context.Context, cfg *config.Config, aut
|
||||
updated.Metadata["refresh_token"] = tokenData.RefreshToken
|
||||
updated.Metadata["expires_at"] = tokenData.ExpiresAt
|
||||
updated.Metadata["last_refresh"] = now.Format(time.RFC3339) // For double-check optimization
|
||||
// Store clientId/clientSecret if they were loaded from device registration
|
||||
if clientID != "" && updated.Metadata["client_id"] == nil {
|
||||
updated.Metadata["client_id"] = clientID
|
||||
}
|
||||
if clientSecret != "" && updated.Metadata["client_secret"] == nil {
|
||||
updated.Metadata["client_secret"] = clientSecret
|
||||
}
|
||||
// NextRefreshAfter: 20 minutes before expiry
|
||||
updated.NextRefreshAfter = expiresAt.Add(-20 * time.Minute)
|
||||
|
||||
return updated, nil
|
||||
}
|
||||
|
||||
// loadDeviceRegistrationCredentials loads clientId and clientSecret from device registration file.
|
||||
// This is used when refreshing tokens that were imported without clientId/clientSecret.
|
||||
func loadDeviceRegistrationCredentials(clientIDHash string) (clientID, clientSecret string, err error) {
|
||||
if clientIDHash == "" {
|
||||
return "", "", fmt.Errorf("clientIdHash is empty")
|
||||
}
|
||||
|
||||
// Sanitize clientIdHash to prevent path traversal
|
||||
if strings.Contains(clientIDHash, "/") || strings.Contains(clientIDHash, "\\") || strings.Contains(clientIDHash, "..") {
|
||||
return "", "", fmt.Errorf("invalid clientIdHash: contains path separator")
|
||||
}
|
||||
|
||||
homeDir, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("failed to get home directory: %w", err)
|
||||
}
|
||||
|
||||
deviceRegPath := filepath.Join(homeDir, ".aws", "sso", "cache", clientIDHash+".json")
|
||||
data, err := os.ReadFile(deviceRegPath)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("failed to read device registration file: %w", err)
|
||||
}
|
||||
|
||||
var deviceReg struct {
|
||||
ClientID string `json:"clientId"`
|
||||
ClientSecret string `json:"clientSecret"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(data, &deviceReg); err != nil {
|
||||
return "", "", fmt.Errorf("failed to parse device registration: %w", err)
|
||||
}
|
||||
|
||||
if deviceReg.ClientID == "" || deviceReg.ClientSecret == "" {
|
||||
return "", "", fmt.Errorf("device registration missing clientId or clientSecret")
|
||||
}
|
||||
|
||||
return deviceReg.ClientID, deviceReg.ClientSecret, nil
|
||||
}
|
||||
|
||||
@@ -718,6 +718,7 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
|
||||
go func(streamCtx context.Context, streamAuth *Auth, streamProvider string, streamChunks <-chan cliproxyexecutor.StreamChunk) {
|
||||
defer close(out)
|
||||
var failed bool
|
||||
forward := true
|
||||
for chunk := range streamChunks {
|
||||
if chunk.Err != nil && !failed {
|
||||
failed = true
|
||||
@@ -728,7 +729,18 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
|
||||
}
|
||||
m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: false, Error: rerr})
|
||||
}
|
||||
out <- chunk
|
||||
if !forward {
|
||||
continue
|
||||
}
|
||||
if streamCtx == nil {
|
||||
out <- chunk
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case <-streamCtx.Done():
|
||||
forward = false
|
||||
case out <- chunk:
|
||||
}
|
||||
}
|
||||
if !failed {
|
||||
m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: true})
|
||||
|
||||
@@ -329,8 +329,8 @@ func (a *Auth) AccountInfo() (string, string) {
|
||||
}
|
||||
}
|
||||
|
||||
// For GitHub provider, return username
|
||||
if strings.ToLower(a.Provider) == "github" {
|
||||
// For GitHub provider (including github-copilot), return username
|
||||
if strings.HasPrefix(strings.ToLower(a.Provider), "github") {
|
||||
if a.Metadata != nil {
|
||||
if username, ok := a.Metadata["username"].(string); ok {
|
||||
username = strings.TrimSpace(username)
|
||||
|
||||
@@ -708,6 +708,10 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
|
||||
if a == nil || a.ID == "" {
|
||||
return
|
||||
}
|
||||
if a.Disabled {
|
||||
GlobalModelRegistry().UnregisterClient(a.ID)
|
||||
return
|
||||
}
|
||||
authKind := strings.ToLower(strings.TrimSpace(a.Attributes["auth_kind"]))
|
||||
if authKind == "" {
|
||||
if kind, _ := a.AccountInfo(); strings.EqualFold(kind, "api_key") {
|
||||
@@ -1412,29 +1416,44 @@ func (s *Service) fetchKiroModels(a *coreauth.Auth) []*ModelInfo {
|
||||
}
|
||||
|
||||
// extractKiroTokenData extracts KiroTokenData from auth attributes and metadata.
|
||||
// It supports both config-based tokens (stored in Attributes) and file-based tokens (stored in Metadata).
|
||||
func (s *Service) extractKiroTokenData(a *coreauth.Auth) *kiroauth.KiroTokenData {
|
||||
if a == nil || a.Attributes == nil {
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
accessToken := strings.TrimSpace(a.Attributes["access_token"])
|
||||
var accessToken, profileArn, refreshToken string
|
||||
|
||||
// Priority 1: Try to get from Attributes (config.yaml source)
|
||||
if a.Attributes != nil {
|
||||
accessToken = strings.TrimSpace(a.Attributes["access_token"])
|
||||
profileArn = strings.TrimSpace(a.Attributes["profile_arn"])
|
||||
refreshToken = strings.TrimSpace(a.Attributes["refresh_token"])
|
||||
}
|
||||
|
||||
// Priority 2: If not found in Attributes, try Metadata (JSON file source)
|
||||
if accessToken == "" && a.Metadata != nil {
|
||||
if at, ok := a.Metadata["access_token"].(string); ok {
|
||||
accessToken = strings.TrimSpace(at)
|
||||
}
|
||||
if pa, ok := a.Metadata["profile_arn"].(string); ok {
|
||||
profileArn = strings.TrimSpace(pa)
|
||||
}
|
||||
if rt, ok := a.Metadata["refresh_token"].(string); ok {
|
||||
refreshToken = strings.TrimSpace(rt)
|
||||
}
|
||||
}
|
||||
|
||||
// access_token is required
|
||||
if accessToken == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
tokenData := &kiroauth.KiroTokenData{
|
||||
AccessToken: accessToken,
|
||||
ProfileArn: strings.TrimSpace(a.Attributes["profile_arn"]),
|
||||
return &kiroauth.KiroTokenData{
|
||||
AccessToken: accessToken,
|
||||
ProfileArn: profileArn,
|
||||
RefreshToken: refreshToken,
|
||||
}
|
||||
|
||||
// Also try to get refresh token from metadata
|
||||
if a.Metadata != nil {
|
||||
if rt, ok := a.Metadata["refresh_token"].(string); ok {
|
||||
tokenData.RefreshToken = rt
|
||||
}
|
||||
}
|
||||
|
||||
return tokenData
|
||||
}
|
||||
|
||||
// convertKiroAPIModels converts Kiro API models to ModelInfo slice.
|
||||
|
||||
@@ -2,6 +2,7 @@ package test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -2778,12 +2779,18 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) {
|
||||
|
||||
// Verify clear_thinking for iFlow GLM models when enable_thinking=true
|
||||
if tc.to == "iflow" && tc.expectField == "chat_template_kwargs.enable_thinking" && tc.expectValue == "true" {
|
||||
baseModel := thinking.ParseSuffix(tc.model).ModelName
|
||||
isGLM := strings.HasPrefix(strings.ToLower(baseModel), "glm")
|
||||
ctVal := gjson.GetBytes(body, "chat_template_kwargs.clear_thinking")
|
||||
if !ctVal.Exists() {
|
||||
t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body))
|
||||
}
|
||||
if ctVal.Bool() != false {
|
||||
t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body))
|
||||
if isGLM {
|
||||
if !ctVal.Exists() {
|
||||
t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body))
|
||||
}
|
||||
if ctVal.Bool() != false {
|
||||
t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body))
|
||||
}
|
||||
} else if ctVal.Exists() {
|
||||
t.Fatalf("expected no clear_thinking field for non-GLM enable_thinking model, body=%s", string(body))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user