mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-03-08 06:43:41 +00:00
188 lines
4.6 KiB
Go
188 lines
4.6 KiB
Go
package kiro
|
|
|
|
import (
|
|
"math"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// TokenMetrics holds performance metrics for a single token.
|
|
type TokenMetrics struct {
|
|
SuccessRate float64 // Success rate (0.0 - 1.0)
|
|
AvgLatency float64 // Average latency in milliseconds
|
|
QuotaRemaining float64 // Remaining quota (0.0 - 1.0)
|
|
LastUsed time.Time // Last usage timestamp
|
|
FailCount int // Consecutive failure count
|
|
TotalRequests int // Total request count
|
|
successCount int // Internal: successful request count
|
|
totalLatency float64 // Internal: cumulative latency
|
|
}
|
|
|
|
// TokenScorer manages token metrics and scoring.
|
|
type TokenScorer struct {
|
|
mu sync.RWMutex
|
|
metrics map[string]*TokenMetrics
|
|
|
|
// Scoring weights
|
|
successRateWeight float64
|
|
quotaWeight float64
|
|
latencyWeight float64
|
|
lastUsedWeight float64
|
|
failPenaltyMultiplier float64
|
|
}
|
|
|
|
// NewTokenScorer creates a new TokenScorer with default weights.
|
|
func NewTokenScorer() *TokenScorer {
|
|
return &TokenScorer{
|
|
metrics: make(map[string]*TokenMetrics),
|
|
successRateWeight: 0.4,
|
|
quotaWeight: 0.25,
|
|
latencyWeight: 0.2,
|
|
lastUsedWeight: 0.15,
|
|
failPenaltyMultiplier: 0.1,
|
|
}
|
|
}
|
|
|
|
// getOrCreateMetrics returns existing metrics or creates new ones.
|
|
func (s *TokenScorer) getOrCreateMetrics(tokenKey string) *TokenMetrics {
|
|
if m, ok := s.metrics[tokenKey]; ok {
|
|
return m
|
|
}
|
|
m := &TokenMetrics{
|
|
SuccessRate: 1.0,
|
|
QuotaRemaining: 1.0,
|
|
}
|
|
s.metrics[tokenKey] = m
|
|
return m
|
|
}
|
|
|
|
// RecordRequest records the result of a request for a token.
|
|
func (s *TokenScorer) RecordRequest(tokenKey string, success bool, latency time.Duration) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
m := s.getOrCreateMetrics(tokenKey)
|
|
m.TotalRequests++
|
|
m.LastUsed = time.Now()
|
|
m.totalLatency += float64(latency.Milliseconds())
|
|
|
|
if success {
|
|
m.successCount++
|
|
m.FailCount = 0
|
|
} else {
|
|
m.FailCount++
|
|
}
|
|
|
|
// Update derived metrics
|
|
if m.TotalRequests > 0 {
|
|
m.SuccessRate = float64(m.successCount) / float64(m.TotalRequests)
|
|
m.AvgLatency = m.totalLatency / float64(m.TotalRequests)
|
|
}
|
|
}
|
|
|
|
// SetQuotaRemaining updates the remaining quota for a token.
|
|
func (s *TokenScorer) SetQuotaRemaining(tokenKey string, quota float64) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
m := s.getOrCreateMetrics(tokenKey)
|
|
m.QuotaRemaining = quota
|
|
}
|
|
|
|
// GetMetrics returns a copy of the metrics for a token.
|
|
func (s *TokenScorer) GetMetrics(tokenKey string) *TokenMetrics {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
if m, ok := s.metrics[tokenKey]; ok {
|
|
copy := *m
|
|
return ©
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// CalculateScore computes the score for a token (higher is better).
|
|
func (s *TokenScorer) CalculateScore(tokenKey string) float64 {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
m, ok := s.metrics[tokenKey]
|
|
if !ok {
|
|
return 1.0 // New tokens get a high initial score
|
|
}
|
|
|
|
// Success rate component (0-1)
|
|
successScore := m.SuccessRate
|
|
|
|
// Quota component (0-1)
|
|
quotaScore := m.QuotaRemaining
|
|
|
|
// Latency component (normalized, lower is better)
|
|
// Using exponential decay: score = e^(-latency/1000)
|
|
// 1000ms latency -> ~0.37 score, 100ms -> ~0.90 score
|
|
latencyScore := math.Exp(-m.AvgLatency / 1000.0)
|
|
if m.TotalRequests == 0 {
|
|
latencyScore = 1.0
|
|
}
|
|
|
|
// Last used component (prefer tokens not recently used)
|
|
// Score increases as time since last use increases
|
|
timeSinceUse := time.Since(m.LastUsed).Seconds()
|
|
// Normalize: 60 seconds -> ~0.63 score, 0 seconds -> 0 score
|
|
lastUsedScore := 1.0 - math.Exp(-timeSinceUse/60.0)
|
|
if m.LastUsed.IsZero() {
|
|
lastUsedScore = 1.0
|
|
}
|
|
|
|
// Calculate weighted score
|
|
score := s.successRateWeight*successScore +
|
|
s.quotaWeight*quotaScore +
|
|
s.latencyWeight*latencyScore +
|
|
s.lastUsedWeight*lastUsedScore
|
|
|
|
// Apply consecutive failure penalty
|
|
if m.FailCount > 0 {
|
|
penalty := s.failPenaltyMultiplier * float64(m.FailCount)
|
|
score = score * math.Max(0, 1.0-penalty)
|
|
}
|
|
|
|
return score
|
|
}
|
|
|
|
// SelectBestToken selects the token with the highest score.
|
|
func (s *TokenScorer) SelectBestToken(tokens []string) string {
|
|
if len(tokens) == 0 {
|
|
return ""
|
|
}
|
|
if len(tokens) == 1 {
|
|
return tokens[0]
|
|
}
|
|
|
|
bestToken := tokens[0]
|
|
bestScore := s.CalculateScore(tokens[0])
|
|
|
|
for _, token := range tokens[1:] {
|
|
score := s.CalculateScore(token)
|
|
if score > bestScore {
|
|
bestScore = score
|
|
bestToken = token
|
|
}
|
|
}
|
|
|
|
return bestToken
|
|
}
|
|
|
|
// ResetMetrics clears all metrics for a token.
|
|
func (s *TokenScorer) ResetMetrics(tokenKey string) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
delete(s.metrics, tokenKey)
|
|
}
|
|
|
|
// ResetAllMetrics clears all stored metrics.
|
|
func (s *TokenScorer) ResetAllMetrics() {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
s.metrics = make(map[string]*TokenMetrics)
|
|
}
|