1
0
Files
nex/backend/internal/conversion/openai/encoder.go
lanyuanxiaoyao d92db73937 refactor: 后端代码质量优化 - 复用公共库、使用标准库、类型安全错误判断
## 高优先级修复
- stats_service_impl: 使用 strings.SplitN 替代错误的索引分割
- provider_handler: 使用 errors.Is(err, gorm.ErrDuplicatedKey) 替代字符串匹配
- client: 重写 isNetworkError 使用 errors.As/Is 类型安全判断
- proxy_handler: 使用 encoding/json 标准库解析 JSON(extractModelName、isStreamRequest)

## 中优先级修复
- stats_handler: 添加 parseDateParam 辅助函数消除重复日期解析
- pkg/errors: 新增 ErrRequestCreate/Send/ResponseRead 错误类型和 WithCause 方法
- client: 使用结构化错误替代 fmt.Errorf
- ConversionEngine: logger 依赖注入,替换所有 zap.L() 调用

## 低优先级修复
- encoder: 删除 joinStrings,使用 strings.Join
- adapter: 删除 modelInfoRegex 正则,使用 isModelInfoPath 字符串函数

## 文档更新
- README.md: 添加公共库使用指南和编码规范章节
- specs: 同步 delta specs 到 main specs(error-handling、structured-logging、request-validation)

## 归档
- openspec/changes/archive/2026-04-20-refactor-backend-code-quality/
2026-04-20 16:42:48 +08:00

522 lines
13 KiB
Go

package openai
import (
"encoding/json"
"strings"
"time"
"nex/backend/internal/conversion"
"nex/backend/internal/conversion/canonical"
)
// encodeRequest 将 Canonical 请求编码为 OpenAI 请求
func encodeRequest(req *canonical.CanonicalRequest, provider *conversion.TargetProvider) ([]byte, error) {
result := map[string]any{
"model": provider.ModelName,
"stream": req.Stream,
}
// 系统消息 + 消息
messages := encodeSystemAndMessages(req)
result["messages"] = messages
// 参数
encodeParametersInto(req, result)
// 工具
if len(req.Tools) > 0 {
tools := make([]map[string]any, len(req.Tools))
for i, t := range req.Tools {
tools[i] = map[string]any{
"type": "function",
"function": map[string]any{
"name": t.Name,
"description": t.Description,
"parameters": t.InputSchema,
},
}
}
result["tools"] = tools
}
if req.ToolChoice != nil {
result["tool_choice"] = encodeToolChoice(req.ToolChoice)
}
// 公共字段
if req.UserID != "" {
result["user"] = req.UserID
}
if req.OutputFormat != nil {
result["response_format"] = encodeOutputFormat(req.OutputFormat)
}
if req.ParallelToolUse != nil {
result["parallel_tool_calls"] = *req.ParallelToolUse
}
if req.Thinking != nil {
switch req.Thinking.Type {
case "disabled":
result["reasoning_effort"] = "none"
default:
if req.Thinking.Effort != "" {
result["reasoning_effort"] = req.Thinking.Effort
} else {
result["reasoning_effort"] = "medium"
}
}
}
body, err := json.Marshal(result)
if err != nil {
return nil, conversion.NewConversionError(conversion.ErrorCodeEncodingFailure, "编码 OpenAI 请求失败").WithCause(err)
}
return body, nil
}
// encodeSystemAndMessages 编码系统消息和消息列表
func encodeSystemAndMessages(req *canonical.CanonicalRequest) []map[string]any {
var messages []map[string]any
// 系统消息
switch v := req.System.(type) {
case string:
if v != "" {
messages = append(messages, map[string]any{
"role": "system",
"content": v,
})
}
case []canonical.SystemBlock:
var parts []string
for _, b := range v {
parts = append(parts, b.Text)
}
text := strings.Join(parts, "\n\n")
if text != "" {
messages = append(messages, map[string]any{
"role": "system",
"content": text,
})
}
}
// 消息
for _, msg := range req.Messages {
encoded := encodeMessage(msg)
messages = append(messages, encoded...)
}
// 合并连续同角色消息
return mergeConsecutiveRoles(messages)
}
// encodeMessage 编码单条消息
func encodeMessage(msg canonical.CanonicalMessage) []map[string]any {
switch msg.Role {
case canonical.RoleUser:
return []map[string]any{{
"role": "user",
"content": encodeUserContent(msg.Content),
}}
case canonical.RoleAssistant:
m := map[string]any{"role": "assistant"}
var textParts []string
var toolUses []canonical.ContentBlock
for _, b := range msg.Content {
switch b.Type {
case "text":
textParts = append(textParts, b.Text)
case "tool_use":
toolUses = append(toolUses, b)
}
}
if len(toolUses) > 0 {
if len(textParts) > 0 {
m["content"] = strings.Join(textParts, "")
} else {
m["content"] = nil
}
tcs := make([]map[string]any, len(toolUses))
for i, tu := range toolUses {
tcs[i] = map[string]any{
"id": tu.ID,
"type": "function",
"function": map[string]any{
"name": tu.Name,
"arguments": string(tu.Input),
},
}
}
m["tool_calls"] = tcs
} else if len(textParts) > 0 {
m["content"] = strings.Join(textParts, "")
} else {
m["content"] = ""
}
return []map[string]any{m}
case canonical.RoleTool:
for _, b := range msg.Content {
if b.Type == "tool_result" {
var contentStr string
if b.Content != nil {
var s string
if json.Unmarshal(b.Content, &s) == nil {
contentStr = s
} else {
contentStr = string(b.Content)
}
}
return []map[string]any{{
"role": "tool",
"tool_call_id": b.ToolUseID,
"content": contentStr,
}}
}
}
}
return nil
}
// encodeUserContent 编码用户内容
func encodeUserContent(blocks []canonical.ContentBlock) any {
if len(blocks) == 1 && blocks[0].Type == "text" {
return blocks[0].Text
}
parts := make([]map[string]any, 0, len(blocks))
for _, b := range blocks {
switch b.Type {
case "text":
parts = append(parts, map[string]any{"type": "text", "text": b.Text})
case "image":
parts = append(parts, map[string]any{"type": "image_url"})
case "audio":
parts = append(parts, map[string]any{"type": "input_audio"})
case "file":
parts = append(parts, map[string]any{"type": "file"})
}
}
if len(parts) == 0 {
return ""
}
return parts
}
// encodeToolChoice 编码工具选择
func encodeToolChoice(choice *canonical.ToolChoice) any {
switch choice.Type {
case "auto":
return "auto"
case "none":
return "none"
case "any":
return "required"
case "tool":
return map[string]any{
"type": "function",
"function": map[string]any{
"name": choice.Name,
},
}
}
return "auto"
}
// encodeParametersInto 编码参数到结果 map
func encodeParametersInto(req *canonical.CanonicalRequest, result map[string]any) {
if req.Parameters.MaxTokens != nil {
result["max_completion_tokens"] = *req.Parameters.MaxTokens
}
if req.Parameters.Temperature != nil {
result["temperature"] = *req.Parameters.Temperature
}
if req.Parameters.TopP != nil {
result["top_p"] = *req.Parameters.TopP
}
if req.Parameters.FrequencyPenalty != nil {
result["frequency_penalty"] = *req.Parameters.FrequencyPenalty
}
if req.Parameters.PresencePenalty != nil {
result["presence_penalty"] = *req.Parameters.PresencePenalty
}
if len(req.Parameters.StopSequences) > 0 {
result["stop"] = req.Parameters.StopSequences
}
}
// encodeOutputFormat 编码输出格式
func encodeOutputFormat(format *canonical.OutputFormat) map[string]any {
switch format.Type {
case "json_object":
return map[string]any{"type": "json_object"}
case "json_schema":
m := map[string]any{"type": "json_schema"}
schema := map[string]any{
"name": format.Name,
}
if format.Schema != nil {
schema["schema"] = format.Schema
}
if format.Strict != nil {
schema["strict"] = *format.Strict
}
m["json_schema"] = schema
return m
}
return nil
}
// encodeResponse 将 Canonical 响应编码为 OpenAI 响应
func encodeResponse(resp *canonical.CanonicalResponse) ([]byte, error) {
var textParts []string
var thinkingParts []string
var toolUses []canonical.ContentBlock
for _, b := range resp.Content {
switch b.Type {
case "text":
textParts = append(textParts, b.Text)
case "thinking":
thinkingParts = append(thinkingParts, b.Thinking)
case "tool_use":
toolUses = append(toolUses, b)
}
}
message := map[string]any{"role": "assistant"}
if len(toolUses) > 0 {
if len(textParts) > 0 {
message["content"] = strings.Join(textParts, "")
} else {
message["content"] = nil
}
tcs := make([]map[string]any, len(toolUses))
for i, tu := range toolUses {
tcs[i] = map[string]any{
"id": tu.ID,
"type": "function",
"function": map[string]any{
"name": tu.Name,
"arguments": string(tu.Input),
},
}
}
message["tool_calls"] = tcs
} else if len(textParts) > 0 {
message["content"] = strings.Join(textParts, "")
} else {
message["content"] = ""
}
if len(thinkingParts) > 0 {
message["reasoning_content"] = strings.Join(thinkingParts, "")
}
var finishReason *string
if resp.StopReason != nil {
fr := mapCanonicalToFinishReason(*resp.StopReason)
finishReason = &fr
}
result := map[string]any{
"id": resp.ID,
"object": "chat.completion",
"created": time.Now().Unix(),
"model": resp.Model,
"choices": []map[string]any{{
"index": 0,
"message": message,
"finish_reason": finishReason,
}},
"usage": encodeUsage(resp.Usage),
}
body, err := json.Marshal(result)
if err != nil {
return nil, conversion.NewConversionError(conversion.ErrorCodeEncodingFailure, "编码 OpenAI 响应失败").WithCause(err)
}
return body, nil
}
// mapCanonicalToFinishReason 映射 Canonical 停止原因到 OpenAI finish_reason
func mapCanonicalToFinishReason(reason canonical.StopReason) string {
switch reason {
case canonical.StopReasonEndTurn:
return "stop"
case canonical.StopReasonMaxTokens:
return "length"
case canonical.StopReasonToolUse:
return "tool_calls"
case canonical.StopReasonContentFilter:
return "content_filter"
case canonical.StopReasonStopSequence:
return "stop"
case canonical.StopReasonRefusal:
return "stop"
default:
return "stop"
}
}
// encodeUsage 编码用量
func encodeUsage(usage canonical.CanonicalUsage) map[string]any {
result := map[string]any{
"prompt_tokens": usage.InputTokens,
"completion_tokens": usage.OutputTokens,
"total_tokens": usage.InputTokens + usage.OutputTokens,
}
if usage.CacheReadTokens != nil && *usage.CacheReadTokens > 0 {
result["prompt_tokens_details"] = map[string]any{
"cached_tokens": *usage.CacheReadTokens,
}
}
if usage.ReasoningTokens != nil && *usage.ReasoningTokens > 0 {
result["completion_tokens_details"] = map[string]any{
"reasoning_tokens": *usage.ReasoningTokens,
}
}
return result
}
// encodeModelsResponse 编码模型列表响应
func encodeModelsResponse(list *canonical.CanonicalModelList) ([]byte, error) {
data := make([]map[string]any, len(list.Models))
for i, m := range list.Models {
created := int64(0)
if m.Created != 0 {
created = m.Created
}
ownedBy := "unknown"
if m.OwnedBy != "" {
ownedBy = m.OwnedBy
}
data[i] = map[string]any{
"id": m.ID,
"object": "model",
"created": created,
"owned_by": ownedBy,
}
}
return json.Marshal(map[string]any{
"object": "list",
"data": data,
})
}
// encodeModelInfoResponse 编码模型详情响应
func encodeModelInfoResponse(info *canonical.CanonicalModelInfo) ([]byte, error) {
created := int64(0)
if info.Created != 0 {
created = info.Created
}
ownedBy := "unknown"
if info.OwnedBy != "" {
ownedBy = info.OwnedBy
}
return json.Marshal(map[string]any{
"id": info.ID,
"object": "model",
"created": created,
"owned_by": ownedBy,
})
}
// encodeEmbeddingRequest 编码嵌入请求
func encodeEmbeddingRequest(req *canonical.CanonicalEmbeddingRequest, provider *conversion.TargetProvider) ([]byte, error) {
result := map[string]any{
"model": provider.ModelName,
"input": req.Input,
}
if req.EncodingFormat != "" {
result["encoding_format"] = req.EncodingFormat
}
if req.Dimensions != nil {
result["dimensions"] = *req.Dimensions
}
return json.Marshal(result)
}
// encodeEmbeddingResponse 编码嵌入响应
func encodeEmbeddingResponse(resp *canonical.CanonicalEmbeddingResponse) ([]byte, error) {
data := make([]map[string]any, len(resp.Data))
for i, d := range resp.Data {
data[i] = map[string]any{
"index": d.Index,
"embedding": d.Embedding,
}
}
return json.Marshal(map[string]any{
"object": "list",
"data": data,
"model": resp.Model,
"usage": resp.Usage,
})
}
// encodeRerankRequest 编码重排序请求
func encodeRerankRequest(req *canonical.CanonicalRerankRequest, provider *conversion.TargetProvider) ([]byte, error) {
result := map[string]any{
"model": provider.ModelName,
"query": req.Query,
"documents": req.Documents,
}
if req.TopN != nil {
result["top_n"] = *req.TopN
}
if req.ReturnDocuments != nil {
result["return_documents"] = *req.ReturnDocuments
}
return json.Marshal(result)
}
// encodeRerankResponse 编码重排序响应
func encodeRerankResponse(resp *canonical.CanonicalRerankResponse) ([]byte, error) {
results := make([]map[string]any, len(resp.Results))
for i, r := range resp.Results {
m := map[string]any{
"index": r.Index,
"relevance_score": r.RelevanceScore,
}
if r.Document != nil {
m["document"] = *r.Document
}
results[i] = m
}
return json.Marshal(map[string]any{
"results": results,
"model": resp.Model,
})
}
// mergeConsecutiveRoles 合并连续同角色消息(拼接内容)
func mergeConsecutiveRoles(messages []map[string]any) []map[string]any {
if len(messages) <= 1 {
return messages
}
var result []map[string]any
for _, msg := range messages {
if len(result) > 0 {
lastRole := result[len(result)-1]["role"]
currRole := msg["role"]
if lastRole == currRole {
lastContent := result[len(result)-1]["content"]
currContent := msg["content"]
switch lv := lastContent.(type) {
case string:
if cv, ok := currContent.(string); ok {
result[len(result)-1]["content"] = lv + cv
}
case []any:
if cv, ok := currContent.([]any); ok {
result[len(result)-1]["content"] = append(lv, cv...)
}
}
continue
}
}
result = append(result, msg)
}
return result
}