2 Commits

Author SHA1 Message Date
v-zhangjc9
f11f5e7656 feat(ai): 调整模型 2025-06-13 19:07:13 +08:00
v-zhangjc9
bc32a89fea fix(ai): 移除不支持的模型 2025-06-13 16:11:37 +08:00
6 changed files with 119 additions and 54 deletions

View File

@@ -9,7 +9,7 @@ spring:
api-key: ENC(K+Hff9QGC+fcyi510VIDd9CaeK/IN5WBJ9rlkUsHEdDgIidW+stHHJlsK0lLPUXXREha+ToQZqqDXJrqSE+GUKCXklFhelD8bRHFXBIeP/ZzT2cxhzgKUXgjw3S0Qw2R) api-key: ENC(K+Hff9QGC+fcyi510VIDd9CaeK/IN5WBJ9rlkUsHEdDgIidW+stHHJlsK0lLPUXXREha+ToQZqqDXJrqSE+GUKCXklFhelD8bRHFXBIeP/ZzT2cxhzgKUXgjw3S0Qw2R)
chat: chat:
options: options:
model: 'Qwen3-1.7-vllm' model: 'Qwen3/qwen3-1.7b'
mvc: mvc:
async: async:
request-timeout: 3600000 request-timeout: 3600000

View File

@@ -1,7 +1,9 @@
package com.lanyuanxiaoyao.service.ai.chat; package com.lanyuanxiaoyao.service.ai.chat;
import cn.hutool.core.util.StrUtil;
import java.io.IOException; import java.io.IOException;
import java.net.http.HttpClient; import java.net.http.HttpClient;
import java.time.Duration;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import org.noear.solon.ai.rag.Document; import org.noear.solon.ai.rag.Document;
@@ -25,12 +27,12 @@ import org.springframework.web.reactive.function.client.WebClient;
* @author lanyuanxiaoyao * @author lanyuanxiaoyao
* @version 20250514 * @version 20250514
*/ */
public class TestSpringAIToolChat { public class TestModel {
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
testChatModel(); // testChatModel();
testVisualModel(); testVisualModel();
// testEmbeddingModel(); testEmbeddingModel();
// testRerankingModel(); testRerankingModel();
} }
private static void testChatModel() { private static void testChatModel() {
@@ -41,12 +43,14 @@ public class TestSpringAIToolChat {
"Qwen3/qwen3-4b-q4km", "Qwen3/qwen3-4b-q4km",
"Qwen3/qwen3-8b-q4km" "Qwen3/qwen3-8b-q4km"
)) { )) {
System.out.println(model);
long start = System.currentTimeMillis();
ChatClient client = chatClient(model); ChatClient client = chatClient(model);
String content = client.prompt() String content = client.prompt()
.user("你好") .user("你好,详细介绍一下是谁,能帮我做什么?")
.call() .call()
.content(); .content();
System.out.println(content); System.out.println(content.length() * 1000.0 / (System.currentTimeMillis() - start));
} }
} }
@@ -55,19 +59,17 @@ public class TestSpringAIToolChat {
"Qwen2.5/qwen2.5-vl-7b", "Qwen2.5/qwen2.5-vl-7b",
"Qwen2.5/qwen2.5-vl-7b-q4km", "Qwen2.5/qwen2.5-vl-7b-q4km",
"Qwen2.5/qwen2.5-vl-3b-instruct", "Qwen2.5/qwen2.5-vl-3b-instruct",
"Qwen2.5/qwen2.5-vl-3b-instruct-awq",
"Qwen2.5/qwen2.5-vl-7b-instruct", "Qwen2.5/qwen2.5-vl-7b-instruct",
"Qwen2.5/qwen2.5-vl-7b-instruct-awq",
"MiniCPM/minicpm-o-2.6-7.6b", "MiniCPM/minicpm-o-2.6-7.6b",
"MiniCPM/minicpm-o-2.6-7.6b-q4km" "MiniCPM/minicpm-o-2.6-7.6b-q4km"
)) { )) {
ChatClient client = chatClient(model); ChatClient client = chatClient(model);
String content = client.prompt() String content = client.prompt()
.user(spec -> spec.text("图片中有什么").media(MimeTypeUtils.IMAGE_PNG, new FileSystemResource("/Users/lanyuanxiaoyao/Pictures/deepseek.png"))) .user(spec -> spec.text("根据图片中的内容编一个童话小故事").media(MimeTypeUtils.IMAGE_PNG, new FileSystemResource("/Users/lanyuanxiaoyao/Pictures/deepseek.png")))
.call() .call()
.content(); .content();
System.out.println(content); System.out.println(StrUtil.trim(content));
} }
} }
@@ -78,9 +80,7 @@ public class TestSpringAIToolChat {
"Qwen3/qwen3-embedding-4b-q4km", "Qwen3/qwen3-embedding-4b-q4km",
"Qwen3/qwen3-embedding-8b-q4km", "Qwen3/qwen3-embedding-8b-q4km",
"BGE/bge-m3", "BGE/bge-m3",
"BGE/bge-m3-q4km", "BGE/bge-m3-q4km"
"MiniCPM/minicpm-embedding",
"MiniCPM/minicpm-embedding-light"
)) { )) {
EmbeddingModel embeddingModel = embeddingModel(model); EmbeddingModel embeddingModel = embeddingModel(model);
float[] worlds = embeddingModel.embed("Hello world"); float[] worlds = embeddingModel.embed("Hello world");
@@ -90,12 +90,10 @@ public class TestSpringAIToolChat {
private static void testRerankingModel() throws IOException { private static void testRerankingModel() throws IOException {
for (String model : List.of( for (String model : List.of(
"BGE/beg-reranker-v2",
"MiniCPM/minicpm-reranker",
"MiniCPM/minicpm-reranker-light",
"BGE/beg-reranker-v2", "BGE/beg-reranker-v2",
"BGE/beg-reranker-v2-q4km" "BGE/beg-reranker-v2-q4km"
)) { )) {
System.out.println(model);
RerankingModel rerankingModel = rerankingModel(model); RerankingModel rerankingModel = rerankingModel(model);
List<Document> list = rerankingModel.rerank( List<Document> list = rerankingModel.rerank(
"你好", "你好",
@@ -149,6 +147,7 @@ public class TestSpringAIToolChat {
return RerankingModel.of("http://132.121.206.65:10086/v1/rerank") return RerankingModel.of("http://132.121.206.65:10086/v1/rerank")
.model(model) .model(model)
.apiKey("*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj<MYQd$&@$sY7CgqNyea4giJi4") .apiKey("*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj<MYQd$&@$sY7CgqNyea4giJi4")
.timeout(Duration.ofMinutes(10))
.build(); .build();
} }

View File

@@ -0,0 +1,73 @@
package com.lanyuanxiaoyao.service.ai.chat;
import cn.hutool.core.util.StrUtil;
import java.net.http.HttpClient;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.openai.OpenAiChatModel;
import org.springframework.ai.openai.OpenAiChatOptions;
import org.springframework.ai.openai.api.OpenAiApi;
import org.springframework.ai.tool.annotation.Tool;
import org.springframework.ai.tool.annotation.ToolParam;
import org.springframework.http.client.JdkClientHttpRequestFactory;
import org.springframework.http.client.reactive.JdkClientHttpConnector;
import org.springframework.web.client.RestClient;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.Disposable;
/**
* @author lanyuanxiaoyao
* @version 20250613
*/
public class TestSpringAiTools {
public static void main(String[] args) {
ChatClient client = ChatClient.builder(
OpenAiChatModel.builder()
.openAiApi(
OpenAiApi.builder()
.baseUrl("http://132.121.206.65:10086")
.apiKey("*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj<MYQd$&@$sY7CgqNyea4giJi4")
.restClientBuilder(restClientBuilder())
.webClientBuilder(webClientBuilder())
.build()
)
.defaultOptions(
OpenAiChatOptions.builder()
.model("Qwen3/qwen3-1.7b")
.build()
)
.build()
)
.build();
Disposable disposable = client.prompt()
.tools(new TestTool())
.user("调用submit工具生成一个关于「猪」的笑话")
.stream()
.content()
.subscribe(System.out::println);
while (!disposable.isDisposed()) {
}
}
private static HttpClient httpClient() {
return HttpClient.newBuilder()
.version(HttpClient.Version.HTTP_1_1)
.build();
}
private static RestClient.Builder restClientBuilder() {
return RestClient.builder()
.requestFactory(new JdkClientHttpRequestFactory(httpClient()));
}
private static WebClient.Builder webClientBuilder() {
return WebClient.builder()
.clientConnector(new JdkClientHttpConnector(httpClient()));
}
public static final class TestTool {
@Tool(description = "传入任意动物名称,返回一个关于这个动物的笑话")
public String submit(@ToolParam(description = "动物名称") String animalName) {
return StrUtil.format("{}掉沟里了", animalName);
}
}
}

View File

@@ -25,39 +25,32 @@ public class LlamaSwapTool extends GeneratorTool {
Map.of( Map.of(
"models", List.of( "models", List.of(
llamaCppEmbeddingCmd("BGE/bge-m3-q4km", "bge-m3-Q4_K_M.gguf", 20), llamaCppEmbeddingCmd("BGE/bge-m3-q4km", "bge-m3-Q4_K_M.gguf", 20),
vllmEmbeddingCmd("BGE/bge-m3", "bge-m3", 20, 5), vllmEmbeddingCmd("BGE/bge-m3", "bge-m3", 5),
llamaCppRerankerCmd("BGE/beg-reranker-v2-q4km", "bge-reranker-v2-m3-Q4_K_M.gguf", 20), llamaCppRerankerCmd("BGE/beg-reranker-v2-q4km", "bge-reranker-v2-m3-Q4_K_M.gguf", 20),
llamaCppRerankerCmd("BGE/beg-reranker-v2", "bge-reranker-v2-m3", 20), vllmRerankerCmd("BGE/beg-reranker-v2", "bge-reranker-v2-m3", 5),
vllmCmd("Qwen3/qwen3-0.6b", "Qwen3-0.6B", 35, 5, true), vllmCmd("Qwen3/qwen3-0.6b", "Qwen3-0.6B", 5, true),
vllmCmd("Qwen3/qwen3-1.7b", "Qwen3-1.7B", 35, 5, true), vllmCmd("Qwen3/qwen3-1.7b", "Qwen3-1.7B", 5, true),
vllmCmd("Qwen3/qwen3-4b", "Qwen3-4B", 35, 8, true), vllmCmd("Qwen3/qwen3-4b", "Qwen3-4B", 8, true),
llamaCppCmd("Qwen3/qwen3-4b-q4km", "Qwen3-4B-Q4_K_M.gguf", 35), llamaCppCmd("Qwen3/qwen3-4b-q4km", "Qwen3-4B-Q4_K_M.gguf", 35),
llamaCppCmd("Qwen3/qwen3-8b-q4km", "Qwen3-8B-Q4_K_M.gguf", 35), llamaCppCmd("Qwen3/qwen3-8b-q4km", "Qwen3-8B-Q4_K_M.gguf", 35),
vllmEmbeddingCmd("Qwen3/qwen3-embedding-0.6b", "Qwen3-Embedding-0.6B", 35, 5), vllmEmbeddingCmd("Qwen3/qwen3-embedding-0.6b", "Qwen3-Embedding-0.6B", 5),
vllmEmbeddingCmd("Qwen3/qwen3-embedding-4b", "Qwen3-Embedding-4B", 35, 8), vllmEmbeddingCmd("Qwen3/qwen3-embedding-4b", "Qwen3-Embedding-4B", 8),
llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-4b-q4km", "Qwen3-Embedding-4B-Q4_K_M.gguf", 35), llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-4b-q4km", "Qwen3-Embedding-4B-Q4_K_M.gguf", 35),
llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-8b-q4km", "Qwen3-Embedding-8B-Q4_K_M.gguf", 35), llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-8b-q4km", "Qwen3-Embedding-8B-Q4_K_M.gguf", 35),
vllmRerankerCmd("Qwen3/qwen3-reranker-0.6b", "Qwen3-Reranker-0.6B", 35, 5), // 0.9.1 vllm还未支持
vllmRerankerCmd("Qwen3/qwen3-reranker-4b", "Qwen3-Reranker-4B", 35, 8), // vllmRerankerCmd("Qwen3/qwen3-reranker-0.6b", "Qwen3-Reranker-0.6B", 5),
// vllmRerankerCmd("Qwen3/qwen3-reranker-4b", "Qwen3-Reranker-4B", 8),
llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b", "Qwen2.5-VL-7B-Instruct-BF16.gguf", 35), llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b", "Qwen2.5-VL-7B-Instruct-BF16.gguf", 35),
llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b-q4km", "Qwen2.5-VL-7B-Instruct-Q4_K_M.gguf", 35), llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b-q4km", "Qwen2.5-VL-7B-Instruct-Q4_K_M.gguf", 35),
vllmCmd("Qwen2.5/qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 35, 8, false), vllmCmd("Qwen2.5/qwen2.5-vl-3b-instruct", "Qwen2.5-VL-3B-Instruct", 8, false),
vllmCmd("Qwen2.5/qwen2.5-vl-7b-instruct-awq", "Qwen2.5-VL-7B-Instruct-AWQ", 35, 8, false), vllmCmd("Qwen2.5/qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 8, false),
vllmCmd("Qwen2.5/qwen2.5-vl-3b-instruct", "Qwen2.5-VL-3B-Instruct", 35, 8, false),
vllmCmd("Qwen2.5/qwen2.5-vl-3b-instruct-awq", "Qwen2.5-VL-3B-Instruct-AWQ", 35, 8, false),
llamaCppVisualCmd("MiniCPM/minicpm-o-2.6-7.6b-q4km", "MiniCPM-o-2_6-7.6B-Q4_K_M.gguf", 35), llamaCppVisualCmd("MiniCPM/minicpm-o-2.6-7.6b-q4km", "MiniCPM-o-2_6-7.6B-Q4_K_M.gguf", 35),
vllmCmd("MiniCPM/minicpm-o-2.6-7.6b", "MiniCPM-o-2_6", 35, 10, false), vllmCmd("MiniCPM/minicpm-o-2.6-7.6b", "MiniCPM-o-2_6", 10, false)
vllmEmbeddingCmd("MiniCPM/minicpm-embedding", "MiniCPM-Embedding", 20, 8),
vllmEmbeddingCmd("MiniCPM/minicpm-embedding-light", "MiniCPM-Embedding-Light", 20, 5),
vllmEmbeddingCmd("MiniCPM/minicpm-reranker", "MiniCPM-Reranker", 20, 8),
vllmEmbeddingCmd("MiniCPM/minicpm-reranker-light", "MiniCPM-Reranker-Light", 20, 5)
) )
), ),
"config.yaml" "config.yaml"
@@ -114,23 +107,23 @@ public class LlamaSwapTool extends GeneratorTool {
); );
} }
private DockerCmd vllmCmd(String name, String model, Integer thread, Integer cache, Boolean isReasonable) { private DockerCmd vllmCmd(String name, String model, Integer cache, Boolean isReasonable) {
return vllmCmd(name, model, thread, cache, false, false, isReasonable, false); return vllmCmd(name, model, cache, false, false, isReasonable);
} }
private DockerCmd vllmEmbeddingCmd(String name, String model, Integer thread, Integer cache) { private DockerCmd vllmEmbeddingCmd(String name, String model, Integer cache) {
return vllmCmd(name, model, thread, cache, true, false, false, false); return vllmCmd(name, model, cache, true, false, false);
} }
private DockerCmd vllmRerankerCmd(String name, String model, Integer thread, Integer cache) { private DockerCmd vllmRerankerCmd(String name, String model, Integer cache) {
return vllmCmd(name, model, thread, cache, false, true, false, false); return vllmCmd(name, model, cache, false, true, false);
} }
private DockerCmd vllmVisualCmd(String name, String model, Integer thread, Integer cache, Boolean isReasonable) { private DockerCmd vllmVisualCmd(String name, String model, Integer cache, Boolean isReasonable) {
return vllmCmd(name, model, thread, cache, false, false, isReasonable, true); return vllmCmd(name, model, cache, false, false, isReasonable);
} }
private DockerCmd vllmCmd(String name, String model, Integer thread, Integer cache, Boolean isEmbedding, Boolean isReranker, Boolean isReasonable, Boolean isVisual) { private DockerCmd vllmCmd(String name, String model, Integer cache, Boolean isEmbedding, Boolean isReranker, Boolean isReasonable) {
List<String> arguments = ListUtil.list( List<String> arguments = ListUtil.list(
false, false,
StrUtil.format("--model /models/{}", model), StrUtil.format("--model /models/{}", model),
@@ -138,11 +131,13 @@ public class LlamaSwapTool extends GeneratorTool {
"--port ${PORT}", "--port ${PORT}",
StrUtil.format("--api-key {}", API_KEY), StrUtil.format("--api-key {}", API_KEY),
"--disable-log-requests", "--disable-log-requests",
"--uvicorn-log-level error" "--uvicorn-log-level error",
"--trust-remote-code"
); );
if (isEmbedding) { if (isEmbedding) {
arguments.add("--task embedding"); arguments.add("--task embedding");
} else if (isReranker) { } else if (isReranker) {
arguments.add("--task score");
} else if (isReasonable) { } else if (isReasonable) {
arguments.add("--enable-auto-tool-choice"); arguments.add("--enable-auto-tool-choice");
arguments.add("--tool-call-parser hermes"); arguments.add("--tool-call-parser hermes");
@@ -150,7 +145,7 @@ public class LlamaSwapTool extends GeneratorTool {
arguments.add("--reasoning-parser deepseek_r1"); arguments.add("--reasoning-parser deepseek_r1");
} }
return new DockerCmd( return new DockerCmd(
"vllm-server-cpu:0.9.1", "vllm-server-cpu:0.8.5.post1",
name, name,
model, model,
StrUtil.format("http://vllm-{}:${PORT}", displayName(model)), StrUtil.format("http://vllm-{}:${PORT}", displayName(model)),
@@ -158,9 +153,7 @@ public class LlamaSwapTool extends GeneratorTool {
StrUtil.format("--name vllm-{}", displayName(model)), StrUtil.format("--name vllm-{}", displayName(model)),
"--privileged=true", "--privileged=true",
"--shm-size=4g", "--shm-size=4g",
StrUtil.format("-e VLLM_CPU_KVCACHE_SPACE={}", cache), StrUtil.format("-e VLLM_CPU_KVCACHE_SPACE={}", cache)
StrUtil.format("-e VLLM_CPU_OMP_THREADS_BIND=0-{}", thread - 1),
"-e VLLM_CPU_MOE_PREPACK=0"
), ),
arguments arguments
); );

View File

@@ -1,4 +1,4 @@
healthCheckTimeout: 120 healthCheckTimeout: 600
logLevel: warn logLevel: warn
models: models:
<#list models as model> <#list models as model>

View File

@@ -9,10 +9,10 @@ spring:
api-key: ENC(K+Hff9QGC+fcyi510VIDd9CaeK/IN5WBJ9rlkUsHEdDgIidW+stHHJlsK0lLPUXXREha+ToQZqqDXJrqSE+GUKCXklFhelD8bRHFXBIeP/ZzT2cxhzgKUXgjw3S0Qw2R) api-key: ENC(K+Hff9QGC+fcyi510VIDd9CaeK/IN5WBJ9rlkUsHEdDgIidW+stHHJlsK0lLPUXXREha+ToQZqqDXJrqSE+GUKCXklFhelD8bRHFXBIeP/ZzT2cxhzgKUXgjw3S0Qw2R)
chat: chat:
options: options:
model: 'Qwen3-1.7-vllm' model: 'Qwen3/qwen3-1.7b'
embedding: embedding:
options: options:
model: 'Bge-m3-vllm' model: 'Qwen3/qwen3-embedding-4b'
vectorstore: vectorstore:
qdrant: qdrant:
host: 132.121.206.65 host: 132.121.206.65