Compare commits
2 Commits
506e28c9f7
...
2e24bdb90b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2e24bdb90b | ||
|
|
5160c59ab0 |
@@ -77,11 +77,12 @@ export const run_package_batch = async (projects) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const upload = async (file_path) => {
|
export const upload = async (file_path) => {
|
||||||
let start = new Date().getTime()
|
let start = new Date().getTime()
|
||||||
|
let basename = path.basename(file_path)
|
||||||
let response = await spinner(
|
let response = await spinner(
|
||||||
`Uploading project ${file_path}`,
|
`Uploading project ${file_path}`,
|
||||||
() => fetch(`${upload_url}/file/upload/${path.basename(file_path)}`, {
|
() => fetch(`${upload_url}/file/upload/${basename}`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'application/octet-stream',
|
'Content-Type': 'application/octet-stream',
|
||||||
@@ -98,6 +99,7 @@ const upload = async (file_path) => {
|
|||||||
console.log(`✅ Finished upload ${file_path} (${millisecondToString((new Date().getTime()) - start)})`)
|
console.log(`✅ Finished upload ${file_path} (${millisecondToString((new Date().getTime()) - start)})`)
|
||||||
console.log(`📘 Uploaded ${fileSize(fs.statSync(file_path).size)}`)
|
console.log(`📘 Uploaded ${fileSize(fs.statSync(file_path).size)}`)
|
||||||
console.log(`📘 MD5 ${md5file.sync(file_path)}`)
|
console.log(`📘 MD5 ${md5file.sync(file_path)}`)
|
||||||
|
console.log(`📘 Download curl http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.124:36800/file/download/${basename} -o ${basename}`)
|
||||||
fs.rmSync(file_path)
|
fs.rmSync(file_path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
<module>service-ai-core</module>
|
<module>service-ai-core</module>
|
||||||
<module>service-ai-chat</module>
|
<module>service-ai-chat</module>
|
||||||
<module>service-ai-knowledge</module>
|
<module>service-ai-knowledge</module>
|
||||||
|
<module>service-ai-cli</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
|
|||||||
@@ -20,6 +20,21 @@
|
|||||||
<groupId>org.springframework.ai</groupId>
|
<groupId>org.springframework.ai</groupId>
|
||||||
<artifactId>spring-ai-starter-model-deepseek</artifactId>
|
<artifactId>spring-ai-starter-model-deepseek</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.ai</groupId>
|
||||||
|
<artifactId>spring-ai-starter-model-openai</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.noear</groupId>
|
||||||
|
<artifactId>solon-ai</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.noear</groupId>
|
||||||
|
<artifactId>solon-ai-dialect-openai</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@@ -1,71 +1,155 @@
|
|||||||
package com.lanyuanxiaoyao.service.ai.chat;
|
package com.lanyuanxiaoyao.service.ai.chat;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.net.http.HttpClient;
|
import java.net.http.HttpClient;
|
||||||
import java.time.LocalDateTime;
|
import java.util.Arrays;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.util.List;
|
||||||
|
import org.noear.solon.ai.rag.Document;
|
||||||
|
import org.noear.solon.ai.reranking.RerankingModel;
|
||||||
import org.springframework.ai.chat.client.ChatClient;
|
import org.springframework.ai.chat.client.ChatClient;
|
||||||
import org.springframework.ai.deepseek.DeepSeekChatModel;
|
import org.springframework.ai.document.MetadataMode;
|
||||||
import org.springframework.ai.deepseek.DeepSeekChatOptions;
|
import org.springframework.ai.embedding.EmbeddingModel;
|
||||||
import org.springframework.ai.deepseek.api.DeepSeekApi;
|
import org.springframework.ai.openai.OpenAiChatModel;
|
||||||
import org.springframework.ai.tool.ToolCallback;
|
import org.springframework.ai.openai.OpenAiChatOptions;
|
||||||
import org.springframework.ai.tool.definition.ToolDefinition;
|
import org.springframework.ai.openai.OpenAiEmbeddingModel;
|
||||||
|
import org.springframework.ai.openai.OpenAiEmbeddingOptions;
|
||||||
|
import org.springframework.ai.openai.api.OpenAiApi;
|
||||||
|
import org.springframework.core.io.FileSystemResource;
|
||||||
import org.springframework.http.client.JdkClientHttpRequestFactory;
|
import org.springframework.http.client.JdkClientHttpRequestFactory;
|
||||||
import org.springframework.http.client.reactive.JdkClientHttpConnector;
|
import org.springframework.http.client.reactive.JdkClientHttpConnector;
|
||||||
|
import org.springframework.util.MimeTypeUtils;
|
||||||
import org.springframework.web.client.RestClient;
|
import org.springframework.web.client.RestClient;
|
||||||
import org.springframework.web.reactive.function.client.WebClient;
|
import org.springframework.web.reactive.function.client.WebClient;
|
||||||
import reactor.core.Disposable;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author lanyuanxiaoyao
|
* @author lanyuanxiaoyao
|
||||||
* @version 20250514
|
* @version 20250514
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("NullableProblems")
|
|
||||||
public class TestSpringAIToolChat {
|
public class TestSpringAIToolChat {
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) throws IOException {
|
||||||
ChatClient client = ChatClient.builder(
|
testChatModel();
|
||||||
DeepSeekChatModel.builder()
|
testVisualModel();
|
||||||
.deepSeekApi(
|
// testEmbeddingModel();
|
||||||
DeepSeekApi.builder()
|
// testRerankingModel();
|
||||||
.baseUrl("http://132.121.206.65:10086/v1")
|
}
|
||||||
|
|
||||||
|
private static void testChatModel() {
|
||||||
|
for (String model : List.of(
|
||||||
|
"Qwen3/qwen3-0.6b",
|
||||||
|
"Qwen3/qwen3-1.7b",
|
||||||
|
"Qwen3/qwen3-4b",
|
||||||
|
"Qwen3/qwen3-4b-q4km",
|
||||||
|
"Qwen3/qwen3-8b-q4km"
|
||||||
|
)) {
|
||||||
|
ChatClient client = chatClient(model);
|
||||||
|
String content = client.prompt()
|
||||||
|
.user("你好")
|
||||||
|
.call()
|
||||||
|
.content();
|
||||||
|
System.out.println(content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void testVisualModel() {
|
||||||
|
for (String model : List.of(
|
||||||
|
"Qwen2.5/qwen2.5-vl-7b",
|
||||||
|
"Qwen2.5/qwen2.5-vl-7b-q4km",
|
||||||
|
"Qwen2.5/qwen2.5-vl-3b-instruct",
|
||||||
|
"Qwen2.5/qwen2.5-vl-3b-instruct-awq",
|
||||||
|
"Qwen2.5/qwen2.5-vl-7b-instruct",
|
||||||
|
"Qwen2.5/qwen2.5-vl-7b-instruct-awq",
|
||||||
|
"MiniCPM/minicpm-o-2.6-7.6b",
|
||||||
|
"MiniCPM/minicpm-o-2.6-7.6b-q4km"
|
||||||
|
|
||||||
|
)) {
|
||||||
|
ChatClient client = chatClient(model);
|
||||||
|
String content = client.prompt()
|
||||||
|
.user(spec -> spec.text("图片中有什么").media(MimeTypeUtils.IMAGE_PNG, new FileSystemResource("/Users/lanyuanxiaoyao/Pictures/deepseek.png")))
|
||||||
|
.call()
|
||||||
|
.content();
|
||||||
|
System.out.println(content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void testEmbeddingModel() {
|
||||||
|
for (String model : List.of(
|
||||||
|
"Qwen3/qwen3-embedding-0.6b",
|
||||||
|
"Qwen3/qwen3-embedding-4b",
|
||||||
|
"Qwen3/qwen3-embedding-4b-q4km",
|
||||||
|
"Qwen3/qwen3-embedding-8b-q4km",
|
||||||
|
"BGE/bge-m3",
|
||||||
|
"BGE/bge-m3-q4km",
|
||||||
|
"MiniCPM/minicpm-embedding",
|
||||||
|
"MiniCPM/minicpm-embedding-light"
|
||||||
|
)) {
|
||||||
|
EmbeddingModel embeddingModel = embeddingModel(model);
|
||||||
|
float[] worlds = embeddingModel.embed("Hello world");
|
||||||
|
System.out.println(Arrays.toString(worlds));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void testRerankingModel() throws IOException {
|
||||||
|
for (String model : List.of(
|
||||||
|
"BGE/beg-reranker-v2",
|
||||||
|
"MiniCPM/minicpm-reranker",
|
||||||
|
"MiniCPM/minicpm-reranker-light",
|
||||||
|
"BGE/beg-reranker-v2",
|
||||||
|
"BGE/beg-reranker-v2-q4km"
|
||||||
|
)) {
|
||||||
|
RerankingModel rerankingModel = rerankingModel(model);
|
||||||
|
List<Document> list = rerankingModel.rerank(
|
||||||
|
"你好",
|
||||||
|
List.of(
|
||||||
|
new Document("go go go,滚犊子"),
|
||||||
|
new Document("我是tom,你最近过得好吗?"),
|
||||||
|
new Document("666,你就是大聪明")
|
||||||
|
)
|
||||||
|
);
|
||||||
|
list.forEach(System.out::println);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ChatClient chatClient(String model) {
|
||||||
|
return ChatClient.builder(
|
||||||
|
OpenAiChatModel.builder()
|
||||||
|
.openAiApi(
|
||||||
|
OpenAiApi.builder()
|
||||||
|
.baseUrl("http://132.121.206.65:10086")
|
||||||
.apiKey("*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj<MYQd$&@$sY7CgqNyea4giJi4")
|
.apiKey("*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj<MYQd$&@$sY7CgqNyea4giJi4")
|
||||||
.restClientBuilder(restClientBuilder())
|
.restClientBuilder(restClientBuilder())
|
||||||
.webClientBuilder(webClientBuilder())
|
.webClientBuilder(webClientBuilder())
|
||||||
.build()
|
.build()
|
||||||
)
|
)
|
||||||
.defaultOptions(
|
.defaultOptions(
|
||||||
DeepSeekChatOptions.builder()
|
OpenAiChatOptions.builder()
|
||||||
.model("Qwen3-1.7-vllm")
|
.model(model)
|
||||||
.build()
|
.build()
|
||||||
)
|
)
|
||||||
.build()
|
.build()
|
||||||
)
|
)
|
||||||
.build();
|
.build();
|
||||||
ToolCallback datetimeTool = new ToolCallback() {
|
}
|
||||||
@Override
|
|
||||||
public ToolDefinition getToolDefinition() {
|
|
||||||
return ToolDefinition.builder()
|
|
||||||
.name("getCurrentTime")
|
|
||||||
.description("获取当前日期和时间")
|
|
||||||
// language=JSON
|
|
||||||
.inputSchema("""
|
|
||||||
{"type": null}
|
|
||||||
""")
|
|
||||||
.build();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
private static EmbeddingModel embeddingModel(String model) {
|
||||||
public String call(String toolInput) {
|
return new OpenAiEmbeddingModel(
|
||||||
return LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
|
OpenAiApi.builder()
|
||||||
}
|
.baseUrl("http://132.121.206.65:10086")
|
||||||
};
|
.apiKey("*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj<MYQd$&@$sY7CgqNyea4giJi4")
|
||||||
Disposable disposable = client.prompt()
|
.restClientBuilder(restClientBuilder())
|
||||||
.user("当前时间?")
|
.webClientBuilder(webClientBuilder())
|
||||||
.toolCallbacks(datetimeTool)
|
.build(),
|
||||||
.stream()
|
MetadataMode.EMBED,
|
||||||
.content()
|
OpenAiEmbeddingOptions.builder()
|
||||||
.subscribe(System.out::println);
|
.model(model)
|
||||||
while (!disposable.isDisposed()) {
|
.build()
|
||||||
}
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static RerankingModel rerankingModel(String model) {
|
||||||
|
return RerankingModel.of("http://132.121.206.65:10086/v1/rerank")
|
||||||
|
.model(model)
|
||||||
|
.apiKey("*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj<MYQd$&@$sY7CgqNyea4giJi4")
|
||||||
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static HttpClient httpClient() {
|
private static HttpClient httpClient() {
|
||||||
|
|||||||
46
service-ai/service-ai-cli/pom.xml
Normal file
46
service-ai/service-ai-cli/pom.xml
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<parent>
|
||||||
|
<groupId>com.lanyuanxiaoyao</groupId>
|
||||||
|
<artifactId>service-ai</artifactId>
|
||||||
|
<version>1.0.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>service-ai-cli</artifactId>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.boot</groupId>
|
||||||
|
<artifactId>spring-boot-starter</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>cn.hutool</groupId>
|
||||||
|
<artifactId>hutool-all</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.freemarker</groupId>
|
||||||
|
<artifactId>freemarker</artifactId>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.springframework.boot</groupId>
|
||||||
|
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>repackage</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
</project>
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.ai.cli;
|
||||||
|
|
||||||
|
import org.springframework.boot.ApplicationArguments;
|
||||||
|
import org.springframework.boot.ApplicationRunner;
|
||||||
|
import org.springframework.boot.SpringApplication;
|
||||||
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
* @version 20250612
|
||||||
|
*/
|
||||||
|
@SpringBootApplication
|
||||||
|
public class Generator implements ApplicationRunner {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
SpringApplication.run(Generator.class, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run(ApplicationArguments args) throws Exception {
|
||||||
|
new LlamaSwapTool().generate();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.ai.cli;
|
||||||
|
|
||||||
|
import cn.hutool.core.io.FileUtil;
|
||||||
|
import cn.hutool.extra.template.Template;
|
||||||
|
import cn.hutool.extra.template.TemplateConfig;
|
||||||
|
import cn.hutool.extra.template.TemplateEngine;
|
||||||
|
import cn.hutool.extra.template.TemplateUtil;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
* @version 20250612
|
||||||
|
*/
|
||||||
|
public abstract class GeneratorTool {
|
||||||
|
private final TemplateEngine engine = TemplateUtil.createEngine(new TemplateConfig("template", TemplateConfig.ResourceMode.CLASSPATH));
|
||||||
|
|
||||||
|
protected void generateTemplate(String templatePath, Map<?, ?> data, String targetScriptPath) {
|
||||||
|
Template template = engine.getTemplate(templatePath);
|
||||||
|
String script = template.render(data);
|
||||||
|
FileUtil.del(targetScriptPath);
|
||||||
|
FileUtil.writeString(script, targetScriptPath, StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract void generate() throws Exception;
|
||||||
|
}
|
||||||
@@ -0,0 +1,251 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.ai.cli;
|
||||||
|
|
||||||
|
import cn.hutool.core.collection.ListUtil;
|
||||||
|
import cn.hutool.core.util.StrUtil;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
* @version 20250612
|
||||||
|
*/
|
||||||
|
public class LlamaSwapTool extends GeneratorTool {
|
||||||
|
private static final String API_KEY = "*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj<MYQd$&@$sY7CgqNyea4giJi4";
|
||||||
|
|
||||||
|
public static String displayName(String name) {
|
||||||
|
return name.replaceAll("\\s+", "_")
|
||||||
|
.replaceAll("\\.", "_")
|
||||||
|
.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void generate() {
|
||||||
|
generateTemplate(
|
||||||
|
"llama-swap.ftl",
|
||||||
|
Map.of(
|
||||||
|
"models", List.of(
|
||||||
|
llamaCppEmbeddingCmd("BGE/bge-m3-q4km", "bge-m3-Q4_K_M.gguf", 20),
|
||||||
|
vllmEmbeddingCmd("BGE/bge-m3", "bge-m3", 20, 5),
|
||||||
|
llamaCppRerankerCmd("BGE/beg-reranker-v2-q4km", "bge-reranker-v2-m3-Q4_K_M.gguf", 20),
|
||||||
|
llamaCppRerankerCmd("BGE/beg-reranker-v2", "bge-reranker-v2-m3", 20),
|
||||||
|
|
||||||
|
vllmCmd("Qwen3/qwen3-0.6b", "Qwen3-0.6B", 35, 5, true),
|
||||||
|
vllmCmd("Qwen3/qwen3-1.7b", "Qwen3-1.7B", 35, 5, true),
|
||||||
|
vllmCmd("Qwen3/qwen3-4b", "Qwen3-4B", 35, 8, true),
|
||||||
|
llamaCppCmd("Qwen3/qwen3-4b-q4km", "Qwen3-4B-Q4_K_M.gguf", 35),
|
||||||
|
llamaCppCmd("Qwen3/qwen3-8b-q4km", "Qwen3-8B-Q4_K_M.gguf", 35),
|
||||||
|
|
||||||
|
vllmEmbeddingCmd("Qwen3/qwen3-embedding-0.6b", "Qwen3-Embedding-0.6B", 35, 5),
|
||||||
|
vllmEmbeddingCmd("Qwen3/qwen3-embedding-4b", "Qwen3-Embedding-4B", 35, 8),
|
||||||
|
llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-4b-q4km", "Qwen3-Embedding-4B-Q4_K_M.gguf", 35),
|
||||||
|
llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-8b-q4km", "Qwen3-Embedding-8B-Q4_K_M.gguf", 35),
|
||||||
|
|
||||||
|
vllmRerankerCmd("Qwen3/qwen3-reranker-0.6b", "Qwen3-Reranker-0.6B", 35, 5),
|
||||||
|
vllmRerankerCmd("Qwen3/qwen3-reranker-4b", "Qwen3-Reranker-4B", 35, 8),
|
||||||
|
|
||||||
|
llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b", "Qwen2.5-VL-7B-Instruct-BF16.gguf", 35),
|
||||||
|
llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b-q4km", "Qwen2.5-VL-7B-Instruct-Q4_K_M.gguf", 35),
|
||||||
|
vllmCmd("Qwen2.5/qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 35, 8, false),
|
||||||
|
vllmCmd("Qwen2.5/qwen2.5-vl-7b-instruct-awq", "Qwen2.5-VL-7B-Instruct-AWQ", 35, 8, false),
|
||||||
|
vllmCmd("Qwen2.5/qwen2.5-vl-3b-instruct", "Qwen2.5-VL-3B-Instruct", 35, 8, false),
|
||||||
|
vllmCmd("Qwen2.5/qwen2.5-vl-3b-instruct-awq", "Qwen2.5-VL-3B-Instruct-AWQ", 35, 8, false),
|
||||||
|
|
||||||
|
llamaCppVisualCmd("MiniCPM/minicpm-o-2.6-7.6b-q4km", "MiniCPM-o-2_6-7.6B-Q4_K_M.gguf", 35),
|
||||||
|
vllmCmd("MiniCPM/minicpm-o-2.6-7.6b", "MiniCPM-o-2_6", 35, 10, false),
|
||||||
|
|
||||||
|
vllmEmbeddingCmd("MiniCPM/minicpm-embedding", "MiniCPM-Embedding", 20, 8),
|
||||||
|
vllmEmbeddingCmd("MiniCPM/minicpm-embedding-light", "MiniCPM-Embedding-Light", 20, 5),
|
||||||
|
|
||||||
|
vllmEmbeddingCmd("MiniCPM/minicpm-reranker", "MiniCPM-Reranker", 20, 8),
|
||||||
|
vllmEmbeddingCmd("MiniCPM/minicpm-reranker-light", "MiniCPM-Reranker-Light", 20, 5)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"config.yaml"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DockerCmd llamaCppCmd(String name, String model, Integer thread) {
|
||||||
|
return llamaCppCmd(name, model, thread, false, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DockerCmd llamaCppEmbeddingCmd(String name, String model, Integer thread) {
|
||||||
|
return llamaCppCmd(name, model, thread, true, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DockerCmd llamaCppRerankerCmd(String name, String model, Integer thread) {
|
||||||
|
return llamaCppCmd(name, model, thread, false, true, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DockerCmd llamaCppVisualCmd(String name, String model, Integer thread) {
|
||||||
|
return llamaCppCmd(name, model, thread, false, false, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DockerCmd llamaCppCmd(String name, String model, Integer thread, Boolean isEmbedding, Boolean isReranker, Boolean isVisual) {
|
||||||
|
List<String> arguments = ListUtil.list(
|
||||||
|
false,
|
||||||
|
StrUtil.format("-m /models/{}", model),
|
||||||
|
"--port ${PORT}",
|
||||||
|
StrUtil.format("--api-key {}", API_KEY),
|
||||||
|
"-c 0",
|
||||||
|
"-b 4096",
|
||||||
|
StrUtil.format("-t {}", thread),
|
||||||
|
"-np 5",
|
||||||
|
"--log-disable",
|
||||||
|
"--no-webui"
|
||||||
|
);
|
||||||
|
if (isEmbedding) {
|
||||||
|
arguments.add("--embedding");
|
||||||
|
arguments.add("-ub 8192");
|
||||||
|
arguments.add("--pooling mean");
|
||||||
|
} else if (isReranker) {
|
||||||
|
arguments.add("--reranking");
|
||||||
|
} else if (isVisual) {
|
||||||
|
arguments.add(StrUtil.format("--mmproj /models/{}.mmproj", model));
|
||||||
|
} else {
|
||||||
|
arguments.add("--jinja");
|
||||||
|
}
|
||||||
|
return new DockerCmd(
|
||||||
|
"ghcr.io/ggml-org/llama.cpp:server",
|
||||||
|
name,
|
||||||
|
model,
|
||||||
|
StrUtil.format("http://llamacpp-{}:${PORT}", displayName(model)),
|
||||||
|
List.of(StrUtil.format("--name llamacpp-{}", displayName(model))),
|
||||||
|
arguments
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DockerCmd vllmCmd(String name, String model, Integer thread, Integer cache, Boolean isReasonable) {
|
||||||
|
return vllmCmd(name, model, thread, cache, false, false, isReasonable, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DockerCmd vllmEmbeddingCmd(String name, String model, Integer thread, Integer cache) {
|
||||||
|
return vllmCmd(name, model, thread, cache, true, false, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DockerCmd vllmRerankerCmd(String name, String model, Integer thread, Integer cache) {
|
||||||
|
return vllmCmd(name, model, thread, cache, false, true, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DockerCmd vllmVisualCmd(String name, String model, Integer thread, Integer cache, Boolean isReasonable) {
|
||||||
|
return vllmCmd(name, model, thread, cache, false, false, isReasonable, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DockerCmd vllmCmd(String name, String model, Integer thread, Integer cache, Boolean isEmbedding, Boolean isReranker, Boolean isReasonable, Boolean isVisual) {
|
||||||
|
List<String> arguments = ListUtil.list(
|
||||||
|
false,
|
||||||
|
StrUtil.format("--model /models/{}", model),
|
||||||
|
StrUtil.format("--served-model-name {}", name),
|
||||||
|
"--port ${PORT}",
|
||||||
|
StrUtil.format("--api-key {}", API_KEY),
|
||||||
|
"--disable-log-requests",
|
||||||
|
"--uvicorn-log-level error"
|
||||||
|
);
|
||||||
|
if (isEmbedding) {
|
||||||
|
arguments.add("--task embedding");
|
||||||
|
} else if (isReranker) {
|
||||||
|
} else if (isReasonable) {
|
||||||
|
arguments.add("--enable-auto-tool-choice");
|
||||||
|
arguments.add("--tool-call-parser hermes");
|
||||||
|
arguments.add("--enable-reasoning");
|
||||||
|
arguments.add("--reasoning-parser deepseek_r1");
|
||||||
|
}
|
||||||
|
return new DockerCmd(
|
||||||
|
"vllm-server-cpu:0.9.1",
|
||||||
|
name,
|
||||||
|
model,
|
||||||
|
StrUtil.format("http://vllm-{}:${PORT}", displayName(model)),
|
||||||
|
List.of(
|
||||||
|
StrUtil.format("--name vllm-{}", displayName(model)),
|
||||||
|
"--privileged=true",
|
||||||
|
"--shm-size=4g",
|
||||||
|
StrUtil.format("-e VLLM_CPU_KVCACHE_SPACE={}", cache),
|
||||||
|
StrUtil.format("-e VLLM_CPU_OMP_THREADS_BIND=0-{}", thread - 1),
|
||||||
|
"-e VLLM_CPU_MOE_PREPACK=0"
|
||||||
|
),
|
||||||
|
arguments
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class DockerCmd {
|
||||||
|
private String image;
|
||||||
|
private String name;
|
||||||
|
private String model;
|
||||||
|
private String proxy;
|
||||||
|
private List<String> options = ListUtil.list(
|
||||||
|
false,
|
||||||
|
"--rm",
|
||||||
|
"--network llama",
|
||||||
|
"-v /data/models:/models"
|
||||||
|
);
|
||||||
|
private List<String> arguments = ListUtil.list(false);
|
||||||
|
|
||||||
|
public DockerCmd(String image, String name, String model, String proxy, List<String> options, List<String> arguments) {
|
||||||
|
this.image = image;
|
||||||
|
this.name = name;
|
||||||
|
this.model = model;
|
||||||
|
this.proxy = proxy;
|
||||||
|
this.options.addAll(options);
|
||||||
|
this.arguments.addAll(arguments);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getImage() {
|
||||||
|
return image;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setImage(String image) {
|
||||||
|
this.image = image;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getModel() {
|
||||||
|
return model;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setModel(String model) {
|
||||||
|
this.model = model;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getProxy() {
|
||||||
|
return proxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProxy(String proxy) {
|
||||||
|
this.proxy = proxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getOptions() {
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOptions(List<String> options) {
|
||||||
|
this.options = options;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getArguments() {
|
||||||
|
return arguments;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setArguments(List<String> arguments) {
|
||||||
|
this.arguments = arguments;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "DockerCmd{" +
|
||||||
|
"image='" + image + '\'' +
|
||||||
|
", name='" + name + '\'' +
|
||||||
|
", model='" + model + '\'' +
|
||||||
|
", proxy='" + proxy + '\'' +
|
||||||
|
", options=" + options +
|
||||||
|
", arguments=" + arguments +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
healthCheckTimeout: 120
|
||||||
|
logLevel: warn
|
||||||
|
models:
|
||||||
|
<#list models as model>
|
||||||
|
"${model.name}":
|
||||||
|
proxy: ${model.proxy}
|
||||||
|
ttl: 86400
|
||||||
|
cmd: |
|
||||||
|
docker run
|
||||||
|
<#list model.options as option>
|
||||||
|
${option}
|
||||||
|
</#list>
|
||||||
|
${model.image}
|
||||||
|
<#list model.arguments as arg>
|
||||||
|
${arg}
|
||||||
|
</#list>
|
||||||
|
</#list>
|
||||||
|
groups:
|
||||||
|
"persistent":
|
||||||
|
swap: false
|
||||||
|
exclusive: false
|
||||||
|
members:
|
||||||
|
<#list models as model>
|
||||||
|
- "${model.name}"
|
||||||
|
</#list>
|
||||||
Reference in New Issue
Block a user