feat(ai): 增加llama-swap配置文件生成工具

This commit is contained in:
v-zhangjc9
2025-06-12 20:24:46 +08:00
parent 506e28c9f7
commit 5160c59ab0
7 changed files with 375 additions and 2 deletions

View File

@@ -77,11 +77,12 @@ export const run_package_batch = async (projects) => {
}
}
const upload = async (file_path) => {
export const upload = async (file_path) => {
let start = new Date().getTime()
let basename = path.basename(file_path)
let response = await spinner(
`Uploading project ${file_path}`,
() => fetch(`${upload_url}/file/upload/${path.basename(file_path)}`, {
() => fetch(`${upload_url}/file/upload/${basename}`, {
method: 'POST',
headers: {
'Content-Type': 'application/octet-stream',
@@ -98,6 +99,7 @@ const upload = async (file_path) => {
console.log(`✅ Finished upload ${file_path} (${millisecondToString((new Date().getTime()) - start)})`)
console.log(`📘 Uploaded ${fileSize(fs.statSync(file_path).size)}`)
console.log(`📘 MD5 ${md5file.sync(file_path)}`)
console.log(`📘 Download curl http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.124:36800/file/download/${basename} -o ${basename}`)
fs.rmSync(file_path)
}

View File

@@ -13,6 +13,7 @@
<module>service-ai-core</module>
<module>service-ai-chat</module>
<module>service-ai-knowledge</module>
<module>service-ai-cli</module>
</modules>
<properties>

View File

@@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.lanyuanxiaoyao</groupId>
<artifactId>service-ai</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>service-ai-cli</artifactId>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
</dependency>
<dependency>
<groupId>org.freemarker</groupId>
<artifactId>freemarker</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,22 @@
package com.lanyuanxiaoyao.service.ai.cli;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
/**
* @author lanyuanxiaoyao
* @version 20250612
*/
@SpringBootApplication
public class Generator implements ApplicationRunner {
public static void main(String[] args) {
SpringApplication.run(Generator.class, args);
}
@Override
public void run(ApplicationArguments args) throws Exception {
new LlamaSwapTool().generate();
}
}

View File

@@ -0,0 +1,26 @@
package com.lanyuanxiaoyao.service.ai.cli;
import cn.hutool.core.io.FileUtil;
import cn.hutool.extra.template.Template;
import cn.hutool.extra.template.TemplateConfig;
import cn.hutool.extra.template.TemplateEngine;
import cn.hutool.extra.template.TemplateUtil;
import java.nio.charset.StandardCharsets;
import java.util.Map;
/**
* @author lanyuanxiaoyao
* @version 20250612
*/
public abstract class GeneratorTool {
private final TemplateEngine engine = TemplateUtil.createEngine(new TemplateConfig("template", TemplateConfig.ResourceMode.CLASSPATH));
protected void generateTemplate(String templatePath, Map<?, ?> data, String targetScriptPath) {
Template template = engine.getTemplate(templatePath);
String script = template.render(data);
FileUtil.del(targetScriptPath);
FileUtil.writeString(script, targetScriptPath, StandardCharsets.UTF_8);
}
public abstract void generate() throws Exception;
}

View File

@@ -0,0 +1,251 @@
package com.lanyuanxiaoyao.service.ai.cli;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.util.StrUtil;
import java.util.List;
import java.util.Map;
/**
* @author lanyuanxiaoyao
* @version 20250612
*/
public class LlamaSwapTool extends GeneratorTool {
private static final String API_KEY = "*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj<MYQd$&@$sY7CgqNyea4giJi4";
public static String displayName(String name) {
return name.replaceAll("\\s+", "_")
.replaceAll("\\.", "_")
.toLowerCase();
}
@Override
public void generate() {
generateTemplate(
"llama-swap.ftl",
Map.of(
"models", List.of(
llamaCppEmbeddingCmd("BGE/bge-m3-q4km", "bge-m3-Q4_K_M.gguf", 20),
vllmEmbeddingCmd("BGE/bge-m3", "bge-m3", 20, 5),
llamaCppRerankerCmd("BGE/beg-reranker-v2-q4km", "bge-reranker-v2-m3-Q4_K_M.gguf", 20),
llamaCppRerankerCmd("BGE/beg-reranker-v2", "bge-reranker-v2-m3", 20),
vllmCmd("Qwen3/qwen3-0.6b", "Qwen3-0.6B", 35, 5, true),
vllmCmd("Qwen3/qwen3-1.7b", "Qwen3-1.7B", 35, 5, true),
vllmCmd("Qwen3/qwen3-4b", "Qwen3-4B", 35, 8, true),
llamaCppCmd("Qwen3/qwen3-4b-q4km", "Qwen3-4B-Q4_K_M.gguf", 35),
llamaCppCmd("Qwen3/qwen3-8b-q4km", "Qwen3-8B-Q4_K_M.gguf", 35),
vllmEmbeddingCmd("Qwen3/qwen3-embedding-0.6b", "Qwen3-Embedding-0.6B", 35, 5),
vllmEmbeddingCmd("Qwen3/qwen3-embedding-4b", "Qwen3-Embedding-4B", 35, 8),
llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-4b-q4km", "Qwen3-Embedding-4B-Q4_K_M.gguf", 35),
llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-8b-q4km", "Qwen3-Embedding-8B-Q4_K_M.gguf", 35),
vllmRerankerCmd("Qwen3/qwen3-reranker-0.6b", "Qwen3-Reranker-0.6B", 35, 5),
vllmRerankerCmd("Qwen3/qwen3-reranker-4b", "Qwen3-Reranker-4B", 35, 8),
llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b", "Qwen2.5-VL-7B-Instruct-BF16.gguf", 35),
llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b-q4km", "Qwen2.5-VL-7B-Instruct-Q4_K_M.gguf", 35),
vllmCmd("Qwen2.5/qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 35, 8, false),
vllmCmd("Qwen2.5/qwen2.5-vl-7b-instruct-awq", "Qwen2.5-VL-7B-Instruct-AWQ", 35, 8, false),
vllmCmd("Qwen2.5/qwen2.5-vl-3b-instruct", "Qwen2.5-VL-3B-Instruct", 35, 8, false),
vllmCmd("Qwen2.5/qwen2.5-vl-3b-instruct-awq", "Qwen2.5-VL-3B-Instruct-AWQ", 35, 8, false),
llamaCppVisualCmd("MiniCPM/minicpm-o-2.6-7.6b-q4km", "MiniCPM-o-2_6-7.6B-Q4_K_M.gguf", 35),
vllmCmd("MiniCPM/minicpm-o-2.6-7.6b", "MiniCPM-o-2_6", 35, 10, false),
vllmEmbeddingCmd("MiniCPM/minicpm-embedding", "MiniCPM-Embedding", 20, 8),
vllmEmbeddingCmd("MiniCPM/minicpm-embedding-light", "MiniCPM-Embedding-Light", 20, 5),
vllmEmbeddingCmd("MiniCPM/minicpm-reranker", "MiniCPM-Reranker", 20, 8),
vllmEmbeddingCmd("MiniCPM/minicpm-reranker-light", "MiniCPM-Reranker-Light", 20, 5)
)
),
"config.yaml"
);
}
private DockerCmd llamaCppCmd(String name, String model, Integer thread) {
return llamaCppCmd(name, model, thread, false, false, false);
}
private DockerCmd llamaCppEmbeddingCmd(String name, String model, Integer thread) {
return llamaCppCmd(name, model, thread, true, false, false);
}
private DockerCmd llamaCppRerankerCmd(String name, String model, Integer thread) {
return llamaCppCmd(name, model, thread, false, true, false);
}
private DockerCmd llamaCppVisualCmd(String name, String model, Integer thread) {
return llamaCppCmd(name, model, thread, false, false, true);
}
private DockerCmd llamaCppCmd(String name, String model, Integer thread, Boolean isEmbedding, Boolean isReranker, Boolean isVisual) {
List<String> arguments = ListUtil.list(
false,
StrUtil.format("-m /models/{}", model),
"--port ${PORT}",
StrUtil.format("--api-key {}", API_KEY),
"-c 0",
"-b 4096",
StrUtil.format("-t {}", thread),
"-np 5",
"--log-disable",
"--no-webui"
);
if (isEmbedding) {
arguments.add("--embedding");
arguments.add("-ub 8192");
arguments.add("--pooling mean");
} else if (isReranker) {
arguments.add("--reranking");
} else if (isVisual) {
arguments.add(StrUtil.format("--mmproj /models/{}.mmproj", model));
} else {
arguments.add("--jinja");
}
return new DockerCmd(
"ghcr.io/ggml-org/llama.cpp:server",
name,
model,
StrUtil.format("http://llamacpp-{}:${PORT}", displayName(model)),
List.of(StrUtil.format("--name llamacpp-{}", displayName(model))),
arguments
);
}
private DockerCmd vllmCmd(String name, String model, Integer thread, Integer cache, Boolean isReasonable) {
return vllmCmd(name, model, thread, cache, false, false, isReasonable, false);
}
private DockerCmd vllmEmbeddingCmd(String name, String model, Integer thread, Integer cache) {
return vllmCmd(name, model, thread, cache, true, false, false, false);
}
private DockerCmd vllmRerankerCmd(String name, String model, Integer thread, Integer cache) {
return vllmCmd(name, model, thread, cache, false, true, false, false);
}
private DockerCmd vllmVisualCmd(String name, String model, Integer thread, Integer cache, Boolean isReasonable) {
return vllmCmd(name, model, thread, cache, false, false, isReasonable, true);
}
private DockerCmd vllmCmd(String name, String model, Integer thread, Integer cache, Boolean isEmbedding, Boolean isReranker, Boolean isReasonable, Boolean isVisual) {
List<String> arguments = ListUtil.list(
false,
StrUtil.format("--model /models/{}", model),
StrUtil.format("--served-model-name {}", name),
"--port ${PORT}",
StrUtil.format("--api-key {}", API_KEY),
"--disable-log-requests",
"--uvicorn-log-level error"
);
if (isEmbedding) {
arguments.add("--task embedding");
} else if (isReranker) {
} else if (isReasonable) {
arguments.add("--enable-auto-tool-choice");
arguments.add("--tool-call-parser hermes");
arguments.add("--enable-reasoning");
arguments.add("--reasoning-parser deepseek_r1");
}
return new DockerCmd(
"vllm-server-cpu:0.9.1",
name,
model,
StrUtil.format("http://vllm-{}:${PORT}", displayName(model)),
List.of(
StrUtil.format("--name vllm-{}", displayName(model)),
"--privileged=true",
"--shm-size=4g",
StrUtil.format("-e VLLM_CPU_KVCACHE_SPACE={}", cache),
StrUtil.format("-e VLLM_CPU_OMP_THREADS_BIND=0-{}", thread - 1),
"-e VLLM_CPU_MOE_PREPACK=0"
),
arguments
);
}
public static class DockerCmd {
private String image;
private String name;
private String model;
private String proxy;
private List<String> options = ListUtil.list(
false,
"--rm",
"--network llama",
"-v /data/models:/models"
);
private List<String> arguments = ListUtil.list(false);
public DockerCmd(String image, String name, String model, String proxy, List<String> options, List<String> arguments) {
this.image = image;
this.name = name;
this.model = model;
this.proxy = proxy;
this.options.addAll(options);
this.arguments.addAll(arguments);
}
public String getImage() {
return image;
}
public void setImage(String image) {
this.image = image;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getModel() {
return model;
}
public void setModel(String model) {
this.model = model;
}
public String getProxy() {
return proxy;
}
public void setProxy(String proxy) {
this.proxy = proxy;
}
public List<String> getOptions() {
return options;
}
public void setOptions(List<String> options) {
this.options = options;
}
public List<String> getArguments() {
return arguments;
}
public void setArguments(List<String> arguments) {
this.arguments = arguments;
}
@Override
public String toString() {
return "DockerCmd{" +
"image='" + image + '\'' +
", name='" + name + '\'' +
", model='" + model + '\'' +
", proxy='" + proxy + '\'' +
", options=" + options +
", arguments=" + arguments +
'}';
}
}
}

View File

@@ -0,0 +1,25 @@
healthCheckTimeout: 120
logLevel: warn
models:
<#list models as model>
"${model.name}":
proxy: ${model.proxy}
ttl: 86400
cmd: |
docker run
<#list model.options as option>
${option}
</#list>
${model.image}
<#list model.arguments as arg>
${arg}
</#list>
</#list>
groups:
"persistent":
swap: false
exclusive: false
members:
<#list models as model>
- "${model.name}"
</#list>