From 5160c59ab014d8b48ad7a6bbd7dba31fb5e90234 Mon Sep 17 00:00:00 2001 From: v-zhangjc9 Date: Thu, 12 Jun 2025 20:24:46 +0800 Subject: [PATCH] =?UTF-8?q?feat(ai):=20=E5=A2=9E=E5=8A=A0llama-swap?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6=E7=94=9F=E6=88=90=E5=B7=A5?= =?UTF-8?q?=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bin/library.js | 6 +- service-ai/pom.xml | 1 + service-ai/service-ai-cli/pom.xml | 46 ++++ .../service/ai/cli/Generator.java | 22 ++ .../service/ai/cli/GeneratorTool.java | 26 ++ .../service/ai/cli/LlamaSwapTool.java | 251 ++++++++++++++++++ .../main/resources/template/llama-swap.ftl | 25 ++ 7 files changed, 375 insertions(+), 2 deletions(-) create mode 100644 service-ai/service-ai-cli/pom.xml create mode 100644 service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/Generator.java create mode 100644 service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/GeneratorTool.java create mode 100644 service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/LlamaSwapTool.java create mode 100644 service-ai/service-ai-cli/src/main/resources/template/llama-swap.ftl diff --git a/bin/library.js b/bin/library.js index 28cc4be..36b3ad9 100644 --- a/bin/library.js +++ b/bin/library.js @@ -77,11 +77,12 @@ export const run_package_batch = async (projects) => { } } -const upload = async (file_path) => { +export const upload = async (file_path) => { let start = new Date().getTime() + let basename = path.basename(file_path) let response = await spinner( `Uploading project ${file_path}`, - () => fetch(`${upload_url}/file/upload/${path.basename(file_path)}`, { + () => fetch(`${upload_url}/file/upload/${basename}`, { method: 'POST', headers: { 'Content-Type': 'application/octet-stream', @@ -98,6 +99,7 @@ const upload = async (file_path) => { console.log(`✅ Finished upload ${file_path} (${millisecondToString((new Date().getTime()) - start)})`) console.log(`📘 Uploaded ${fileSize(fs.statSync(file_path).size)}`) console.log(`📘 MD5 ${md5file.sync(file_path)}`) + console.log(`📘 Download curl http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.124:36800/file/download/${basename} -o ${basename}`) fs.rmSync(file_path) } diff --git a/service-ai/pom.xml b/service-ai/pom.xml index f54116c..af12c61 100644 --- a/service-ai/pom.xml +++ b/service-ai/pom.xml @@ -13,6 +13,7 @@ service-ai-core service-ai-chat service-ai-knowledge + service-ai-cli diff --git a/service-ai/service-ai-cli/pom.xml b/service-ai/service-ai-cli/pom.xml new file mode 100644 index 0000000..61083c3 --- /dev/null +++ b/service-ai/service-ai-cli/pom.xml @@ -0,0 +1,46 @@ + + + 4.0.0 + + com.lanyuanxiaoyao + service-ai + 1.0.0-SNAPSHOT + + + service-ai-cli + + + + org.springframework.boot + spring-boot-starter + + + cn.hutool + hutool-all + + + org.freemarker + freemarker + + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + package + + repackage + + + + + + + + \ No newline at end of file diff --git a/service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/Generator.java b/service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/Generator.java new file mode 100644 index 0000000..5d2c252 --- /dev/null +++ b/service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/Generator.java @@ -0,0 +1,22 @@ +package com.lanyuanxiaoyao.service.ai.cli; + +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +/** + * @author lanyuanxiaoyao + * @version 20250612 + */ +@SpringBootApplication +public class Generator implements ApplicationRunner { + public static void main(String[] args) { + SpringApplication.run(Generator.class, args); + } + + @Override + public void run(ApplicationArguments args) throws Exception { + new LlamaSwapTool().generate(); + } +} diff --git a/service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/GeneratorTool.java b/service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/GeneratorTool.java new file mode 100644 index 0000000..ec2986a --- /dev/null +++ b/service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/GeneratorTool.java @@ -0,0 +1,26 @@ +package com.lanyuanxiaoyao.service.ai.cli; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.extra.template.Template; +import cn.hutool.extra.template.TemplateConfig; +import cn.hutool.extra.template.TemplateEngine; +import cn.hutool.extra.template.TemplateUtil; +import java.nio.charset.StandardCharsets; +import java.util.Map; + +/** + * @author lanyuanxiaoyao + * @version 20250612 + */ +public abstract class GeneratorTool { + private final TemplateEngine engine = TemplateUtil.createEngine(new TemplateConfig("template", TemplateConfig.ResourceMode.CLASSPATH)); + + protected void generateTemplate(String templatePath, Map data, String targetScriptPath) { + Template template = engine.getTemplate(templatePath); + String script = template.render(data); + FileUtil.del(targetScriptPath); + FileUtil.writeString(script, targetScriptPath, StandardCharsets.UTF_8); + } + + public abstract void generate() throws Exception; +} diff --git a/service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/LlamaSwapTool.java b/service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/LlamaSwapTool.java new file mode 100644 index 0000000..4587932 --- /dev/null +++ b/service-ai/service-ai-cli/src/main/java/com/lanyuanxiaoyao/service/ai/cli/LlamaSwapTool.java @@ -0,0 +1,251 @@ +package com.lanyuanxiaoyao.service.ai.cli; + +import cn.hutool.core.collection.ListUtil; +import cn.hutool.core.util.StrUtil; +import java.util.List; +import java.util.Map; + +/** + * @author lanyuanxiaoyao + * @version 20250612 + */ +public class LlamaSwapTool extends GeneratorTool { + private static final String API_KEY = "*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj arguments = ListUtil.list( + false, + StrUtil.format("-m /models/{}", model), + "--port ${PORT}", + StrUtil.format("--api-key {}", API_KEY), + "-c 0", + "-b 4096", + StrUtil.format("-t {}", thread), + "-np 5", + "--log-disable", + "--no-webui" + ); + if (isEmbedding) { + arguments.add("--embedding"); + arguments.add("-ub 8192"); + arguments.add("--pooling mean"); + } else if (isReranker) { + arguments.add("--reranking"); + } else if (isVisual) { + arguments.add(StrUtil.format("--mmproj /models/{}.mmproj", model)); + } else { + arguments.add("--jinja"); + } + return new DockerCmd( + "ghcr.io/ggml-org/llama.cpp:server", + name, + model, + StrUtil.format("http://llamacpp-{}:${PORT}", displayName(model)), + List.of(StrUtil.format("--name llamacpp-{}", displayName(model))), + arguments + ); + } + + private DockerCmd vllmCmd(String name, String model, Integer thread, Integer cache, Boolean isReasonable) { + return vllmCmd(name, model, thread, cache, false, false, isReasonable, false); + } + + private DockerCmd vllmEmbeddingCmd(String name, String model, Integer thread, Integer cache) { + return vllmCmd(name, model, thread, cache, true, false, false, false); + } + + private DockerCmd vllmRerankerCmd(String name, String model, Integer thread, Integer cache) { + return vllmCmd(name, model, thread, cache, false, true, false, false); + } + + private DockerCmd vllmVisualCmd(String name, String model, Integer thread, Integer cache, Boolean isReasonable) { + return vllmCmd(name, model, thread, cache, false, false, isReasonable, true); + } + + private DockerCmd vllmCmd(String name, String model, Integer thread, Integer cache, Boolean isEmbedding, Boolean isReranker, Boolean isReasonable, Boolean isVisual) { + List arguments = ListUtil.list( + false, + StrUtil.format("--model /models/{}", model), + StrUtil.format("--served-model-name {}", name), + "--port ${PORT}", + StrUtil.format("--api-key {}", API_KEY), + "--disable-log-requests", + "--uvicorn-log-level error" + ); + if (isEmbedding) { + arguments.add("--task embedding"); + } else if (isReranker) { + } else if (isReasonable) { + arguments.add("--enable-auto-tool-choice"); + arguments.add("--tool-call-parser hermes"); + arguments.add("--enable-reasoning"); + arguments.add("--reasoning-parser deepseek_r1"); + } + return new DockerCmd( + "vllm-server-cpu:0.9.1", + name, + model, + StrUtil.format("http://vllm-{}:${PORT}", displayName(model)), + List.of( + StrUtil.format("--name vllm-{}", displayName(model)), + "--privileged=true", + "--shm-size=4g", + StrUtil.format("-e VLLM_CPU_KVCACHE_SPACE={}", cache), + StrUtil.format("-e VLLM_CPU_OMP_THREADS_BIND=0-{}", thread - 1), + "-e VLLM_CPU_MOE_PREPACK=0" + ), + arguments + ); + } + + public static class DockerCmd { + private String image; + private String name; + private String model; + private String proxy; + private List options = ListUtil.list( + false, + "--rm", + "--network llama", + "-v /data/models:/models" + ); + private List arguments = ListUtil.list(false); + + public DockerCmd(String image, String name, String model, String proxy, List options, List arguments) { + this.image = image; + this.name = name; + this.model = model; + this.proxy = proxy; + this.options.addAll(options); + this.arguments.addAll(arguments); + } + + public String getImage() { + return image; + } + + public void setImage(String image) { + this.image = image; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getModel() { + return model; + } + + public void setModel(String model) { + this.model = model; + } + + public String getProxy() { + return proxy; + } + + public void setProxy(String proxy) { + this.proxy = proxy; + } + + public List getOptions() { + return options; + } + + public void setOptions(List options) { + this.options = options; + } + + public List getArguments() { + return arguments; + } + + public void setArguments(List arguments) { + this.arguments = arguments; + } + + @Override + public String toString() { + return "DockerCmd{" + + "image='" + image + '\'' + + ", name='" + name + '\'' + + ", model='" + model + '\'' + + ", proxy='" + proxy + '\'' + + ", options=" + options + + ", arguments=" + arguments + + '}'; + } + } +} diff --git a/service-ai/service-ai-cli/src/main/resources/template/llama-swap.ftl b/service-ai/service-ai-cli/src/main/resources/template/llama-swap.ftl new file mode 100644 index 0000000..6a0bd27 --- /dev/null +++ b/service-ai/service-ai-cli/src/main/resources/template/llama-swap.ftl @@ -0,0 +1,25 @@ +healthCheckTimeout: 120 +logLevel: warn +models: +<#list models as model> + "${model.name}": + proxy: ${model.proxy} + ttl: 86400 + cmd: | + docker run + <#list model.options as option> + ${option} + + ${model.image} + <#list model.arguments as arg> + ${arg} + + +groups: + "persistent": + swap: false + exclusive: false + members: + <#list models as model> + - "${model.name}" + \ No newline at end of file