fix(ai): 移除不支持的模型
This commit is contained in:
@@ -25,39 +25,32 @@ public class LlamaSwapTool extends GeneratorTool {
|
|||||||
Map.of(
|
Map.of(
|
||||||
"models", List.of(
|
"models", List.of(
|
||||||
llamaCppEmbeddingCmd("BGE/bge-m3-q4km", "bge-m3-Q4_K_M.gguf", 20),
|
llamaCppEmbeddingCmd("BGE/bge-m3-q4km", "bge-m3-Q4_K_M.gguf", 20),
|
||||||
vllmEmbeddingCmd("BGE/bge-m3", "bge-m3", 20, 5),
|
vllmEmbeddingCmd("BGE/bge-m3", "bge-m3", 5),
|
||||||
llamaCppRerankerCmd("BGE/beg-reranker-v2-q4km", "bge-reranker-v2-m3-Q4_K_M.gguf", 20),
|
llamaCppRerankerCmd("BGE/beg-reranker-v2-q4km", "bge-reranker-v2-m3-Q4_K_M.gguf", 20),
|
||||||
llamaCppRerankerCmd("BGE/beg-reranker-v2", "bge-reranker-v2-m3", 20),
|
vllmRerankerCmd("BGE/beg-reranker-v2", "bge-reranker-v2-m3", 5),
|
||||||
|
|
||||||
vllmCmd("Qwen3/qwen3-0.6b", "Qwen3-0.6B", 35, 5, true),
|
vllmCmd("Qwen3/qwen3-0.6b", "Qwen3-0.6B", 5, true),
|
||||||
vllmCmd("Qwen3/qwen3-1.7b", "Qwen3-1.7B", 35, 5, true),
|
vllmCmd("Qwen3/qwen3-1.7b", "Qwen3-1.7B", 5, true),
|
||||||
vllmCmd("Qwen3/qwen3-4b", "Qwen3-4B", 35, 8, true),
|
vllmCmd("Qwen3/qwen3-4b", "Qwen3-4B", 8, true),
|
||||||
llamaCppCmd("Qwen3/qwen3-4b-q4km", "Qwen3-4B-Q4_K_M.gguf", 35),
|
llamaCppCmd("Qwen3/qwen3-4b-q4km", "Qwen3-4B-Q4_K_M.gguf", 35),
|
||||||
llamaCppCmd("Qwen3/qwen3-8b-q4km", "Qwen3-8B-Q4_K_M.gguf", 35),
|
llamaCppCmd("Qwen3/qwen3-8b-q4km", "Qwen3-8B-Q4_K_M.gguf", 35),
|
||||||
|
|
||||||
vllmEmbeddingCmd("Qwen3/qwen3-embedding-0.6b", "Qwen3-Embedding-0.6B", 35, 5),
|
vllmEmbeddingCmd("Qwen3/qwen3-embedding-0.6b", "Qwen3-Embedding-0.6B", 5),
|
||||||
vllmEmbeddingCmd("Qwen3/qwen3-embedding-4b", "Qwen3-Embedding-4B", 35, 8),
|
vllmEmbeddingCmd("Qwen3/qwen3-embedding-4b", "Qwen3-Embedding-4B", 8),
|
||||||
llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-4b-q4km", "Qwen3-Embedding-4B-Q4_K_M.gguf", 35),
|
llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-4b-q4km", "Qwen3-Embedding-4B-Q4_K_M.gguf", 35),
|
||||||
llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-8b-q4km", "Qwen3-Embedding-8B-Q4_K_M.gguf", 35),
|
llamaCppEmbeddingCmd("Qwen3/qwen3-embedding-8b-q4km", "Qwen3-Embedding-8B-Q4_K_M.gguf", 35),
|
||||||
|
|
||||||
vllmRerankerCmd("Qwen3/qwen3-reranker-0.6b", "Qwen3-Reranker-0.6B", 35, 5),
|
// 0.9.1 vllm还未支持
|
||||||
vllmRerankerCmd("Qwen3/qwen3-reranker-4b", "Qwen3-Reranker-4B", 35, 8),
|
// vllmRerankerCmd("Qwen3/qwen3-reranker-0.6b", "Qwen3-Reranker-0.6B", 5),
|
||||||
|
// vllmRerankerCmd("Qwen3/qwen3-reranker-4b", "Qwen3-Reranker-4B", 8),
|
||||||
|
|
||||||
llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b", "Qwen2.5-VL-7B-Instruct-BF16.gguf", 35),
|
llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b", "Qwen2.5-VL-7B-Instruct-BF16.gguf", 35),
|
||||||
llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b-q4km", "Qwen2.5-VL-7B-Instruct-Q4_K_M.gguf", 35),
|
llamaCppVisualCmd("Qwen2.5/qwen2.5-vl-7b-q4km", "Qwen2.5-VL-7B-Instruct-Q4_K_M.gguf", 35),
|
||||||
vllmCmd("Qwen2.5/qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 35, 8, false),
|
vllmCmd("Qwen2.5/qwen2.5-vl-3b-instruct", "Qwen2.5-VL-3B-Instruct", 8, false),
|
||||||
vllmCmd("Qwen2.5/qwen2.5-vl-7b-instruct-awq", "Qwen2.5-VL-7B-Instruct-AWQ", 35, 8, false),
|
vllmCmd("Qwen2.5/qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 8, false),
|
||||||
vllmCmd("Qwen2.5/qwen2.5-vl-3b-instruct", "Qwen2.5-VL-3B-Instruct", 35, 8, false),
|
|
||||||
vllmCmd("Qwen2.5/qwen2.5-vl-3b-instruct-awq", "Qwen2.5-VL-3B-Instruct-AWQ", 35, 8, false),
|
|
||||||
|
|
||||||
llamaCppVisualCmd("MiniCPM/minicpm-o-2.6-7.6b-q4km", "MiniCPM-o-2_6-7.6B-Q4_K_M.gguf", 35),
|
llamaCppVisualCmd("MiniCPM/minicpm-o-2.6-7.6b-q4km", "MiniCPM-o-2_6-7.6B-Q4_K_M.gguf", 35),
|
||||||
vllmCmd("MiniCPM/minicpm-o-2.6-7.6b", "MiniCPM-o-2_6", 35, 10, false),
|
vllmCmd("MiniCPM/minicpm-o-2.6-7.6b", "MiniCPM-o-2_6", 10, false)
|
||||||
|
|
||||||
vllmEmbeddingCmd("MiniCPM/minicpm-embedding", "MiniCPM-Embedding", 20, 8),
|
|
||||||
vllmEmbeddingCmd("MiniCPM/minicpm-embedding-light", "MiniCPM-Embedding-Light", 20, 5),
|
|
||||||
|
|
||||||
vllmEmbeddingCmd("MiniCPM/minicpm-reranker", "MiniCPM-Reranker", 20, 8),
|
|
||||||
vllmEmbeddingCmd("MiniCPM/minicpm-reranker-light", "MiniCPM-Reranker-Light", 20, 5)
|
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
"config.yaml"
|
"config.yaml"
|
||||||
@@ -114,23 +107,23 @@ public class LlamaSwapTool extends GeneratorTool {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private DockerCmd vllmCmd(String name, String model, Integer thread, Integer cache, Boolean isReasonable) {
|
private DockerCmd vllmCmd(String name, String model, Integer cache, Boolean isReasonable) {
|
||||||
return vllmCmd(name, model, thread, cache, false, false, isReasonable, false);
|
return vllmCmd(name, model, cache, false, false, isReasonable);
|
||||||
}
|
}
|
||||||
|
|
||||||
private DockerCmd vllmEmbeddingCmd(String name, String model, Integer thread, Integer cache) {
|
private DockerCmd vllmEmbeddingCmd(String name, String model, Integer cache) {
|
||||||
return vllmCmd(name, model, thread, cache, true, false, false, false);
|
return vllmCmd(name, model, cache, true, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private DockerCmd vllmRerankerCmd(String name, String model, Integer thread, Integer cache) {
|
private DockerCmd vllmRerankerCmd(String name, String model, Integer cache) {
|
||||||
return vllmCmd(name, model, thread, cache, false, true, false, false);
|
return vllmCmd(name, model, cache, false, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private DockerCmd vllmVisualCmd(String name, String model, Integer thread, Integer cache, Boolean isReasonable) {
|
private DockerCmd vllmVisualCmd(String name, String model, Integer cache, Boolean isReasonable) {
|
||||||
return vllmCmd(name, model, thread, cache, false, false, isReasonable, true);
|
return vllmCmd(name, model, cache, false, false, isReasonable);
|
||||||
}
|
}
|
||||||
|
|
||||||
private DockerCmd vllmCmd(String name, String model, Integer thread, Integer cache, Boolean isEmbedding, Boolean isReranker, Boolean isReasonable, Boolean isVisual) {
|
private DockerCmd vllmCmd(String name, String model, Integer cache, Boolean isEmbedding, Boolean isReranker, Boolean isReasonable) {
|
||||||
List<String> arguments = ListUtil.list(
|
List<String> arguments = ListUtil.list(
|
||||||
false,
|
false,
|
||||||
StrUtil.format("--model /models/{}", model),
|
StrUtil.format("--model /models/{}", model),
|
||||||
@@ -138,11 +131,13 @@ public class LlamaSwapTool extends GeneratorTool {
|
|||||||
"--port ${PORT}",
|
"--port ${PORT}",
|
||||||
StrUtil.format("--api-key {}", API_KEY),
|
StrUtil.format("--api-key {}", API_KEY),
|
||||||
"--disable-log-requests",
|
"--disable-log-requests",
|
||||||
"--uvicorn-log-level error"
|
"--uvicorn-log-level error",
|
||||||
|
"--trust-remote-code"
|
||||||
);
|
);
|
||||||
if (isEmbedding) {
|
if (isEmbedding) {
|
||||||
arguments.add("--task embedding");
|
arguments.add("--task embedding");
|
||||||
} else if (isReranker) {
|
} else if (isReranker) {
|
||||||
|
arguments.add("--task score");
|
||||||
} else if (isReasonable) {
|
} else if (isReasonable) {
|
||||||
arguments.add("--enable-auto-tool-choice");
|
arguments.add("--enable-auto-tool-choice");
|
||||||
arguments.add("--tool-call-parser hermes");
|
arguments.add("--tool-call-parser hermes");
|
||||||
@@ -158,9 +153,7 @@ public class LlamaSwapTool extends GeneratorTool {
|
|||||||
StrUtil.format("--name vllm-{}", displayName(model)),
|
StrUtil.format("--name vllm-{}", displayName(model)),
|
||||||
"--privileged=true",
|
"--privileged=true",
|
||||||
"--shm-size=4g",
|
"--shm-size=4g",
|
||||||
StrUtil.format("-e VLLM_CPU_KVCACHE_SPACE={}", cache),
|
StrUtil.format("-e VLLM_CPU_KVCACHE_SPACE={}", cache)
|
||||||
StrUtil.format("-e VLLM_CPU_OMP_THREADS_BIND=0-{}", thread - 1),
|
|
||||||
"-e VLLM_CPU_MOE_PREPACK=0"
|
|
||||||
),
|
),
|
||||||
arguments
|
arguments
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
healthCheckTimeout: 120
|
healthCheckTimeout: 600
|
||||||
logLevel: warn
|
logLevel: warn
|
||||||
models:
|
models:
|
||||||
<#list models as model>
|
<#list models as model>
|
||||||
|
|||||||
Reference in New Issue
Block a user