feat(knowledge): 初步完成知识库分片预览
This commit is contained in:
@@ -46,6 +46,14 @@
|
|||||||
<groupId>com.yomahub</groupId>
|
<groupId>com.yomahub</groupId>
|
||||||
<artifactId>liteflow-spring-boot-starter</artifactId>
|
<artifactId>liteflow-spring-boot-starter</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.ai</groupId>
|
||||||
|
<artifactId>spring-ai-tika-document-reader</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.ai</groupId>
|
||||||
|
<artifactId>spring-ai-pdf-document-reader</artifactId>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package com.lanyuanxiaoyao.service.ai.knowledge.controller;
|
|||||||
|
|
||||||
import com.lanyuanxiaoyao.service.ai.knowledge.entity.vo.KnowledgeVO;
|
import com.lanyuanxiaoyao.service.ai.knowledge.entity.vo.KnowledgeVO;
|
||||||
import com.lanyuanxiaoyao.service.ai.knowledge.entity.vo.PointVO;
|
import com.lanyuanxiaoyao.service.ai.knowledge.entity.vo.PointVO;
|
||||||
import com.lanyuanxiaoyao.service.ai.knowledge.reader.TextLineReader;
|
import com.lanyuanxiaoyao.service.ai.knowledge.service.EmbeddingService;
|
||||||
import com.lanyuanxiaoyao.service.ai.knowledge.service.KnowledgeService;
|
import com.lanyuanxiaoyao.service.ai.knowledge.service.KnowledgeService;
|
||||||
import io.qdrant.client.QdrantClient;
|
import io.qdrant.client.QdrantClient;
|
||||||
import io.qdrant.client.grpc.Points;
|
import io.qdrant.client.grpc.Points;
|
||||||
@@ -14,7 +14,6 @@ import org.eclipse.collections.api.list.ImmutableList;
|
|||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.ai.embedding.EmbeddingModel;
|
import org.springframework.ai.embedding.EmbeddingModel;
|
||||||
import org.springframework.ai.reader.TextReader;
|
|
||||||
import org.springframework.ai.reader.markdown.MarkdownDocumentReader;
|
import org.springframework.ai.reader.markdown.MarkdownDocumentReader;
|
||||||
import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig;
|
import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig;
|
||||||
import org.springframework.ai.vectorstore.VectorStore;
|
import org.springframework.ai.vectorstore.VectorStore;
|
||||||
@@ -37,11 +36,13 @@ public class KnowledgeController {
|
|||||||
private static final Logger logger = LoggerFactory.getLogger(KnowledgeController.class);
|
private static final Logger logger = LoggerFactory.getLogger(KnowledgeController.class);
|
||||||
|
|
||||||
private final KnowledgeService knowledgeService;
|
private final KnowledgeService knowledgeService;
|
||||||
|
private final EmbeddingService embeddingService;
|
||||||
private final QdrantClient client;
|
private final QdrantClient client;
|
||||||
private final EmbeddingModel embeddingModel;
|
private final EmbeddingModel embeddingModel;
|
||||||
|
|
||||||
public KnowledgeController(KnowledgeService knowledgeService, VectorStore vectorStore, EmbeddingModel embeddingModel) {
|
public KnowledgeController(KnowledgeService knowledgeService, EmbeddingService embeddingService, VectorStore vectorStore, EmbeddingModel embeddingModel) {
|
||||||
this.knowledgeService = knowledgeService;
|
this.knowledgeService = knowledgeService;
|
||||||
|
this.embeddingService = embeddingService;
|
||||||
client = (QdrantClient) vectorStore.getNativeClient().orElseThrow();
|
client = (QdrantClient) vectorStore.getNativeClient().orElseThrow();
|
||||||
this.embeddingModel = embeddingModel;
|
this.embeddingModel = embeddingModel;
|
||||||
}
|
}
|
||||||
@@ -89,15 +90,11 @@ public class KnowledgeController {
|
|||||||
|
|
||||||
@PostMapping("preview_text")
|
@PostMapping("preview_text")
|
||||||
public ImmutableList<PointVO> previewText(
|
public ImmutableList<PointVO> previewText(
|
||||||
@RequestParam("name") String name,
|
@RequestParam(value = "mode", defaultValue = "NORMAL") String mode,
|
||||||
@RequestParam(value = "mode", defaultValue = "normal") String mode,
|
|
||||||
@RequestParam(value = "type", defaultValue = "text") String type,
|
@RequestParam(value = "type", defaultValue = "text") String type,
|
||||||
@RequestParam("content") String content
|
@RequestParam("content") String content
|
||||||
) {
|
) {
|
||||||
TextReader reader = new TextLineReader(new ByteArrayResource(content.getBytes(StandardCharsets.UTF_8)));
|
return embeddingService.split(mode, content)
|
||||||
return reader.get()
|
|
||||||
.stream()
|
|
||||||
.collect(Collectors.toCollection(Lists.mutable::empty))
|
|
||||||
.collect(doc -> {
|
.collect(doc -> {
|
||||||
PointVO vo = new PointVO();
|
PointVO vo = new PointVO();
|
||||||
vo.setId(doc.getId());
|
vo.setId(doc.getId());
|
||||||
|
|||||||
@@ -0,0 +1,130 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.ai.knowledge.entity;
|
||||||
|
|
||||||
|
import cn.hutool.core.util.StrUtil;
|
||||||
|
import java.io.File;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.eclipse.collections.api.factory.Lists;
|
||||||
|
import org.eclipse.collections.api.factory.Maps;
|
||||||
|
import org.springframework.ai.document.Document;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
* @version 20250523
|
||||||
|
*/
|
||||||
|
public class EmbeddingContext {
|
||||||
|
private Config config;
|
||||||
|
private String content;
|
||||||
|
private String file;
|
||||||
|
private List<Document> documents = Lists.mutable.empty();
|
||||||
|
private Map<String, Object> metadata = Maps.mutable.empty();
|
||||||
|
|
||||||
|
public EmbeddingContext(String content) {
|
||||||
|
this(content, new Config());
|
||||||
|
}
|
||||||
|
|
||||||
|
public EmbeddingContext(String content, Config config) {
|
||||||
|
this.content = StrUtil.trim(content);
|
||||||
|
this.config = config;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EmbeddingContext(File file) {
|
||||||
|
this(file, new Config());
|
||||||
|
}
|
||||||
|
|
||||||
|
public EmbeddingContext(File file, Config config) {
|
||||||
|
this.file = file.getAbsolutePath();
|
||||||
|
this.config = config;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EmbeddingContext(Path path) {
|
||||||
|
this(path.toFile());
|
||||||
|
}
|
||||||
|
|
||||||
|
public EmbeddingContext(Path path, Config config) {
|
||||||
|
this(path.toFile(), config);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Config getConfig() {
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConfig(Config config) {
|
||||||
|
this.config = config;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getContent() {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContent(String content) {
|
||||||
|
this.content = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFile() {
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFile(String file) {
|
||||||
|
this.file = file;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Document> getDocuments() {
|
||||||
|
return documents;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDocuments(List<Document> documents) {
|
||||||
|
this.documents = documents;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, Object> getMetadata() {
|
||||||
|
return metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMetadata(Map<String, Object> metadata) {
|
||||||
|
this.metadata = metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "EmbeddingContext{" +
|
||||||
|
"config=" + config +
|
||||||
|
", content='" + content + '\'' +
|
||||||
|
", file='" + file + '\'' +
|
||||||
|
", documents=" + documents +
|
||||||
|
", metadata=" + metadata +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class Config {
|
||||||
|
private SplitStrategy splitStrategy = SplitStrategy.NORMAL;
|
||||||
|
|
||||||
|
public Config() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public Config(SplitStrategy splitStrategy) {
|
||||||
|
this.splitStrategy = splitStrategy;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SplitStrategy getSplitStrategy() {
|
||||||
|
return splitStrategy;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSplitStrategy(SplitStrategy splitStrategy) {
|
||||||
|
this.splitStrategy = splitStrategy;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "Config{" +
|
||||||
|
"splitStrategy=" + splitStrategy +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum SplitStrategy {
|
||||||
|
NORMAL, LLM, QA
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
package com.lanyuanxiaoyao.service.ai.knowledge.reader;
|
|
||||||
|
|
||||||
import cn.hutool.core.util.StrUtil;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.springframework.ai.document.Document;
|
|
||||||
import org.springframework.ai.reader.TextReader;
|
|
||||||
import org.springframework.core.io.Resource;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author lanyuanxiaoyao
|
|
||||||
* @version 20250522
|
|
||||||
*/
|
|
||||||
public class TextLineReader extends TextReader {
|
|
||||||
public TextLineReader(Resource resource) {
|
|
||||||
super(resource);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Document> get() {
|
|
||||||
return super.get()
|
|
||||||
.stream()
|
|
||||||
.flatMap(doc -> {
|
|
||||||
String text = doc.getText();
|
|
||||||
if (StrUtil.isBlank(text)) {
|
|
||||||
return Stream.of(doc);
|
|
||||||
}
|
|
||||||
return Stream.of(text.split("\n\n"))
|
|
||||||
.filter(StrUtil::isNotBlank)
|
|
||||||
.map(line -> new Document(line, doc.getMetadata()));
|
|
||||||
})
|
|
||||||
.toList();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,7 +1,13 @@
|
|||||||
package com.lanyuanxiaoyao.service.ai.knowledge.service;
|
package com.lanyuanxiaoyao.service.ai.knowledge.service;
|
||||||
|
|
||||||
|
import com.lanyuanxiaoyao.service.ai.knowledge.entity.EmbeddingContext;
|
||||||
|
import com.yomahub.liteflow.core.FlowExecutor;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import org.eclipse.collections.api.factory.Lists;
|
||||||
|
import org.eclipse.collections.api.list.ImmutableList;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.ai.document.Document;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -12,5 +18,19 @@ import org.springframework.stereotype.Service;
|
|||||||
public class EmbeddingService {
|
public class EmbeddingService {
|
||||||
private static final Logger logger = LoggerFactory.getLogger(EmbeddingService.class);
|
private static final Logger logger = LoggerFactory.getLogger(EmbeddingService.class);
|
||||||
|
|
||||||
|
private final FlowExecutor executor;
|
||||||
|
|
||||||
|
@SuppressWarnings("SpringJavaInjectionPointsAutowiringInspection")
|
||||||
|
public EmbeddingService(FlowExecutor executor) {
|
||||||
|
this.executor = executor;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ImmutableList<Document> split(String mode, String content) {
|
||||||
|
EmbeddingContext context = new EmbeddingContext(
|
||||||
|
content,
|
||||||
|
new EmbeddingContext.Config(EmbeddingContext.Config.SplitStrategy.valueOf(mode))
|
||||||
|
);
|
||||||
|
executor.execute2Resp("embedding", null, context);
|
||||||
|
return Lists.immutable.ofAll(context.getDocuments());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,201 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.ai.knowledge.service.node;
|
||||||
|
|
||||||
|
import cn.hutool.core.io.FileUtil;
|
||||||
|
import cn.hutool.core.lang.Assert;
|
||||||
|
import cn.hutool.core.util.StrUtil;
|
||||||
|
import com.lanyuanxiaoyao.service.ai.knowledge.entity.EmbeddingContext;
|
||||||
|
import com.yomahub.liteflow.annotation.LiteflowComponent;
|
||||||
|
import com.yomahub.liteflow.annotation.LiteflowMethod;
|
||||||
|
import com.yomahub.liteflow.core.NodeComponent;
|
||||||
|
import com.yomahub.liteflow.enums.LiteFlowMethodEnum;
|
||||||
|
import com.yomahub.liteflow.enums.NodeTypeEnum;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.ai.chat.client.ChatClient;
|
||||||
|
import org.springframework.ai.document.Document;
|
||||||
|
import org.springframework.ai.document.DocumentReader;
|
||||||
|
import org.springframework.ai.reader.ExtractedTextFormatter;
|
||||||
|
import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
|
||||||
|
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
|
||||||
|
import org.springframework.ai.reader.tika.TikaDocumentReader;
|
||||||
|
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
||||||
|
import org.springframework.core.io.PathResource;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
* @version 20250523
|
||||||
|
*/
|
||||||
|
@LiteflowComponent
|
||||||
|
public class EmbeddingNodes {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(EmbeddingNodes.class);
|
||||||
|
|
||||||
|
private final ChatClient chatClient;
|
||||||
|
|
||||||
|
public EmbeddingNodes(ChatClient.Builder builder) {
|
||||||
|
this.chatClient = builder.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@LiteflowMethod(value = LiteFlowMethodEnum.PROCESS_BOOLEAN, nodeId = "embedding_check_if_file_needed", nodeName = "判断是否需要读取文件", nodeType = NodeTypeEnum.BOOLEAN)
|
||||||
|
public boolean checkIfFileReadNeeded(NodeComponent node) {
|
||||||
|
EmbeddingContext context = node.getContextBean(EmbeddingContext.class);
|
||||||
|
Assert.notNull(context, "EmbeddingContext is null");
|
||||||
|
if (StrUtil.isNotBlank(context.getFile())) {
|
||||||
|
Assert.isTrue(FileUtil.exist(context.getFile()), "File [{}] not exist", context.getFile());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
Assert.notBlank(context.getContent(), "Contents is empty");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@LiteflowMethod(value = LiteFlowMethodEnum.PROCESS, nodeId = "test_print", nodeType = NodeTypeEnum.COMMON)
|
||||||
|
public void testPrint(NodeComponent node) {
|
||||||
|
EmbeddingContext context = node.getContextBean(EmbeddingContext.class);
|
||||||
|
logger.info(context.getContent());
|
||||||
|
}
|
||||||
|
|
||||||
|
@LiteflowMethod(value = LiteFlowMethodEnum.PROCESS_SWITCH, nodeId = "file_reader_switch", nodeName = "判断文件格式", nodeType = NodeTypeEnum.SWITCH)
|
||||||
|
public String fileReaderSwitch(NodeComponent node) {
|
||||||
|
EmbeddingContext context = node.getContextBean(EmbeddingContext.class);
|
||||||
|
String extName = FileUtil.extName(context.getFile());
|
||||||
|
return switch (extName.toLowerCase()) {
|
||||||
|
case "txt", "md", "markdown" -> "txt_file_reader";
|
||||||
|
case "pdf" -> "pdf_file_reader";
|
||||||
|
case "doc", "docx", "xls", "xlsx", "ppt", "pptx", "html", "xml", "wps", "et", "dpt" -> "any_file_reader";
|
||||||
|
default -> throw new IllegalStateException("Unsupported ext: " + extName);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@LiteflowMethod(value = LiteFlowMethodEnum.PROCESS, nodeId = "txt_file_reader", nodeName = "读取文本文件", nodeType = NodeTypeEnum.COMMON)
|
||||||
|
public void txtFileReader(NodeComponent node) {
|
||||||
|
EmbeddingContext context = node.getContextBean(EmbeddingContext.class);
|
||||||
|
context.setContent(FileUtil.readString(context.getFile(), Charset.defaultCharset()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@LiteflowMethod(value = LiteFlowMethodEnum.PROCESS, nodeId = "pdf_file_reader", nodeName = "读取pdf文件", nodeType = NodeTypeEnum.COMMON)
|
||||||
|
public void pdfFileReader(NodeComponent node) {
|
||||||
|
EmbeddingContext context = node.getContextBean(EmbeddingContext.class);
|
||||||
|
PagePdfDocumentReader reader = new PagePdfDocumentReader(
|
||||||
|
new PathResource(context.getFile()),
|
||||||
|
PdfDocumentReaderConfig.builder()
|
||||||
|
.withPageTopMargin(0)
|
||||||
|
.withPageExtractedTextFormatter(ExtractedTextFormatter.builder()
|
||||||
|
.withNumberOfTopTextLinesToDelete(0)
|
||||||
|
.build())
|
||||||
|
.build());
|
||||||
|
context.setContent(readBySpringAiReader(reader));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <a href="https://tika.apache.org/3.1.0/formats.html#Database_formats">Tika支持的格式</a>
|
||||||
|
*/
|
||||||
|
@LiteflowMethod(value = LiteFlowMethodEnum.PROCESS, nodeId = "any_file_reader", nodeName = "使用Tika尝试读取文件", nodeType = NodeTypeEnum.COMMON)
|
||||||
|
public void anyFileReader(NodeComponent node) {
|
||||||
|
EmbeddingContext context = node.getContextBean(EmbeddingContext.class);
|
||||||
|
context.setContent(readBySpringAiReader(new TikaDocumentReader(new PathResource(context.getFile()))));
|
||||||
|
}
|
||||||
|
|
||||||
|
private String readBySpringAiReader(DocumentReader reader) {
|
||||||
|
return reader.get()
|
||||||
|
.stream()
|
||||||
|
.map(Document::getText)
|
||||||
|
.collect(Collectors.joining("\n"))
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
@LiteflowMethod(value = LiteFlowMethodEnum.PROCESS_SWITCH, nodeId = "split_switch", nodeName = "判断使用什么分段方法", nodeType = NodeTypeEnum.SWITCH)
|
||||||
|
public String splitSwitch(NodeComponent node) {
|
||||||
|
EmbeddingContext context = node.getContextBean(EmbeddingContext.class);
|
||||||
|
return switch (context.getConfig().getSplitStrategy()) {
|
||||||
|
case NORMAL -> "normal_split";
|
||||||
|
case LLM -> "llm_split";
|
||||||
|
case QA -> "qa_split";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@LiteflowMethod(value = LiteFlowMethodEnum.PROCESS, nodeId = "normal_split", nodeName = "使用普通分段", nodeType = NodeTypeEnum.COMMON)
|
||||||
|
public void normalSplit(NodeComponent node) {
|
||||||
|
EmbeddingContext context = node.getContextBean(EmbeddingContext.class);
|
||||||
|
TokenTextSplitter splitter = new TokenTextSplitter(200, 100, 5, 200, true);
|
||||||
|
Document document = Document.builder()
|
||||||
|
.text(context.getContent())
|
||||||
|
.build();
|
||||||
|
context.setDocuments(splitter.split(document));
|
||||||
|
}
|
||||||
|
|
||||||
|
@LiteflowMethod(value = LiteFlowMethodEnum.PROCESS, nodeId = "llm_split", nodeName = "使用大模型分段", nodeType = NodeTypeEnum.COMMON)
|
||||||
|
public void llmSplit(NodeComponent node) {
|
||||||
|
EmbeddingContext context = node.getContextBean(EmbeddingContext.class);
|
||||||
|
context.getDocuments().addAll(llmSplit(
|
||||||
|
"""
|
||||||
|
对用户输入的文本,生成高质量的分段。请遵循以下指南:
|
||||||
|
1. 分段原则:
|
||||||
|
分段按文本内容的语义进行分割,每个分段都尽可能保持完整连续的内容表达。
|
||||||
|
避免从词句的中间进行分割。
|
||||||
|
2. 格式:
|
||||||
|
分段之间用两个空行分隔,以提高可读性。
|
||||||
|
避免使用任何Markdown格式
|
||||||
|
3. 内容要求:
|
||||||
|
确保每个分段的内容文字完全依照原文。
|
||||||
|
避免添加任何原文中不存在的文字。
|
||||||
|
""",
|
||||||
|
context.getContent(),
|
||||||
|
context.getMetadata()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
@LiteflowMethod(value = LiteFlowMethodEnum.PROCESS, nodeId = "qa_split", nodeName = "使用Q/A格式分段", nodeType = NodeTypeEnum.COMMON)
|
||||||
|
public void qaSplit(NodeComponent node) {
|
||||||
|
EmbeddingContext context = node.getContextBean(EmbeddingContext.class);
|
||||||
|
context.getDocuments().addAll(llmSplit(
|
||||||
|
"""
|
||||||
|
对用户输入的文本,生成一组高质量的问答对。请遵循以下指南:
|
||||||
|
1. 问题部分:
|
||||||
|
为同一个主题创建尽可能多的不同表述的问题,确保问题的多样性。
|
||||||
|
每个问题应考虑用户可能的多种问法,例如:
|
||||||
|
直接询问(如“什么是...?”)
|
||||||
|
请求确认(如“是否可以说...?”)
|
||||||
|
寻求解释(如“请解释一下...的含义。”)
|
||||||
|
假设性问题(如“如果...会怎样?”)
|
||||||
|
例子请求(如“能否举个例子说明...?”)
|
||||||
|
问题应涵盖文本中的关键信息、主要概念和细节,确保不遗漏重要内容。
|
||||||
|
2. 答案部分:
|
||||||
|
提供一个全面、信息丰富的答案,涵盖问题的所有可能角度,确保逻辑连贯。
|
||||||
|
答案应直接基于给定文本,确保准确性和一致性。
|
||||||
|
包含相关的细节,如日期、名称、职位等具体信息,必要时提供背景信息以增强理解。
|
||||||
|
3. 格式:
|
||||||
|
使用"Q:"标记问题集合的开始,所有问题应在一个段落内,问题之间用空格分隔。
|
||||||
|
使用"A:"标记答案的开始,答案应清晰分段,便于阅读。
|
||||||
|
问答对之间用两个空行分隔,以提高可读性。
|
||||||
|
避免使用任何Markdown格式
|
||||||
|
4. 内容要求:
|
||||||
|
确保问答对紧密围绕文本主题,避免偏离主题。
|
||||||
|
避免添加文本中未提及的信息,确保信息的真实性。
|
||||||
|
如果文本信息不足以回答某个方面,可以在答案中说明 "根据给定信息无法确定",并尽量提供相关的上下文。
|
||||||
|
""",
|
||||||
|
context.getContent(),
|
||||||
|
context.getMetadata()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Document> llmSplit(String prompt, String content, Map<String, Object> metadata) {
|
||||||
|
String response = chatClient.prompt()
|
||||||
|
.system(prompt)
|
||||||
|
.user(content)
|
||||||
|
.call()
|
||||||
|
.content();
|
||||||
|
Assert.notBlank(response, "LLM response is empty");
|
||||||
|
// noinspection DataFlowIssue
|
||||||
|
return Arrays.stream(StrUtil.trim(response).split("(s?)\\s*\\n\\n"))
|
||||||
|
.map(StrUtil::trim)
|
||||||
|
.map(text -> Document.builder()
|
||||||
|
.text(text)
|
||||||
|
.metadata(metadata)
|
||||||
|
.build())
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -33,7 +33,7 @@ spring:
|
|||||||
api-key: ENC(K+Hff9QGC+fcyi510VIDd9CaeK/IN5WBJ9rlkUsHEdDgIidW+stHHJlsK0lLPUXXREha+ToQZqqDXJrqSE+GUKCXklFhelD8bRHFXBIeP/ZzT2cxhzgKUXgjw3S0Qw2R)
|
api-key: ENC(K+Hff9QGC+fcyi510VIDd9CaeK/IN5WBJ9rlkUsHEdDgIidW+stHHJlsK0lLPUXXREha+ToQZqqDXJrqSE+GUKCXklFhelD8bRHFXBIeP/ZzT2cxhzgKUXgjw3S0Qw2R)
|
||||||
chat:
|
chat:
|
||||||
options:
|
options:
|
||||||
model: 'Qwen3-1.7'
|
model: 'Qwen3-1.7-vllm'
|
||||||
embedding:
|
embedding:
|
||||||
options:
|
options:
|
||||||
model: 'Bge-m3'
|
model: 'Bge-m3'
|
||||||
|
|||||||
@@ -1,14 +1,20 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE flow PUBLIC "liteflow" "https://liteflow.cc/liteflow.dtd">
|
||||||
<flow>
|
<flow>
|
||||||
<chain name="embedding">
|
<chain id="embedding">
|
||||||
SER(
|
SER(
|
||||||
embedding_start,
|
IF(
|
||||||
SWITCH(embedding_mode_switch).TO(
|
embedding_check_if_file_needed,
|
||||||
normal_embedding,
|
SWITCH(file_reader_switch).TO(
|
||||||
llm_embedding,
|
txt_file_reader,
|
||||||
qa_embedding
|
pdf_file_reader
|
||||||
|
).DEFAULT(any_file_reader)
|
||||||
),
|
),
|
||||||
embedding_finish
|
SWITCH(split_switch).TO(
|
||||||
);
|
normal_split,
|
||||||
|
llm_split,
|
||||||
|
qa_split
|
||||||
|
)
|
||||||
|
)
|
||||||
</chain>
|
</chain>
|
||||||
</flow>
|
</flow>
|
||||||
@@ -33,21 +33,19 @@ const DataImport: React.FC = () => {
|
|||||||
name: 'mode',
|
name: 'mode',
|
||||||
type: 'radios',
|
type: 'radios',
|
||||||
label: '解析模式',
|
label: '解析模式',
|
||||||
value: 'normal',
|
value: 'NORMAL',
|
||||||
options: [
|
options: [
|
||||||
{
|
{
|
||||||
value: 'normal',
|
value: 'NORMAL',
|
||||||
label: '常规模式',
|
label: '常规模式',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
value: 'llm',
|
value: 'LLM',
|
||||||
label: '智能模式',
|
label: '智能模式',
|
||||||
disabled: true,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
value: 'qa',
|
value: 'QA',
|
||||||
label: 'Q/A模式',
|
label: 'Q/A模式',
|
||||||
disabled: true,
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
@@ -105,7 +103,6 @@ const DataImport: React.FC = () => {
|
|||||||
},
|
},
|
||||||
dataType: 'form',
|
dataType: 'form',
|
||||||
data: {
|
data: {
|
||||||
name: name,
|
|
||||||
mode: '${mode}',
|
mode: '${mode}',
|
||||||
type: '${type}',
|
type: '${type}',
|
||||||
content: '${content}',
|
content: '${content}',
|
||||||
|
|||||||
Reference in New Issue
Block a user