From 3ee6303cf5823ac009c16d51129bdeb7f3f19db2 Mon Sep 17 00:00:00 2001 From: v-zhangjc9 Date: Wed, 28 May 2025 15:06:30 +0800 Subject: [PATCH] =?UTF-8?q?feat(knowledge):=20=E5=AE=8C=E6=88=90=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E5=9F=BA=E6=9C=AC=E5=8A=9F=E8=83=BD=E5=BC=80?= =?UTF-8?q?=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- service-ai/pom.xml | 2 +- .../ai/knowledge/KnowledgeApplication.java | 2 + .../configuration/KnowledgeConfiguration.java | 39 ++ .../controller/DataFileController.java | 382 ++++++++++++++++++ .../knowledge/controller/GroupController.java | 38 ++ .../controller/KnowledgeController.java | 120 +++--- .../controller/SegmentController.java | 38 ++ .../ai/knowledge/entity/EmbeddingContext.java | 118 ++++-- .../service/ai/knowledge/entity/Group.java | 64 +++ .../ai/knowledge/entity/Knowledge.java | 20 + .../ai/knowledge/entity/vo/DataFileVO.java | 74 ++++ .../ai/knowledge/entity/vo/KnowledgeVO.java | 34 +- .../vo/{PointVO.java => SegmentVO.java} | 2 +- .../ai/knowledge/service/DataFileService.java | 90 +++++ .../knowledge/service/EmbeddingService.java | 79 +++- .../ai/knowledge/service/GroupService.java | 135 +++++++ .../service/KnowledgeGroupService.java | 58 --- .../knowledge/service/KnowledgeService.java | 67 +-- .../ai/knowledge/service/SegmentService.java | 71 ++++ .../service/node/EmbeddingNodes.java | 76 ++-- .../src/main/resources/application.yml | 5 +- .../src/main/resources/config/flow.xml | 5 +- .../service/ai/knowledge/TestEmbedding.java | 59 +++ .../service/ai/knowledge/TestLlm.java | 116 ++++++ .../src/pages/ai/knowledge/DataDetail.tsx | 113 ++++-- .../src/pages/ai/knowledge/DataImport.tsx | 93 ++++- .../src/pages/ai/knowledge/DataSegment.tsx | 133 ++++++ .../src/pages/ai/knowledge/Knowledge.tsx | 17 +- service-web/client/src/route.tsx | 9 +- service-web/client/src/util/amis.tsx | 1 + 30 files changed, 1787 insertions(+), 273 deletions(-) create mode 100644 service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/configuration/KnowledgeConfiguration.java create mode 100644 service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/DataFileController.java create mode 100644 service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/GroupController.java create mode 100644 service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/SegmentController.java create mode 100644 service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/Group.java create mode 100644 service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/DataFileVO.java rename service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/{PointVO.java => SegmentVO.java} (95%) create mode 100644 service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/DataFileService.java create mode 100644 service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/GroupService.java delete mode 100644 service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/KnowledgeGroupService.java create mode 100644 service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/SegmentService.java create mode 100644 service-ai/service-ai-knowledge/src/test/java/com/lanyuanxiaoyao/service/ai/knowledge/TestEmbedding.java create mode 100644 service-ai/service-ai-knowledge/src/test/java/com/lanyuanxiaoyao/service/ai/knowledge/TestLlm.java create mode 100644 service-web/client/src/pages/ai/knowledge/DataSegment.tsx diff --git a/service-ai/pom.xml b/service-ai/pom.xml index b689311..6ccfe6a 100644 --- a/service-ai/pom.xml +++ b/service-ai/pom.xml @@ -24,7 +24,7 @@ 3.4.3 2024.0.1 - 1.0.0-RC1 + 1.0.0 11.1.0 5.1.0 5.8.27 diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/KnowledgeApplication.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/KnowledgeApplication.java index 06cda45..cee0e37 100644 --- a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/KnowledgeApplication.java +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/KnowledgeApplication.java @@ -8,6 +8,7 @@ import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.cloud.client.discovery.EnableDiscoveryClient; import org.springframework.retry.annotation.EnableRetry; +import org.springframework.scheduling.annotation.EnableScheduling; /** * @author lanyuanxiaoyao @@ -18,6 +19,7 @@ import org.springframework.retry.annotation.EnableRetry; @EnableConfigurationProperties @EnableEncryptableProperties @EnableRetry +@EnableScheduling public class KnowledgeApplication implements ApplicationRunner { public static void main(String[] args) { SpringApplication.run(KnowledgeApplication.class, args); diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/configuration/KnowledgeConfiguration.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/configuration/KnowledgeConfiguration.java new file mode 100644 index 0000000..42adb63 --- /dev/null +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/configuration/KnowledgeConfiguration.java @@ -0,0 +1,39 @@ +package com.lanyuanxiaoyao.service.ai.knowledge.configuration; + +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.context.annotation.Configuration; + +/** + * @author lanyuanxiaoyao + * @version 20250527 + */ +@Configuration +@ConfigurationProperties(prefix = "knowledge") +public class KnowledgeConfiguration { + private String downloadPrefix; + private String uploadPath; + + public String getDownloadPrefix() { + return downloadPrefix; + } + + public void setDownloadPrefix(String downloadPrefix) { + this.downloadPrefix = downloadPrefix; + } + + public String getUploadPath() { + return uploadPath; + } + + public void setUploadPath(String uploadPath) { + this.uploadPath = uploadPath; + } + + @Override + public String toString() { + return "KnowledgeConfiguration{" + + "downloadPrefix='" + downloadPrefix + '\'' + + ", uploadPath='" + uploadPath + '\'' + + '}'; + } +} diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/DataFileController.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/DataFileController.java new file mode 100644 index 0000000..3d8b743 --- /dev/null +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/DataFileController.java @@ -0,0 +1,382 @@ +package com.lanyuanxiaoyao.service.ai.knowledge.controller; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.io.IoUtil; +import cn.hutool.core.util.StrUtil; +import cn.hutool.core.util.URLUtil; +import cn.hutool.crypto.SecureUtil; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.lanyuanxiaoyao.service.ai.core.entity.amis.AmisResponse; +import com.lanyuanxiaoyao.service.ai.knowledge.configuration.KnowledgeConfiguration; +import com.lanyuanxiaoyao.service.ai.knowledge.entity.vo.DataFileVO; +import com.lanyuanxiaoyao.service.ai.knowledge.service.DataFileService; +import jakarta.servlet.http.HttpServletResponse; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.channels.FileChannel; +import org.eclipse.collections.api.list.ImmutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +/** + * 文件上传接口 + * + * @author lanyuanxiaoyao + * @date 2024-11-21 + */ +@RestController +@RequestMapping("/upload") +public class DataFileController { + private static final Logger log = LoggerFactory.getLogger(DataFileController.class); + + private final KnowledgeConfiguration knowledgeConfiguration; + private final DataFileService dataFileService; + private final String uploadFolderPath; + private final String cacheFolderPath; + private final String sliceFolderPath; + + public DataFileController(KnowledgeConfiguration knowledgeConfiguration, DataFileService dataFileService) { + this.knowledgeConfiguration = knowledgeConfiguration; + this.dataFileService = dataFileService; + + this.uploadFolderPath = knowledgeConfiguration.getUploadPath(); + this.cacheFolderPath = StrUtil.format("{}/cache", uploadFolderPath); + this.sliceFolderPath = StrUtil.format("{}/slice", uploadFolderPath); + } + + @PostMapping("") + public AmisResponse upload(@RequestParam("file") MultipartFile file) throws IOException { + String filename = file.getOriginalFilename(); + Long id = dataFileService.initialDataFile(filename); + String url = StrUtil.format("{}/upload/download/{}", knowledgeConfiguration.getDownloadPrefix(), id); + byte[] bytes = file.getBytes(); + String originMd5 = SecureUtil.md5(new ByteArrayInputStream(bytes)); + File targetFile = new File(StrUtil.format("{}/{}", uploadFolderPath, originMd5)); + if (targetFile.exists()) { + dataFileService.updateDataFile(id, FileUtil.getAbsolutePath(targetFile), FileUtil.size(targetFile), originMd5, file.getContentType()); + return AmisResponse.responseSuccess(new FinishResponse(id, filename, url, url)); + } + File cacheFile = new File(StrUtil.format("{}/{}", cacheFolderPath, id)); + cacheFile = FileUtil.writeBytes(bytes, cacheFile); + String targetMd5 = SecureUtil.md5(cacheFile); + if (!StrUtil.equals(originMd5, targetMd5)) { + throw new RuntimeException("文件上传失败,校验不匹配"); + } + FileUtil.move(cacheFile, targetFile, true); + dataFileService.updateDataFile(id, FileUtil.getAbsolutePath(targetFile), FileUtil.size(targetFile), targetMd5, file.getContentType()); + return AmisResponse.responseSuccess(new FinishResponse(id, filename, url, url)); + } + + @GetMapping("/download/{id}") + public void download(@PathVariable Long id, HttpServletResponse response) throws IOException { + DataFileVO dataFile = dataFileService.downloadFile(id); + File targetFile = new File(dataFile.getPath()); + response.setHeader("Content-Type", dataFile.getType()); + response.setHeader("Access-Control-Expose-Headers", "Content-Disposition"); + response.setHeader("Content-Disposition", StrUtil.format("attachment; filename={}", URLUtil.encodeAll(dataFile.getFilename()))); + IoUtil.copy(new FileInputStream(targetFile), response.getOutputStream()); + } + + @PostMapping("/start") + public AmisResponse start(@RequestBody StartRequest request) { + log.info("Request: {}", request); + Long id = dataFileService.initialDataFile(request.filename); + return AmisResponse.responseSuccess(new StartResponse(id.toString())); + } + + @PostMapping("/slice") + public AmisResponse slice( + @RequestParam("uploadId") + Long uploadId, + @RequestParam("partNumber") + Integer sequence, + @RequestParam("partSize") + Long size, + @RequestParam("file") + MultipartFile file + ) throws IOException { + byte[] bytes = file.getBytes(); + String md5 = SecureUtil.md5(new ByteArrayInputStream(bytes)); + String targetFilename = StrUtil.format("{}-{}", sequence, md5); + String targetFilePath = sliceFilePath(uploadId, targetFilename); + FileUtil.mkParentDirs(targetFilePath); + FileUtil.writeBytes(bytes, targetFilePath); + return AmisResponse.responseSuccess(new SliceResponse(targetFilename)); + } + + private String sliceFilePath(Long uploadId, String sliceFilename) { + return StrUtil.format("{}/{}/{}", sliceFolderPath, uploadId, sliceFilename); + } + + @PostMapping("finish") + public AmisResponse finish(@RequestBody FinishRequest request) { + if (request.partList.anySatisfy(part -> !FileUtil.exist(sliceFilePath(request.uploadId, part.eTag)))) { + throw new RuntimeException("文件校验失败,请重新上传"); + } + try { + File cacheFile = new File(StrUtil.format("{}/{}", cacheFolderPath, request.uploadId)); + FileUtil.mkParentDirs(cacheFile); + if (cacheFile.createNewFile()) { + try (FileOutputStream fos = new FileOutputStream(cacheFile)) { + try (FileChannel fosChannel = fos.getChannel()) { + for (FinishRequest.Part part : request.partList) { + File sliceFile = new File(sliceFilePath(request.uploadId, part.eTag)); + try (FileInputStream fis = new FileInputStream(sliceFile)) { + try (FileChannel fisChannel = fis.getChannel()) { + fisChannel.transferTo(0, fisChannel.size(), fosChannel); + } + } + } + } + } + String md5 = SecureUtil.md5(cacheFile); + File targetFile = new File(StrUtil.format("{}/{}", uploadFolderPath, md5)); + if (!targetFile.exists()) { + FileUtil.move(cacheFile, targetFile, true); + } + String absolutePath = FileUtil.getAbsolutePath(targetFile); + dataFileService.updateDataFile( + request.uploadId, + absolutePath, + FileUtil.size(targetFile), + SecureUtil.md5(targetFile), + FileUtil.getMimeType(absolutePath) + ); + return AmisResponse.responseSuccess(new FinishResponse( + request.uploadId, + request.filename, + request.uploadId.toString(), + StrUtil.format("{}/upload/download/{}", knowledgeConfiguration.getDownloadPrefix(), request.uploadId) + )); + } else { + throw new RuntimeException("合并文件失败"); + } + } catch (Throwable throwable) { + throw new RuntimeException(throwable); + } finally { + FileUtil.del(StrUtil.format("{}/{}", cacheFolderPath, request.uploadId)); + FileUtil.del(StrUtil.format("{}/{}", sliceFolderPath, request.uploadId)); + } + } + + public static final class StartRequest { + private String name; + private String filename; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getFilename() { + return filename; + } + + public void setFilename(String filename) { + this.filename = filename; + } + + @Override + public String toString() { + return "StartRequest{" + + "name='" + name + '\'' + + ", filename='" + filename + '\'' + + '}'; + } + } + + public static final class StartResponse { + private String uploadId; + + public StartResponse() { + } + + public StartResponse(String uploadId) { + this.uploadId = uploadId; + } + + public String getUploadId() { + return uploadId; + } + + public void setUploadId(String uploadId) { + this.uploadId = uploadId; + } + + @Override + public String toString() { + return "StartResponse{" + + "uploadId='" + uploadId + '\'' + + '}'; + } + } + + public static final class SliceResponse { + @JsonProperty("eTag") + private String eTag; + + public SliceResponse() { + } + + public SliceResponse(String eTag) { + this.eTag = eTag; + } + + public String geteTag() { + return eTag; + } + + public void seteTag(String eTag) { + this.eTag = eTag; + } + + @Override + public String toString() { + return "SliceResponse{" + + "eTag='" + eTag + '\'' + + '}'; + } + } + + public static final class FinishRequest { + private String filename; + private Long uploadId; + private ImmutableList partList; + + public String getFilename() { + return filename; + } + + public void setFilename(String filename) { + this.filename = filename; + } + + public Long getUploadId() { + return uploadId; + } + + public void setUploadId(Long uploadId) { + this.uploadId = uploadId; + } + + public ImmutableList getPartList() { + return partList; + } + + public void setPartList(ImmutableList partList) { + this.partList = partList; + } + + @Override + public String toString() { + return "FinishRequest{" + + "filename='" + filename + '\'' + + ", uploadId=" + uploadId + + ", partList=" + partList + + '}'; + } + + public static final class Part { + private Integer partNumber; + @JsonProperty("eTag") + private String eTag; + + public Integer getPartNumber() { + return partNumber; + } + + public void setPartNumber(Integer partNumber) { + this.partNumber = partNumber; + } + + public String geteTag() { + return eTag; + } + + public void seteTag(String eTag) { + this.eTag = eTag; + } + + @Override + public String toString() { + return "Part{" + + "partNumber=" + partNumber + + ", eTag='" + eTag + '\'' + + '}'; + } + } + } + + public static final class FinishResponse { + private Long id; + private String filename; + private String value; + private String url; + + public FinishResponse() { + } + + public FinishResponse(Long id, String filename, String value, String url) { + this.id = id; + this.filename = filename; + this.value = value; + this.url = url; + } + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public String getFilename() { + return filename; + } + + public void setFilename(String filename) { + this.filename = filename; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + @Override + public String toString() { + return "FinishResponse{" + + "id=" + id + + ", filename='" + filename + '\'' + + ", value='" + value + '\'' + + ", url='" + url + '\'' + + '}'; + } + } +} diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/GroupController.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/GroupController.java new file mode 100644 index 0000000..8205864 --- /dev/null +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/GroupController.java @@ -0,0 +1,38 @@ +package com.lanyuanxiaoyao.service.ai.knowledge.controller; + +import com.lanyuanxiaoyao.service.ai.core.entity.amis.AmisResponse; +import com.lanyuanxiaoyao.service.ai.knowledge.service.GroupService; +import java.util.concurrent.ExecutionException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** + * @author lanyuanxiaoyao + * @version 20250528 + */ +@RestController +@RequestMapping("group") +public class GroupController { + private static final Logger logger = LoggerFactory.getLogger(GroupController.class); + + private final GroupService groupService; + + public GroupController(GroupService groupService) { + this.groupService = groupService; + } + + @GetMapping("list") + public AmisResponse list(@RequestParam("knowledge_id") Long knowledgeId) { + return AmisResponse.responseCrudData(groupService.list(knowledgeId)); + } + + @GetMapping("delete") + public AmisResponse delete(@RequestParam("id") Long id) throws ExecutionException, InterruptedException { + groupService.remove(id); + return AmisResponse.responseSuccess(); + } +} diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/KnowledgeController.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/KnowledgeController.java index d764510..0b6ad77 100644 --- a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/KnowledgeController.java +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/KnowledgeController.java @@ -1,28 +1,17 @@ package com.lanyuanxiaoyao.service.ai.knowledge.controller; +import cn.hutool.core.util.StrUtil; import com.lanyuanxiaoyao.service.ai.core.entity.amis.AmisMapResponse; import com.lanyuanxiaoyao.service.ai.core.entity.amis.AmisResponse; -import com.lanyuanxiaoyao.service.ai.knowledge.entity.vo.PointVO; +import com.lanyuanxiaoyao.service.ai.knowledge.entity.vo.SegmentVO; import com.lanyuanxiaoyao.service.ai.knowledge.service.EmbeddingService; import com.lanyuanxiaoyao.service.ai.knowledge.service.KnowledgeService; -import io.qdrant.client.QdrantClient; -import io.qdrant.client.grpc.Points; -import java.nio.charset.StandardCharsets; import java.util.concurrent.ExecutionException; -import java.util.stream.Collectors; import org.eclipse.collections.api.factory.Lists; -import org.eclipse.collections.api.list.ImmutableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.ai.embedding.EmbeddingModel; -import org.springframework.ai.reader.markdown.MarkdownDocumentReader; -import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig; -import org.springframework.ai.vectorstore.VectorStore; -import org.springframework.ai.vectorstore.qdrant.QdrantVectorStore; -import org.springframework.core.io.ByteArrayResource; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; @@ -38,14 +27,10 @@ public class KnowledgeController { private final KnowledgeService knowledgeService; private final EmbeddingService embeddingService; - private final QdrantClient client; - private final EmbeddingModel embeddingModel; - public KnowledgeController(KnowledgeService knowledgeService, EmbeddingService embeddingService, VectorStore vectorStore, EmbeddingModel embeddingModel) { + public KnowledgeController(KnowledgeService knowledgeService, EmbeddingService embeddingService) { this.knowledgeService = knowledgeService; this.embeddingService = embeddingService; - client = (QdrantClient) vectorStore.getNativeClient().orElseThrow(); - this.embeddingModel = embeddingModel; } @PostMapping("add") @@ -56,73 +41,68 @@ public class KnowledgeController { knowledgeService.add(name, strategy); } + @GetMapping("name") + public AmisMapResponse name(@RequestParam("id") Long id) { + return AmisResponse.responseMapData() + .setData("name", knowledgeService.getName(id)); + } + @GetMapping("list") public AmisResponse list() { return AmisResponse.responseCrudData(knowledgeService.list()); } - @GetMapping("list_points") - public ImmutableList listPoints(@RequestParam("name") String name) throws ExecutionException, InterruptedException { - Points.ScrollResponse response = client.scrollAsync( - Points.ScrollPoints.newBuilder() - .setCollectionName(name) - // .setLimit(2) - .setWithPayload(Points.WithPayloadSelector.newBuilder().setEnable(true).build()) - .setWithVectors(Points.WithVectorsSelector.newBuilder().setEnable(false).build()) - .build() - ) - .get(); - return response.getResultList() - .stream() - .collect(Collectors.toCollection(Lists.mutable::empty)) - .collect(point -> { - PointVO vo = new PointVO(); - vo.setId(point.getId().getUuid()); - vo.setText(point.getPayloadMap().get("doc_content").getStringValue()); - return vo; - }) - .toImmutable(); - } - @GetMapping("delete") - public void delete(@RequestParam("name") String name) throws ExecutionException, InterruptedException { - knowledgeService.remove(name); + public void delete(@RequestParam("id") Long id) throws ExecutionException, InterruptedException { + knowledgeService.remove(id); } @PostMapping("preview_text") public AmisResponse previewText( @RequestParam(value = "mode", defaultValue = "NORMAL") String mode, @RequestParam(value = "type", defaultValue = "text") String type, - @RequestParam("content") String content + @RequestParam(value = "content", required = false) String content, + @RequestParam(value = "files", required = false) String files ) { - return AmisResponse.responseCrudData( - embeddingService.split(mode, content) - .collect(doc -> { - PointVO vo = new PointVO(); - vo.setId(doc.getId()); - vo.setText(doc.getText()); - return vo; - }) - ); + if (StrUtil.equals("text", type)) { + return AmisResponse.responseCrudData( + embeddingService.preview(mode, content) + .collect(doc -> { + SegmentVO vo = new SegmentVO(); + vo.setId(doc.getId()); + vo.setText(doc.getText()); + return vo; + }) + ); + } else if (StrUtil.equals("file", type)) { + return AmisResponse.responseCrudData( + embeddingService.preview(mode, Lists.immutable.of(files.split(","))) + .collect(doc -> { + SegmentVO vo = new SegmentVO(); + vo.setId(doc.getId()); + vo.setText(doc.getText()); + return vo; + }) + ); + } else { + throw new IllegalArgumentException("Unsupported type: " + type); + } } - @PostMapping(value = "process_text", consumes = "text/plain;charset=utf-8") - public void processText( - @RequestParam("name") String name, - @RequestBody String text + @PostMapping("submit_text") + public void submitText( + @RequestParam(value = "id") Long id, + @RequestParam(value = "mode", defaultValue = "NORMAL") String mode, + @RequestParam(value = "type", defaultValue = "text") String type, + @RequestParam(value = "content", required = false) String content, + @RequestParam(value = "files", required = false) String files ) { - VectorStore source = QdrantVectorStore.builder(client, embeddingModel) - .collectionName(name) - .initializeSchema(true) - .build(); - MarkdownDocumentReader reader = new MarkdownDocumentReader( - new ByteArrayResource(text.getBytes(StandardCharsets.UTF_8)), - MarkdownDocumentReaderConfig.builder() - .withHorizontalRuleCreateDocument(true) - .withIncludeCodeBlock(false) - .withIncludeBlockquote(false) - .build() - ); - source.add(reader.get()); + if (StrUtil.equals("text", type)) { + embeddingService.submit(id, mode, content); + } else if (StrUtil.equals("file", type)) { + embeddingService.submit(id, mode, Lists.immutable.of(files.split(","))); + } else { + throw new IllegalArgumentException("Unsupported type: " + type); + } } } diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/SegmentController.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/SegmentController.java new file mode 100644 index 0000000..8eb17a7 --- /dev/null +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/controller/SegmentController.java @@ -0,0 +1,38 @@ +package com.lanyuanxiaoyao.service.ai.knowledge.controller; + +import com.lanyuanxiaoyao.service.ai.core.entity.amis.AmisResponse; +import com.lanyuanxiaoyao.service.ai.knowledge.service.SegmentService; +import java.util.concurrent.ExecutionException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** + * @author lanyuanxiaoyao + * @version 20250528 + */ +@RestController +@RequestMapping("segment") +public class SegmentController { + private static final Logger logger = LoggerFactory.getLogger(SegmentController.class); + + private final SegmentService segmentService; + + public SegmentController(SegmentService segmentService) { + this.segmentService = segmentService; + } + + @GetMapping("list") + public AmisResponse list(@RequestParam("knowledge_id") Long knowledgeId, @RequestParam("group_id") Long groupId) throws ExecutionException, InterruptedException { + return AmisResponse.responseCrudData(segmentService.list(knowledgeId, groupId)); + } + + @GetMapping("delete") + public AmisResponse delete(@RequestParam("knowledge_id") Long knowledgeId, @RequestParam("segment_id") Long segmentId) throws ExecutionException, InterruptedException { + segmentService.remove(knowledgeId, segmentId); + return AmisResponse.responseSuccess(); + } +} diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/EmbeddingContext.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/EmbeddingContext.java index 5ecd72f..1f38514 100644 --- a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/EmbeddingContext.java +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/EmbeddingContext.java @@ -1,9 +1,5 @@ package com.lanyuanxiaoyao.service.ai.knowledge.entity; -import cn.hutool.core.util.StrUtil; -import java.io.File; -import java.nio.file.Path; -import java.util.HashMap; import java.util.List; import java.util.Map; import org.eclipse.collections.api.factory.Lists; @@ -15,36 +11,42 @@ import org.springframework.ai.document.Document; * @version 20250523 */ public class EmbeddingContext { + private Long vectorSourceId; + private Long groupId; private Config config; private String content; private String file; + private String fileFormat; private List documents = Lists.mutable.empty(); private Map metadata = Maps.mutable.empty(); - public EmbeddingContext(String content) { - this(content, new Config()); + private EmbeddingContext(Builder builder) { + setVectorSourceId(builder.vectorSourceId); + setGroupId(builder.groupId); + setConfig(builder.config); + setContent(builder.content); + setFile(builder.file); + setFileFormat(builder.fileFormat); } - public EmbeddingContext(String content, Config config) { - this.content = StrUtil.trim(content); - this.config = config; + public static Builder builder() { + return new Builder(); } - public EmbeddingContext(File file) { - this(file, new Config()); + public Long getGroupId() { + return groupId; } - public EmbeddingContext(File file, Config config) { - this.file = file.getAbsolutePath(); - this.config = config; + public void setGroupId(Long groupId) { + this.groupId = groupId; } - public EmbeddingContext(Path path) { - this(path.toFile()); + public Long getVectorSourceId() { + return vectorSourceId; } - public EmbeddingContext(Path path, Config config) { - this(path.toFile(), config); + public void setVectorSourceId(Long vectorSourceId) { + this.vectorSourceId = vectorSourceId; } public Config getConfig() { @@ -71,6 +73,14 @@ public class EmbeddingContext { this.file = file; } + public String getFileFormat() { + return fileFormat; + } + + public void setFileFormat(String fileFormat) { + this.fileFormat = fileFormat; + } + public List getDocuments() { return documents; } @@ -90,9 +100,12 @@ public class EmbeddingContext { @Override public String toString() { return "EmbeddingContext{" + - "config=" + config + + "vectorSourceId=" + vectorSourceId + + ", groupId=" + groupId + + ", config=" + config + ", content='" + content + '\'' + ", file='" + file + '\'' + + ", fileFormat='" + fileFormat + '\'' + ", documents=" + documents + ", metadata=" + metadata + '}'; @@ -101,11 +114,10 @@ public class EmbeddingContext { public static final class Config { private SplitStrategy splitStrategy = SplitStrategy.NORMAL; - public Config() { - } + private Config(Builder builder) {setSplitStrategy(builder.splitStrategy);} - public Config(SplitStrategy splitStrategy) { - this.splitStrategy = splitStrategy; + public static Builder builder() { + return new Builder(); } public SplitStrategy getSplitStrategy() { @@ -126,5 +138,65 @@ public class EmbeddingContext { public enum SplitStrategy { NORMAL, LLM, QA } + + public static final class Builder { + private SplitStrategy splitStrategy; + + private Builder() {} + + public Builder splitStrategy(SplitStrategy val) { + splitStrategy = val; + return this; + } + + public Config build() { + return new Config(this); + } + } + } + + public static final class Builder { + private Long vectorSourceId; + private Long groupId; + private Config config; + private String content; + private String file; + private String fileFormat; + + private Builder() {} + + public Builder vectorSourceId(Long val) { + vectorSourceId = val; + return this; + } + + public Builder groupId(Long val) { + groupId = val; + return this; + } + + public Builder config(Config val) { + config = val; + return this; + } + + public Builder content(String val) { + content = val; + return this; + } + + public Builder file(String val) { + file = val; + return this; + } + + public Builder fileFormat(String val) { + fileFormat = val; + return this; + } + + public EmbeddingContext build() { + return new EmbeddingContext(this); + } } } diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/Group.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/Group.java new file mode 100644 index 0000000..7465553 --- /dev/null +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/Group.java @@ -0,0 +1,64 @@ +package com.lanyuanxiaoyao.service.ai.knowledge.entity; + +/** + * @author lanyuanxiaoyao + * @version 20250527 + */ +public class Group { + private String id; + private String name; + private String status; + private Long createdTime; + private Long modifiedTime; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public Long getCreatedTime() { + return createdTime; + } + + public void setCreatedTime(Long createdTime) { + this.createdTime = createdTime; + } + + public Long getModifiedTime() { + return modifiedTime; + } + + public void setModifiedTime(Long modifiedTime) { + this.modifiedTime = modifiedTime; + } + + @Override + public String toString() { + return "GroupVO{" + + "id='" + id + '\'' + + ", name='" + name + '\'' + + ", status='" + status + '\'' + + ", createdTime=" + createdTime + + ", modifiedTime=" + modifiedTime + + '}'; + } +} diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/Knowledge.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/Knowledge.java index 016a92c..e63c077 100644 --- a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/Knowledge.java +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/Knowledge.java @@ -9,6 +9,8 @@ public class Knowledge { private Long vectorSourceId; private String name; private String strategy; + private Long createdTime; + private Long modifiedTime; public Long getId() { return id; @@ -42,6 +44,22 @@ public class Knowledge { this.strategy = strategy; } + public Long getCreatedTime() { + return createdTime; + } + + public void setCreatedTime(Long createdTime) { + this.createdTime = createdTime; + } + + public Long getModifiedTime() { + return modifiedTime; + } + + public void setModifiedTime(Long modifiedTime) { + this.modifiedTime = modifiedTime; + } + @Override public String toString() { return "Knowledge{" + @@ -49,6 +67,8 @@ public class Knowledge { ", vectorSourceId=" + vectorSourceId + ", name='" + name + '\'' + ", strategy='" + strategy + '\'' + + ", createdTime=" + createdTime + + ", modifiedTime=" + modifiedTime + '}'; } } diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/DataFileVO.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/DataFileVO.java new file mode 100644 index 0000000..f34c9c2 --- /dev/null +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/DataFileVO.java @@ -0,0 +1,74 @@ +package com.lanyuanxiaoyao.service.ai.knowledge.entity.vo; + +/** + * @author lanyuanxiaoyao + * @version 20250527 + */ +public class DataFileVO { + private String id; + private String filename; + private Long size; + private String md5; + private String path; + private String type; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getFilename() { + return filename; + } + + public void setFilename(String filename) { + this.filename = filename; + } + + public Long getSize() { + return size; + } + + public void setSize(Long size) { + this.size = size; + } + + public String getMd5() { + return md5; + } + + public void setMd5(String md5) { + this.md5 = md5; + } + + public String getPath() { + return path; + } + + public void setPath(String path) { + this.path = path; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + @Override + public String toString() { + return "DataFile{" + + "id='" + id + '\'' + + ", filename='" + filename + '\'' + + ", size=" + size + + ", md5='" + md5 + '\'' + + ", path='" + path + '\'' + + ", type='" + type + '\'' + + '}'; + } +} diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/KnowledgeVO.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/KnowledgeVO.java index 1e26302..6a2a0ec 100644 --- a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/KnowledgeVO.java +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/KnowledgeVO.java @@ -5,12 +5,23 @@ package com.lanyuanxiaoyao.service.ai.knowledge.entity.vo; * @version 20250516 */ public class KnowledgeVO { + private String id; private String name; private String strategy; private Long size; private Long points; private Long segments; private String status; + private Long createdTime; + private Long modifiedTime; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } public String getName() { return name; @@ -60,15 +71,34 @@ public class KnowledgeVO { this.status = status; } + public Long getCreatedTime() { + return createdTime; + } + + public void setCreatedTime(Long createdTime) { + this.createdTime = createdTime; + } + + public Long getModifiedTime() { + return modifiedTime; + } + + public void setModifiedTime(Long modifiedTime) { + this.modifiedTime = modifiedTime; + } + @Override public String toString() { - return "CollectionVO{" + - "name='" + name + '\'' + + return "KnowledgeVO{" + + "id='" + id + '\'' + + ", name='" + name + '\'' + ", strategy='" + strategy + '\'' + ", size=" + size + ", points=" + points + ", segments=" + segments + ", status='" + status + '\'' + + ", createdTime=" + createdTime + + ", modifiedTime=" + modifiedTime + '}'; } } diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/PointVO.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/SegmentVO.java similarity index 95% rename from service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/PointVO.java rename to service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/SegmentVO.java index 0cf40a2..85d08c6 100644 --- a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/PointVO.java +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/entity/vo/SegmentVO.java @@ -4,7 +4,7 @@ package com.lanyuanxiaoyao.service.ai.knowledge.entity.vo; * @author lanyuanxiaoyao * @version 20250516 */ -public class PointVO { +public class SegmentVO { private String id; private String text; diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/DataFileService.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/DataFileService.java new file mode 100644 index 0000000..d2563ea --- /dev/null +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/DataFileService.java @@ -0,0 +1,90 @@ +package com.lanyuanxiaoyao.service.ai.knowledge.service; + +import club.kingon.sql.builder.SqlBuilder; +import cn.hutool.core.util.IdUtil; +import com.lanyuanxiaoyao.service.ai.knowledge.entity.vo.DataFileVO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +/** + * @author lanyuanxiaoyao + * @version 20250527 + */ +@Service +public class DataFileService { + private static final Logger log = LoggerFactory.getLogger(DataFileService.class); + private static final String DATA_FILE_TABLE_NAME = "service_ai_file"; + + private final JdbcTemplate template; + + public DataFileService(JdbcTemplate template) { + this.template = template; + } + + public DataFileVO downloadFile(Long id) { + return template.queryForObject( + SqlBuilder.select("id", "filename", "size", "md5", "path", "type") + .from(DATA_FILE_TABLE_NAME) + .whereEq("id", "?") + .precompileSql(), + (rs, row) -> { + DataFileVO vo = new DataFileVO(); + vo.setId(String.valueOf(rs.getLong(1))); + vo.setFilename(rs.getString(2)); + vo.setSize(rs.getLong(3)); + vo.setMd5(rs.getString(4)); + vo.setPath(rs.getString(5)); + vo.setType(rs.getString(6)); + return vo; + }, + id + ); + } + + @Transactional(rollbackFor = Exception.class) + public Long initialDataFile(String filename) { + long id = IdUtil.getSnowflakeNextId(); + template.update( + SqlBuilder.insertInto(DATA_FILE_TABLE_NAME, "id", "filename") + .values() + .addValue("?", "?") + .precompileSql(), + id, + filename + ); + return id; + } + + @Transactional(rollbackFor = Exception.class) + public void updateDataFile(Long id, String path, Long size, String md5, String type) { + template.update( + SqlBuilder.update(DATA_FILE_TABLE_NAME) + .set("size", "?") + .addSet("md5", "?") + .addSet("path", "?") + .addSet("type", "?") + .whereEq("id", "?") + .precompileSql(), + size, + md5, + path, + type, + id + ); + } + + public static final class DataFileNotFoundException extends RuntimeException { + public DataFileNotFoundException() { + super("文件未找到,请重新上传"); + } + } + + public static final class UpdateDataFileFailedException extends RuntimeException { + public UpdateDataFileFailedException() { + super("更新文件信息失败,请重新上传"); + } + } +} diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/EmbeddingService.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/EmbeddingService.java index bb3f901..b610c24 100644 --- a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/EmbeddingService.java +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/EmbeddingService.java @@ -1,8 +1,15 @@ package com.lanyuanxiaoyao.service.ai.knowledge.service; +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.util.IdUtil; +import cn.hutool.core.util.StrUtil; import com.lanyuanxiaoyao.service.ai.knowledge.entity.EmbeddingContext; +import com.lanyuanxiaoyao.service.ai.knowledge.entity.Knowledge; +import com.lanyuanxiaoyao.service.ai.knowledge.entity.vo.DataFileVO; import com.yomahub.liteflow.core.FlowExecutor; -import java.nio.file.Path; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import org.eclipse.collections.api.factory.Lists; import org.eclipse.collections.api.list.ImmutableList; import org.slf4j.Logger; @@ -18,19 +25,75 @@ import org.springframework.stereotype.Service; public class EmbeddingService { private static final Logger logger = LoggerFactory.getLogger(EmbeddingService.class); + private final DataFileService dataFileService; private final FlowExecutor executor; + private final KnowledgeService knowledgeService; + private final GroupService groupService; + private final ExecutorService executors = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); @SuppressWarnings("SpringJavaInjectionPointsAutowiringInspection") - public EmbeddingService(FlowExecutor executor) { + public EmbeddingService(DataFileService dataFileService, FlowExecutor executor, KnowledgeService knowledgeService, GroupService groupService) { + this.dataFileService = dataFileService; this.executor = executor; + this.knowledgeService = knowledgeService; + this.groupService = groupService; } - public ImmutableList split(String mode, String content) { - EmbeddingContext context = new EmbeddingContext( - content, - new EmbeddingContext.Config(EmbeddingContext.Config.SplitStrategy.valueOf(mode)) - ); - executor.execute2Resp("embedding", null, context); + public ImmutableList preview(String mode, String content) { + if (content.length() > 2000) { + content = content.substring(0, 2000); + } + EmbeddingContext context = EmbeddingContext.builder() + .content(content) + .config(EmbeddingContext.Config.builder() + .splitStrategy(EmbeddingContext.Config.SplitStrategy.valueOf(mode)) + .build()) + .build(); + executor.execute2Resp("embedding_preview", null, context); return Lists.immutable.ofAll(context.getDocuments()); } + + public ImmutableList preview(String mode, ImmutableList ids) { + DataFileVO vo = dataFileService.downloadFile(Long.parseLong(ids.get(0))); + String content = FileUtil.readString(vo.getPath(), StandardCharsets.UTF_8); + return preview(mode, content); + } + + public void submit(Long id, String mode, String content) { + executors.submit(() -> { + Knowledge knowledge = knowledgeService.get(id); + Long groupId = groupService.add(knowledge.getId(), StrUtil.format("文本-{}", IdUtil.nanoId(10))); + EmbeddingContext context = EmbeddingContext.builder() + .vectorSourceId(knowledge.getVectorSourceId()) + .groupId(groupId) + .content(content) + .config(EmbeddingContext.Config.builder() + .splitStrategy(EmbeddingContext.Config.SplitStrategy.valueOf(mode)) + .build()) + .build(); + executor.execute2Resp("embedding_submit", null, context); + groupService.finish(groupId); + }); + } + + public void submit(Long id, String mode, ImmutableList ids) { + executors.submit(() -> { + Knowledge knowledge = knowledgeService.get(id); + for (String fileId : ids) { + DataFileVO vo = dataFileService.downloadFile(Long.parseLong(fileId)); + Long groupId = groupService.add(id, vo.getFilename()); + EmbeddingContext context = EmbeddingContext.builder() + .vectorSourceId(knowledge.getVectorSourceId()) + .groupId(groupId) + .file(vo.getPath()) + .fileFormat(vo.getFilename()) + .config(EmbeddingContext.Config.builder() + .splitStrategy(EmbeddingContext.Config.SplitStrategy.valueOf(mode)) + .build()) + .build(); + executor.execute2Resp("embedding_submit", null, context); + groupService.finish(groupId); + } + }); + } } diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/GroupService.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/GroupService.java new file mode 100644 index 0000000..6daee1e --- /dev/null +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/GroupService.java @@ -0,0 +1,135 @@ +package com.lanyuanxiaoyao.service.ai.knowledge.service; + +import club.kingon.sql.builder.SqlBuilder; +import club.kingon.sql.builder.entry.Alias; +import club.kingon.sql.builder.entry.Column; +import cn.hutool.core.util.IdUtil; +import com.lanyuanxiaoyao.service.ai.knowledge.entity.Group; +import io.qdrant.client.ConditionFactory; +import io.qdrant.client.QdrantClient; +import io.qdrant.client.grpc.Points; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import org.eclipse.collections.api.factory.Lists; +import org.eclipse.collections.api.list.ImmutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.jdbc.core.RowMapper; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +/** + * @author lanyuanxiaoyao + * @version 20250522 + */ +@Service +public class GroupService { + public static final String GROUP_TABLE_NAME = "service_ai_group"; + private static final Logger logger = LoggerFactory.getLogger(GroupService.class); + private static final RowMapper groupMapper = (rs, row) -> { + Group vo = new Group(); + vo.setId(String.valueOf(rs.getLong(1))); + vo.setName(rs.getString(2)); + vo.setStatus(rs.getString(3)); + vo.setCreatedTime(rs.getTimestamp(4).getTime()); + vo.setModifiedTime(rs.getTimestamp(5).getTime()); + return vo; + }; + + private final JdbcTemplate template; + private final QdrantClient client; + + public GroupService(JdbcTemplate template, VectorStore vectorStore) { + this.template = template; + this.client = (QdrantClient) vectorStore.getNativeClient().orElseThrow(); + } + + public Group get(Long id) { + return template.queryForObject( + SqlBuilder.select("id", "name", "status", "created_time", "modified_time") + .from(GROUP_TABLE_NAME) + .whereEq("id", id) + .orderByDesc("created_time") + .build(), + groupMapper + ); + } + + @Transactional(rollbackFor = Exception.class) + public Long add(Long knowledgeId, String name) { + long id = IdUtil.getSnowflakeNextId(); + template.update( + SqlBuilder.insertInto(GROUP_TABLE_NAME, "id", "knowledge_id", "name", "status") + .values() + .addValue("?", "?", "?", "?") + .precompileSql(), + id, + knowledgeId, + name, + "RUNNING" + ); + return id; + } + + public ImmutableList list(Long knowledgeId) { + return template.query( + SqlBuilder.select("id", "name", "status", "created_time", "modified_time") + .from(GROUP_TABLE_NAME) + .whereEq("knowledge_id", knowledgeId) + .orderByDesc("created_time") + .build(), + groupMapper + ) + .stream() + .collect(Collectors.toCollection(Lists.mutable::empty)) + .toImmutable(); + } + + @Transactional(rollbackFor = Exception.class) + public void finish(Long groupId) { + template.update( + SqlBuilder.update(GROUP_TABLE_NAME) + .set("status", "FINISHED") + .whereEq("id", groupId) + .build() + ); + } + + @Transactional(rollbackFor = Exception.class) + public void remove(Long groupId) throws ExecutionException, InterruptedException { + Long vectorSourceId = template.queryForObject( + SqlBuilder.select("k.vector_source_id") + .from(Alias.of(GROUP_TABLE_NAME, "g"), Alias.of(KnowledgeService.KNOWLEDGE_TABLE_NAME, "k")) + .whereEq("g.knowledge_id", Column.as("k.id")) + .andEq("g.id", groupId) + .precompileSql(), + Long.class, + groupId + ); + logger.info("Delete {} {}", vectorSourceId, groupId); + client.deleteAsync( + String.valueOf(vectorSourceId), + Points.Filter.newBuilder() + .addMust(ConditionFactory.matchKeyword("vector_source_id", String.valueOf(vectorSourceId))) + .addMust(ConditionFactory.matchKeyword("group_id", String.valueOf(groupId))) + .build() + ).get(); + template.update( + SqlBuilder.delete(GROUP_TABLE_NAME) + .whereEq("id", groupId) + .build() + ); + } + + @Transactional(rollbackFor = Exception.class) + public void removeByKnowledgeId(Long knowledgeId) { + template.update( + SqlBuilder.delete(GROUP_TABLE_NAME) + .whereEq("knowledge_id", "?") + .precompileSql(), + knowledgeId + ); + } +} diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/KnowledgeGroupService.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/KnowledgeGroupService.java deleted file mode 100644 index b9c72f7..0000000 --- a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/KnowledgeGroupService.java +++ /dev/null @@ -1,58 +0,0 @@ -package com.lanyuanxiaoyao.service.ai.knowledge.service; - -import club.kingon.sql.builder.SqlBuilder; -import cn.hutool.core.util.IdUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.jdbc.core.JdbcTemplate; -import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; - -/** - * @author lanyuanxiaoyao - * @version 20250522 - */ -@Service -public class KnowledgeGroupService { - private static final Logger logger = LoggerFactory.getLogger(KnowledgeGroupService.class); - private static final String GROUP_TABLE_NAME = "service_ai_group"; - - private final JdbcTemplate template; - - public KnowledgeGroupService(JdbcTemplate template) { - this.template = template; - } - - @Transactional(rollbackFor = Exception.class) - public void add(Long knowledgeId, String name) { - template.update( - SqlBuilder.insertInto(GROUP_TABLE_NAME, "id", "knowledge_id", "name") - .values() - .addValue("?", "?", "?") - .precompileSql(), - IdUtil.getSnowflakeNextId(), - knowledgeId, - name - ); - } - - @Transactional(rollbackFor = Exception.class) - public void remove(Long groupId) { - template.update( - SqlBuilder.delete(GROUP_TABLE_NAME) - .whereEq("id", "?") - .precompileSql(), - groupId - ); - } - - @Transactional(rollbackFor = Exception.class) - public void removeByKnowledgeId(Long knowledgeId) { - template.update( - SqlBuilder.delete(GROUP_TABLE_NAME) - .whereEq("knowledge_id", "?") - .precompileSql(), - knowledgeId - ); - } -} diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/KnowledgeService.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/KnowledgeService.java index f4709b2..6430c5f 100644 --- a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/KnowledgeService.java +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/KnowledgeService.java @@ -17,6 +17,7 @@ import org.slf4j.LoggerFactory; import org.springframework.ai.embedding.EmbeddingModel; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.jdbc.core.RowMapper; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -26,43 +27,41 @@ import org.springframework.transaction.annotation.Transactional; */ @Service public class KnowledgeService { + public static final String KNOWLEDGE_TABLE_NAME = "service_ai_knowledge"; private static final Logger logger = LoggerFactory.getLogger(KnowledgeService.class); - private static final String KNOWLEDGE_TABLE_NAME = "service_ai_knowledge"; - + private static final RowMapper knowledgeMapper = (rs, row) -> { + Knowledge knowledge = new Knowledge(); + knowledge.setId(rs.getLong(1)); + knowledge.setVectorSourceId(rs.getLong(2)); + knowledge.setName(rs.getString(3)); + knowledge.setStrategy(rs.getString(4)); + knowledge.setCreatedTime(rs.getTimestamp(5).getTime()); + knowledge.setModifiedTime(rs.getTimestamp(6).getTime()); + return knowledge; + }; private final JdbcTemplate template; private final EmbeddingModel embeddingModel; private final QdrantClient client; - private final KnowledgeGroupService knowledgeGroupService; + private final GroupService groupService; - public KnowledgeService(JdbcTemplate template, EmbeddingModel embeddingModel, VectorStore vectorStore, KnowledgeGroupService knowledgeGroupService) { + public KnowledgeService(JdbcTemplate template, EmbeddingModel embeddingModel, VectorStore vectorStore, GroupService groupService) { this.template = template; this.embeddingModel = embeddingModel; this.client = (QdrantClient) vectorStore.getNativeClient().orElseThrow(); - this.knowledgeGroupService = knowledgeGroupService; + this.groupService = groupService; } public Knowledge get(Long id) { return template.queryForObject( - SqlBuilder.select("id", "vector_source_id", "name", "strategy") + SqlBuilder.select("id", "vector_source_id", "name", "strategy", "created_time", "modified_time") .from(KNOWLEDGE_TABLE_NAME) .whereEq("id", "?") .precompileSql(), - Knowledge.class, + knowledgeMapper, id ); } - public Knowledge get(String name) { - return template.queryForObject( - SqlBuilder.select("id", "vector_source_id", "name", "strategy") - .from(KNOWLEDGE_TABLE_NAME) - .whereEq("name", "?") - .precompileSql(), - Knowledge.class, - name - ); - } - @Transactional(rollbackFor = Exception.class) public void add(String name, String strategy) throws ExecutionException, InterruptedException { Integer count = template.queryForObject( @@ -98,25 +97,31 @@ public class KnowledgeService { ).get(); } + public String getName(Long id) { + return template.queryForObject( + SqlBuilder.select("name") + .from(KNOWLEDGE_TABLE_NAME) + .whereEq("id", id) + .orderByDesc("created_time") + .build(), + String.class + ); + } + public ImmutableList list() { return template.query( - SqlBuilder.select("id", "vector_source_id", "name", "strategy") + SqlBuilder.select("id", "vector_source_id", "name", "strategy", "created_time", "modified_time") .from(KNOWLEDGE_TABLE_NAME) + .orderByDesc("created_time") .build(), - (rs, index) -> { - Knowledge knowledge = new Knowledge(); - knowledge.setId(rs.getLong(1)); - knowledge.setVectorSourceId(rs.getLong(2)); - knowledge.setName(rs.getString(3)); - knowledge.setStrategy(rs.getString(4)); - return knowledge; - } + knowledgeMapper ) .stream() .map(knowledge -> { try { Collections.CollectionInfo info = client.getCollectionInfoAsync(String.valueOf(knowledge.getVectorSourceId())).get(); KnowledgeVO vo = new KnowledgeVO(); + vo.setId(String.valueOf(knowledge.getId())); vo.setName(knowledge.getName()); vo.setPoints(info.getPointsCount()); vo.setSegments(info.getSegmentsCount()); @@ -124,6 +129,8 @@ public class KnowledgeService { Collections.VectorParams vectorParams = info.getConfig().getParams().getVectorsConfig().getParams(); vo.setStrategy(vectorParams.getDistance().name()); vo.setSize(vectorParams.getSize()); + vo.setCreatedTime(vo.getCreatedTime()); + vo.setModifiedTime(vo.getModifiedTime()); return vo; } catch (InterruptedException | ExecutionException e) { throw new RuntimeException(e); @@ -134,8 +141,8 @@ public class KnowledgeService { } @Transactional(rollbackFor = Exception.class) - public void remove(String name) throws ExecutionException, InterruptedException { - Knowledge knowledge = get(name); + public void remove(Long id) throws ExecutionException, InterruptedException { + Knowledge knowledge = get(id); if (ObjectUtil.isNull(knowledge)) { throw new RuntimeException(StrUtil.format("{} 不存在")); } @@ -145,7 +152,7 @@ public class KnowledgeService { .precompileSql(), knowledge.getId() ); - knowledgeGroupService.removeByKnowledgeId(knowledge.getId()); + groupService.removeByKnowledgeId(knowledge.getId()); client.deleteCollectionAsync(String.valueOf(knowledge.getVectorSourceId())).get(); } } \ No newline at end of file diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/SegmentService.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/SegmentService.java new file mode 100644 index 0000000..54fbd69 --- /dev/null +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/SegmentService.java @@ -0,0 +1,71 @@ +package com.lanyuanxiaoyao.service.ai.knowledge.service; + +import com.lanyuanxiaoyao.service.ai.knowledge.entity.Knowledge; +import com.lanyuanxiaoyao.service.ai.knowledge.entity.vo.SegmentVO; +import io.qdrant.client.ConditionFactory; +import io.qdrant.client.QdrantClient; +import io.qdrant.client.grpc.Points; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import org.eclipse.collections.api.factory.Lists; +import org.eclipse.collections.api.list.ImmutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.stereotype.Service; + +/** + * @author lanyuanxiaoyao + * @version 20250528 + */ +@Service +public class SegmentService { + private static final Logger logger = LoggerFactory.getLogger(SegmentService.class); + + private final KnowledgeService knowledgeService; + private final QdrantClient client; + + public SegmentService(KnowledgeService knowledgeService, VectorStore vectorStore) { + this.knowledgeService = knowledgeService; + this.client = (QdrantClient) vectorStore.getNativeClient().orElseThrow(); + } + + public ImmutableList list(Long id, Long groupId) throws ExecutionException, InterruptedException { + Knowledge knowledge = knowledgeService.get(id); + Points.ScrollResponse response = client.scrollAsync( + Points.ScrollPoints.newBuilder() + .setCollectionName(String.valueOf(knowledge.getVectorSourceId())) + .setWithPayload(Points.WithPayloadSelector.newBuilder().setEnable(true).build()) + .setWithVectors(Points.WithVectorsSelector.newBuilder().setEnable(false).build()) + .setFilter( + Points.Filter.newBuilder() + .addMust(ConditionFactory.matchKeyword("group_id", String.valueOf(groupId))) + .build() + ) + .build() + ) + .get(); + return response.getResultList() + .stream() + .collect(Collectors.toCollection(Lists.mutable::empty)) + .collect(point -> { + SegmentVO vo = new SegmentVO(); + vo.setId(point.getId().getUuid()); + vo.setText(point.getPayloadMap().get("doc_content").getStringValue()); + return vo; + }) + .toImmutable(); + } + + public void remove(Long knowledgeId, Long segmentId) throws ExecutionException, InterruptedException { + Knowledge knowledge = knowledgeService.get(knowledgeId); + client.deletePayloadAsync( + String.valueOf(knowledgeId), + List.of(String.valueOf(segmentId)), + null, + null, + null + ).get(); + } +} diff --git a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/node/EmbeddingNodes.java b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/node/EmbeddingNodes.java index 2f57167..f6dc08a 100644 --- a/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/node/EmbeddingNodes.java +++ b/service-ai/service-ai-knowledge/src/main/java/com/lanyuanxiaoyao/service/ai/knowledge/service/node/EmbeddingNodes.java @@ -2,6 +2,7 @@ package com.lanyuanxiaoyao.service.ai.knowledge.service.node; import cn.hutool.core.io.FileUtil; import cn.hutool.core.lang.Assert; +import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.StrUtil; import com.lanyuanxiaoyao.service.ai.knowledge.entity.EmbeddingContext; import com.yomahub.liteflow.annotation.LiteflowComponent; @@ -9,21 +10,27 @@ import com.yomahub.liteflow.annotation.LiteflowMethod; import com.yomahub.liteflow.core.NodeComponent; import com.yomahub.liteflow.enums.LiteFlowMethodEnum; import com.yomahub.liteflow.enums.NodeTypeEnum; +import io.qdrant.client.QdrantClient; import java.nio.charset.Charset; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.document.Document; import org.springframework.ai.document.DocumentReader; +import org.springframework.ai.embedding.EmbeddingModel; import org.springframework.ai.reader.ExtractedTextFormatter; import org.springframework.ai.reader.pdf.PagePdfDocumentReader; import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; import org.springframework.ai.reader.tika.TikaDocumentReader; import org.springframework.ai.transformer.splitter.TokenTextSplitter; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.ai.vectorstore.qdrant.QdrantVectorStore; import org.springframework.core.io.PathResource; /** @@ -35,9 +42,13 @@ public class EmbeddingNodes { private static final Logger logger = LoggerFactory.getLogger(EmbeddingNodes.class); private final ChatClient chatClient; + private final QdrantClient qdrantClient; + private final EmbeddingModel embeddingModel; - public EmbeddingNodes(ChatClient.Builder builder) { + public EmbeddingNodes(ChatClient.Builder builder, VectorStore vectorStore, EmbeddingModel embeddingModel) { this.chatClient = builder.build(); + this.qdrantClient = (QdrantClient) vectorStore.getNativeClient().orElseThrow(); + this.embeddingModel = embeddingModel; } @LiteflowMethod(value = LiteFlowMethodEnum.PROCESS_BOOLEAN, nodeId = "embedding_check_if_file_needed", nodeName = "判断是否需要读取文件", nodeType = NodeTypeEnum.BOOLEAN) @@ -52,16 +63,10 @@ public class EmbeddingNodes { return false; } - @LiteflowMethod(value = LiteFlowMethodEnum.PROCESS, nodeId = "test_print", nodeType = NodeTypeEnum.COMMON) - public void testPrint(NodeComponent node) { - EmbeddingContext context = node.getContextBean(EmbeddingContext.class); - logger.info(context.getContent()); - } - @LiteflowMethod(value = LiteFlowMethodEnum.PROCESS_SWITCH, nodeId = "file_reader_switch", nodeName = "判断文件格式", nodeType = NodeTypeEnum.SWITCH) public String fileReaderSwitch(NodeComponent node) { EmbeddingContext context = node.getContextBean(EmbeddingContext.class); - String extName = FileUtil.extName(context.getFile()); + String extName = FileUtil.extName(context.getFileFormat()); return switch (extName.toLowerCase()) { case "txt", "md", "markdown" -> "txt_file_reader"; case "pdf" -> "pdf_file_reader"; @@ -132,16 +137,11 @@ public class EmbeddingNodes { EmbeddingContext context = node.getContextBean(EmbeddingContext.class); context.getDocuments().addAll(llmSplit( """ - 对用户输入的文本,生成高质量的分段。请遵循以下指南: - 1. 分段原则: - 分段按文本内容的语义进行分割,每个分段都尽可能保持完整连续的内容表达。 - 避免从词句的中间进行分割。 - 2. 格式: - 分段之间用两个空行分隔,以提高可读性。 - 避免使用任何Markdown格式 - 3. 内容要求: - 确保每个分段的内容文字完全依照原文。 - 避免添加任何原文中不存在的文字。 + 请你将用户输入的文本进行语义切分,生成用于知识库检索的文本段。 + 每个文本段要尽可能多地覆盖用户输入文本的各方面知识和细节,包括但不限于主题、概念、关键信息等。对于关键的数字、理论、细节等,要严格遵循原文,不能进行任何虚构和捏造不存在的知识,确保输出内容准确、真实且全面。 + 输出格式为纯文本段,分段之间使用“---”作为分割,方便后续使用代码进行切分。 + 输出文本避免添加markdown格式,保持文本格式紧凑。 + 切分过程中,要注重保持文本的完整性和逻辑性,确保每个文本段都能独立地表达出清晰、准确的信息,以便更好地进行知识库检索。 """, context.getContent(), context.getMetadata() @@ -153,7 +153,7 @@ public class EmbeddingNodes { EmbeddingContext context = node.getContextBean(EmbeddingContext.class); context.getDocuments().addAll(llmSplit( """ - 对用户输入的文本,生成一组高质量的问答对。请遵循以下指南: + 对用户输入的文本,生成多组高质量的问答对。请遵循以下指南: 1. 问题部分: 为同一个主题创建尽可能多的不同表述的问题,确保问题的多样性。 每个问题应考虑用户可能的多种问法,例如: @@ -168,14 +168,20 @@ public class EmbeddingNodes { 答案应直接基于给定文本,确保准确性和一致性。 包含相关的细节,如日期、名称、职位等具体信息,必要时提供背景信息以增强理解。 3. 格式: - 使用"Q:"标记问题集合的开始,所有问题应在一个段落内,问题之间用空格分隔。 - 使用"A:"标记答案的开始,答案应清晰分段,便于阅读。 - 问答对之间用两个空行分隔,以提高可读性。 - 避免使用任何Markdown格式 + 使用"问:"标记问题集合的开始,所有问题应在一个段落内,问题之间用空格分隔。 + 使用"答:"标记答案的开始,答案应清晰分段,便于阅读。 + 问答对之间用“---”分隔,以提高可读性。 4. 内容要求: 确保问答对紧密围绕文本主题,避免偏离主题。 避免添加文本中未提及的信息,确保信息的真实性。 + 一个问题搭配一个答案,避免一组问答对中同时涉及多个问题。 如果文本信息不足以回答某个方面,可以在答案中说明 "根据给定信息无法确定",并尽量提供相关的上下文。 + 格式样例: + 问:苹果通常是什么颜色的? + 答:红色。 + --- + 问:苹果长在树上还是地上? + 答:苹果长在树上。 """, context.getContent(), context.getMetadata() @@ -189,13 +195,33 @@ public class EmbeddingNodes { .call() .content(); Assert.notBlank(response, "LLM response is empty"); + logger.info("{}", response); // noinspection DataFlowIssue - return Arrays.stream(StrUtil.trim(response).split("(s?)\\s*\\n\\n")) + return Arrays.stream(StrUtil.trim(response).split("---")) + .map(text -> text.replaceAll("(?!^.+) +$", "")) .map(StrUtil::trim) .map(text -> Document.builder() .text(text) - .metadata(metadata) + .metadata(Optional.ofNullable(metadata).orElse(new HashMap<>())) .build()) .toList(); } + + @LiteflowMethod(value = LiteFlowMethodEnum.PROCESS, nodeId = "import_vector_source", nodeName = "导入向量库", nodeType = NodeTypeEnum.COMMON) + public void importVectorSource(NodeComponent node) { + EmbeddingContext context = node.getContextBean(EmbeddingContext.class); + if (ObjectUtil.isNotEmpty(context.getDocuments())) { + VectorStore vs = QdrantVectorStore.builder(qdrantClient, embeddingModel) + .collectionName(String.valueOf(context.getVectorSourceId())) + .build(); + for (Document document : context.getDocuments()) { + Map metadata = document.getMetadata(); + metadata.put("filename", context.getFileFormat()); + metadata.put("filepath", context.getFile()); + metadata.put("group_id", String.valueOf(context.getGroupId())); + metadata.put("vector_source_id", String.valueOf(context.getVectorSourceId())); + } + vs.add(context.getDocuments()); + } + } } diff --git a/service-ai/service-ai-knowledge/src/main/resources/application.yml b/service-ai/service-ai-knowledge/src/main/resources/application.yml index 30e248d..21ede65 100644 --- a/service-ai/service-ai-knowledge/src/main/resources/application.yml +++ b/service-ai/service-ai-knowledge/src/main/resources/application.yml @@ -36,7 +36,7 @@ spring: model: 'Qwen3-1.7-vllm' embedding: options: - model: 'Bge-m3' + model: 'Bge-m3-vllm' vectorstore: qdrant: api-key: lanyuanxiaoyao @@ -49,3 +49,6 @@ liteflow: rule-source: config/flow.xml print-banner: false check-node-exists: false +knowledge: + download-prefix: "http://localhost:8080" + upload-path: /Users/lanyuanxiaoyao/Project/IdeaProjects/hudi-service/service-ai/temp \ No newline at end of file diff --git a/service-ai/service-ai-knowledge/src/main/resources/config/flow.xml b/service-ai/service-ai-knowledge/src/main/resources/config/flow.xml index 08f7b43..cecb4e9 100644 --- a/service-ai/service-ai-knowledge/src/main/resources/config/flow.xml +++ b/service-ai/service-ai-knowledge/src/main/resources/config/flow.xml @@ -1,7 +1,7 @@ - + SER( IF( embedding_check_if_file_needed, @@ -17,4 +17,7 @@ ) ) + + SER(embedding_preview, import_vector_source) + \ No newline at end of file diff --git a/service-ai/service-ai-knowledge/src/test/java/com/lanyuanxiaoyao/service/ai/knowledge/TestEmbedding.java b/service-ai/service-ai-knowledge/src/test/java/com/lanyuanxiaoyao/service/ai/knowledge/TestEmbedding.java new file mode 100644 index 0000000..e9249cf --- /dev/null +++ b/service-ai/service-ai-knowledge/src/test/java/com/lanyuanxiaoyao/service/ai/knowledge/TestEmbedding.java @@ -0,0 +1,59 @@ +package com.lanyuanxiaoyao.service.ai.knowledge; + +import io.qdrant.client.QdrantClient; +import io.qdrant.client.QdrantGrpcClient; +import io.qdrant.client.grpc.Collections; +import java.net.http.HttpClient; +import java.util.List; +import java.util.concurrent.ExecutionException; +import org.springframework.ai.document.Document; +import org.springframework.ai.document.MetadataMode; +import org.springframework.ai.embedding.EmbeddingModel; +import org.springframework.ai.openai.OpenAiEmbeddingModel; +import org.springframework.ai.openai.OpenAiEmbeddingOptions; +import org.springframework.ai.openai.api.OpenAiApi; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.ai.vectorstore.qdrant.QdrantVectorStore; +import org.springframework.http.client.JdkClientHttpRequestFactory; +import org.springframework.http.client.reactive.JdkClientHttpConnector; +import org.springframework.web.client.RestClient; +import org.springframework.web.reactive.function.client.WebClient; + +/** + * @author lanyuanxiaoyao + * @version 20250527 + */ +public class TestEmbedding { + public static void main(String[] args) throws ExecutionException, InterruptedException { + HttpClient httpClient = HttpClient.newBuilder() + .version(HttpClient.Version.HTTP_1_1) + .build(); + EmbeddingModel model = new OpenAiEmbeddingModel( + OpenAiApi.builder() + .baseUrl("http://132.121.206.65:10086") + .apiKey("*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTjhR&v>>g*NwCs3tpQ5FVMFEF2VHVTj { - const {name} = useParams() + const {knowledge_id} = useParams() + const navigate = useNavigate() return (
{amisRender( { className: 'h-full', type: 'page', - title: `数据详情 (知识库:${name})`, + title: { + type: 'wrapper', + size: 'none', + body: [ + '数据详情 (知识库:', + { + type: 'service', + className: 'inline', + api: { + method: 'get', + url: 'http://127.0.0.1:8080/knowledge/name', + headers: { + 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', + }, + data: { + id: knowledge_id, + }, + }, + body: { + type: 'tpl', + tpl: '${name}', + }, + }, + ')', + ], + }, size: 'lg', actions: [], body: [ { type: 'crud', api: { - url: 'http://127.0.0.1:8080/knowledge/list_points?name=${name}', + method: 'get', + url: 'http://127.0.0.1:8080/group/list', headers: { 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', }, + data: { + knowledge_id, + }, }, ...crudCommonOptions(), headerToolbar: [ 'reload', + { + type: 'action', + icon: 'fa fa-plus', + label: '', + tooltip: '新增', + tooltipPlacement: 'top', + onEvent: { + click: { + actions: [ + { + actionType: 'custom', + // @ts-ignore + script: (context, action, event) => { + navigate(`/ai/knowledge/import/${knowledge_id}`) + }, + }, + ], + }, + }, + }, ], columns: [ { @@ -32,8 +87,15 @@ const DataDetail: React.FC = () => { hidden: true, }, { - name: 'text', - label: '内容', + name: 'name', + label: '文件名', + }, + { + name: 'status', + label: '状态', + width: 50, + align: 'center', + ...mappingField('status', statusMapping), }, { type: 'operation', @@ -42,30 +104,17 @@ const DataDetail: React.FC = () => { buttons: [ { type: 'action', - label: '编辑', + label: '查看', level: 'link', - size: 'lg', - actionType: 'dialog', - dialog: { - title: '编辑文段', - size: 'md', - body: { - type: 'form', - body: [ + size: 'sm', + onEvent: { + click: { + actions: [ { - type: 'input-text', - name: 'id', - disabled: true, - label: '文段ID', - }, - { - type: 'editor', - label: '内容', - name: 'text', - language: 'plaintext', - options: { - lineNumbers: 'off', - wordWrap: 'bounded', + actionType: 'custom', + // @ts-ignore + script: (context, action, event) => { + navigate(`/ai/knowledge/detail/${knowledge_id}/segment/${context.props.data['id']}`) }, }, ], @@ -77,13 +126,17 @@ const DataDetail: React.FC = () => { label: '删除', className: 'text-danger hover:text-red-600', level: 'link', - size: 'xs', + size: 'sm', actionType: 'ajax', api: { method: 'get', + url: 'http://127.0.0.1:8080/group/delete', headers: { 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', }, + data: { + id: '${id}', + }, }, confirmText: '确认删除', confirmTitle: '删除', diff --git a/service-web/client/src/pages/ai/knowledge/DataImport.tsx b/service-web/client/src/pages/ai/knowledge/DataImport.tsx index 3d32816..bf764be 100644 --- a/service-web/client/src/pages/ai/knowledge/DataImport.tsx +++ b/service-web/client/src/pages/ai/knowledge/DataImport.tsx @@ -10,12 +10,37 @@ const ImportDataDiv = styled.div` ` const DataImport: React.FC = () => { - const {name} = useParams() + const {knowledge_id} = useParams() return ( {amisRender({ type: 'page', - title: `数据导入 (知识库:${name})`, + title: { + type: 'wrapper', + size: 'none', + body: [ + '数据导入 (知识库:', + { + type: 'service', + className: 'inline', + api: { + method: 'get', + url: 'http://127.0.0.1:8080/knowledge/name', + headers: { + 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', + }, + data: { + id: knowledge_id, + }, + }, + body: { + type: 'tpl', + tpl: '${name}', + }, + }, + ')', + ], + }, body: [ [ { @@ -53,7 +78,7 @@ const DataImport: React.FC = () => { name: 'type', type: 'radios', label: '数据形式', - value: 'text', + value: 'file', options: [ { value: 'text', @@ -62,7 +87,6 @@ const DataImport: React.FC = () => { { value: 'file', label: '文件', - disabled: true, }, ], }, @@ -82,10 +106,38 @@ const DataImport: React.FC = () => { type: 'input-file', name: 'files', label: '数据文件', - accept: '.txt,.csv', autoUpload: false, drag: true, multiple: true, + useChunk: true, + accept: '*', + // 5MB 5242880 + // 100MB 104857600 + // 500MB 524288000 + // 1GB 1073741824 + maxSize: '', + maxLength: 0, + startChunkApi: { + method: 'post', + url: 'http://127.0.0.1:8080/upload/start', + headers: { + 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', + }, + }, + chunkApi: { + method: 'post', + url: 'http://127.0.0.1:8080/upload/slice', + headers: { + 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', + }, + }, + finishChunkApi: { + method: 'post', + url: 'http://127.0.0.1:8080/upload/finish', + headers: { + 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', + }, + }, }, { className: 'text-right', @@ -103,16 +155,10 @@ const DataImport: React.FC = () => { }, dataType: 'form', data: { - mode: '${mode}', - type: '${type}', - content: '${content}', - }, - // @ts-ignore - adaptor: (payload, response, api, context) => { - console.log(payload) - return { - items: payload, - } + mode: '${mode|default:undefined}', + type: '${type|default:undefined}', + content: '${content|default:undefined}', + files: '${files|default:undefined}', }, }, reload: 'preview_list?rows=${items}', @@ -121,6 +167,22 @@ const DataImport: React.FC = () => { type: 'submit', label: '提交', level: 'primary', + actionType: 'ajax', + api: { + method: 'post', + url: 'http://127.0.0.1:8080/knowledge/submit_text', + headers: { + 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', + }, + dataType: 'form', + data: { + id: knowledge_id, + mode: '${mode|default:undefined}', + type: '${type|default:undefined}', + content: '${content|default:undefined}', + files: '${files|default:undefined}', + }, + }, }, ], }, @@ -155,6 +217,7 @@ const DataImport: React.FC = () => { ], listItem: { body: { + className: 'white-space-pre-line', type: 'tpl', tpl: '${text}', }, diff --git a/service-web/client/src/pages/ai/knowledge/DataSegment.tsx b/service-web/client/src/pages/ai/knowledge/DataSegment.tsx new file mode 100644 index 0000000..c17421e --- /dev/null +++ b/service-web/client/src/pages/ai/knowledge/DataSegment.tsx @@ -0,0 +1,133 @@ +import React from 'react' +import {useParams} from 'react-router' +import {amisRender, crudCommonOptions} from '../../../util/amis.tsx' + +const DataDetail: React.FC = () => { + const {knowledge_id, group_id} = useParams() + return ( +
+ {amisRender( + { + className: 'h-full', + type: 'page', + title: { + type: 'wrapper', + size: 'none', + body: [ + '数据详情 (知识库:', + { + type: 'service', + className: 'inline', + api: { + method: 'get', + url: 'http://127.0.0.1:8080/knowledge/name', + headers: { + 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', + }, + data: { + id: knowledge_id, + }, + }, + body: { + type: 'tpl', + tpl: '${name}', + }, + }, + ')', + ], + }, + size: 'lg', + actions: [], + body: [ + { + type: 'crud', + api: { + method: 'get', + url: 'http://127.0.0.1:8080/segment/list', + headers: { + 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', + }, + data: { + knowledge_id, + group_id, + }, + }, + ...crudCommonOptions(), + headerToolbar: [ + 'reload', + ], + columns: [ + { + name: 'id', + hidden: true, + }, + { + name: 'text', + label: '内容', + className: 'white-space-pre-line', + }, + { + type: 'operation', + label: '操作', + width: 50, + buttons: [ + /*{ + type: 'action', + label: '编辑', + level: 'link', + size: 'lg', + actionType: 'dialog', + dialog: { + title: '编辑文段', + size: 'md', + body: { + type: 'form', + body: [ + { + type: 'input-text', + name: 'id', + disabled: true, + label: '文段ID', + }, + { + type: 'editor', + label: '内容', + name: 'text', + language: 'plaintext', + options: { + lineNumbers: 'off', + wordWrap: 'bounded', + }, + }, + ], + }, + }, + },*/ + { + type: 'action', + label: '删除', + className: 'text-danger hover:text-red-600', + level: 'link', + size: 'sm', + actionType: 'ajax', + api: { + method: 'get', + headers: { + 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', + }, + }, + confirmText: '删除后无法恢复,确认删除该记录?', + confirmTitle: '删除', + }, + ], + }, + ], + }, + ], + }, + )} +
+ ) +} + +export default DataDetail \ No newline at end of file diff --git a/service-web/client/src/pages/ai/knowledge/Knowledge.tsx b/service-web/client/src/pages/ai/knowledge/Knowledge.tsx index 9643f81..969e5f1 100644 --- a/service-web/client/src/pages/ai/knowledge/Knowledge.tsx +++ b/service-web/client/src/pages/ai/knowledge/Knowledge.tsx @@ -38,6 +38,8 @@ const Knowledge: React.FC = () => { type: 'action', label: '', icon: 'fa fa-plus', + tooltip: '新增', + tooltipPlacement: 'top', actionType: 'dialog', dialog: { title: '新增知识库', @@ -111,7 +113,7 @@ const Knowledge: React.FC = () => { type: 'action', label: '详情', level: 'link', - size: 'xs', + size: 'sm', onEvent: { click: { actions: [ @@ -119,7 +121,7 @@ const Knowledge: React.FC = () => { actionType: 'custom', // @ts-ignore script: (context, action, event) => { - navigate(`/ai/knowledge/detail/${context.props.data['name']}`) + navigate(`/ai/knowledge/detail/${context.props.data['id']}`) }, }, ], @@ -130,7 +132,7 @@ const Knowledge: React.FC = () => { type: 'action', label: '导入', level: 'link', - size: 'xs', + size: 'sm', onEvent: { click: { actions: [ @@ -138,7 +140,7 @@ const Knowledge: React.FC = () => { actionType: 'custom', // @ts-ignore script: (context, action, event) => { - navigate(`/ai/knowledge/import/${context.props.data['name']}`) + navigate(`/ai/knowledge/import/${context.props.data['id']}`) }, }, ], @@ -150,14 +152,17 @@ const Knowledge: React.FC = () => { label: '删除', className: 'text-danger hover:text-red-600', level: 'link', - size: 'xs', + size: 'sm', actionType: 'ajax', api: { method: 'get', - url: 'http://127.0.0.1:8080/knowledge/delete?name=${name}', + url: 'http://127.0.0.1:8080/knowledge/delete', headers: { 'Authorization': 'Basic QXhoRWJzY3dzSkRiWU1IMjpjWXhnM2I0UHRXb1ZENVNqRmF5V3h0blNWc2p6UnNnNA==', }, + data: { + id: '${id}', + }, }, confirmText: '确认删除', confirmTitle: '删除', diff --git a/service-web/client/src/route.tsx b/service-web/client/src/route.tsx index 288632b..0af9d83 100644 --- a/service-web/client/src/route.tsx +++ b/service-web/client/src/route.tsx @@ -17,6 +17,7 @@ import Conversation from './pages/ai/Conversation.tsx' import Inspection from './pages/ai/Inspection.tsx' import DataDetail from './pages/ai/knowledge/DataDetail.tsx' import DataImport from './pages/ai/knowledge/DataImport.tsx' +import DataSegment from './pages/ai/knowledge/DataSegment.tsx' import Knowledge from './pages/ai/knowledge/Knowledge.tsx' import App from './pages/App.tsx' import Cloud from './pages/overview/Cloud.tsx' @@ -95,13 +96,17 @@ export const routes: RouteObject[] = [ Component: Knowledge, }, { - path: 'knowledge/import/:name', + path: 'knowledge/import/:knowledge_id', Component: DataImport, }, { - path: 'knowledge/detail/:name', + path: 'knowledge/detail/:knowledge_id', Component: DataDetail, }, + { + path: 'knowledge/detail/:knowledge_id/segment/:group_id', + Component: DataSegment, + }, ], }, ], diff --git a/service-web/client/src/util/amis.tsx b/service-web/client/src/util/amis.tsx index a41397b..f2e9139 100644 --- a/service-web/client/src/util/amis.tsx +++ b/service-web/client/src/util/amis.tsx @@ -77,6 +77,7 @@ export const amisRender = (schema: Schema, data: Record = {}) => { theme: theme, }, { + enableAMISDebug: true, fetcher: async (api: any) => { let {url, method, data, responseType, config, headers} = api config = config || {}