From 77cbf36524136267937ed24f81f86b677ec9c264 Mon Sep 17 00:00:00 2001 From: lanyuanxiaoyao Date: Sat, 1 Nov 2025 10:11:36 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E6=A1=88=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 20 ++- .../lanyuanxiaoyao/bookstore/entity/Book.java | 1 + .../bookstore/entity/Chapter.java | 1 - .../bookstore/service/CrawlerService.java | 13 ++ .../bookstore/BookCrawlerTest.java | 99 +++++++++++ .../bookstore/UpdateIntoDatabase.java | 159 ++++++++++++++++++ 6 files changed, 291 insertions(+), 2 deletions(-) create mode 100644 src/main/java/com/lanyuanxiaoyao/bookstore/service/CrawlerService.java create mode 100644 src/test/java/com/lanyuanxiaoyao/bookstore/BookCrawlerTest.java create mode 100644 src/test/java/com/lanyuanxiaoyao/bookstore/UpdateIntoDatabase.java diff --git a/pom.xml b/pom.xml index fa17fad..b6352f0 100644 --- a/pom.xml +++ b/pom.xml @@ -21,6 +21,7 @@ 5.8.39 2.15.0 + 4.38.1 @@ -44,15 +45,26 @@ cn.hutool - hutool-core + hutool-all ${hutool.version} + + cn.hutool + hutool-ai + ${hutool.version} + + com.yomahub liteflow-spring-boot-starter ${liteflow.version} + + org.seleniumhq.selenium + selenium-java + + com.h2database h2 @@ -82,6 +94,12 @@ pom import + + org.seleniumhq.selenium + selenium-dependencies-bom + ${selenium.version} + pom + diff --git a/src/main/java/com/lanyuanxiaoyao/bookstore/entity/Book.java b/src/main/java/com/lanyuanxiaoyao/bookstore/entity/Book.java index d515b49..d80f852 100644 --- a/src/main/java/com/lanyuanxiaoyao/bookstore/entity/Book.java +++ b/src/main/java/com/lanyuanxiaoyao/bookstore/entity/Book.java @@ -41,6 +41,7 @@ public class Book extends SimpleEntity { @Column(nullable = false) private String name; private String author; + @Column(columnDefinition = "text") private String description; private String source; @ElementCollection(fetch = FetchType.EAGER) diff --git a/src/main/java/com/lanyuanxiaoyao/bookstore/entity/Chapter.java b/src/main/java/com/lanyuanxiaoyao/bookstore/entity/Chapter.java index 524e1a8..bc911e4 100644 --- a/src/main/java/com/lanyuanxiaoyao/bookstore/entity/Chapter.java +++ b/src/main/java/com/lanyuanxiaoyao/bookstore/entity/Chapter.java @@ -40,7 +40,6 @@ import org.springframework.data.jpa.domain.support.AuditingEntityListener; public class Chapter extends SimpleEntity { @Column(nullable = false) private Double sequence; - @Column(nullable = false) private String name; private String description; diff --git a/src/main/java/com/lanyuanxiaoyao/bookstore/service/CrawlerService.java b/src/main/java/com/lanyuanxiaoyao/bookstore/service/CrawlerService.java new file mode 100644 index 0000000..be3d59b --- /dev/null +++ b/src/main/java/com/lanyuanxiaoyao/bookstore/service/CrawlerService.java @@ -0,0 +1,13 @@ +package com.lanyuanxiaoyao.bookstore.service; + +import org.springframework.stereotype.Service; + +/** + * 书籍下载 + * + * @author lanyuanxiaoyao + * @version 20251031 + */ +@Service +public class CrawlerService { +} diff --git a/src/test/java/com/lanyuanxiaoyao/bookstore/BookCrawlerTest.java b/src/test/java/com/lanyuanxiaoyao/bookstore/BookCrawlerTest.java new file mode 100644 index 0000000..4c6399b --- /dev/null +++ b/src/test/java/com/lanyuanxiaoyao/bookstore/BookCrawlerTest.java @@ -0,0 +1,99 @@ +package com.lanyuanxiaoyao.bookstore; + +import cn.hutool.core.thread.ThreadUtil; +import cn.hutool.core.util.StrUtil; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import lombok.extern.slf4j.Slf4j; +import org.openqa.selenium.By; +import org.openqa.selenium.chrome.ChromeDriver; +import org.openqa.selenium.chrome.ChromeDriverService; +import org.openqa.selenium.chrome.ChromeOptions; + +/** + * @author lanyuanxiaoyao + * @version 20251031 + */ +@Slf4j +public class BookCrawlerTest { + public static void main(String[] args) throws IOException { + ChromeDriver driver = null; + try { + driver = new ChromeDriver( + new ChromeDriverService.Builder() + .usingDriverExecutable(new File("/Users/lanyuanxiaoyao/Downloads/chromium/134/macOS-1345775/chromedriver")) + .build(), + new ChromeOptions() + .setBinary(new File("/Users/lanyuanxiaoyao/Downloads/chromium/134/macOS-1345775/Chromium.app/Contents/MacOS/Chromium")) + .addArguments( + // 允许不安全的域名 + "--allow-insecure-localhost", + // 禁用GPU渲染,headLess 模式用不上 + "--disable-gpu", + // 禁用音频输出 + "--disable-audio-output", + // 禁用错误页自动重新刷新 + "--disable-auto-reload", + // 禁用默认应用加载 + "--disable-default-apps", + // 禁用浏览器扩展 + "--disable-extensions", + // 禁用日志 + "--disable-logging", + // 禁用通知 + "--disable-notifications", + // 禁用远程字体 + "--disable-remote-fonts", + // 禁用弹出窗口 + "--disable-popup-blocking", + // 禁用同步 + "--disable-sync", + // 禁用沙盒 + "--no-sandbox", + // 禁用声音 + "--mute-audio", + // 禁止图片显示 + "blink-settings=imagesEnabled=false" + ) + ); + var articleUrl = "https://www.alicesw.com/novel/33927.html"; + driver.get(articleUrl); + var contextUrl = driver.findElement(By.xpath("//div[@class='book_newchap']//a[contains(text(),'查看所有章节')]")).getDomProperty("href"); + if (StrUtil.isBlank(contextUrl)) { + throw new RuntimeException("获取目录页链接失败"); + } + driver.get(contextUrl); + var chapterItems = driver.findElements(By.cssSelector("ul.mulu_list > li > a")) + .stream() + .map(element -> element.getDomProperty("href")) + .toList(); + for (var index = 0; index < chapterItems.size(); index++) { + var chapterUrl = chapterItems.get(index); + if (StrUtil.isBlank(chapterUrl)) { + throw new RuntimeException("获取章节链接失败: " + chapterUrl); + } + driver.get(chapterUrl); + var text = driver.findElement(By.cssSelector(".read-content")).getText(); + log.info(text); + + var title = driver.getTitle(); + if (StrUtil.isBlank(title)) { + title = String.valueOf(index); + } + var filename = StrUtil.format("{}.txt", title.replaceAll("\\s", "_")); + var targetFile = Path.of("out", filename); + Files.deleteIfExists(targetFile); + Files.createFile(targetFile); + Files.writeString(targetFile, text); + + ThreadUtil.safeSleep(5000); + } + } finally { + if (driver != null) { + driver.close(); + } + } + } +} diff --git a/src/test/java/com/lanyuanxiaoyao/bookstore/UpdateIntoDatabase.java b/src/test/java/com/lanyuanxiaoyao/bookstore/UpdateIntoDatabase.java new file mode 100644 index 0000000..94dc38e --- /dev/null +++ b/src/test/java/com/lanyuanxiaoyao/bookstore/UpdateIntoDatabase.java @@ -0,0 +1,159 @@ +package com.lanyuanxiaoyao.bookstore; + +import cn.hutool.ai.AIUtil; +import cn.hutool.ai.ModelName; +import cn.hutool.ai.core.AIConfigBuilder; +import java.io.IOException; +import lombok.extern.slf4j.Slf4j; + +/** + * 更新章节 + * + * @author lanyuanxiaoyao + * @version 20251031 + */ +@Slf4j +public class UpdateIntoDatabase { + public static void main(String[] args) throws IOException { + /*var chapters = new ArrayList(); + Files.list(Path.of("out")) + .sorted(Comparator.comparing(path -> FileUtil.lastModifiedTime(path.toFile()))) + .forEach(path -> { + try { + chapters.add(new Chapter( + path.getFileName().toString(), + Files.readString(path) + )); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + for (var chapter : chapters) { + log.info("name: {}", chapter.title()); + }*/ + var response = AIUtil.chat( + new AIConfigBuilder(ModelName.OPENAI.getValue()) + .setApiUrl("http://127.0.0.1:30000") + .setApiKey("*XMySqV%>hR&v>>g*NwCs3tpQ5FVMFEF2VHVTj