diff --git a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Application.kt b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Application.kt index 58af23b..d7f9075 100644 --- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Application.kt +++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Application.kt @@ -56,9 +56,18 @@ class Application : ApplicationRunner, ApplicationListener { @Resource private lateinit var pushRunner: PushRunner + @Resource + private lateinit var articleRepository: ArticleRepository + override fun run(args: ApplicationArguments?) { // pushRunner.run() // newsRunner.run() + sites + .filterNot { articleRepository.existsByCode(it.code) } + .forEach { + logger.info("发现新站点:{}", it.name) + newsRunner.run(it.code) + } } override fun onApplicationEvent(event: ContextClosedEvent) { diff --git a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Entity.kt b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Entity.kt index 43874c8..d5d635d 100644 --- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Entity.kt +++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Entity.kt @@ -34,6 +34,8 @@ interface ArticleRepository : JpaRepository, JpaSpecificationEx fun findAllByDescriptionIsNullAndTextIsNotNull(): List
fun findAllByHtmlIsNotNull(): List
+ fun existsByCode(code: String): Boolean + @Modifying @Transactional @Query("update Article article set article.pushed = :pushed where article.id = :id") diff --git a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt index 2c2eb50..4fb9872 100644 --- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt +++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt @@ -45,42 +45,48 @@ class NewsRunner : Runner { @Scheduled(cron = "0 0 6-18 * * ?") override fun run() { - sites.forEach { site -> - logger.info("站点: {}", site.name) - keywords.forEach { keyword -> - val url = site.search.replace("{query}", keyword) - logger.info("类目: {}, 地址: {}", keyword, url) - val hashList = articleRepository.findAllId() - val links = parseArticleLink(site.code, url, false) - val total = links.size - val current = AtomicLong(0) - links - .filter { - if (hashList.contains(it.hash)) { - current.addAndGet(1) - false - } else true - } - .forEach { link -> - Thread.sleep(500) - logger.info("进度:{} 采集文章:{} {}", NumberUtil.formatPercent(current.addAndGet(1) * 1.0 / total, 2), link.title, link.url) - parseArticle(site.code, link.url, link.title, link.datetime, link.hash)?.let { article -> - if (!article.text.isNullOrBlank()) { - val triple = descriptionService.parseDescription(article.text) - logger.info("小标题:{}", triple?.first) - logger.info("描述:{}", triple?.second) - logger.info("相关度:{}", triple?.third) - article.subtitle = triple?.first - article.description = triple?.second - article.score = triple?.third - } + sites.forEach { run(it.code) } + } - article.category = keyword - articleRepository.save(article) + fun run(code: String) { + sites + .filter { it.code == code } + .forEach { site -> + logger.info("站点: {}", site.name) + keywords.forEach { keyword -> + val url = site.search.replace("{query}", keyword) + logger.info("类目: {}, 地址: {}", keyword, url) + val hashList = articleRepository.findAllId() + val links = parseArticleLink(site.code, url, false) + val total = links.size + val current = AtomicLong(0) + links + .filter { + if (hashList.contains(it.hash)) { + current.addAndGet(1) + false + } else true } - } + .forEach { link -> + Thread.sleep(500) + logger.info("进度:{} 采集文章:{} {}", NumberUtil.formatPercent(current.addAndGet(1) * 1.0 / total, 2), link.title, link.url) + parseArticle(site.code, link.url, link.title, link.datetime, link.hash)?.let { article -> + if (!article.text.isNullOrBlank()) { + val triple = descriptionService.parseDescription(article.text) + logger.info("小标题:{}", triple?.first) + logger.info("描述:{}", triple?.second) + logger.info("相关度:{}", triple?.third) + article.subtitle = triple?.first + article.description = triple?.second + article.score = triple?.third + } + + article.category = keyword + articleRepository.save(article) + } + } + } } - } logger.info("本轮采集完成") }