feat: 增加新站点采集
This commit is contained in:
@@ -56,9 +56,18 @@ class Application : ApplicationRunner, ApplicationListener<ContextClosedEvent> {
|
|||||||
@Resource
|
@Resource
|
||||||
private lateinit var pushRunner: PushRunner
|
private lateinit var pushRunner: PushRunner
|
||||||
|
|
||||||
|
@Resource
|
||||||
|
private lateinit var articleRepository: ArticleRepository
|
||||||
|
|
||||||
override fun run(args: ApplicationArguments?) {
|
override fun run(args: ApplicationArguments?) {
|
||||||
// pushRunner.run()
|
// pushRunner.run()
|
||||||
// newsRunner.run()
|
// newsRunner.run()
|
||||||
|
sites
|
||||||
|
.filterNot { articleRepository.existsByCode(it.code) }
|
||||||
|
.forEach {
|
||||||
|
logger.info("发现新站点:{}", it.name)
|
||||||
|
newsRunner.run(it.code)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun onApplicationEvent(event: ContextClosedEvent) {
|
override fun onApplicationEvent(event: ContextClosedEvent) {
|
||||||
|
|||||||
@@ -34,6 +34,8 @@ interface ArticleRepository : JpaRepository<Article, String>, JpaSpecificationEx
|
|||||||
fun findAllByDescriptionIsNullAndTextIsNotNull(): List<Article>
|
fun findAllByDescriptionIsNullAndTextIsNotNull(): List<Article>
|
||||||
fun findAllByHtmlIsNotNull(): List<Article>
|
fun findAllByHtmlIsNotNull(): List<Article>
|
||||||
|
|
||||||
|
fun existsByCode(code: String): Boolean
|
||||||
|
|
||||||
@Modifying
|
@Modifying
|
||||||
@Transactional
|
@Transactional
|
||||||
@Query("update Article article set article.pushed = :pushed where article.id = :id")
|
@Query("update Article article set article.pushed = :pushed where article.id = :id")
|
||||||
|
|||||||
@@ -45,42 +45,48 @@ class NewsRunner : Runner {
|
|||||||
|
|
||||||
@Scheduled(cron = "0 0 6-18 * * ?")
|
@Scheduled(cron = "0 0 6-18 * * ?")
|
||||||
override fun run() {
|
override fun run() {
|
||||||
sites.forEach { site ->
|
sites.forEach { run(it.code) }
|
||||||
logger.info("站点: {}", site.name)
|
}
|
||||||
keywords.forEach { keyword ->
|
|
||||||
val url = site.search.replace("{query}", keyword)
|
|
||||||
logger.info("类目: {}, 地址: {}", keyword, url)
|
|
||||||
val hashList = articleRepository.findAllId()
|
|
||||||
val links = parseArticleLink(site.code, url, false)
|
|
||||||
val total = links.size
|
|
||||||
val current = AtomicLong(0)
|
|
||||||
links
|
|
||||||
.filter {
|
|
||||||
if (hashList.contains(it.hash)) {
|
|
||||||
current.addAndGet(1)
|
|
||||||
false
|
|
||||||
} else true
|
|
||||||
}
|
|
||||||
.forEach { link ->
|
|
||||||
Thread.sleep(500)
|
|
||||||
logger.info("进度:{} 采集文章:{} {}", NumberUtil.formatPercent(current.addAndGet(1) * 1.0 / total, 2), link.title, link.url)
|
|
||||||
parseArticle(site.code, link.url, link.title, link.datetime, link.hash)?.let { article ->
|
|
||||||
if (!article.text.isNullOrBlank()) {
|
|
||||||
val triple = descriptionService.parseDescription(article.text)
|
|
||||||
logger.info("小标题:{}", triple?.first)
|
|
||||||
logger.info("描述:{}", triple?.second)
|
|
||||||
logger.info("相关度:{}", triple?.third)
|
|
||||||
article.subtitle = triple?.first
|
|
||||||
article.description = triple?.second
|
|
||||||
article.score = triple?.third
|
|
||||||
}
|
|
||||||
|
|
||||||
article.category = keyword
|
fun run(code: String) {
|
||||||
articleRepository.save(article)
|
sites
|
||||||
|
.filter { it.code == code }
|
||||||
|
.forEach { site ->
|
||||||
|
logger.info("站点: {}", site.name)
|
||||||
|
keywords.forEach { keyword ->
|
||||||
|
val url = site.search.replace("{query}", keyword)
|
||||||
|
logger.info("类目: {}, 地址: {}", keyword, url)
|
||||||
|
val hashList = articleRepository.findAllId()
|
||||||
|
val links = parseArticleLink(site.code, url, false)
|
||||||
|
val total = links.size
|
||||||
|
val current = AtomicLong(0)
|
||||||
|
links
|
||||||
|
.filter {
|
||||||
|
if (hashList.contains(it.hash)) {
|
||||||
|
current.addAndGet(1)
|
||||||
|
false
|
||||||
|
} else true
|
||||||
}
|
}
|
||||||
}
|
.forEach { link ->
|
||||||
|
Thread.sleep(500)
|
||||||
|
logger.info("进度:{} 采集文章:{} {}", NumberUtil.formatPercent(current.addAndGet(1) * 1.0 / total, 2), link.title, link.url)
|
||||||
|
parseArticle(site.code, link.url, link.title, link.datetime, link.hash)?.let { article ->
|
||||||
|
if (!article.text.isNullOrBlank()) {
|
||||||
|
val triple = descriptionService.parseDescription(article.text)
|
||||||
|
logger.info("小标题:{}", triple?.first)
|
||||||
|
logger.info("描述:{}", triple?.second)
|
||||||
|
logger.info("相关度:{}", triple?.third)
|
||||||
|
article.subtitle = triple?.first
|
||||||
|
article.description = triple?.second
|
||||||
|
article.score = triple?.third
|
||||||
|
}
|
||||||
|
|
||||||
|
article.category = keyword
|
||||||
|
articleRepository.save(article)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
logger.info("本轮采集完成")
|
logger.info("本轮采集完成")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user