fix: 修复正文解析失败后重复采集的问题,加入错误次数统计
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
package com.lanyuanxiaoyao.digtal.market
|
||||
|
||||
import com.blinkfox.fenix.jpa.FenixJpaRepository
|
||||
import com.blinkfox.fenix.specification.FenixJpaSpecificationExecutor
|
||||
import jakarta.persistence.Entity
|
||||
import jakarta.persistence.Id
|
||||
import jakarta.persistence.Lob
|
||||
import java.util.Date
|
||||
import org.springframework.data.jpa.repository.JpaRepository
|
||||
import org.springframework.data.jpa.repository.Modifying
|
||||
import org.springframework.data.jpa.repository.Query
|
||||
import org.springframework.data.repository.query.Param
|
||||
@@ -27,10 +27,15 @@ class Article(
|
||||
var tags: String?,
|
||||
var createTime: Date?,
|
||||
var pushed: Boolean?,
|
||||
)
|
||||
var error: Int?,
|
||||
) {
|
||||
override fun toString(): String {
|
||||
return "Article(id='$id', code='$code', url='$url', title=$title, author=$author, category=$category, text=$text, html=$html, subtitle=$subtitle, description=$description, tags=$tags, createTime=$createTime, pushed=$pushed, error=$error)"
|
||||
}
|
||||
}
|
||||
|
||||
@Repository
|
||||
interface ArticleRepository : JpaRepository<Article, String>, FenixJpaSpecificationExecutor<Article> {
|
||||
interface ArticleRepository : FenixJpaRepository<Article, String>, FenixJpaSpecificationExecutor<Article> {
|
||||
fun findAllByDescriptionIsNullAndTextIsNotNull(): List<Article>
|
||||
fun findAllByHtmlIsNotNull(): List<Article>
|
||||
|
||||
@@ -47,11 +52,14 @@ interface ArticleRepository : JpaRepository<Article, String>, FenixJpaSpecificat
|
||||
@Query("update Article article set article.pushed = :pushed where article.id = :id")
|
||||
fun updatePushedById(@Param("id") id: String, @Param("pushed") pushed: Boolean)
|
||||
|
||||
@Query("select article.id from Article article where article.description is not null and article.text is not null and article.text <> ''")
|
||||
@Query("select article.id from Article article where (article.error is not null and article.error > 3) or (article.description is not null and article.text is not null and article.text <> '')")
|
||||
fun findAllId(): List<String>
|
||||
|
||||
@Query("select new com.lanyuanxiaoyao.digtal.market.CountGroupByString(article.code, count(article.code)) from Article article group by article.code")
|
||||
fun countGroupByCode(): List<CountGroupByString>
|
||||
|
||||
@Query("select article.error from Article article where article.id = :id")
|
||||
fun findErrorById(@Param("id") id: String): Int?
|
||||
}
|
||||
|
||||
data class CountGroupByString(
|
||||
|
||||
@@ -148,6 +148,7 @@ fun parseArticle(management: Management, code: String, url: String, hash: String
|
||||
tags = null,
|
||||
createTime = createTime,
|
||||
pushed = false,
|
||||
error = null,
|
||||
)
|
||||
} ?: return null
|
||||
}
|
||||
@@ -87,6 +87,11 @@ class NewsRunner : Runner {
|
||||
article.subtitle = triple?.subtitle
|
||||
article.description = triple?.description
|
||||
article.tags = triple?.tags
|
||||
|
||||
if (triple == null) {
|
||||
val error = articleRepository.findErrorById(link.hash)
|
||||
article.error = error?.plus(1) ?: 1
|
||||
}
|
||||
}
|
||||
|
||||
article.category = keyword
|
||||
|
||||
Reference in New Issue
Block a user