fix: 修复正文解析失败后重复采集的问题,加入错误次数统计
This commit is contained in:
@@ -1,11 +1,11 @@
|
|||||||
package com.lanyuanxiaoyao.digtal.market
|
package com.lanyuanxiaoyao.digtal.market
|
||||||
|
|
||||||
|
import com.blinkfox.fenix.jpa.FenixJpaRepository
|
||||||
import com.blinkfox.fenix.specification.FenixJpaSpecificationExecutor
|
import com.blinkfox.fenix.specification.FenixJpaSpecificationExecutor
|
||||||
import jakarta.persistence.Entity
|
import jakarta.persistence.Entity
|
||||||
import jakarta.persistence.Id
|
import jakarta.persistence.Id
|
||||||
import jakarta.persistence.Lob
|
import jakarta.persistence.Lob
|
||||||
import java.util.Date
|
import java.util.Date
|
||||||
import org.springframework.data.jpa.repository.JpaRepository
|
|
||||||
import org.springframework.data.jpa.repository.Modifying
|
import org.springframework.data.jpa.repository.Modifying
|
||||||
import org.springframework.data.jpa.repository.Query
|
import org.springframework.data.jpa.repository.Query
|
||||||
import org.springframework.data.repository.query.Param
|
import org.springframework.data.repository.query.Param
|
||||||
@@ -27,10 +27,15 @@ class Article(
|
|||||||
var tags: String?,
|
var tags: String?,
|
||||||
var createTime: Date?,
|
var createTime: Date?,
|
||||||
var pushed: Boolean?,
|
var pushed: Boolean?,
|
||||||
)
|
var error: Int?,
|
||||||
|
) {
|
||||||
|
override fun toString(): String {
|
||||||
|
return "Article(id='$id', code='$code', url='$url', title=$title, author=$author, category=$category, text=$text, html=$html, subtitle=$subtitle, description=$description, tags=$tags, createTime=$createTime, pushed=$pushed, error=$error)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Repository
|
@Repository
|
||||||
interface ArticleRepository : JpaRepository<Article, String>, FenixJpaSpecificationExecutor<Article> {
|
interface ArticleRepository : FenixJpaRepository<Article, String>, FenixJpaSpecificationExecutor<Article> {
|
||||||
fun findAllByDescriptionIsNullAndTextIsNotNull(): List<Article>
|
fun findAllByDescriptionIsNullAndTextIsNotNull(): List<Article>
|
||||||
fun findAllByHtmlIsNotNull(): List<Article>
|
fun findAllByHtmlIsNotNull(): List<Article>
|
||||||
|
|
||||||
@@ -47,11 +52,14 @@ interface ArticleRepository : JpaRepository<Article, String>, FenixJpaSpecificat
|
|||||||
@Query("update Article article set article.pushed = :pushed where article.id = :id")
|
@Query("update Article article set article.pushed = :pushed where article.id = :id")
|
||||||
fun updatePushedById(@Param("id") id: String, @Param("pushed") pushed: Boolean)
|
fun updatePushedById(@Param("id") id: String, @Param("pushed") pushed: Boolean)
|
||||||
|
|
||||||
@Query("select article.id from Article article where article.description is not null and article.text is not null and article.text <> ''")
|
@Query("select article.id from Article article where (article.error is not null and article.error > 3) or (article.description is not null and article.text is not null and article.text <> '')")
|
||||||
fun findAllId(): List<String>
|
fun findAllId(): List<String>
|
||||||
|
|
||||||
@Query("select new com.lanyuanxiaoyao.digtal.market.CountGroupByString(article.code, count(article.code)) from Article article group by article.code")
|
@Query("select new com.lanyuanxiaoyao.digtal.market.CountGroupByString(article.code, count(article.code)) from Article article group by article.code")
|
||||||
fun countGroupByCode(): List<CountGroupByString>
|
fun countGroupByCode(): List<CountGroupByString>
|
||||||
|
|
||||||
|
@Query("select article.error from Article article where article.id = :id")
|
||||||
|
fun findErrorById(@Param("id") id: String): Int?
|
||||||
}
|
}
|
||||||
|
|
||||||
data class CountGroupByString(
|
data class CountGroupByString(
|
||||||
|
|||||||
@@ -148,6 +148,7 @@ fun parseArticle(management: Management, code: String, url: String, hash: String
|
|||||||
tags = null,
|
tags = null,
|
||||||
createTime = createTime,
|
createTime = createTime,
|
||||||
pushed = false,
|
pushed = false,
|
||||||
|
error = null,
|
||||||
)
|
)
|
||||||
} ?: return null
|
} ?: return null
|
||||||
}
|
}
|
||||||
@@ -87,6 +87,11 @@ class NewsRunner : Runner {
|
|||||||
article.subtitle = triple?.subtitle
|
article.subtitle = triple?.subtitle
|
||||||
article.description = triple?.description
|
article.description = triple?.description
|
||||||
article.tags = triple?.tags
|
article.tags = triple?.tags
|
||||||
|
|
||||||
|
if (triple == null) {
|
||||||
|
val error = articleRepository.findErrorById(link.hash)
|
||||||
|
article.error = error?.plus(1) ?: 1
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
article.category = keyword
|
article.category = keyword
|
||||||
|
|||||||
Reference in New Issue
Block a user