1
0

fix: 修复正文解析失败后重复采集的问题,加入错误次数统计

This commit is contained in:
2025-02-08 10:38:47 +08:00
parent e7a77004c5
commit 01e6f2437f
3 changed files with 18 additions and 4 deletions

View File

@@ -1,11 +1,11 @@
package com.lanyuanxiaoyao.digtal.market package com.lanyuanxiaoyao.digtal.market
import com.blinkfox.fenix.jpa.FenixJpaRepository
import com.blinkfox.fenix.specification.FenixJpaSpecificationExecutor import com.blinkfox.fenix.specification.FenixJpaSpecificationExecutor
import jakarta.persistence.Entity import jakarta.persistence.Entity
import jakarta.persistence.Id import jakarta.persistence.Id
import jakarta.persistence.Lob import jakarta.persistence.Lob
import java.util.Date import java.util.Date
import org.springframework.data.jpa.repository.JpaRepository
import org.springframework.data.jpa.repository.Modifying import org.springframework.data.jpa.repository.Modifying
import org.springframework.data.jpa.repository.Query import org.springframework.data.jpa.repository.Query
import org.springframework.data.repository.query.Param import org.springframework.data.repository.query.Param
@@ -27,10 +27,15 @@ class Article(
var tags: String?, var tags: String?,
var createTime: Date?, var createTime: Date?,
var pushed: Boolean?, var pushed: Boolean?,
) var error: Int?,
) {
override fun toString(): String {
return "Article(id='$id', code='$code', url='$url', title=$title, author=$author, category=$category, text=$text, html=$html, subtitle=$subtitle, description=$description, tags=$tags, createTime=$createTime, pushed=$pushed, error=$error)"
}
}
@Repository @Repository
interface ArticleRepository : JpaRepository<Article, String>, FenixJpaSpecificationExecutor<Article> { interface ArticleRepository : FenixJpaRepository<Article, String>, FenixJpaSpecificationExecutor<Article> {
fun findAllByDescriptionIsNullAndTextIsNotNull(): List<Article> fun findAllByDescriptionIsNullAndTextIsNotNull(): List<Article>
fun findAllByHtmlIsNotNull(): List<Article> fun findAllByHtmlIsNotNull(): List<Article>
@@ -47,11 +52,14 @@ interface ArticleRepository : JpaRepository<Article, String>, FenixJpaSpecificat
@Query("update Article article set article.pushed = :pushed where article.id = :id") @Query("update Article article set article.pushed = :pushed where article.id = :id")
fun updatePushedById(@Param("id") id: String, @Param("pushed") pushed: Boolean) fun updatePushedById(@Param("id") id: String, @Param("pushed") pushed: Boolean)
@Query("select article.id from Article article where article.description is not null and article.text is not null and article.text <> ''") @Query("select article.id from Article article where (article.error is not null and article.error > 3) or (article.description is not null and article.text is not null and article.text <> '')")
fun findAllId(): List<String> fun findAllId(): List<String>
@Query("select new com.lanyuanxiaoyao.digtal.market.CountGroupByString(article.code, count(article.code)) from Article article group by article.code") @Query("select new com.lanyuanxiaoyao.digtal.market.CountGroupByString(article.code, count(article.code)) from Article article group by article.code")
fun countGroupByCode(): List<CountGroupByString> fun countGroupByCode(): List<CountGroupByString>
@Query("select article.error from Article article where article.id = :id")
fun findErrorById(@Param("id") id: String): Int?
} }
data class CountGroupByString( data class CountGroupByString(

View File

@@ -148,6 +148,7 @@ fun parseArticle(management: Management, code: String, url: String, hash: String
tags = null, tags = null,
createTime = createTime, createTime = createTime,
pushed = false, pushed = false,
error = null,
) )
} ?: return null } ?: return null
} }

View File

@@ -87,6 +87,11 @@ class NewsRunner : Runner {
article.subtitle = triple?.subtitle article.subtitle = triple?.subtitle
article.description = triple?.description article.description = triple?.description
article.tags = triple?.tags article.tags = triple?.tags
if (triple == null) {
val error = articleRepository.findErrorById(link.hash)
article.error = error?.plus(1) ?: 1
}
} }
article.category = keyword article.category = keyword