fix: 修复zhengceku部份网站无法解析
This commit is contained in:
@@ -237,6 +237,18 @@ private val 中华人民共和国中央人民政府 = Site(
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
// language=regexp
|
// language=regexp
|
||||||
|
"https*://www\\.gov\\.cn/.+/(zhengceku)/.+/content_\\d+\\.html*" to Rule(
|
||||||
|
downloader = Downloader.Type.BROWSER,
|
||||||
|
text = Content(
|
||||||
|
expression = ".policyLibraryOverview_content",
|
||||||
|
author = Selector("td:contains(源:) + td"),
|
||||||
|
content = Selector(".pages_content"),
|
||||||
|
extra = mapOf(
|
||||||
|
"source" to Selector(".pages_content", properties = mapOf("html" to "true"))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
// language=regexp
|
||||||
"https*://www\\.gov\\.cn/.+/content_\\d+\\.html*" to Rule(
|
"https*://www\\.gov\\.cn/.+/content_\\d+\\.html*" to Rule(
|
||||||
downloader = Downloader.Type.BROWSER,
|
downloader = Downloader.Type.BROWSER,
|
||||||
text = Content(
|
text = Content(
|
||||||
|
|||||||
@@ -96,31 +96,30 @@ class NewsRunner : Runner {
|
|||||||
Failsafe
|
Failsafe
|
||||||
.with(retryPolicy)
|
.with(retryPolicy)
|
||||||
.get(CheckedSupplier {
|
.get(CheckedSupplier {
|
||||||
runBlocking {
|
runBlocking { management.parse(code, url) }
|
||||||
management.parse(code, url, properties = mutableMapOf<String, String>().apply {
|
|
||||||
title?.let { put("title", title) }
|
|
||||||
datetime?.let { put("datetime", datetime) }
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
} catch (e: FailsafeException) {
|
} catch (e: FailsafeException) {
|
||||||
logger.error("Parse failure", e)
|
logger.error("Parse failure", e)
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
(page["text"] as? Map<*, *>)?.let { text ->
|
(page["text"] as? Map<*, *>)?.let { text ->
|
||||||
|
var datetimeText = text["datetime"] as? String
|
||||||
val createTime = try {
|
val createTime = try {
|
||||||
var datetimeText = text["datetime"] as? String
|
|
||||||
if (datetimeText.isNullOrBlank()) {
|
if (datetimeText.isNullOrBlank()) {
|
||||||
datetimeText = datetime
|
datetimeText = datetime
|
||||||
}
|
}
|
||||||
if (datetimeText?.matches(Regex("\\d{10}")) == true) {
|
if (datetimeText?.matches(Regex("\\d{10}")) == true) {
|
||||||
DateUtil.date(datetimeText.toLong()).toTimestamp()
|
DateUtil
|
||||||
|
.date(datetimeText.toLong())
|
||||||
|
.toTimestamp()
|
||||||
} else if (datetimeText?.matches(Regex("\\d{7}")) == true) {
|
} else if (datetimeText?.matches(Regex("\\d{7}")) == true) {
|
||||||
DateUtil.date(datetimeText.toLong() * 1000).toTimestamp()
|
DateUtil
|
||||||
|
.date(datetimeText.toLong() * 1000)
|
||||||
|
.toTimestamp()
|
||||||
} else {
|
} else {
|
||||||
DateUtil
|
DateUtil
|
||||||
.parse(
|
.parse(
|
||||||
(text["datetime"] as String?)?.trim(),
|
datetimeText,
|
||||||
"yyyy-MM-dd HH:mm:ss",
|
"yyyy-MM-dd HH:mm:ss",
|
||||||
"yyyy-MM-dd",
|
"yyyy-MM-dd",
|
||||||
"yyyy年MM月dd日 HH:mm",
|
"yyyy年MM月dd日 HH:mm",
|
||||||
@@ -129,7 +128,7 @@ class NewsRunner : Runner {
|
|||||||
.toTimestamp()
|
.toTimestamp()
|
||||||
}
|
}
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
logger.error("Parse ${text["datetime"]} error", e)
|
logger.error("Parse $datetimeText error", e)
|
||||||
null
|
null
|
||||||
}
|
}
|
||||||
val source = text["source"]?.let {
|
val source = text["source"]?.let {
|
||||||
|
|||||||
@@ -68,10 +68,10 @@ class TestManagement {
|
|||||||
fun testParseArticle() {
|
fun testParseArticle() {
|
||||||
newsRunner
|
newsRunner
|
||||||
.parseArticle(
|
.parseArticle(
|
||||||
"74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
|
"00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be",
|
||||||
"https://zwfwj.beijing.gov.cn/zwgk/2024zcwj/202409/t20240927_3908531.html",
|
"https://www.gov.cn/zhengce/zhengceku/202411/content_6984322.htm",
|
||||||
"no title",
|
"工业和信息化部关于印发《工业和信息化领域数据安全事件应急预案(试行)》的通知",
|
||||||
"no datetime",
|
"2024-11-01 12:48:26",
|
||||||
"",
|
"",
|
||||||
)
|
)
|
||||||
?.let { logger.info("{}", JSONUtil.toJsonPrettyStr(it)) }
|
?.let { logger.info("{}", JSONUtil.toJsonPrettyStr(it)) }
|
||||||
|
|||||||
Reference in New Issue
Block a user