fix: 修复zhengceku部份网站无法解析
This commit is contained in:
@@ -237,6 +237,18 @@ private val 中华人民共和国中央人民政府 = Site(
|
||||
)
|
||||
),
|
||||
// language=regexp
|
||||
"https*://www\\.gov\\.cn/.+/(zhengceku)/.+/content_\\d+\\.html*" to Rule(
|
||||
downloader = Downloader.Type.BROWSER,
|
||||
text = Content(
|
||||
expression = ".policyLibraryOverview_content",
|
||||
author = Selector("td:contains(源:) + td"),
|
||||
content = Selector(".pages_content"),
|
||||
extra = mapOf(
|
||||
"source" to Selector(".pages_content", properties = mapOf("html" to "true"))
|
||||
)
|
||||
)
|
||||
),
|
||||
// language=regexp
|
||||
"https*://www\\.gov\\.cn/.+/content_\\d+\\.html*" to Rule(
|
||||
downloader = Downloader.Type.BROWSER,
|
||||
text = Content(
|
||||
|
||||
@@ -96,31 +96,30 @@ class NewsRunner : Runner {
|
||||
Failsafe
|
||||
.with(retryPolicy)
|
||||
.get(CheckedSupplier {
|
||||
runBlocking {
|
||||
management.parse(code, url, properties = mutableMapOf<String, String>().apply {
|
||||
title?.let { put("title", title) }
|
||||
datetime?.let { put("datetime", datetime) }
|
||||
})
|
||||
}
|
||||
runBlocking { management.parse(code, url) }
|
||||
})
|
||||
} catch (e: FailsafeException) {
|
||||
logger.error("Parse failure", e)
|
||||
return null
|
||||
}
|
||||
(page["text"] as? Map<*, *>)?.let { text ->
|
||||
var datetimeText = text["datetime"] as? String
|
||||
val createTime = try {
|
||||
var datetimeText = text["datetime"] as? String
|
||||
if (datetimeText.isNullOrBlank()) {
|
||||
datetimeText = datetime
|
||||
datetimeText = datetime
|
||||
}
|
||||
if (datetimeText?.matches(Regex("\\d{10}")) == true) {
|
||||
DateUtil.date(datetimeText.toLong()).toTimestamp()
|
||||
DateUtil
|
||||
.date(datetimeText.toLong())
|
||||
.toTimestamp()
|
||||
} else if (datetimeText?.matches(Regex("\\d{7}")) == true) {
|
||||
DateUtil.date(datetimeText.toLong() * 1000).toTimestamp()
|
||||
DateUtil
|
||||
.date(datetimeText.toLong() * 1000)
|
||||
.toTimestamp()
|
||||
} else {
|
||||
DateUtil
|
||||
.parse(
|
||||
(text["datetime"] as String?)?.trim(),
|
||||
datetimeText,
|
||||
"yyyy-MM-dd HH:mm:ss",
|
||||
"yyyy-MM-dd",
|
||||
"yyyy年MM月dd日 HH:mm",
|
||||
@@ -129,7 +128,7 @@ class NewsRunner : Runner {
|
||||
.toTimestamp()
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
logger.error("Parse ${text["datetime"]} error", e)
|
||||
logger.error("Parse $datetimeText error", e)
|
||||
null
|
||||
}
|
||||
val source = text["source"]?.let {
|
||||
|
||||
@@ -68,10 +68,10 @@ class TestManagement {
|
||||
fun testParseArticle() {
|
||||
newsRunner
|
||||
.parseArticle(
|
||||
"74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
|
||||
"https://zwfwj.beijing.gov.cn/zwgk/2024zcwj/202409/t20240927_3908531.html",
|
||||
"no title",
|
||||
"no datetime",
|
||||
"00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be",
|
||||
"https://www.gov.cn/zhengce/zhengceku/202411/content_6984322.htm",
|
||||
"工业和信息化部关于印发《工业和信息化领域数据安全事件应急预案(试行)》的通知",
|
||||
"2024-11-01 12:48:26",
|
||||
"",
|
||||
)
|
||||
?.let { logger.info("{}", JSONUtil.toJsonPrettyStr(it)) }
|
||||
|
||||
Reference in New Issue
Block a user