diff --git a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt index 15d431e..05e8d71 100644 --- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt +++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt @@ -237,6 +237,18 @@ private val 中华人民共和国中央人民政府 = Site( ) ), // language=regexp + "https*://www\\.gov\\.cn/.+/(zhengceku)/.+/content_\\d+\\.html*" to Rule( + downloader = Downloader.Type.BROWSER, + text = Content( + expression = ".policyLibraryOverview_content", + author = Selector("td:contains(源:) + td"), + content = Selector(".pages_content"), + extra = mapOf( + "source" to Selector(".pages_content", properties = mapOf("html" to "true")) + ) + ) + ), + // language=regexp "https*://www\\.gov\\.cn/.+/content_\\d+\\.html*" to Rule( downloader = Downloader.Type.BROWSER, text = Content( diff --git a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt index 7e606d0..3781e43 100644 --- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt +++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt @@ -96,31 +96,30 @@ class NewsRunner : Runner { Failsafe .with(retryPolicy) .get(CheckedSupplier { - runBlocking { - management.parse(code, url, properties = mutableMapOf().apply { - title?.let { put("title", title) } - datetime?.let { put("datetime", datetime) } - }) - } + runBlocking { management.parse(code, url) } }) } catch (e: FailsafeException) { logger.error("Parse failure", e) return null } (page["text"] as? Map<*, *>)?.let { text -> + var datetimeText = text["datetime"] as? String val createTime = try { - var datetimeText = text["datetime"] as? String if (datetimeText.isNullOrBlank()) { - datetimeText = datetime + datetimeText = datetime } if (datetimeText?.matches(Regex("\\d{10}")) == true) { - DateUtil.date(datetimeText.toLong()).toTimestamp() + DateUtil + .date(datetimeText.toLong()) + .toTimestamp() } else if (datetimeText?.matches(Regex("\\d{7}")) == true) { - DateUtil.date(datetimeText.toLong() * 1000).toTimestamp() + DateUtil + .date(datetimeText.toLong() * 1000) + .toTimestamp() } else { DateUtil .parse( - (text["datetime"] as String?)?.trim(), + datetimeText, "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd", "yyyy年MM月dd日 HH:mm", @@ -129,7 +128,7 @@ class NewsRunner : Runner { .toTimestamp() } } catch (e: Exception) { - logger.error("Parse ${text["datetime"]} error", e) + logger.error("Parse $datetimeText error", e) null } val source = text["source"]?.let { diff --git a/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt b/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt index 1322e7b..978d6b3 100644 --- a/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt +++ b/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt @@ -68,10 +68,10 @@ class TestManagement { fun testParseArticle() { newsRunner .parseArticle( - "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e", - "https://zwfwj.beijing.gov.cn/zwgk/2024zcwj/202409/t20240927_3908531.html", - "no title", - "no datetime", + "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be", + "https://www.gov.cn/zhengce/zhengceku/202411/content_6984322.htm", + "工业和信息化部关于印发《工业和信息化领域数据安全事件应急预案(试行)》的通知", + "2024-11-01 12:48:26", "", ) ?.let { logger.info("{}", JSONUtil.toJsonPrettyStr(it)) }