From 697460ed9860f44962bea1eab92616028e924ac9 Mon Sep 17 00:00:00 2001 From: lanyuanxiaoyao Date: Tue, 5 Nov 2024 17:49:51 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8Dzhengceku=E9=83=A8?= =?UTF-8?q?=E4=BB=BD=E7=BD=91=E7=AB=99=E6=97=A0=E6=B3=95=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/lanyuanxiaoyao/digtal/market/Sites.kt | 12 ++++++++++ .../digtal/market/runner/NewsRunner.kt | 23 +++++++++---------- .../digtal/market/TestManagement.kt | 8 +++---- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt index 15d431e..05e8d71 100644 --- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt +++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt @@ -237,6 +237,18 @@ private val 中华人民共和国中央人民政府 = Site( ) ), // language=regexp + "https*://www\\.gov\\.cn/.+/(zhengceku)/.+/content_\\d+\\.html*" to Rule( + downloader = Downloader.Type.BROWSER, + text = Content( + expression = ".policyLibraryOverview_content", + author = Selector("td:contains(源:) + td"), + content = Selector(".pages_content"), + extra = mapOf( + "source" to Selector(".pages_content", properties = mapOf("html" to "true")) + ) + ) + ), + // language=regexp "https*://www\\.gov\\.cn/.+/content_\\d+\\.html*" to Rule( downloader = Downloader.Type.BROWSER, text = Content( diff --git a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt index 7e606d0..3781e43 100644 --- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt +++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt @@ -96,31 +96,30 @@ class NewsRunner : Runner { Failsafe .with(retryPolicy) .get(CheckedSupplier { - runBlocking { - management.parse(code, url, properties = mutableMapOf().apply { - title?.let { put("title", title) } - datetime?.let { put("datetime", datetime) } - }) - } + runBlocking { management.parse(code, url) } }) } catch (e: FailsafeException) { logger.error("Parse failure", e) return null } (page["text"] as? Map<*, *>)?.let { text -> + var datetimeText = text["datetime"] as? String val createTime = try { - var datetimeText = text["datetime"] as? String if (datetimeText.isNullOrBlank()) { - datetimeText = datetime + datetimeText = datetime } if (datetimeText?.matches(Regex("\\d{10}")) == true) { - DateUtil.date(datetimeText.toLong()).toTimestamp() + DateUtil + .date(datetimeText.toLong()) + .toTimestamp() } else if (datetimeText?.matches(Regex("\\d{7}")) == true) { - DateUtil.date(datetimeText.toLong() * 1000).toTimestamp() + DateUtil + .date(datetimeText.toLong() * 1000) + .toTimestamp() } else { DateUtil .parse( - (text["datetime"] as String?)?.trim(), + datetimeText, "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd", "yyyy年MM月dd日 HH:mm", @@ -129,7 +128,7 @@ class NewsRunner : Runner { .toTimestamp() } } catch (e: Exception) { - logger.error("Parse ${text["datetime"]} error", e) + logger.error("Parse $datetimeText error", e) null } val source = text["source"]?.let { diff --git a/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt b/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt index 1322e7b..978d6b3 100644 --- a/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt +++ b/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt @@ -68,10 +68,10 @@ class TestManagement { fun testParseArticle() { newsRunner .parseArticle( - "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e", - "https://zwfwj.beijing.gov.cn/zwgk/2024zcwj/202409/t20240927_3908531.html", - "no title", - "no datetime", + "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be", + "https://www.gov.cn/zhengce/zhengceku/202411/content_6984322.htm", + "工业和信息化部关于印发《工业和信息化领域数据安全事件应急预案(试行)》的通知", + "2024-11-01 12:48:26", "", ) ?.let { logger.info("{}", JSONUtil.toJsonPrettyStr(it)) }