feat: 增加spring环境的测试

2024-11-04 09:23:38 +08:00
parent a89f4ab8b1
commit 5829b6e145
5 changed files with 329 additions and 233 deletions
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -34,7 +34,9 @@ repositories {
 }

 dependencies {
-    implementation("com.lanyuanxiaoyao:squirrel-core-jvm:1.0.0-SNAPSHOT")
+    implementation("com.lanyuanxiaoyao:squirrel-core-jvm:1.0.0-SNAPSHOT") {
+        exclude("commons-logging", "commons-logging")
+    }
    implementation("dev.failsafe:failsafe:3.3.2")
    implementation("cn.bigmodel.openapi:oapi-java-sdk:release-V4-2.3.0")
    implementation("com.baidubce:qianfan:0.1.1")
@@ -61,6 +63,7 @@ dependencies {
    implementation("org.jetbrains.kotlin:kotlin-reflect")
    // implementation("org.jetbrains.kotlin:kotlin-scripting-jsr223")

+    testImplementation("org.springframework.boot:spring-boot-starter-test")
    testImplementation("org.jetbrains.kotlin:kotlin-test")
 }

--- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Application.kt
+++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Application.kt
@@ -3,8 +3,6 @@ package com.lanyuanxiaoyao.digtal.market
 import com.lanyuanxiaoyao.digtal.market.runner.NewsRunner
 import com.lanyuanxiaoyao.digtal.market.runner.PushRunner
 import com.lanyuanxiaoyao.squirrel.core.common.Management
-import com.lanyuanxiaoyao.squirrel.core.jvm.BINARY_PATH
-import com.lanyuanxiaoyao.squirrel.core.jvm.DRIVER_PATH
 import com.lanyuanxiaoyao.squirrel.core.jvm.JvmManagement
 import com.lanyuanxiaoyao.squirrel.core.jvm.LocalFileDatabase
 import jakarta.annotation.Resource
@@ -20,7 +18,6 @@ import org.springframework.context.ApplicationListener
 import org.springframework.context.annotation.Bean
 import org.springframework.context.annotation.Configuration
 import org.springframework.context.event.ContextClosedEvent
-import org.springframework.scheduling.annotation.EnableScheduling
 import org.springframework.web.servlet.config.annotation.CorsRegistry
 import org.springframework.web.servlet.config.annotation.WebMvcConfigurer

@@ -41,7 +38,7 @@ data class MailProperties @ConstructorBinding constructor(
    val targets: List<String>,
 )

-@EnableScheduling
+// @EnableScheduling
@OptIn(ExperimentalStdlibApi::class)
@ConfigurationPropertiesScan
@SpringBootApplication
@@ -73,7 +70,7 @@ class Configuration {
    @Bean
    fun jvmManagement(
        driverProperties: DriverProperties,
-        databaseProperties: DatabaseProperties
+        databaseProperties: DatabaseProperties,
    ): Management = JvmManagement(
        database = LocalFileDatabase(databaseProperties.jsonPath)
    ).also { management ->
@@ -84,13 +81,7 @@ class Configuration {
                    management.changeDownloader("basicCacheDownloader")
                }
                if (information.browserDownloaderName.isBlank()) {
-                    management.changeDownloader(
-                        "chromeCacheDownloader",
-                        mapOf(
-                            BINARY_PATH to driverProperties.binaryPath,
-                            DRIVER_PATH to driverProperties.driverPath,
-                        )
-                    )
+                    management.changeDownloader("htmlUnitCacheDownloader")
                }
            }
        management.importSites(sites)
--- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt
+++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt
@@ -1,3 +1,5 @@
+@file:Suppress("NonAsciiCharacters", "ObjectPropertyName")
+
 package com.lanyuanxiaoyao.digtal.market

 import com.lanyuanxiaoyao.squirrel.core.common.Content
@@ -25,236 +27,288 @@ private val titleScript = Script(
    script = "if (text && text !== '') {\n    return text\n} else if(params['title']) {\n    return params['title']\n} else {\n    return text\n}",
 )

-val sites = setOf(
-    Site(
-        code = "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
-        name = "广东政务服务和数据管理局",
-        home = "https://zfsg.gd.gov.cn",
-        parser = Parser.Type.CSS,
-        author = "lanyuanxiaoyao",
-        target = Site.Target.TEXT,
-        downloader = Downloader.Type.BROWSER,
-        tags = mapOf(
-            "数据要闻" to "https://zfsg.gd.gov.cn/xxfb/ywsd/index.html",
-            "省局要闻" to "https://zfsg.gd.gov.cn/xxfb/sjyw/index.html",
-            "动态新闻" to "https://zfsg.gd.gov.cn/xxfb/dtxw/index.html",
-            "媒体报道" to "https://zfsg.gd.gov.cn/xxfb/mtbd/index.html",
-            "政务文件" to "https://zfsg.gd.gov.cn/zwgk/wjk/index.html",
-            "政策解读" to "https://zfsg.gd.gov.cn/zwgk/zcjd2/index.html",
-        ),
-        rules = mapOf(
-            // language=regexp
-            "https*://zfsg\\.gd\\.gov\\.cn/(xxfb|zwgk)/.+/index(_\\d*)*\\.html" to Rule(
-                list = Content(
-                    expression = "ul.newList > li",
-                    title = Selector(".til > a"),
-                    dateTime = Selector(".time"),
-                    link = Selector(".til > a", "href"),
-                ),
-                next = Selector(".page > a.next", "href")
-            ),
-            // language=regexp
-            "https*://zfsg\\.gd\\.gov\\.cn/(xxfb|zwgk)/.+/content/post_\\d+\\.html" to Rule(
-                text = Content(
-                    expression = ".Con",
-                    title = Selector("h3.zw-title"),
-                    author = Selector(".zw-info > .ly", process = Process(remove = listOf("来源\\s*:\\s*"))),
-                    dateTime = Selector(".zw-info > .time", process = Process(remove = listOf("时间\\s*:\\s*"))),
-                    content = Selector(".zw"),
-                    extra = mapOf(
-                        "source" to Selector(".zw", properties = mapOf(html))
-                    )
-                )
-            ),
-            // language=regexp
-            "https://mp\\.weixin\\.qq\\.com/s/.+" to Rule(
-                text = Content(
-                    expression = "#page-content",
-                    title = Selector("#activity-name"),
-                    author = Selector("#js_name"),
-                    dateTime = Selector("#publish_time"),
-                    content = Selector("#js_content"),
-                    extra = mapOf(
-                        "source" to Selector("#js_content", properties = mapOf(html))
-                    )
-                )
-            ),
-            // language=regexp
-            "https*://zfsg\\.gd\\.gov\\.cn/hdjlpt/yjzj/answer/.+" to Rule(
-                text = Content(
-                    expression = "#article-container",
-                    title = Selector("#article-title"),
-                    author = Selector("#article-source", process = Process(remove = listOf("发布机构："))),
-                    dateTime = Selector("#article-published_at", process = Process(remove = listOf("发布时间："))),
-                    content = Selector("#article-content"),
-                    extra = mapOf(
-                        "source" to Selector("#article-content", properties = mapOf(html))
-                    )
-                )
-            ),
-            // language=regexp
-            "https*://zfsg\\.gd\\.gov\\.cn/hdjlpt/live/index.php\\?pid=\\d+" to Rule(
-                text = Content(
-                    expression = "body",
-                    title = Selector(".title-page .txt > span"),
-                    author = Selector(process = Process(default = "广东政务服务和数据管理局")),
-                    dateTime = Selector(process = Process(script = listOf(timeScript))),
-                    content = Selector(".content"),
-                    extra = mapOf(
-                        "source" to Selector(".content", properties = mapOf(html))
-                    )
-                )
-            ),
-        )
+private val 广东政务服务和数据管理局 = Site(
+    code = "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
+    name = "广东政务服务和数据管理局",
+    home = "https://zfsg.gd.gov.cn",
+    parser = Parser.Type.CSS,
+    author = "lanyuanxiaoyao",
+    target = Site.Target.TEXT,
+    downloader = Downloader.Type.BROWSER,
+    tags = mapOf(
+        "数据要闻" to "https://zfsg.gd.gov.cn/xxfb/ywsd/index.html",
+        "省局要闻" to "https://zfsg.gd.gov.cn/xxfb/sjyw/index.html",
+        "动态新闻" to "https://zfsg.gd.gov.cn/xxfb/dtxw/index.html",
+        "媒体报道" to "https://zfsg.gd.gov.cn/xxfb/mtbd/index.html",
+        "政务文件" to "https://zfsg.gd.gov.cn/zwgk/wjk/index.html",
+        "政策解读" to "https://zfsg.gd.gov.cn/zwgk/zcjd2/index.html",
    ),
-    Site(
-        code = "189a0e12-9319-4a4b-bf3b-34a6282e6f68",
-        name = "深圳市政务服务和数据管理局",
-        home = "https://www.sz.gov.cn/szzsj/gkmlpt/index",
-        icon = "https://www.sz.gov.cn/favicon.ico",
-        parser = Parser.Type.CSS,
-        author = "lanyuanxiaoyao",
-        target = Site.Target.TEXT,
-        downloader = Downloader.Type.HTTP,
-        properties = mapOf(iframe),
-        tags = mapOf(
-            "工作动态" to "http://www.sz.gov.cn/szzsj/gkmlpt/api/all/19236?page=1",
+    rules = mapOf(
+        // language=regexp
+        "https*://zfsg\\.gd\\.gov\\.cn/(xxfb|zwgk)/.+/index(_\\d*)*\\.html" to Rule(
+            list = Content(
+                expression = "ul.newList > li",
+                title = Selector(".til > a"),
+                dateTime = Selector(".time"),
+                link = Selector(".til > a", "href"),
+            ),
+            next = Selector(".page > a.next", "href")
        ),
-        rules = mapOf(
-            // language=regexp
-            "https*://www\\.sz\\.gov\\.cn/szzsj/gkmlpt/api/all/19236\\?page=\\d+" to Rule(
-                parser = Parser.Type.JSON,
-                list = Content(
-                    expression = "$.articles",
-                    title = Selector("$.title"),
-                    dateTime = Selector(
-                        "$.first_publish_time",
-                        process = Process(
-                            script = listOf(
-                                Script(
-                                    Script.Type.Javascript,
-                                    // language=javascript
-                                    script = "let datetime = new Date(text * 1000)\nreturn `\${datetime.getFullYear()}-\${datetime.getMonth() + 1}-\${datetime.getDate()} \${datetime.getHours()}:\${datetime.getMinutes()}:\${datetime.getSeconds()}`"
-                                )
-                            )
-                        )
-                    ),
-                    link = Selector("$.url", process = Process(replace = listOf(Replace("https", "http")))),
-                ),
-                next = Selector(
-                    "$.total",
+        // language=regexp
+        "https*://zfsg\\.gd\\.gov\\.cn/(xxfb|zwgk)/.+/content/post_\\d+\\.html" to Rule(
+            text = Content(
+                expression = ".Con",
+                title = Selector("h3.zw-title"),
+                author = Selector(".zw-info > .ly", process = Process(remove = listOf("来源\\s*:\\s*"))),
+                dateTime = Selector(".zw-info > .time", process = Process(remove = listOf("时间\\s*:\\s*"))),
+                content = Selector(".zw"),
+                extra = mapOf(
+                    "source" to Selector(".zw", properties = mapOf(html))
+                )
+            )
+        ),
+        // language=regexp
+        "https://mp\\.weixin\\.qq\\.com/s/.+" to Rule(
+            text = Content(
+                expression = "#page-content",
+                title = Selector("#activity-name"),
+                author = Selector("#js_name"),
+                dateTime = Selector("#publish_time"),
+                content = Selector("#js_content"),
+                extra = mapOf(
+                    "source" to Selector("#js_content", properties = mapOf(html))
+                )
+            )
+        ),
+        // language=regexp
+        "https*://zfsg\\.gd\\.gov\\.cn/hdjlpt/yjzj/answer/.+" to Rule(
+            text = Content(
+                expression = "#article-container",
+                title = Selector("#article-title"),
+                author = Selector("#article-source", process = Process(remove = listOf("发布机构："))),
+                dateTime = Selector("#article-published_at", process = Process(remove = listOf("发布时间："))),
+                content = Selector("#article-content"),
+                extra = mapOf(
+                    "source" to Selector("#article-content", properties = mapOf(html))
+                )
+            )
+        ),
+        // language=regexp
+        "https*://zfsg\\.gd\\.gov\\.cn/hdjlpt/live/index.php\\?pid=\\d+" to Rule(
+            text = Content(
+                expression = "body",
+                title = Selector(".title-page .txt > span"),
+                author = Selector(process = Process(default = "广东政务服务和数据管理局")),
+                dateTime = Selector(process = Process(script = listOf(timeScript))),
+                content = Selector(".content"),
+                extra = mapOf(
+                    "source" to Selector(".content", properties = mapOf(html))
+                )
+            )
+        ),
+    )
+)
+
+private val 深圳市政务服务和数据管理局 = Site(
+    code = "189a0e12-9319-4a4b-bf3b-34a6282e6f68",
+    name = "深圳市政务服务和数据管理局",
+    home = "https://www.sz.gov.cn/szzsj/gkmlpt/index",
+    icon = "https://www.sz.gov.cn/favicon.ico",
+    parser = Parser.Type.CSS,
+    author = "lanyuanxiaoyao",
+    target = Site.Target.TEXT,
+    downloader = Downloader.Type.HTTP,
+    properties = mapOf(iframe),
+    tags = mapOf(
+        "工作动态" to "http://www.sz.gov.cn/szzsj/gkmlpt/api/all/19236?page=1",
+    ),
+    rules = mapOf(
+        // language=regexp
+        "https*://www\\.sz\\.gov\\.cn/szzsj/gkmlpt/api/all/19236\\?page=\\d+" to Rule(
+            parser = Parser.Type.JSON,
+            list = Content(
+                expression = "$.articles",
+                title = Selector("$.title"),
+                dateTime = Selector(
+                    "$.first_publish_time",
                    process = Process(
                        script = listOf(
                            Script(
                                Script.Type.Javascript,
                                // language=javascript
-                                script = "let total = parseInt(text)\nlet base = 'http://www.sz.gov.cn/szzsj/gkmlpt/api/all/19236?page='\nlet count = parseInt(params['page'] ?? '1')\nif (count * 100 >= total) {\n    return ''\n}\nreturn `\${base}\${count + 1}`\n"
+                                script = "let datetime = new Date(text * 1000)\nreturn `\${datetime.getFullYear()}-\${datetime.getMonth() + 1}-\${datetime.getDate()} \${datetime.getHours()}:\${datetime.getMinutes()}:\${datetime.getSeconds()}`"
                            )
                        )
                    )
-                )
+                ),
+                link = Selector("$.url", process = Process(replace = listOf(Replace("https", "http")))),
            ),
-            // language=regexp
-            "https*://www\\.sz\\.gov\\.cn/szzsj/gkmlpt/content/\\d+/\\d+/post_\\d+\\.html" to Rule(
-                downloader = Downloader.Type.BROWSER,
-                text = Content(
-                    expression = ".content-container",
-                    title = Selector(".content-box .content h1.title"),
-                    author = Selector("td.first:contains(发布机构) + td > span"),
-                    dateTime = Selector("td.second:contains(成文日期) + td > span"),
-                    content = Selector(".content .article-content"),
-                    extra = mapOf(
-                        "source" to Selector(".content .article-content", properties = mapOf(html))
-                    ),
+            next = Selector(
+                "$.total",
+                process = Process(
+                    script = listOf(
+                        Script(
+                            Script.Type.Javascript,
+                            // language=javascript
+                            script = "let total = parseInt(text)\nlet base = 'http://www.sz.gov.cn/szzsj/gkmlpt/api/all/19236?page='\nlet count = parseInt(params['page'] ?? '1')\nif (count * 100 >= total) {\n    return ''\n}\nreturn `\${base}\${count + 1}`\n"
+                        )
+                    )
+                )
+            )
+        ),
+        // language=regexp
+        "https*://www\\.sz\\.gov\\.cn/szzsj/gkmlpt/content/\\d+/\\d+/post_\\d+\\.html" to Rule(
+            downloader = Downloader.Type.BROWSER,
+            text = Content(
+                expression = ".content-container",
+                title = Selector(".content-box .content h1.title"),
+                author = Selector("td.first:contains(发布机构) + td > span"),
+                dateTime = Selector("td.second:contains(成文日期) + td > span"),
+                content = Selector(".content .article-content"),
+                extra = mapOf(
+                    "source" to Selector(".content .article-content", properties = mapOf(html))
+                ),
+            )
+        )
+    )
+)
+
+private val 中华人民共和国中央人民政府 = Site(
+    code = "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be",
+    name = "中华人民共和国中央人民政府",
+    home = "https://www.gov.cn",
+    icon = "https://www.gov.cn/favicon.ico",
+    parser = Parser.Type.CSS,
+    author = "lanyuanxiaoyao",
+    target = Site.Target.TEXT,
+    downloader = Downloader.Type.HTTP,
+    properties = mapOf(iframe),
+    tags = mapOf(
+        "搜索-数据要素" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E8%A6%81%E7%B4%A0%22%7D",
+        "搜索-国家数据局" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E5%9B%BD%E5%AE%B6%E6%95%B0%E6%8D%AE%E5%B1%80%22%7D",
+        "搜索-可信数据空间" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E5%8F%AF%E4%BF%A1%E6%95%B0%E6%8D%AE%E7%A9%BA%E9%97%B4%22%7D",
+        "搜索-数据基础设施" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E5%9F%BA%E7%A1%80%E8%AE%BE%E6%96%BD%22%7D",
+        "搜索-数据跨境" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E8%B7%A8%E5%A2%83%22%7D",
+        "搜索-数据安全" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E5%AE%89%E5%85%A8%22%7D",
+    ),
+    rules = mapOf(
+        // language=regexp
+        "https://sousuoht\\.www\\.gov\\.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE.+" to Rule(
+            parser = Parser.Type.JSON,
+            headers = mapOf(
+                "Content-Type" to "application/json",
+                "Athenaappkey" to "be5qL61qyHKQ87BVlqy%2BwloIkpcL1tCqC4yOM%2F%2BsqaSn0NxuA4GPOWocQu96h0PUaYBe9hz3DVvjGTECMNNou6cjAlUc3UoJlviQAipc4Ha8qxmIeS1sPqdGQ3Unm49j%2BsYN5T%2BzjqKxdA2yz5qLcG5wjZR7rqYVgT98NasByMA%3D",
+                "Athenaappname" to "%E5%9B%BD%E7%BD%91%E6%90%9C%E7%B4%A2",
+            ),
+            properties = mapOf("post" to "true"),
+            list = Content(
+                expression = "$.result.data.middle.list",
+                title = Selector("$.title_no_tag"),
+                dateTime = Selector("$.time"),
+                link = Selector("$.url"),
+            ),
+            next = Selector(
+                expression = "$.result.data.pager",
+                process = Process(
+                    script = listOf(
+                        Script(
+                            Script.Type.Javascript,
+                            // language=javascript
+                            script = "let query = {\n    code: '17da70961a7',\n    historySearchWords: [],\n    dataTypeId: '107',\n    orderBy: 'time',\n    searchBy: 'title',\n    appendixType: '',\n    granularity: 'ALL',\n    trackTotalHits: true,\n    beginDateTime: '',\n    endDateTime: '',\n    isSearchForced: 0,\n    filters: [],\n    pageNo: 1,\n    pageSize: 10,\n    customFilter: {\n        operator: 'and',\n        properties: []\n    },\n    searchWord: '数据要素'\n}\nif (text && text !== '') {\n    let pager = JSON.parse(text)\n    let current = pager['pageNo']\n    let count = pager['pageCount']\n    if (current < count) {\n        query.pageNo = current + 1\n    } else {\n        return ''\n    }\n}\nreturn `https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=\${encodeURIComponent(JSON.stringify(query))}`\n"
+                        )
+                    )
+                )
+            )
+        ),
+        // language=regexp
+        "https*://www\\.gov\\.cn/.+/content_\\d+\\.html*" to Rule(
+            downloader = Downloader.Type.BROWSER,
+            text = Content(
+                expression = ".content .article",
+                title = Selector("h1#ti", process = Process(script = listOf(titleScript))),
+                author = Selector(
+                    ".pages-date > .font",
+                    process = Process(
+                        default = "中华人民共和国中央人民政府",
+                        remove = listOf("来源：")
+                    )
+                ),
+                dateTime = Selector(
+                    ".pages-date",
+                    process = Process(script = listOf(timeScript)),
+                    properties = mapOf("precision" to "true")
+                ),
+                content = Selector(".pages_content"),
+                extra = mapOf(
+                    "source" to Selector(".pages_content", properties = mapOf("html" to "true"))
+                )
+            )
+        ),
+        // language=regexp
+        "https://xcx\\.www\\.gov\\.cn/robot/gwypolicy/#/qaReadDetails.+" to Rule(
+            downloader = Downloader.Type.BROWSER,
+            text = Content(
+                expression = ".main-content",
+                title = Selector(".qa_content_box", process = Process(script = listOf(titleScript))),
+                author = Selector(".qa-subtext-item:contains(来源)", process = Process(remove = listOf("来\\s*源："))),
+                dateTime = Selector(process = Process(script = listOf(timeScript))),
+                content = Selector(".qa_content_text"),
+                extra = mapOf(
+                    "source" to Selector(".main-content", properties = mapOf("html" to "true"))
+                )
+            )
+        ),
+    )
+)
+
+private val 中华人民共和国国家互联网信息办公室 = Site(
+    code = "1df28c35-1e9e-4d58-9595-f08029b160b4",
+    name = "中华人民共和国国家互联网信息办公室",
+    home = "https://www.cac.gov.cn",
+    icon = "https://www.cac.gov.cn/favicon.ico",
+    parser = Parser.Type.CSS,
+    author = "lanyuanxiaoyao",
+    target = Site.Target.TEXT,
+    downloader = Downloader.Type.HTTP,
+    tags = mapOf(
+        "搜索-数据要素" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据要素&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
+        "搜索-数据跨境" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
+        "搜索-数据基础设施" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
+        "搜索-数据安全" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
+        "搜索-数据交易" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
+    ),
+    rules = mapOf(
+        "https://search\\.cac\\.gov\\.cn/cms/cmsadmin/infopub/gjjs\\.jsp\\?templetid=1563339473064626&pubtype=S&pubpath=portal&page=\\d*&webappcode=A09&huopro=.+&mustpro=&notpro=&inpro=&startDate=\\\$*&endDate=\\\$*&sort=1&searchfield=\\\$*&searchdir=A09" to Rule(
+            list = Content(
+                expression = ".xpage-container .list-item",
+                title = Selector("a", process = Process(remove = listOf("\\s*»\\s*"))),
+                dateTime = Selector(".search_time"),
+                link = Selector("a", "href", process = Process(prefix = "https:"))
+            ),
+            next = Selector(
+                ".xpage-pagination .xpage-pagination-next a:contains(下一页)",
+                "href",
+                Process(prefix = "https://search.cac.gov.cn/cms/cmsadmin/infopub/")
+            )
+        ),
+        "https://www\\.cac\\.gov\\.cn/.+/c_\\d+\\.htm" to Rule(
+            text = Content(
+                expression = ".main",
+                title = Selector("h1.title"),
+                author = Selector("#source", process = Process(remove = listOf("来源："))),
+                dateTime = Selector("#pubtime"),
+                content = Selector(".main-content"),
+                extra = mapOf(
+                    "source" to Selector(".main", properties = mapOf("html" to "true"))
                )
            )
        )
-    ),
-    Site(
-        code = "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be",
-        name = "中华人民共和国中央人民政府",
-        home = "https://www.gov.cn",
-        icon = "https://www.gov.cn/favicon.ico",
-        parser = Parser.Type.CSS,
-        author = "lanyuanxiaoyao",
-        target = Site.Target.TEXT,
-        downloader = Downloader.Type.HTTP,
-        properties = mapOf(iframe),
-        tags = mapOf(
-            "搜索-数据要素" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E8%A6%81%E7%B4%A0%22%7D",
-            "搜索-国家数据局" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E5%9B%BD%E5%AE%B6%E6%95%B0%E6%8D%AE%E5%B1%80%22%7D",
-            "搜索-可信数据空间" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E5%8F%AF%E4%BF%A1%E6%95%B0%E6%8D%AE%E7%A9%BA%E9%97%B4%22%7D",
-            "搜索-数据基础设施" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E5%9F%BA%E7%A1%80%E8%AE%BE%E6%96%BD%22%7D",
-            "搜索-数据跨境" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E8%B7%A8%E5%A2%83%22%7D",
-            "搜索-数据安全" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E5%AE%89%E5%85%A8%22%7D",
-        ),
-        rules = mapOf(
-            // language=regexp
-            "https://sousuoht\\.www\\.gov\\.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE.+" to Rule(
-                parser = Parser.Type.JSON,
-                headers = mapOf(
-                    "Content-Type" to "application/json",
-                    "Athenaappkey" to "be5qL61qyHKQ87BVlqy%2BwloIkpcL1tCqC4yOM%2F%2BsqaSn0NxuA4GPOWocQu96h0PUaYBe9hz3DVvjGTECMNNou6cjAlUc3UoJlviQAipc4Ha8qxmIeS1sPqdGQ3Unm49j%2BsYN5T%2BzjqKxdA2yz5qLcG5wjZR7rqYVgT98NasByMA%3D",
-                    "Athenaappname" to "%E5%9B%BD%E7%BD%91%E6%90%9C%E7%B4%A2",
-                ),
-                properties = mapOf("post" to "true"),
-                list = Content(
-                    expression = "$.result.data.middle.list",
-                    title = Selector("$.title_no_tag"),
-                    dateTime = Selector("$.time"),
-                    link = Selector("$.url"),
-                ),
-                next = Selector(
-                    expression = "$.result.data.pager",
-                    process = Process(
-                        script = listOf(
-                            Script(
-                                Script.Type.Javascript,
-                                // language=javascript
-                                script = "let query = {\n    code: '17da70961a7',\n    historySearchWords: [],\n    dataTypeId: '107',\n    orderBy: 'time',\n    searchBy: 'title',\n    appendixType: '',\n    granularity: 'ALL',\n    trackTotalHits: true,\n    beginDateTime: '',\n    endDateTime: '',\n    isSearchForced: 0,\n    filters: [],\n    pageNo: 1,\n    pageSize: 10,\n    customFilter: {\n        operator: 'and',\n        properties: []\n    },\n    searchWord: '数据要素'\n}\nif (text && text !== '') {\n    let pager = JSON.parse(text)\n    let current = pager['pageNo']\n    let count = pager['pageCount']\n    if (current < count) {\n        query.pageNo = current + 1\n    } else {\n        return ''\n    }\n}\nreturn `https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=\${encodeURIComponent(JSON.stringify(query))}`\n"
-                            )
-                        )
-                    )
-                )
-            ),
-            // language=regexp
-            "https*://www\\.gov\\.cn/.+/content_\\d+\\.html*" to Rule(
-                downloader = Downloader.Type.BROWSER,
-                text = Content(
-                    expression = ".content .article",
-                    title = Selector("h1#ti", process = Process(script = listOf(titleScript))),
-                    author = Selector(
-                        ".pages-date > .font",
-                        process = Process(
-                            default = "中华人民共和国中央人民政府",
-                            remove = listOf("来源：")
-                        )
-                    ),
-                    dateTime = Selector(
-                        ".pages-date",
-                        process = Process(script = listOf(timeScript)),
-                        properties = mapOf("precision" to "true")
-                    ),
-                    content = Selector(".pages_content"),
-                    extra = mapOf(
-                        "source" to Selector(".pages_content", properties = mapOf("html" to "true"))
-                    )
-                )
-            ),
-            // language=regexp
-            "https://xcx\\.www\\.gov\\.cn/robot/gwypolicy/#/qaReadDetails.+" to Rule(
-                downloader = Downloader.Type.BROWSER,
-                text = Content(
-                    expression = ".main-content",
-                    title = Selector(".qa_content_box", process = Process(script = listOf(titleScript))),
-                    author = Selector(".qa-subtext-item:contains(来源)", process = Process(remove = listOf("来\\s*源："))),
-                    dateTime = Selector(process = Process(script = listOf(timeScript))),
-                    content = Selector(".qa_content_text"),
-                    extra = mapOf(
-                        "source" to Selector(".main-content", properties = mapOf("html" to "true"))
-                    )
-                )
-            ),
-        )
-    ),
+    )
+)
+
+val sites = setOf(
+    广东政务服务和数据管理局,
+    深圳市政务服务和数据管理局,
+    中华人民共和国中央人民政府,
+    中华人民共和国国家互联网信息办公室,
 )
--- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt
+++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/runner/NewsRunner.kt
@@ -125,7 +125,7 @@ class NewsRunner : Runner {
                description = null,
                score = null,
                createTime = createTime,
-                pushed = null,
+                pushed = false,
            )
        } ?: return null
    }
@@ -159,7 +159,7 @@ class NewsRunner : Runner {
                }
            }
            next = page["next"] as String?
-        } while (recursive and !next.isNullOrBlank())
+        } while (recursive && !next.isNullOrBlank())
        return links
    }
 }
--- a/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt
+++ b/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt
@@ -0,0 +1,48 @@
+package com.lanyuanxiaoyao.digtal.market
+
+import cn.hutool.json.JSONUtil
+import com.lanyuanxiaoyao.digtal.market.runner.NewsRunner
+import com.lanyuanxiaoyao.squirrel.core.common.Management
+import jakarta.annotation.Resource
+import org.junit.jupiter.api.Test
+import org.slf4j.LoggerFactory
+import org.springframework.boot.test.context.SpringBootTest
+
+@SpringBootTest(
+    classes = [Application::class],
+    webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
+)
+@ExperimentalStdlibApi
+class TestManagement {
+    private val logger = LoggerFactory.getLogger(javaClass)
+
+    @Resource
+    private lateinit var management: Management
+
+    @Resource
+    private lateinit var newsRunner: NewsRunner
+
+    @Test
+    fun testParseList() {
+        newsRunner
+            .parseArticleLink(
+                "1df28c35-1e9e-4d58-9595-f08029b160b4",
+                "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据要素&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
+                true,
+            )
+            .forEach { logger.info("{} {} {}", it.datetime, it.url, it.title) }
+    }
+
+    @Test
+    fun testParseArticle() {
+        newsRunner
+            .parseArticle(
+                "1df28c35-1e9e-4d58-9595-f08029b160b4",
+                "https://www.cac.gov.cn/2024-10/14/c_1730595202555062.htm",
+                "no title",
+                "no datetime",
+                "",
+            )
+            ?.let { logger.info("{}", JSONUtil.toJsonPrettyStr(it)) }
+    }
+}