diff --git a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt index 2211987..c7cc790 100644 --- a/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt +++ b/src/main/kotlin/com/lanyuanxiaoyao/digtal/market/Sites.kt @@ -328,11 +328,78 @@ private val 中华人民共和国国家互联网信息办公室 = Site( ) ) +val 广州数据交易所 = Site( + code = "9a7f1d8f-4f39-4120-adeb-7435339b97bb", + name = "广州数据交易所", + author = "lanyuanxiaoyao", + home = "https://www.cantonde.com", + icon = "https://www.cantonde.com/favicon.ico", + description = "广州数据交易所是广东省深入贯彻落实党中央、国务院关于加快培育数据要素市场,助力数字经济高质量发展工作部署,高标准建设的新型数据交易场所。旨在为市场主体提供合规安全、集约高效的数据流通交易综合性服务。广州数据交易所作为广东省数据要素市场体系的核心枢纽,是畅通数据要素大循环的关键举措,也是推进数据要素市场化配置改革的重要载体。", + target = Site.Target.SEARCH, + parser = Parser.Type.JSON, + search = "https://www.cantonde.com/si/common/searchInfo?json=%7B%22NAME%22%3A%22{query}%22%2C%22IN_CATEGORY%22%3A%22%22%2C%22NOT_IN_CATEGORY%22%3A%22%22%2C%22CATEGORY%22%3A%22%22%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%7D", + rules = mapOf( + // language=regexp + "https://www\\.cantonde\\.com/si/common/searchInfo\\?json=.+" to Rule( + list = Content( + expression = "$.data", + title = Selector("$.TITLE"), + dateTime = Selector("$.PUBTIME_ORDER"), + link = Selector( + "$.ID", process = Process( + script = listOf( + Script( + Script.Type.Javascript, + // language=javascript + "return `https://www.cantonde.com/si/info/detail?json=%7B%22id%22%3A%22\${text}%22%7D`", + ) + ) + ) + ) + ), + properties = mapOf(post), + next = Selector( + "$.extra.total", process = Process( + script = listOf( + Script( + Script.Type.Javascript, + // language=javascript + "if (text && text !== '') {\n let total = parseInt(text)\n let postData = JSON.parse(params['json'])\n let current = postData['pageNo']\n let size = postData['pageSize']\n if (current * size >= total) {\n return ''\n }\n postData['pageNo'] = current + 1\n return `https://www.cantonde.com/si/common/searchInfo?json=` + encodeURIComponent(JSON.stringify(postData))\n}\nreturn ''" + ) + ) + ) + ), + ), + // language=regexp + "https://www\\.cantonde\\.com/si/info/detail\\?json=.+" to Rule( + text = Content( + expression = "$.data", + title = Selector("$.TITLE"), + content = Selector("$.CONTENT", process = Process(remove = commonRemove)), + dateTime = Selector( + process = Process( + script = listOf( + Script( + Script.Type.Javascript, + // language=javascript + "let datetimeText = params['datetime']\nif (datetimeText && datetimeText !== '') {\n let datetime = new Date(parseInt(params['datetime']))\n return `\${datetime.getFullYear()}-\${datetime.getMonth() + 1}-\${datetime.getDate()} \${datetime.getHours()}:\${datetime.getMinutes()}:\${datetime.getSeconds()}`\n}\nreturn ''", + ) + ) + ) + ), + author = Selector(process = Process(default = "广州数据交易所")) + ), + properties = mapOf(post), + ), + ), +) + val sites = setOf( 广东政务服务和数据管理局, 深圳市政务服务和数据管理局, 中华人民共和国中央人民政府, 中华人民共和国国家互联网信息办公室, + 广州数据交易所, ) val keywords = setOf( diff --git a/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt b/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt index 2318e6e..0a0be2b 100644 --- a/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt +++ b/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestManagement.kt @@ -27,12 +27,12 @@ class TestManagement { fun testParse() { newsRunner .parseArticleLink( - "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e", - "https://search.gd.gov.cn/api/search/all?json=%7B%22page%22%3A1%2C%22keywords%22%3A%22数据要素%22%2C%22advance%22%3A%22true%22%2C%22sort%22%3A%22time%22%2C%22position%22%3A%22all%22%2C%22time_to%22%3A2524579200%2C%22time_from%22%3A189273600%2C%22site_id%22%3A%22246%22%2C%22range%22%3A%22site%22%2C%22recommand%22%3A1%7D", + "9a7f1d8f-4f39-4120-adeb-7435339b97bb", + "https://www.cantonde.com/si/common/searchInfo?json=%7B%22NAME%22%3A%22数据要素%22%2C%22IN_CATEGORY%22%3A%22%22%2C%22NOT_IN_CATEGORY%22%3A%22%22%2C%22CATEGORY%22%3A%22%22%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%7D", ) .forEach { link -> val article = newsRunner.parseArticle( - "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e", + "9a7f1d8f-4f39-4120-adeb-7435339b97bb", link.url, link.title, link.datetime, diff --git a/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestRule.kt b/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestRule.kt new file mode 100644 index 0000000..e940ac8 --- /dev/null +++ b/src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestRule.kt @@ -0,0 +1,108 @@ +package com.lanyuanxiaoyao.digtal.market + +import cn.hutool.json.JSONUtil +import com.lanyuanxiaoyao.squirrel.core.common.* +import com.lanyuanxiaoyao.squirrel.core.jvm.BINARY_PATH +import com.lanyuanxiaoyao.squirrel.core.jvm.DRIVER_PATH +import com.lanyuanxiaoyao.squirrel.core.jvm.JvmManagement +import kotlinx.coroutines.runBlocking +import kotlin.test.BeforeTest +import kotlin.test.Test + +@ExperimentalStdlibApi +class TestRule { + private lateinit var management: Management + + private val link = + "https://www.cantonde.com/si/info/detail?json=%7B%22id%22%3A%223010%22%7D" + private val site = Site( + code = "9a7f1d8f-4f39-4120-adeb-7435339b97bb", + name = "广州数据交易所", + author = "lanyuanxiaoyao", + home = "https://www.cantonde.com", + icon = "https://www.cantonde.com/favicon.ico", + description = "广州数据交易所是广东省深入贯彻落实党中央、国务院关于加快培育数据要素市场,助力数字经济高质量发展工作部署,高标准建设的新型数据交易场所。旨在为市场主体提供合规安全、集约高效的数据流通交易综合性服务。广州数据交易所作为广东省数据要素市场体系的核心枢纽,是畅通数据要素大循环的关键举措,也是推进数据要素市场化配置改革的重要载体。", + target = Site.Target.SEARCH, + parser = Parser.Type.JSON, + search = "https://www.cantonde.com/si/common/searchInfo?json=%7B%22NAME%22%3A%22{query}%22%2C%22IN_CATEGORY%22%3A%22%22%2C%22NOT_IN_CATEGORY%22%3A%22%22%2C%22CATEGORY%22%3A%22%22%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%7D", + rules = mapOf( + // language=regexp + "https://www\\.cantonde\\.com/si/common/searchInfo\\?json=.+" to Rule( + list = Content( + expression = "$.data", + title = Selector("$.TITLE"), + dateTime = Selector("$.PUBTIME_ORDER"), + link = Selector( + "$.ID", process = Process( + script = listOf( + Script( + Script.Type.Javascript, + // language=javascript + "return `https://www.cantonde.com/si/info/detail?json=%7B%22id%22%3A%22\${text}%22%7D`", + ) + ) + ) + ) + ), + properties = mapOf("post" to "true"), + next = Selector( + "$.extra.total", process = Process( + script = listOf( + Script( + Script.Type.Javascript, + // language=javascript + "if (text && text !== '') {\n let total = parseInt(text)\n let postData = JSON.parse(params['json'])\n let current = postData['pageNo']\n let size = postData['pageSize']\n if (current * size >= total) {\n return ''\n }\n postData['pageNo'] = current + 1\n return `https://www.cantonde.com/si/common/searchInfo?json=` + encodeURIComponent(JSON.stringify(postData))\n}\nreturn ''" + ) + ) + ) + ), + ), + // language=regexp + "https://www\\.cantonde\\.com/si/info/detail\\?json=.+" to Rule( + text = Content( + expression = "$.data", + title = Selector("$.TITLE"), + content = Selector("$.CONTENT"), + dateTime = Selector( + process = Process( + script = listOf( + Script( + Script.Type.Javascript, + // language=javascript + "return params['datetime'] ?? ''", + ) + ) + ) + ), + author = Selector(process = Process(default = "广州数据交易所")) + ), + properties = mapOf("post" to "true"), + ), + ), + ) + + @BeforeTest + fun before() { + management = JvmManagement(InMemoryDatabase()) + management.importSites(setOf(site)) + management.changeDownloader("basicCacheDownloader") + management.changeDownloader( + "chromeCacheDownloader", + mapOf( + BINARY_PATH to "/Users/lanyuanxiaoyao/Downloads/chromium/128/macOS-1289987/Chromium.app/Contents/MacOS/Chromium", + DRIVER_PATH to "/Users/lanyuanxiaoyao/Downloads/chromium/128/macOS-1289987/chromedriver", + ) + ) + } + + @Test + fun test() { + val page = runBlocking { + management.parse( + code = site.code, + url = link, + ) + } + println(JSONUtil.toJsonPrettyStr(page)) + } +} \ No newline at end of file diff --git a/src/test/resources/test.http b/src/test/resources/test.http index 1de8e91..d2a46b6 100644 --- a/src/test/resources/test.http +++ b/src/test/resources/test.http @@ -67,4 +67,17 @@ Content-Type: application/json "site_id": "246", "range": "site", "recommand": 1 +} + +### Search +POST https://www.cantonde.com/si/common/searchInfo +Content-Type: application/json + +{ + "NAME": "数据要素", + "IN_CATEGORY": "", + "NOT_IN_CATEGORY": "", + "CATEGORY": "", + "pageNo": 1, + "pageSize": 10 } \ No newline at end of file