feat: 增加广州数据局站点
This commit is contained in:
@@ -328,11 +328,78 @@ private val 中华人民共和国国家互联网信息办公室 = Site(
|
||||
)
|
||||
)
|
||||
|
||||
val 广州数据交易所 = Site(
|
||||
code = "9a7f1d8f-4f39-4120-adeb-7435339b97bb",
|
||||
name = "广州数据交易所",
|
||||
author = "lanyuanxiaoyao",
|
||||
home = "https://www.cantonde.com",
|
||||
icon = "https://www.cantonde.com/favicon.ico",
|
||||
description = "广州数据交易所是广东省深入贯彻落实党中央、国务院关于加快培育数据要素市场,助力数字经济高质量发展工作部署,高标准建设的新型数据交易场所。旨在为市场主体提供合规安全、集约高效的数据流通交易综合性服务。广州数据交易所作为广东省数据要素市场体系的核心枢纽,是畅通数据要素大循环的关键举措,也是推进数据要素市场化配置改革的重要载体。",
|
||||
target = Site.Target.SEARCH,
|
||||
parser = Parser.Type.JSON,
|
||||
search = "https://www.cantonde.com/si/common/searchInfo?json=%7B%22NAME%22%3A%22{query}%22%2C%22IN_CATEGORY%22%3A%22%22%2C%22NOT_IN_CATEGORY%22%3A%22%22%2C%22CATEGORY%22%3A%22%22%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%7D",
|
||||
rules = mapOf(
|
||||
// language=regexp
|
||||
"https://www\\.cantonde\\.com/si/common/searchInfo\\?json=.+" to Rule(
|
||||
list = Content(
|
||||
expression = "$.data",
|
||||
title = Selector("$.TITLE"),
|
||||
dateTime = Selector("$.PUBTIME_ORDER"),
|
||||
link = Selector(
|
||||
"$.ID", process = Process(
|
||||
script = listOf(
|
||||
Script(
|
||||
Script.Type.Javascript,
|
||||
// language=javascript
|
||||
"return `https://www.cantonde.com/si/info/detail?json=%7B%22id%22%3A%22\${text}%22%7D`",
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
properties = mapOf(post),
|
||||
next = Selector(
|
||||
"$.extra.total", process = Process(
|
||||
script = listOf(
|
||||
Script(
|
||||
Script.Type.Javascript,
|
||||
// language=javascript
|
||||
"if (text && text !== '') {\n let total = parseInt(text)\n let postData = JSON.parse(params['json'])\n let current = postData['pageNo']\n let size = postData['pageSize']\n if (current * size >= total) {\n return ''\n }\n postData['pageNo'] = current + 1\n return `https://www.cantonde.com/si/common/searchInfo?json=` + encodeURIComponent(JSON.stringify(postData))\n}\nreturn ''"
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
// language=regexp
|
||||
"https://www\\.cantonde\\.com/si/info/detail\\?json=.+" to Rule(
|
||||
text = Content(
|
||||
expression = "$.data",
|
||||
title = Selector("$.TITLE"),
|
||||
content = Selector("$.CONTENT", process = Process(remove = commonRemove)),
|
||||
dateTime = Selector(
|
||||
process = Process(
|
||||
script = listOf(
|
||||
Script(
|
||||
Script.Type.Javascript,
|
||||
// language=javascript
|
||||
"let datetimeText = params['datetime']\nif (datetimeText && datetimeText !== '') {\n let datetime = new Date(parseInt(params['datetime']))\n return `\${datetime.getFullYear()}-\${datetime.getMonth() + 1}-\${datetime.getDate()} \${datetime.getHours()}:\${datetime.getMinutes()}:\${datetime.getSeconds()}`\n}\nreturn ''",
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
author = Selector(process = Process(default = "广州数据交易所"))
|
||||
),
|
||||
properties = mapOf(post),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
val sites = setOf(
|
||||
广东政务服务和数据管理局,
|
||||
深圳市政务服务和数据管理局,
|
||||
中华人民共和国中央人民政府,
|
||||
中华人民共和国国家互联网信息办公室,
|
||||
广州数据交易所,
|
||||
)
|
||||
|
||||
val keywords = setOf(
|
||||
|
||||
@@ -27,12 +27,12 @@ class TestManagement {
|
||||
fun testParse() {
|
||||
newsRunner
|
||||
.parseArticleLink(
|
||||
"74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
|
||||
"https://search.gd.gov.cn/api/search/all?json=%7B%22page%22%3A1%2C%22keywords%22%3A%22数据要素%22%2C%22advance%22%3A%22true%22%2C%22sort%22%3A%22time%22%2C%22position%22%3A%22all%22%2C%22time_to%22%3A2524579200%2C%22time_from%22%3A189273600%2C%22site_id%22%3A%22246%22%2C%22range%22%3A%22site%22%2C%22recommand%22%3A1%7D",
|
||||
"9a7f1d8f-4f39-4120-adeb-7435339b97bb",
|
||||
"https://www.cantonde.com/si/common/searchInfo?json=%7B%22NAME%22%3A%22数据要素%22%2C%22IN_CATEGORY%22%3A%22%22%2C%22NOT_IN_CATEGORY%22%3A%22%22%2C%22CATEGORY%22%3A%22%22%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%7D",
|
||||
)
|
||||
.forEach { link ->
|
||||
val article = newsRunner.parseArticle(
|
||||
"74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
|
||||
"9a7f1d8f-4f39-4120-adeb-7435339b97bb",
|
||||
link.url,
|
||||
link.title,
|
||||
link.datetime,
|
||||
|
||||
108
src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestRule.kt
Normal file
108
src/test/kotlin/com/lanyuanxiaoyao/digtal/market/TestRule.kt
Normal file
@@ -0,0 +1,108 @@
|
||||
package com.lanyuanxiaoyao.digtal.market
|
||||
|
||||
import cn.hutool.json.JSONUtil
|
||||
import com.lanyuanxiaoyao.squirrel.core.common.*
|
||||
import com.lanyuanxiaoyao.squirrel.core.jvm.BINARY_PATH
|
||||
import com.lanyuanxiaoyao.squirrel.core.jvm.DRIVER_PATH
|
||||
import com.lanyuanxiaoyao.squirrel.core.jvm.JvmManagement
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import kotlin.test.BeforeTest
|
||||
import kotlin.test.Test
|
||||
|
||||
@ExperimentalStdlibApi
|
||||
class TestRule {
|
||||
private lateinit var management: Management
|
||||
|
||||
private val link =
|
||||
"https://www.cantonde.com/si/info/detail?json=%7B%22id%22%3A%223010%22%7D"
|
||||
private val site = Site(
|
||||
code = "9a7f1d8f-4f39-4120-adeb-7435339b97bb",
|
||||
name = "广州数据交易所",
|
||||
author = "lanyuanxiaoyao",
|
||||
home = "https://www.cantonde.com",
|
||||
icon = "https://www.cantonde.com/favicon.ico",
|
||||
description = "广州数据交易所是广东省深入贯彻落实党中央、国务院关于加快培育数据要素市场,助力数字经济高质量发展工作部署,高标准建设的新型数据交易场所。旨在为市场主体提供合规安全、集约高效的数据流通交易综合性服务。广州数据交易所作为广东省数据要素市场体系的核心枢纽,是畅通数据要素大循环的关键举措,也是推进数据要素市场化配置改革的重要载体。",
|
||||
target = Site.Target.SEARCH,
|
||||
parser = Parser.Type.JSON,
|
||||
search = "https://www.cantonde.com/si/common/searchInfo?json=%7B%22NAME%22%3A%22{query}%22%2C%22IN_CATEGORY%22%3A%22%22%2C%22NOT_IN_CATEGORY%22%3A%22%22%2C%22CATEGORY%22%3A%22%22%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%7D",
|
||||
rules = mapOf(
|
||||
// language=regexp
|
||||
"https://www\\.cantonde\\.com/si/common/searchInfo\\?json=.+" to Rule(
|
||||
list = Content(
|
||||
expression = "$.data",
|
||||
title = Selector("$.TITLE"),
|
||||
dateTime = Selector("$.PUBTIME_ORDER"),
|
||||
link = Selector(
|
||||
"$.ID", process = Process(
|
||||
script = listOf(
|
||||
Script(
|
||||
Script.Type.Javascript,
|
||||
// language=javascript
|
||||
"return `https://www.cantonde.com/si/info/detail?json=%7B%22id%22%3A%22\${text}%22%7D`",
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
properties = mapOf("post" to "true"),
|
||||
next = Selector(
|
||||
"$.extra.total", process = Process(
|
||||
script = listOf(
|
||||
Script(
|
||||
Script.Type.Javascript,
|
||||
// language=javascript
|
||||
"if (text && text !== '') {\n let total = parseInt(text)\n let postData = JSON.parse(params['json'])\n let current = postData['pageNo']\n let size = postData['pageSize']\n if (current * size >= total) {\n return ''\n }\n postData['pageNo'] = current + 1\n return `https://www.cantonde.com/si/common/searchInfo?json=` + encodeURIComponent(JSON.stringify(postData))\n}\nreturn ''"
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
// language=regexp
|
||||
"https://www\\.cantonde\\.com/si/info/detail\\?json=.+" to Rule(
|
||||
text = Content(
|
||||
expression = "$.data",
|
||||
title = Selector("$.TITLE"),
|
||||
content = Selector("$.CONTENT"),
|
||||
dateTime = Selector(
|
||||
process = Process(
|
||||
script = listOf(
|
||||
Script(
|
||||
Script.Type.Javascript,
|
||||
// language=javascript
|
||||
"return params['datetime'] ?? ''",
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
author = Selector(process = Process(default = "广州数据交易所"))
|
||||
),
|
||||
properties = mapOf("post" to "true"),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
@BeforeTest
|
||||
fun before() {
|
||||
management = JvmManagement(InMemoryDatabase())
|
||||
management.importSites(setOf(site))
|
||||
management.changeDownloader("basicCacheDownloader")
|
||||
management.changeDownloader(
|
||||
"chromeCacheDownloader",
|
||||
mapOf(
|
||||
BINARY_PATH to "/Users/lanyuanxiaoyao/Downloads/chromium/128/macOS-1289987/Chromium.app/Contents/MacOS/Chromium",
|
||||
DRIVER_PATH to "/Users/lanyuanxiaoyao/Downloads/chromium/128/macOS-1289987/chromedriver",
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun test() {
|
||||
val page = runBlocking {
|
||||
management.parse(
|
||||
code = site.code,
|
||||
url = link,
|
||||
)
|
||||
}
|
||||
println(JSONUtil.toJsonPrettyStr(page))
|
||||
}
|
||||
}
|
||||
@@ -68,3 +68,16 @@ Content-Type: application/json
|
||||
"range": "site",
|
||||
"recommand": 1
|
||||
}
|
||||
|
||||
### Search
|
||||
POST https://www.cantonde.com/si/common/searchInfo
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"NAME": "数据要素",
|
||||
"IN_CATEGORY": "",
|
||||
"NOT_IN_CATEGORY": "",
|
||||
"CATEGORY": "",
|
||||
"pageNo": 1,
|
||||
"pageSize": 10
|
||||
}
|
||||
Reference in New Issue
Block a user