|
|
|
|
@@ -1,3 +1,5 @@
|
|
|
|
|
@file:Suppress("NonAsciiCharacters", "ObjectPropertyName")
|
|
|
|
|
|
|
|
|
|
package com.lanyuanxiaoyao.digtal.market
|
|
|
|
|
|
|
|
|
|
import com.lanyuanxiaoyao.squirrel.core.common.Content
|
|
|
|
|
@@ -25,236 +27,288 @@ private val titleScript = Script(
|
|
|
|
|
script = "if (text && text !== '') {\n return text\n} else if(params['title']) {\n return params['title']\n} else {\n return text\n}",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
val sites = setOf(
|
|
|
|
|
Site(
|
|
|
|
|
code = "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
|
|
|
|
|
name = "广东政务服务和数据管理局",
|
|
|
|
|
home = "https://zfsg.gd.gov.cn",
|
|
|
|
|
parser = Parser.Type.CSS,
|
|
|
|
|
author = "lanyuanxiaoyao",
|
|
|
|
|
target = Site.Target.TEXT,
|
|
|
|
|
downloader = Downloader.Type.BROWSER,
|
|
|
|
|
tags = mapOf(
|
|
|
|
|
"数据要闻" to "https://zfsg.gd.gov.cn/xxfb/ywsd/index.html",
|
|
|
|
|
"省局要闻" to "https://zfsg.gd.gov.cn/xxfb/sjyw/index.html",
|
|
|
|
|
"动态新闻" to "https://zfsg.gd.gov.cn/xxfb/dtxw/index.html",
|
|
|
|
|
"媒体报道" to "https://zfsg.gd.gov.cn/xxfb/mtbd/index.html",
|
|
|
|
|
"政务文件" to "https://zfsg.gd.gov.cn/zwgk/wjk/index.html",
|
|
|
|
|
"政策解读" to "https://zfsg.gd.gov.cn/zwgk/zcjd2/index.html",
|
|
|
|
|
),
|
|
|
|
|
rules = mapOf(
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://zfsg\\.gd\\.gov\\.cn/(xxfb|zwgk)/.+/index(_\\d*)*\\.html" to Rule(
|
|
|
|
|
list = Content(
|
|
|
|
|
expression = "ul.newList > li",
|
|
|
|
|
title = Selector(".til > a"),
|
|
|
|
|
dateTime = Selector(".time"),
|
|
|
|
|
link = Selector(".til > a", "href"),
|
|
|
|
|
),
|
|
|
|
|
next = Selector(".page > a.next", "href")
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://zfsg\\.gd\\.gov\\.cn/(xxfb|zwgk)/.+/content/post_\\d+\\.html" to Rule(
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = ".Con",
|
|
|
|
|
title = Selector("h3.zw-title"),
|
|
|
|
|
author = Selector(".zw-info > .ly", process = Process(remove = listOf("来源\\s*:\\s*"))),
|
|
|
|
|
dateTime = Selector(".zw-info > .time", process = Process(remove = listOf("时间\\s*:\\s*"))),
|
|
|
|
|
content = Selector(".zw"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".zw", properties = mapOf(html))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https://mp\\.weixin\\.qq\\.com/s/.+" to Rule(
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = "#page-content",
|
|
|
|
|
title = Selector("#activity-name"),
|
|
|
|
|
author = Selector("#js_name"),
|
|
|
|
|
dateTime = Selector("#publish_time"),
|
|
|
|
|
content = Selector("#js_content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector("#js_content", properties = mapOf(html))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://zfsg\\.gd\\.gov\\.cn/hdjlpt/yjzj/answer/.+" to Rule(
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = "#article-container",
|
|
|
|
|
title = Selector("#article-title"),
|
|
|
|
|
author = Selector("#article-source", process = Process(remove = listOf("发布机构:"))),
|
|
|
|
|
dateTime = Selector("#article-published_at", process = Process(remove = listOf("发布时间:"))),
|
|
|
|
|
content = Selector("#article-content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector("#article-content", properties = mapOf(html))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://zfsg\\.gd\\.gov\\.cn/hdjlpt/live/index.php\\?pid=\\d+" to Rule(
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = "body",
|
|
|
|
|
title = Selector(".title-page .txt > span"),
|
|
|
|
|
author = Selector(process = Process(default = "广东政务服务和数据管理局")),
|
|
|
|
|
dateTime = Selector(process = Process(script = listOf(timeScript))),
|
|
|
|
|
content = Selector(".content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".content", properties = mapOf(html))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
private val 广东政务服务和数据管理局 = Site(
|
|
|
|
|
code = "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
|
|
|
|
|
name = "广东政务服务和数据管理局",
|
|
|
|
|
home = "https://zfsg.gd.gov.cn",
|
|
|
|
|
parser = Parser.Type.CSS,
|
|
|
|
|
author = "lanyuanxiaoyao",
|
|
|
|
|
target = Site.Target.TEXT,
|
|
|
|
|
downloader = Downloader.Type.BROWSER,
|
|
|
|
|
tags = mapOf(
|
|
|
|
|
"数据要闻" to "https://zfsg.gd.gov.cn/xxfb/ywsd/index.html",
|
|
|
|
|
"省局要闻" to "https://zfsg.gd.gov.cn/xxfb/sjyw/index.html",
|
|
|
|
|
"动态新闻" to "https://zfsg.gd.gov.cn/xxfb/dtxw/index.html",
|
|
|
|
|
"媒体报道" to "https://zfsg.gd.gov.cn/xxfb/mtbd/index.html",
|
|
|
|
|
"政务文件" to "https://zfsg.gd.gov.cn/zwgk/wjk/index.html",
|
|
|
|
|
"政策解读" to "https://zfsg.gd.gov.cn/zwgk/zcjd2/index.html",
|
|
|
|
|
),
|
|
|
|
|
Site(
|
|
|
|
|
code = "189a0e12-9319-4a4b-bf3b-34a6282e6f68",
|
|
|
|
|
name = "深圳市政务服务和数据管理局",
|
|
|
|
|
home = "https://www.sz.gov.cn/szzsj/gkmlpt/index",
|
|
|
|
|
icon = "https://www.sz.gov.cn/favicon.ico",
|
|
|
|
|
parser = Parser.Type.CSS,
|
|
|
|
|
author = "lanyuanxiaoyao",
|
|
|
|
|
target = Site.Target.TEXT,
|
|
|
|
|
downloader = Downloader.Type.HTTP,
|
|
|
|
|
properties = mapOf(iframe),
|
|
|
|
|
tags = mapOf(
|
|
|
|
|
"工作动态" to "http://www.sz.gov.cn/szzsj/gkmlpt/api/all/19236?page=1",
|
|
|
|
|
rules = mapOf(
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://zfsg\\.gd\\.gov\\.cn/(xxfb|zwgk)/.+/index(_\\d*)*\\.html" to Rule(
|
|
|
|
|
list = Content(
|
|
|
|
|
expression = "ul.newList > li",
|
|
|
|
|
title = Selector(".til > a"),
|
|
|
|
|
dateTime = Selector(".time"),
|
|
|
|
|
link = Selector(".til > a", "href"),
|
|
|
|
|
),
|
|
|
|
|
next = Selector(".page > a.next", "href")
|
|
|
|
|
),
|
|
|
|
|
rules = mapOf(
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://www\\.sz\\.gov\\.cn/szzsj/gkmlpt/api/all/19236\\?page=\\d+" to Rule(
|
|
|
|
|
parser = Parser.Type.JSON,
|
|
|
|
|
list = Content(
|
|
|
|
|
expression = "$.articles",
|
|
|
|
|
title = Selector("$.title"),
|
|
|
|
|
dateTime = Selector(
|
|
|
|
|
"$.first_publish_time",
|
|
|
|
|
process = Process(
|
|
|
|
|
script = listOf(
|
|
|
|
|
Script(
|
|
|
|
|
Script.Type.Javascript,
|
|
|
|
|
// language=javascript
|
|
|
|
|
script = "let datetime = new Date(text * 1000)\nreturn `\${datetime.getFullYear()}-\${datetime.getMonth() + 1}-\${datetime.getDate()} \${datetime.getHours()}:\${datetime.getMinutes()}:\${datetime.getSeconds()}`"
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
link = Selector("$.url", process = Process(replace = listOf(Replace("https", "http")))),
|
|
|
|
|
),
|
|
|
|
|
next = Selector(
|
|
|
|
|
"$.total",
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://zfsg\\.gd\\.gov\\.cn/(xxfb|zwgk)/.+/content/post_\\d+\\.html" to Rule(
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = ".Con",
|
|
|
|
|
title = Selector("h3.zw-title"),
|
|
|
|
|
author = Selector(".zw-info > .ly", process = Process(remove = listOf("来源\\s*:\\s*"))),
|
|
|
|
|
dateTime = Selector(".zw-info > .time", process = Process(remove = listOf("时间\\s*:\\s*"))),
|
|
|
|
|
content = Selector(".zw"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".zw", properties = mapOf(html))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https://mp\\.weixin\\.qq\\.com/s/.+" to Rule(
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = "#page-content",
|
|
|
|
|
title = Selector("#activity-name"),
|
|
|
|
|
author = Selector("#js_name"),
|
|
|
|
|
dateTime = Selector("#publish_time"),
|
|
|
|
|
content = Selector("#js_content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector("#js_content", properties = mapOf(html))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://zfsg\\.gd\\.gov\\.cn/hdjlpt/yjzj/answer/.+" to Rule(
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = "#article-container",
|
|
|
|
|
title = Selector("#article-title"),
|
|
|
|
|
author = Selector("#article-source", process = Process(remove = listOf("发布机构:"))),
|
|
|
|
|
dateTime = Selector("#article-published_at", process = Process(remove = listOf("发布时间:"))),
|
|
|
|
|
content = Selector("#article-content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector("#article-content", properties = mapOf(html))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://zfsg\\.gd\\.gov\\.cn/hdjlpt/live/index.php\\?pid=\\d+" to Rule(
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = "body",
|
|
|
|
|
title = Selector(".title-page .txt > span"),
|
|
|
|
|
author = Selector(process = Process(default = "广东政务服务和数据管理局")),
|
|
|
|
|
dateTime = Selector(process = Process(script = listOf(timeScript))),
|
|
|
|
|
content = Selector(".content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".content", properties = mapOf(html))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
private val 深圳市政务服务和数据管理局 = Site(
|
|
|
|
|
code = "189a0e12-9319-4a4b-bf3b-34a6282e6f68",
|
|
|
|
|
name = "深圳市政务服务和数据管理局",
|
|
|
|
|
home = "https://www.sz.gov.cn/szzsj/gkmlpt/index",
|
|
|
|
|
icon = "https://www.sz.gov.cn/favicon.ico",
|
|
|
|
|
parser = Parser.Type.CSS,
|
|
|
|
|
author = "lanyuanxiaoyao",
|
|
|
|
|
target = Site.Target.TEXT,
|
|
|
|
|
downloader = Downloader.Type.HTTP,
|
|
|
|
|
properties = mapOf(iframe),
|
|
|
|
|
tags = mapOf(
|
|
|
|
|
"工作动态" to "http://www.sz.gov.cn/szzsj/gkmlpt/api/all/19236?page=1",
|
|
|
|
|
),
|
|
|
|
|
rules = mapOf(
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://www\\.sz\\.gov\\.cn/szzsj/gkmlpt/api/all/19236\\?page=\\d+" to Rule(
|
|
|
|
|
parser = Parser.Type.JSON,
|
|
|
|
|
list = Content(
|
|
|
|
|
expression = "$.articles",
|
|
|
|
|
title = Selector("$.title"),
|
|
|
|
|
dateTime = Selector(
|
|
|
|
|
"$.first_publish_time",
|
|
|
|
|
process = Process(
|
|
|
|
|
script = listOf(
|
|
|
|
|
Script(
|
|
|
|
|
Script.Type.Javascript,
|
|
|
|
|
// language=javascript
|
|
|
|
|
script = "let total = parseInt(text)\nlet base = 'http://www.sz.gov.cn/szzsj/gkmlpt/api/all/19236?page='\nlet count = parseInt(params['page'] ?? '1')\nif (count * 100 >= total) {\n return ''\n}\nreturn `\${base}\${count + 1}`\n"
|
|
|
|
|
script = "let datetime = new Date(text * 1000)\nreturn `\${datetime.getFullYear()}-\${datetime.getMonth() + 1}-\${datetime.getDate()} \${datetime.getHours()}:\${datetime.getMinutes()}:\${datetime.getSeconds()}`"
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
link = Selector("$.url", process = Process(replace = listOf(Replace("https", "http")))),
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://www\\.sz\\.gov\\.cn/szzsj/gkmlpt/content/\\d+/\\d+/post_\\d+\\.html" to Rule(
|
|
|
|
|
downloader = Downloader.Type.BROWSER,
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = ".content-container",
|
|
|
|
|
title = Selector(".content-box .content h1.title"),
|
|
|
|
|
author = Selector("td.first:contains(发布机构) + td > span"),
|
|
|
|
|
dateTime = Selector("td.second:contains(成文日期) + td > span"),
|
|
|
|
|
content = Selector(".content .article-content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".content .article-content", properties = mapOf(html))
|
|
|
|
|
),
|
|
|
|
|
next = Selector(
|
|
|
|
|
"$.total",
|
|
|
|
|
process = Process(
|
|
|
|
|
script = listOf(
|
|
|
|
|
Script(
|
|
|
|
|
Script.Type.Javascript,
|
|
|
|
|
// language=javascript
|
|
|
|
|
script = "let total = parseInt(text)\nlet base = 'http://www.sz.gov.cn/szzsj/gkmlpt/api/all/19236?page='\nlet count = parseInt(params['page'] ?? '1')\nif (count * 100 >= total) {\n return ''\n}\nreturn `\${base}\${count + 1}`\n"
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://www\\.sz\\.gov\\.cn/szzsj/gkmlpt/content/\\d+/\\d+/post_\\d+\\.html" to Rule(
|
|
|
|
|
downloader = Downloader.Type.BROWSER,
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = ".content-container",
|
|
|
|
|
title = Selector(".content-box .content h1.title"),
|
|
|
|
|
author = Selector("td.first:contains(发布机构) + td > span"),
|
|
|
|
|
dateTime = Selector("td.second:contains(成文日期) + td > span"),
|
|
|
|
|
content = Selector(".content .article-content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".content .article-content", properties = mapOf(html))
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
private val 中华人民共和国中央人民政府 = Site(
|
|
|
|
|
code = "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be",
|
|
|
|
|
name = "中华人民共和国中央人民政府",
|
|
|
|
|
home = "https://www.gov.cn",
|
|
|
|
|
icon = "https://www.gov.cn/favicon.ico",
|
|
|
|
|
parser = Parser.Type.CSS,
|
|
|
|
|
author = "lanyuanxiaoyao",
|
|
|
|
|
target = Site.Target.TEXT,
|
|
|
|
|
downloader = Downloader.Type.HTTP,
|
|
|
|
|
properties = mapOf(iframe),
|
|
|
|
|
tags = mapOf(
|
|
|
|
|
"搜索-数据要素" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E8%A6%81%E7%B4%A0%22%7D",
|
|
|
|
|
"搜索-国家数据局" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E5%9B%BD%E5%AE%B6%E6%95%B0%E6%8D%AE%E5%B1%80%22%7D",
|
|
|
|
|
"搜索-可信数据空间" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E5%8F%AF%E4%BF%A1%E6%95%B0%E6%8D%AE%E7%A9%BA%E9%97%B4%22%7D",
|
|
|
|
|
"搜索-数据基础设施" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E5%9F%BA%E7%A1%80%E8%AE%BE%E6%96%BD%22%7D",
|
|
|
|
|
"搜索-数据跨境" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E8%B7%A8%E5%A2%83%22%7D",
|
|
|
|
|
"搜索-数据安全" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E5%AE%89%E5%85%A8%22%7D",
|
|
|
|
|
),
|
|
|
|
|
rules = mapOf(
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https://sousuoht\\.www\\.gov\\.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE.+" to Rule(
|
|
|
|
|
parser = Parser.Type.JSON,
|
|
|
|
|
headers = mapOf(
|
|
|
|
|
"Content-Type" to "application/json",
|
|
|
|
|
"Athenaappkey" to "be5qL61qyHKQ87BVlqy%2BwloIkpcL1tCqC4yOM%2F%2BsqaSn0NxuA4GPOWocQu96h0PUaYBe9hz3DVvjGTECMNNou6cjAlUc3UoJlviQAipc4Ha8qxmIeS1sPqdGQ3Unm49j%2BsYN5T%2BzjqKxdA2yz5qLcG5wjZR7rqYVgT98NasByMA%3D",
|
|
|
|
|
"Athenaappname" to "%E5%9B%BD%E7%BD%91%E6%90%9C%E7%B4%A2",
|
|
|
|
|
),
|
|
|
|
|
properties = mapOf("post" to "true"),
|
|
|
|
|
list = Content(
|
|
|
|
|
expression = "$.result.data.middle.list",
|
|
|
|
|
title = Selector("$.title_no_tag"),
|
|
|
|
|
dateTime = Selector("$.time"),
|
|
|
|
|
link = Selector("$.url"),
|
|
|
|
|
),
|
|
|
|
|
next = Selector(
|
|
|
|
|
expression = "$.result.data.pager",
|
|
|
|
|
process = Process(
|
|
|
|
|
script = listOf(
|
|
|
|
|
Script(
|
|
|
|
|
Script.Type.Javascript,
|
|
|
|
|
// language=javascript
|
|
|
|
|
script = "let query = {\n code: '17da70961a7',\n historySearchWords: [],\n dataTypeId: '107',\n orderBy: 'time',\n searchBy: 'title',\n appendixType: '',\n granularity: 'ALL',\n trackTotalHits: true,\n beginDateTime: '',\n endDateTime: '',\n isSearchForced: 0,\n filters: [],\n pageNo: 1,\n pageSize: 10,\n customFilter: {\n operator: 'and',\n properties: []\n },\n searchWord: '数据要素'\n}\nif (text && text !== '') {\n let pager = JSON.parse(text)\n let current = pager['pageNo']\n let count = pager['pageCount']\n if (current < count) {\n query.pageNo = current + 1\n } else {\n return ''\n }\n}\nreturn `https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=\${encodeURIComponent(JSON.stringify(query))}`\n"
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://www\\.gov\\.cn/.+/content_\\d+\\.html*" to Rule(
|
|
|
|
|
downloader = Downloader.Type.BROWSER,
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = ".content .article",
|
|
|
|
|
title = Selector("h1#ti", process = Process(script = listOf(titleScript))),
|
|
|
|
|
author = Selector(
|
|
|
|
|
".pages-date > .font",
|
|
|
|
|
process = Process(
|
|
|
|
|
default = "中华人民共和国中央人民政府",
|
|
|
|
|
remove = listOf("来源:")
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
dateTime = Selector(
|
|
|
|
|
".pages-date",
|
|
|
|
|
process = Process(script = listOf(timeScript)),
|
|
|
|
|
properties = mapOf("precision" to "true")
|
|
|
|
|
),
|
|
|
|
|
content = Selector(".pages_content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".pages_content", properties = mapOf("html" to "true"))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https://xcx\\.www\\.gov\\.cn/robot/gwypolicy/#/qaReadDetails.+" to Rule(
|
|
|
|
|
downloader = Downloader.Type.BROWSER,
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = ".main-content",
|
|
|
|
|
title = Selector(".qa_content_box", process = Process(script = listOf(titleScript))),
|
|
|
|
|
author = Selector(".qa-subtext-item:contains(来源)", process = Process(remove = listOf("来\\s*源:"))),
|
|
|
|
|
dateTime = Selector(process = Process(script = listOf(timeScript))),
|
|
|
|
|
content = Selector(".qa_content_text"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".main-content", properties = mapOf("html" to "true"))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
private val 中华人民共和国国家互联网信息办公室 = Site(
|
|
|
|
|
code = "1df28c35-1e9e-4d58-9595-f08029b160b4",
|
|
|
|
|
name = "中华人民共和国国家互联网信息办公室",
|
|
|
|
|
home = "https://www.cac.gov.cn",
|
|
|
|
|
icon = "https://www.cac.gov.cn/favicon.ico",
|
|
|
|
|
parser = Parser.Type.CSS,
|
|
|
|
|
author = "lanyuanxiaoyao",
|
|
|
|
|
target = Site.Target.TEXT,
|
|
|
|
|
downloader = Downloader.Type.HTTP,
|
|
|
|
|
tags = mapOf(
|
|
|
|
|
"搜索-数据要素" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据要素&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
|
|
|
|
"搜索-数据跨境" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
|
|
|
|
"搜索-数据基础设施" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
|
|
|
|
"搜索-数据安全" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
|
|
|
|
"搜索-数据交易" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
|
|
|
|
),
|
|
|
|
|
rules = mapOf(
|
|
|
|
|
"https://search\\.cac\\.gov\\.cn/cms/cmsadmin/infopub/gjjs\\.jsp\\?templetid=1563339473064626&pubtype=S&pubpath=portal&page=\\d*&webappcode=A09&huopro=.+&mustpro=¬pro=&inpro=&startDate=\\\$*&endDate=\\\$*&sort=1&searchfield=\\\$*&searchdir=A09" to Rule(
|
|
|
|
|
list = Content(
|
|
|
|
|
expression = ".xpage-container .list-item",
|
|
|
|
|
title = Selector("a", process = Process(remove = listOf("\\s*»\\s*"))),
|
|
|
|
|
dateTime = Selector(".search_time"),
|
|
|
|
|
link = Selector("a", "href", process = Process(prefix = "https:"))
|
|
|
|
|
),
|
|
|
|
|
next = Selector(
|
|
|
|
|
".xpage-pagination .xpage-pagination-next a:contains(下一页)",
|
|
|
|
|
"href",
|
|
|
|
|
Process(prefix = "https://search.cac.gov.cn/cms/cmsadmin/infopub/")
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
"https://www\\.cac\\.gov\\.cn/.+/c_\\d+\\.htm" to Rule(
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = ".main",
|
|
|
|
|
title = Selector("h1.title"),
|
|
|
|
|
author = Selector("#source", process = Process(remove = listOf("来源:"))),
|
|
|
|
|
dateTime = Selector("#pubtime"),
|
|
|
|
|
content = Selector(".main-content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".main", properties = mapOf("html" to "true"))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
Site(
|
|
|
|
|
code = "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be",
|
|
|
|
|
name = "中华人民共和国中央人民政府",
|
|
|
|
|
home = "https://www.gov.cn",
|
|
|
|
|
icon = "https://www.gov.cn/favicon.ico",
|
|
|
|
|
parser = Parser.Type.CSS,
|
|
|
|
|
author = "lanyuanxiaoyao",
|
|
|
|
|
target = Site.Target.TEXT,
|
|
|
|
|
downloader = Downloader.Type.HTTP,
|
|
|
|
|
properties = mapOf(iframe),
|
|
|
|
|
tags = mapOf(
|
|
|
|
|
"搜索-数据要素" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E8%A6%81%E7%B4%A0%22%7D",
|
|
|
|
|
"搜索-国家数据局" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E5%9B%BD%E5%AE%B6%E6%95%B0%E6%8D%AE%E5%B1%80%22%7D",
|
|
|
|
|
"搜索-可信数据空间" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E5%8F%AF%E4%BF%A1%E6%95%B0%E6%8D%AE%E7%A9%BA%E9%97%B4%22%7D",
|
|
|
|
|
"搜索-数据基础设施" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E5%9F%BA%E7%A1%80%E8%AE%BE%E6%96%BD%22%7D",
|
|
|
|
|
"搜索-数据跨境" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E8%B7%A8%E5%A2%83%22%7D",
|
|
|
|
|
"搜索-数据安全" to "https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=%7B%22code%22%3A%2217da70961a7%22%2C%22historySearchWords%22%3A%5B%5D%2C%22dataTypeId%22%3A%22107%22%2C%22orderBy%22%3A%22time%22%2C%22searchBy%22%3A%22title%22%2C%22appendixType%22%3A%22%22%2C%22granularity%22%3A%22ALL%22%2C%22trackTotalHits%22%3Atrue%2C%22beginDateTime%22%3A%22%22%2C%22endDateTime%22%3A%22%22%2C%22isSearchForced%22%3A0%2C%22filters%22%3A%5B%5D%2C%22pageNo%22%3A1%2C%22pageSize%22%3A10%2C%22customFilter%22%3A%7B%22operator%22%3A%22and%22%2C%22properties%22%3A%5B%5D%7D%2C%22searchWord%22%3A%22%E6%95%B0%E6%8D%AE%E5%AE%89%E5%85%A8%22%7D",
|
|
|
|
|
),
|
|
|
|
|
rules = mapOf(
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https://sousuoht\\.www\\.gov\\.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE.+" to Rule(
|
|
|
|
|
parser = Parser.Type.JSON,
|
|
|
|
|
headers = mapOf(
|
|
|
|
|
"Content-Type" to "application/json",
|
|
|
|
|
"Athenaappkey" to "be5qL61qyHKQ87BVlqy%2BwloIkpcL1tCqC4yOM%2F%2BsqaSn0NxuA4GPOWocQu96h0PUaYBe9hz3DVvjGTECMNNou6cjAlUc3UoJlviQAipc4Ha8qxmIeS1sPqdGQ3Unm49j%2BsYN5T%2BzjqKxdA2yz5qLcG5wjZR7rqYVgT98NasByMA%3D",
|
|
|
|
|
"Athenaappname" to "%E5%9B%BD%E7%BD%91%E6%90%9C%E7%B4%A2",
|
|
|
|
|
),
|
|
|
|
|
properties = mapOf("post" to "true"),
|
|
|
|
|
list = Content(
|
|
|
|
|
expression = "$.result.data.middle.list",
|
|
|
|
|
title = Selector("$.title_no_tag"),
|
|
|
|
|
dateTime = Selector("$.time"),
|
|
|
|
|
link = Selector("$.url"),
|
|
|
|
|
),
|
|
|
|
|
next = Selector(
|
|
|
|
|
expression = "$.result.data.pager",
|
|
|
|
|
process = Process(
|
|
|
|
|
script = listOf(
|
|
|
|
|
Script(
|
|
|
|
|
Script.Type.Javascript,
|
|
|
|
|
// language=javascript
|
|
|
|
|
script = "let query = {\n code: '17da70961a7',\n historySearchWords: [],\n dataTypeId: '107',\n orderBy: 'time',\n searchBy: 'title',\n appendixType: '',\n granularity: 'ALL',\n trackTotalHits: true,\n beginDateTime: '',\n endDateTime: '',\n isSearchForced: 0,\n filters: [],\n pageNo: 1,\n pageSize: 10,\n customFilter: {\n operator: 'and',\n properties: []\n },\n searchWord: '数据要素'\n}\nif (text && text !== '') {\n let pager = JSON.parse(text)\n let current = pager['pageNo']\n let count = pager['pageCount']\n if (current < count) {\n query.pageNo = current + 1\n } else {\n return ''\n }\n}\nreturn `https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE?json=\${encodeURIComponent(JSON.stringify(query))}`\n"
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https*://www\\.gov\\.cn/.+/content_\\d+\\.html*" to Rule(
|
|
|
|
|
downloader = Downloader.Type.BROWSER,
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = ".content .article",
|
|
|
|
|
title = Selector("h1#ti", process = Process(script = listOf(titleScript))),
|
|
|
|
|
author = Selector(
|
|
|
|
|
".pages-date > .font",
|
|
|
|
|
process = Process(
|
|
|
|
|
default = "中华人民共和国中央人民政府",
|
|
|
|
|
remove = listOf("来源:")
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
dateTime = Selector(
|
|
|
|
|
".pages-date",
|
|
|
|
|
process = Process(script = listOf(timeScript)),
|
|
|
|
|
properties = mapOf("precision" to "true")
|
|
|
|
|
),
|
|
|
|
|
content = Selector(".pages_content"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".pages_content", properties = mapOf("html" to "true"))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
// language=regexp
|
|
|
|
|
"https://xcx\\.www\\.gov\\.cn/robot/gwypolicy/#/qaReadDetails.+" to Rule(
|
|
|
|
|
downloader = Downloader.Type.BROWSER,
|
|
|
|
|
text = Content(
|
|
|
|
|
expression = ".main-content",
|
|
|
|
|
title = Selector(".qa_content_box", process = Process(script = listOf(titleScript))),
|
|
|
|
|
author = Selector(".qa-subtext-item:contains(来源)", process = Process(remove = listOf("来\\s*源:"))),
|
|
|
|
|
dateTime = Selector(process = Process(script = listOf(timeScript))),
|
|
|
|
|
content = Selector(".qa_content_text"),
|
|
|
|
|
extra = mapOf(
|
|
|
|
|
"source" to Selector(".main-content", properties = mapOf("html" to "true"))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
val sites = setOf(
|
|
|
|
|
广东政务服务和数据管理局,
|
|
|
|
|
深圳市政务服务和数据管理局,
|
|
|
|
|
中华人民共和国中央人民政府,
|
|
|
|
|
中华人民共和国国家互联网信息办公室,
|
|
|
|
|
)
|
|
|
|
|
|