1
0

feat: 优化title和datetime从服务端进行处理

This commit is contained in:
2024-11-05 16:55:03 +08:00
parent bd67c9f830
commit 3175ab7a8c
2 changed files with 33 additions and 48 deletions

View File

@@ -17,18 +17,6 @@ private val iframe = Pair("iframe", "true")
private val post = Pair("post", "true") private val post = Pair("post", "true")
private val form = Pair("form", "true") private val form = Pair("form", "true")
private val timeScript = Script(
Script.Type.Javascript,
// language=javascript
script = "if (text && text !== '') {\n return text\n} else if(params['datetime']) {\n return params['datetime']\n} else {\n return text\n}",
)
private val titleScript = Script(
Script.Type.Javascript,
// language=javascript
script = "if (text && text !== '') {\n return text\n} else if(params['title']) {\n return params['title']\n} else {\n return text\n}",
)
// language=regexp // language=regexp
private val commonRemove = listOf( private val commonRemove = listOf(
"<.+?>", "<.+?>",
@@ -128,7 +116,6 @@ private val 广东政务服务和数据管理局 = Site(
expression = "body", expression = "body",
title = Selector(".title-page .txt > span"), title = Selector(".title-page .txt > span"),
author = Selector(process = Process(default = "广东政务服务和数据管理局")), author = Selector(process = Process(default = "广东政务服务和数据管理局")),
dateTime = Selector(process = Process(script = listOf(timeScript))),
content = Selector(".content"), content = Selector(".content"),
extra = mapOf( extra = mapOf(
"source" to Selector(".content", properties = mapOf(html)) "source" to Selector(".content", properties = mapOf(html))
@@ -254,7 +241,7 @@ private val 中华人民共和国中央人民政府 = Site(
downloader = Downloader.Type.BROWSER, downloader = Downloader.Type.BROWSER,
text = Content( text = Content(
expression = ".content .article", expression = ".content .article",
title = Selector("h1#ti", process = Process(script = listOf(titleScript))), title = Selector("h1#ti"),
author = Selector( author = Selector(
".pages-date > .font", ".pages-date > .font",
process = Process( process = Process(
@@ -262,11 +249,7 @@ private val 中华人民共和国中央人民政府 = Site(
remove = listOf("来源:") remove = listOf("来源:")
) )
), ),
dateTime = Selector( dateTime = Selector(".pages-date", properties = mapOf("precision" to "true")),
".pages-date",
process = Process(script = listOf(timeScript)),
properties = mapOf("precision" to "true")
),
content = Selector(".pages_content"), content = Selector(".pages_content"),
extra = mapOf( extra = mapOf(
"source" to Selector(".pages_content", properties = mapOf("html" to "true")) "source" to Selector(".pages_content", properties = mapOf("html" to "true"))
@@ -278,9 +261,8 @@ private val 中华人民共和国中央人民政府 = Site(
downloader = Downloader.Type.BROWSER, downloader = Downloader.Type.BROWSER,
text = Content( text = Content(
expression = ".main-content", expression = ".main-content",
title = Selector(".qa_content_box", process = Process(script = listOf(titleScript))), title = Selector(".qa_content_box"),
author = Selector(".qa-subtext-item:contains(来源)", process = Process(remove = listOf("\\s*源:"))), author = Selector(".qa-subtext-item:contains(来源)", process = Process(remove = listOf("\\s*源:"))),
dateTime = Selector(process = Process(script = listOf(timeScript))),
content = Selector(".qa_content_text"), content = Selector(".qa_content_text"),
extra = mapOf( extra = mapOf(
"source" to Selector(".main-content", properties = mapOf("html" to "true")) "source" to Selector(".main-content", properties = mapOf("html" to "true"))
@@ -377,17 +359,6 @@ val 广州数据交易所 = Site(
expression = "$.data", expression = "$.data",
title = Selector("$.TITLE"), title = Selector("$.TITLE"),
content = Selector("$.CONTENT", process = Process(remove = commonRemove)), content = Selector("$.CONTENT", process = Process(remove = commonRemove)),
dateTime = Selector(
process = Process(
script = listOf(
Script(
Script.Type.Javascript,
// language=javascript
"let datetimeText = params['datetime']\nif (datetimeText && datetimeText !== '') {\n let datetime = new Date(parseInt(params['datetime']))\n return `\${datetime.getFullYear()}-\${datetime.getMonth() + 1}-\${datetime.getDate()} \${datetime.getHours()}:\${datetime.getMinutes()}:\${datetime.getSeconds()}`\n}\nreturn ''",
)
)
)
),
author = Selector(process = Process(default = "广州数据交易所")) author = Selector(process = Process(default = "广州数据交易所"))
), ),
properties = mapOf(post), properties = mapOf(post),
@@ -425,13 +396,17 @@ val 北京市政务服务和数据管理局 = Site(
), ),
link = Selector("$.data.url") link = Selector("$.data.url")
), ),
next = Selector("$.totalHits", process = Process(script = listOf( next = Selector(
Script( "$.totalHits", process = Process(
Script.Type.Javascript, script = listOf(
// language=javascript Script(
"if (text && text !== '') {\n let total = parseInt(text)\n let current = parseInt(params['page'] ?? '1')\n let size = parseInt(params['pageSize'] ?? 20)\n if (current * size >= total) {\n return ''\n }\n return params['url'].replace(/page=\\d+/, 'page=' + (current + 1))\n} else {\n return ''\n}" Script.Type.Javascript,
)) // language=javascript
)) "if (text && text !== '') {\n let total = parseInt(text)\n let current = parseInt(params['page'] ?? '1')\n let size = parseInt(params['pageSize'] ?? 20)\n if (current * size >= total) {\n return ''\n }\n return params['url'].replace(/page=\\d+/, 'page=' + (current + 1))\n} else {\n return ''\n}"
)
)
)
)
), ),
// language=regexp // language=regexp
"https://zwfwj\\.beijing\\.gov\\.cn/.+t\\d+_\\d+.html" to Rule( "https://zwfwj\\.beijing\\.gov\\.cn/.+t\\d+_\\d+.html" to Rule(

View File

@@ -109,15 +109,25 @@ class NewsRunner : Runner {
} }
(page["text"] as? Map<*, *>)?.let { text -> (page["text"] as? Map<*, *>)?.let { text ->
val createTime = try { val createTime = try {
DateUtil var datetimeText = text["datetime"] as? String
.parse( if (datetimeText.isNullOrBlank()) {
(text["datetime"] as String?)?.trim(), datetimeText = datetime
"yyyy-MM-dd HH:mm:ss", }
"yyyy-MM-dd", if (datetimeText?.matches(Regex("\\d{10}")) == true) {
"yyyy年MM月dd日 HH:mm", DateUtil.date(datetimeText.toLong()).toTimestamp()
"yyyy-MM-dd HH:mm", } else if (datetimeText?.matches(Regex("\\d{7}")) == true) {
) DateUtil.date(datetimeText.toLong() * 1000).toTimestamp()
.toTimestamp() } else {
DateUtil
.parse(
(text["datetime"] as String?)?.trim(),
"yyyy-MM-dd HH:mm:ss",
"yyyy-MM-dd",
"yyyy年MM月dd日 HH:mm",
"yyyy-MM-dd HH:mm",
)
.toTimestamp()
}
} catch (e: Exception) { } catch (e: Exception) {
logger.error("Parse ${text["datetime"]} error", e) logger.error("Parse ${text["datetime"]} error", e)
null null