feat: 增加spring环境的测试
This commit is contained in:
@@ -34,7 +34,9 @@ repositories {
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation("com.lanyuanxiaoyao:squirrel-core-jvm:1.0.0-SNAPSHOT")
|
||||
implementation("com.lanyuanxiaoyao:squirrel-core-jvm:1.0.0-SNAPSHOT") {
|
||||
exclude("commons-logging", "commons-logging")
|
||||
}
|
||||
implementation("dev.failsafe:failsafe:3.3.2")
|
||||
implementation("cn.bigmodel.openapi:oapi-java-sdk:release-V4-2.3.0")
|
||||
implementation("com.baidubce:qianfan:0.1.1")
|
||||
@@ -61,6 +63,7 @@ dependencies {
|
||||
implementation("org.jetbrains.kotlin:kotlin-reflect")
|
||||
// implementation("org.jetbrains.kotlin:kotlin-scripting-jsr223")
|
||||
|
||||
testImplementation("org.springframework.boot:spring-boot-starter-test")
|
||||
testImplementation("org.jetbrains.kotlin:kotlin-test")
|
||||
}
|
||||
|
||||
|
||||
@@ -3,8 +3,6 @@ package com.lanyuanxiaoyao.digtal.market
|
||||
import com.lanyuanxiaoyao.digtal.market.runner.NewsRunner
|
||||
import com.lanyuanxiaoyao.digtal.market.runner.PushRunner
|
||||
import com.lanyuanxiaoyao.squirrel.core.common.Management
|
||||
import com.lanyuanxiaoyao.squirrel.core.jvm.BINARY_PATH
|
||||
import com.lanyuanxiaoyao.squirrel.core.jvm.DRIVER_PATH
|
||||
import com.lanyuanxiaoyao.squirrel.core.jvm.JvmManagement
|
||||
import com.lanyuanxiaoyao.squirrel.core.jvm.LocalFileDatabase
|
||||
import jakarta.annotation.Resource
|
||||
@@ -20,7 +18,6 @@ import org.springframework.context.ApplicationListener
|
||||
import org.springframework.context.annotation.Bean
|
||||
import org.springframework.context.annotation.Configuration
|
||||
import org.springframework.context.event.ContextClosedEvent
|
||||
import org.springframework.scheduling.annotation.EnableScheduling
|
||||
import org.springframework.web.servlet.config.annotation.CorsRegistry
|
||||
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer
|
||||
|
||||
@@ -41,7 +38,7 @@ data class MailProperties @ConstructorBinding constructor(
|
||||
val targets: List<String>,
|
||||
)
|
||||
|
||||
@EnableScheduling
|
||||
// @EnableScheduling
|
||||
@OptIn(ExperimentalStdlibApi::class)
|
||||
@ConfigurationPropertiesScan
|
||||
@SpringBootApplication
|
||||
@@ -73,7 +70,7 @@ class Configuration {
|
||||
@Bean
|
||||
fun jvmManagement(
|
||||
driverProperties: DriverProperties,
|
||||
databaseProperties: DatabaseProperties
|
||||
databaseProperties: DatabaseProperties,
|
||||
): Management = JvmManagement(
|
||||
database = LocalFileDatabase(databaseProperties.jsonPath)
|
||||
).also { management ->
|
||||
@@ -84,13 +81,7 @@ class Configuration {
|
||||
management.changeDownloader("basicCacheDownloader")
|
||||
}
|
||||
if (information.browserDownloaderName.isBlank()) {
|
||||
management.changeDownloader(
|
||||
"chromeCacheDownloader",
|
||||
mapOf(
|
||||
BINARY_PATH to driverProperties.binaryPath,
|
||||
DRIVER_PATH to driverProperties.driverPath,
|
||||
)
|
||||
)
|
||||
management.changeDownloader("htmlUnitCacheDownloader")
|
||||
}
|
||||
}
|
||||
management.importSites(sites)
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
@file:Suppress("NonAsciiCharacters", "ObjectPropertyName")
|
||||
|
||||
package com.lanyuanxiaoyao.digtal.market
|
||||
|
||||
import com.lanyuanxiaoyao.squirrel.core.common.Content
|
||||
@@ -25,8 +27,7 @@ private val titleScript = Script(
|
||||
script = "if (text && text !== '') {\n return text\n} else if(params['title']) {\n return params['title']\n} else {\n return text\n}",
|
||||
)
|
||||
|
||||
val sites = setOf(
|
||||
Site(
|
||||
private val 广东政务服务和数据管理局 = Site(
|
||||
code = "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
|
||||
name = "广东政务服务和数据管理局",
|
||||
home = "https://zfsg.gd.gov.cn",
|
||||
@@ -106,8 +107,9 @@ val sites = setOf(
|
||||
)
|
||||
),
|
||||
)
|
||||
),
|
||||
Site(
|
||||
)
|
||||
|
||||
private val 深圳市政务服务和数据管理局 = Site(
|
||||
code = "189a0e12-9319-4a4b-bf3b-34a6282e6f68",
|
||||
name = "深圳市政务服务和数据管理局",
|
||||
home = "https://www.sz.gov.cn/szzsj/gkmlpt/index",
|
||||
@@ -169,8 +171,9 @@ val sites = setOf(
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
Site(
|
||||
)
|
||||
|
||||
private val 中华人民共和国中央人民政府 = Site(
|
||||
code = "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be",
|
||||
name = "中华人民共和国中央人民政府",
|
||||
home = "https://www.gov.cn",
|
||||
@@ -256,5 +259,56 @@ val sites = setOf(
|
||||
)
|
||||
),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
private val 中华人民共和国国家互联网信息办公室 = Site(
|
||||
code = "1df28c35-1e9e-4d58-9595-f08029b160b4",
|
||||
name = "中华人民共和国国家互联网信息办公室",
|
||||
home = "https://www.cac.gov.cn",
|
||||
icon = "https://www.cac.gov.cn/favicon.ico",
|
||||
parser = Parser.Type.CSS,
|
||||
author = "lanyuanxiaoyao",
|
||||
target = Site.Target.TEXT,
|
||||
downloader = Downloader.Type.HTTP,
|
||||
tags = mapOf(
|
||||
"搜索-数据要素" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据要素&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||
"搜索-数据跨境" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||
"搜索-数据基础设施" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||
"搜索-数据安全" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||
"搜索-数据交易" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||
),
|
||||
rules = mapOf(
|
||||
"https://search\\.cac\\.gov\\.cn/cms/cmsadmin/infopub/gjjs\\.jsp\\?templetid=1563339473064626&pubtype=S&pubpath=portal&page=\\d*&webappcode=A09&huopro=.+&mustpro=¬pro=&inpro=&startDate=\\\$*&endDate=\\\$*&sort=1&searchfield=\\\$*&searchdir=A09" to Rule(
|
||||
list = Content(
|
||||
expression = ".xpage-container .list-item",
|
||||
title = Selector("a", process = Process(remove = listOf("\\s*»\\s*"))),
|
||||
dateTime = Selector(".search_time"),
|
||||
link = Selector("a", "href", process = Process(prefix = "https:"))
|
||||
),
|
||||
next = Selector(
|
||||
".xpage-pagination .xpage-pagination-next a:contains(下一页)",
|
||||
"href",
|
||||
Process(prefix = "https://search.cac.gov.cn/cms/cmsadmin/infopub/")
|
||||
)
|
||||
),
|
||||
"https://www\\.cac\\.gov\\.cn/.+/c_\\d+\\.htm" to Rule(
|
||||
text = Content(
|
||||
expression = ".main",
|
||||
title = Selector("h1.title"),
|
||||
author = Selector("#source", process = Process(remove = listOf("来源:"))),
|
||||
dateTime = Selector("#pubtime"),
|
||||
content = Selector(".main-content"),
|
||||
extra = mapOf(
|
||||
"source" to Selector(".main", properties = mapOf("html" to "true"))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
val sites = setOf(
|
||||
广东政务服务和数据管理局,
|
||||
深圳市政务服务和数据管理局,
|
||||
中华人民共和国中央人民政府,
|
||||
中华人民共和国国家互联网信息办公室,
|
||||
)
|
||||
|
||||
@@ -125,7 +125,7 @@ class NewsRunner : Runner {
|
||||
description = null,
|
||||
score = null,
|
||||
createTime = createTime,
|
||||
pushed = null,
|
||||
pushed = false,
|
||||
)
|
||||
} ?: return null
|
||||
}
|
||||
@@ -159,7 +159,7 @@ class NewsRunner : Runner {
|
||||
}
|
||||
}
|
||||
next = page["next"] as String?
|
||||
} while (recursive and !next.isNullOrBlank())
|
||||
} while (recursive && !next.isNullOrBlank())
|
||||
return links
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
package com.lanyuanxiaoyao.digtal.market
|
||||
|
||||
import cn.hutool.json.JSONUtil
|
||||
import com.lanyuanxiaoyao.digtal.market.runner.NewsRunner
|
||||
import com.lanyuanxiaoyao.squirrel.core.common.Management
|
||||
import jakarta.annotation.Resource
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.slf4j.LoggerFactory
|
||||
import org.springframework.boot.test.context.SpringBootTest
|
||||
|
||||
@SpringBootTest(
|
||||
classes = [Application::class],
|
||||
webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
|
||||
)
|
||||
@ExperimentalStdlibApi
|
||||
class TestManagement {
|
||||
private val logger = LoggerFactory.getLogger(javaClass)
|
||||
|
||||
@Resource
|
||||
private lateinit var management: Management
|
||||
|
||||
@Resource
|
||||
private lateinit var newsRunner: NewsRunner
|
||||
|
||||
@Test
|
||||
fun testParseList() {
|
||||
newsRunner
|
||||
.parseArticleLink(
|
||||
"1df28c35-1e9e-4d58-9595-f08029b160b4",
|
||||
"https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据要素&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||
true,
|
||||
)
|
||||
.forEach { logger.info("{} {} {}", it.datetime, it.url, it.title) }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testParseArticle() {
|
||||
newsRunner
|
||||
.parseArticle(
|
||||
"1df28c35-1e9e-4d58-9595-f08029b160b4",
|
||||
"https://www.cac.gov.cn/2024-10/14/c_1730595202555062.htm",
|
||||
"no title",
|
||||
"no datetime",
|
||||
"",
|
||||
)
|
||||
?.let { logger.info("{}", JSONUtil.toJsonPrettyStr(it)) }
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user