feat: 增加spring环境的测试
This commit is contained in:
@@ -34,7 +34,9 @@ repositories {
|
|||||||
}
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation("com.lanyuanxiaoyao:squirrel-core-jvm:1.0.0-SNAPSHOT")
|
implementation("com.lanyuanxiaoyao:squirrel-core-jvm:1.0.0-SNAPSHOT") {
|
||||||
|
exclude("commons-logging", "commons-logging")
|
||||||
|
}
|
||||||
implementation("dev.failsafe:failsafe:3.3.2")
|
implementation("dev.failsafe:failsafe:3.3.2")
|
||||||
implementation("cn.bigmodel.openapi:oapi-java-sdk:release-V4-2.3.0")
|
implementation("cn.bigmodel.openapi:oapi-java-sdk:release-V4-2.3.0")
|
||||||
implementation("com.baidubce:qianfan:0.1.1")
|
implementation("com.baidubce:qianfan:0.1.1")
|
||||||
@@ -61,6 +63,7 @@ dependencies {
|
|||||||
implementation("org.jetbrains.kotlin:kotlin-reflect")
|
implementation("org.jetbrains.kotlin:kotlin-reflect")
|
||||||
// implementation("org.jetbrains.kotlin:kotlin-scripting-jsr223")
|
// implementation("org.jetbrains.kotlin:kotlin-scripting-jsr223")
|
||||||
|
|
||||||
|
testImplementation("org.springframework.boot:spring-boot-starter-test")
|
||||||
testImplementation("org.jetbrains.kotlin:kotlin-test")
|
testImplementation("org.jetbrains.kotlin:kotlin-test")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,8 +3,6 @@ package com.lanyuanxiaoyao.digtal.market
|
|||||||
import com.lanyuanxiaoyao.digtal.market.runner.NewsRunner
|
import com.lanyuanxiaoyao.digtal.market.runner.NewsRunner
|
||||||
import com.lanyuanxiaoyao.digtal.market.runner.PushRunner
|
import com.lanyuanxiaoyao.digtal.market.runner.PushRunner
|
||||||
import com.lanyuanxiaoyao.squirrel.core.common.Management
|
import com.lanyuanxiaoyao.squirrel.core.common.Management
|
||||||
import com.lanyuanxiaoyao.squirrel.core.jvm.BINARY_PATH
|
|
||||||
import com.lanyuanxiaoyao.squirrel.core.jvm.DRIVER_PATH
|
|
||||||
import com.lanyuanxiaoyao.squirrel.core.jvm.JvmManagement
|
import com.lanyuanxiaoyao.squirrel.core.jvm.JvmManagement
|
||||||
import com.lanyuanxiaoyao.squirrel.core.jvm.LocalFileDatabase
|
import com.lanyuanxiaoyao.squirrel.core.jvm.LocalFileDatabase
|
||||||
import jakarta.annotation.Resource
|
import jakarta.annotation.Resource
|
||||||
@@ -20,7 +18,6 @@ import org.springframework.context.ApplicationListener
|
|||||||
import org.springframework.context.annotation.Bean
|
import org.springframework.context.annotation.Bean
|
||||||
import org.springframework.context.annotation.Configuration
|
import org.springframework.context.annotation.Configuration
|
||||||
import org.springframework.context.event.ContextClosedEvent
|
import org.springframework.context.event.ContextClosedEvent
|
||||||
import org.springframework.scheduling.annotation.EnableScheduling
|
|
||||||
import org.springframework.web.servlet.config.annotation.CorsRegistry
|
import org.springframework.web.servlet.config.annotation.CorsRegistry
|
||||||
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer
|
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer
|
||||||
|
|
||||||
@@ -41,7 +38,7 @@ data class MailProperties @ConstructorBinding constructor(
|
|||||||
val targets: List<String>,
|
val targets: List<String>,
|
||||||
)
|
)
|
||||||
|
|
||||||
@EnableScheduling
|
// @EnableScheduling
|
||||||
@OptIn(ExperimentalStdlibApi::class)
|
@OptIn(ExperimentalStdlibApi::class)
|
||||||
@ConfigurationPropertiesScan
|
@ConfigurationPropertiesScan
|
||||||
@SpringBootApplication
|
@SpringBootApplication
|
||||||
@@ -73,7 +70,7 @@ class Configuration {
|
|||||||
@Bean
|
@Bean
|
||||||
fun jvmManagement(
|
fun jvmManagement(
|
||||||
driverProperties: DriverProperties,
|
driverProperties: DriverProperties,
|
||||||
databaseProperties: DatabaseProperties
|
databaseProperties: DatabaseProperties,
|
||||||
): Management = JvmManagement(
|
): Management = JvmManagement(
|
||||||
database = LocalFileDatabase(databaseProperties.jsonPath)
|
database = LocalFileDatabase(databaseProperties.jsonPath)
|
||||||
).also { management ->
|
).also { management ->
|
||||||
@@ -84,13 +81,7 @@ class Configuration {
|
|||||||
management.changeDownloader("basicCacheDownloader")
|
management.changeDownloader("basicCacheDownloader")
|
||||||
}
|
}
|
||||||
if (information.browserDownloaderName.isBlank()) {
|
if (information.browserDownloaderName.isBlank()) {
|
||||||
management.changeDownloader(
|
management.changeDownloader("htmlUnitCacheDownloader")
|
||||||
"chromeCacheDownloader",
|
|
||||||
mapOf(
|
|
||||||
BINARY_PATH to driverProperties.binaryPath,
|
|
||||||
DRIVER_PATH to driverProperties.driverPath,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
management.importSites(sites)
|
management.importSites(sites)
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
@file:Suppress("NonAsciiCharacters", "ObjectPropertyName")
|
||||||
|
|
||||||
package com.lanyuanxiaoyao.digtal.market
|
package com.lanyuanxiaoyao.digtal.market
|
||||||
|
|
||||||
import com.lanyuanxiaoyao.squirrel.core.common.Content
|
import com.lanyuanxiaoyao.squirrel.core.common.Content
|
||||||
@@ -25,8 +27,7 @@ private val titleScript = Script(
|
|||||||
script = "if (text && text !== '') {\n return text\n} else if(params['title']) {\n return params['title']\n} else {\n return text\n}",
|
script = "if (text && text !== '') {\n return text\n} else if(params['title']) {\n return params['title']\n} else {\n return text\n}",
|
||||||
)
|
)
|
||||||
|
|
||||||
val sites = setOf(
|
private val 广东政务服务和数据管理局 = Site(
|
||||||
Site(
|
|
||||||
code = "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
|
code = "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
|
||||||
name = "广东政务服务和数据管理局",
|
name = "广东政务服务和数据管理局",
|
||||||
home = "https://zfsg.gd.gov.cn",
|
home = "https://zfsg.gd.gov.cn",
|
||||||
@@ -106,8 +107,9 @@ val sites = setOf(
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
),
|
)
|
||||||
Site(
|
|
||||||
|
private val 深圳市政务服务和数据管理局 = Site(
|
||||||
code = "189a0e12-9319-4a4b-bf3b-34a6282e6f68",
|
code = "189a0e12-9319-4a4b-bf3b-34a6282e6f68",
|
||||||
name = "深圳市政务服务和数据管理局",
|
name = "深圳市政务服务和数据管理局",
|
||||||
home = "https://www.sz.gov.cn/szzsj/gkmlpt/index",
|
home = "https://www.sz.gov.cn/szzsj/gkmlpt/index",
|
||||||
@@ -169,8 +171,9 @@ val sites = setOf(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
),
|
)
|
||||||
Site(
|
|
||||||
|
private val 中华人民共和国中央人民政府 = Site(
|
||||||
code = "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be",
|
code = "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be",
|
||||||
name = "中华人民共和国中央人民政府",
|
name = "中华人民共和国中央人民政府",
|
||||||
home = "https://www.gov.cn",
|
home = "https://www.gov.cn",
|
||||||
@@ -256,5 +259,56 @@ val sites = setOf(
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
),
|
)
|
||||||
|
|
||||||
|
private val 中华人民共和国国家互联网信息办公室 = Site(
|
||||||
|
code = "1df28c35-1e9e-4d58-9595-f08029b160b4",
|
||||||
|
name = "中华人民共和国国家互联网信息办公室",
|
||||||
|
home = "https://www.cac.gov.cn",
|
||||||
|
icon = "https://www.cac.gov.cn/favicon.ico",
|
||||||
|
parser = Parser.Type.CSS,
|
||||||
|
author = "lanyuanxiaoyao",
|
||||||
|
target = Site.Target.TEXT,
|
||||||
|
downloader = Downloader.Type.HTTP,
|
||||||
|
tags = mapOf(
|
||||||
|
"搜索-数据要素" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据要素&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||||
|
"搜索-数据跨境" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||||
|
"搜索-数据基础设施" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||||
|
"搜索-数据安全" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||||
|
"搜索-数据交易" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||||
|
),
|
||||||
|
rules = mapOf(
|
||||||
|
"https://search\\.cac\\.gov\\.cn/cms/cmsadmin/infopub/gjjs\\.jsp\\?templetid=1563339473064626&pubtype=S&pubpath=portal&page=\\d*&webappcode=A09&huopro=.+&mustpro=¬pro=&inpro=&startDate=\\\$*&endDate=\\\$*&sort=1&searchfield=\\\$*&searchdir=A09" to Rule(
|
||||||
|
list = Content(
|
||||||
|
expression = ".xpage-container .list-item",
|
||||||
|
title = Selector("a", process = Process(remove = listOf("\\s*»\\s*"))),
|
||||||
|
dateTime = Selector(".search_time"),
|
||||||
|
link = Selector("a", "href", process = Process(prefix = "https:"))
|
||||||
|
),
|
||||||
|
next = Selector(
|
||||||
|
".xpage-pagination .xpage-pagination-next a:contains(下一页)",
|
||||||
|
"href",
|
||||||
|
Process(prefix = "https://search.cac.gov.cn/cms/cmsadmin/infopub/")
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"https://www\\.cac\\.gov\\.cn/.+/c_\\d+\\.htm" to Rule(
|
||||||
|
text = Content(
|
||||||
|
expression = ".main",
|
||||||
|
title = Selector("h1.title"),
|
||||||
|
author = Selector("#source", process = Process(remove = listOf("来源:"))),
|
||||||
|
dateTime = Selector("#pubtime"),
|
||||||
|
content = Selector(".main-content"),
|
||||||
|
extra = mapOf(
|
||||||
|
"source" to Selector(".main", properties = mapOf("html" to "true"))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
val sites = setOf(
|
||||||
|
广东政务服务和数据管理局,
|
||||||
|
深圳市政务服务和数据管理局,
|
||||||
|
中华人民共和国中央人民政府,
|
||||||
|
中华人民共和国国家互联网信息办公室,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ class NewsRunner : Runner {
|
|||||||
description = null,
|
description = null,
|
||||||
score = null,
|
score = null,
|
||||||
createTime = createTime,
|
createTime = createTime,
|
||||||
pushed = null,
|
pushed = false,
|
||||||
)
|
)
|
||||||
} ?: return null
|
} ?: return null
|
||||||
}
|
}
|
||||||
@@ -159,7 +159,7 @@ class NewsRunner : Runner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
next = page["next"] as String?
|
next = page["next"] as String?
|
||||||
} while (recursive and !next.isNullOrBlank())
|
} while (recursive && !next.isNullOrBlank())
|
||||||
return links
|
return links
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,48 @@
|
|||||||
|
package com.lanyuanxiaoyao.digtal.market
|
||||||
|
|
||||||
|
import cn.hutool.json.JSONUtil
|
||||||
|
import com.lanyuanxiaoyao.digtal.market.runner.NewsRunner
|
||||||
|
import com.lanyuanxiaoyao.squirrel.core.common.Management
|
||||||
|
import jakarta.annotation.Resource
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
import org.slf4j.LoggerFactory
|
||||||
|
import org.springframework.boot.test.context.SpringBootTest
|
||||||
|
|
||||||
|
@SpringBootTest(
|
||||||
|
classes = [Application::class],
|
||||||
|
webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
|
||||||
|
)
|
||||||
|
@ExperimentalStdlibApi
|
||||||
|
class TestManagement {
|
||||||
|
private val logger = LoggerFactory.getLogger(javaClass)
|
||||||
|
|
||||||
|
@Resource
|
||||||
|
private lateinit var management: Management
|
||||||
|
|
||||||
|
@Resource
|
||||||
|
private lateinit var newsRunner: NewsRunner
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testParseList() {
|
||||||
|
newsRunner
|
||||||
|
.parseArticleLink(
|
||||||
|
"1df28c35-1e9e-4d58-9595-f08029b160b4",
|
||||||
|
"https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据要素&mustpro=¬pro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
|
||||||
|
true,
|
||||||
|
)
|
||||||
|
.forEach { logger.info("{} {} {}", it.datetime, it.url, it.title) }
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testParseArticle() {
|
||||||
|
newsRunner
|
||||||
|
.parseArticle(
|
||||||
|
"1df28c35-1e9e-4d58-9595-f08029b160b4",
|
||||||
|
"https://www.cac.gov.cn/2024-10/14/c_1730595202555062.htm",
|
||||||
|
"no title",
|
||||||
|
"no datetime",
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
?.let { logger.info("{}", JSONUtil.toJsonPrettyStr(it)) }
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user