1
0

feat: 增加spring环境的测试

This commit is contained in:
2024-11-04 09:23:38 +08:00
parent a89f4ab8b1
commit 5829b6e145
5 changed files with 329 additions and 233 deletions

View File

@@ -34,7 +34,9 @@ repositories {
} }
dependencies { dependencies {
implementation("com.lanyuanxiaoyao:squirrel-core-jvm:1.0.0-SNAPSHOT") implementation("com.lanyuanxiaoyao:squirrel-core-jvm:1.0.0-SNAPSHOT") {
exclude("commons-logging", "commons-logging")
}
implementation("dev.failsafe:failsafe:3.3.2") implementation("dev.failsafe:failsafe:3.3.2")
implementation("cn.bigmodel.openapi:oapi-java-sdk:release-V4-2.3.0") implementation("cn.bigmodel.openapi:oapi-java-sdk:release-V4-2.3.0")
implementation("com.baidubce:qianfan:0.1.1") implementation("com.baidubce:qianfan:0.1.1")
@@ -61,6 +63,7 @@ dependencies {
implementation("org.jetbrains.kotlin:kotlin-reflect") implementation("org.jetbrains.kotlin:kotlin-reflect")
// implementation("org.jetbrains.kotlin:kotlin-scripting-jsr223") // implementation("org.jetbrains.kotlin:kotlin-scripting-jsr223")
testImplementation("org.springframework.boot:spring-boot-starter-test")
testImplementation("org.jetbrains.kotlin:kotlin-test") testImplementation("org.jetbrains.kotlin:kotlin-test")
} }

View File

@@ -3,8 +3,6 @@ package com.lanyuanxiaoyao.digtal.market
import com.lanyuanxiaoyao.digtal.market.runner.NewsRunner import com.lanyuanxiaoyao.digtal.market.runner.NewsRunner
import com.lanyuanxiaoyao.digtal.market.runner.PushRunner import com.lanyuanxiaoyao.digtal.market.runner.PushRunner
import com.lanyuanxiaoyao.squirrel.core.common.Management import com.lanyuanxiaoyao.squirrel.core.common.Management
import com.lanyuanxiaoyao.squirrel.core.jvm.BINARY_PATH
import com.lanyuanxiaoyao.squirrel.core.jvm.DRIVER_PATH
import com.lanyuanxiaoyao.squirrel.core.jvm.JvmManagement import com.lanyuanxiaoyao.squirrel.core.jvm.JvmManagement
import com.lanyuanxiaoyao.squirrel.core.jvm.LocalFileDatabase import com.lanyuanxiaoyao.squirrel.core.jvm.LocalFileDatabase
import jakarta.annotation.Resource import jakarta.annotation.Resource
@@ -20,7 +18,6 @@ import org.springframework.context.ApplicationListener
import org.springframework.context.annotation.Bean import org.springframework.context.annotation.Bean
import org.springframework.context.annotation.Configuration import org.springframework.context.annotation.Configuration
import org.springframework.context.event.ContextClosedEvent import org.springframework.context.event.ContextClosedEvent
import org.springframework.scheduling.annotation.EnableScheduling
import org.springframework.web.servlet.config.annotation.CorsRegistry import org.springframework.web.servlet.config.annotation.CorsRegistry
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer import org.springframework.web.servlet.config.annotation.WebMvcConfigurer
@@ -41,7 +38,7 @@ data class MailProperties @ConstructorBinding constructor(
val targets: List<String>, val targets: List<String>,
) )
@EnableScheduling // @EnableScheduling
@OptIn(ExperimentalStdlibApi::class) @OptIn(ExperimentalStdlibApi::class)
@ConfigurationPropertiesScan @ConfigurationPropertiesScan
@SpringBootApplication @SpringBootApplication
@@ -73,7 +70,7 @@ class Configuration {
@Bean @Bean
fun jvmManagement( fun jvmManagement(
driverProperties: DriverProperties, driverProperties: DriverProperties,
databaseProperties: DatabaseProperties databaseProperties: DatabaseProperties,
): Management = JvmManagement( ): Management = JvmManagement(
database = LocalFileDatabase(databaseProperties.jsonPath) database = LocalFileDatabase(databaseProperties.jsonPath)
).also { management -> ).also { management ->
@@ -84,13 +81,7 @@ class Configuration {
management.changeDownloader("basicCacheDownloader") management.changeDownloader("basicCacheDownloader")
} }
if (information.browserDownloaderName.isBlank()) { if (information.browserDownloaderName.isBlank()) {
management.changeDownloader( management.changeDownloader("htmlUnitCacheDownloader")
"chromeCacheDownloader",
mapOf(
BINARY_PATH to driverProperties.binaryPath,
DRIVER_PATH to driverProperties.driverPath,
)
)
} }
} }
management.importSites(sites) management.importSites(sites)

View File

@@ -1,3 +1,5 @@
@file:Suppress("NonAsciiCharacters", "ObjectPropertyName")
package com.lanyuanxiaoyao.digtal.market package com.lanyuanxiaoyao.digtal.market
import com.lanyuanxiaoyao.squirrel.core.common.Content import com.lanyuanxiaoyao.squirrel.core.common.Content
@@ -25,8 +27,7 @@ private val titleScript = Script(
script = "if (text && text !== '') {\n return text\n} else if(params['title']) {\n return params['title']\n} else {\n return text\n}", script = "if (text && text !== '') {\n return text\n} else if(params['title']) {\n return params['title']\n} else {\n return text\n}",
) )
val sites = setOf( private val 广东政务服务和数据管理局 = Site(
Site(
code = "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e", code = "74ee6b33-c1a3-41f9-b947-acd0bebd0e6e",
name = "广东政务服务和数据管理局", name = "广东政务服务和数据管理局",
home = "https://zfsg.gd.gov.cn", home = "https://zfsg.gd.gov.cn",
@@ -106,8 +107,9 @@ val sites = setOf(
) )
), ),
) )
), )
Site(
private val 深圳市政务服务和数据管理局 = Site(
code = "189a0e12-9319-4a4b-bf3b-34a6282e6f68", code = "189a0e12-9319-4a4b-bf3b-34a6282e6f68",
name = "深圳市政务服务和数据管理局", name = "深圳市政务服务和数据管理局",
home = "https://www.sz.gov.cn/szzsj/gkmlpt/index", home = "https://www.sz.gov.cn/szzsj/gkmlpt/index",
@@ -169,8 +171,9 @@ val sites = setOf(
) )
) )
) )
), )
Site(
private val 中华人民共和国中央人民政府 = Site(
code = "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be", code = "00bfe1f5-7662-4ea5-ada3-6e9dfc19f0be",
name = "中华人民共和国中央人民政府", name = "中华人民共和国中央人民政府",
home = "https://www.gov.cn", home = "https://www.gov.cn",
@@ -256,5 +259,56 @@ val sites = setOf(
) )
), ),
) )
), )
private val 中华人民共和国国家互联网信息办公室 = Site(
code = "1df28c35-1e9e-4d58-9595-f08029b160b4",
name = "中华人民共和国国家互联网信息办公室",
home = "https://www.cac.gov.cn",
icon = "https://www.cac.gov.cn/favicon.ico",
parser = Parser.Type.CSS,
author = "lanyuanxiaoyao",
target = Site.Target.TEXT,
downloader = Downloader.Type.HTTP,
tags = mapOf(
"搜索-数据要素" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据要素&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
"搜索-数据跨境" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
"搜索-数据基础设施" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
"搜索-数据安全" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
"搜索-数据交易" to "https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据跨境&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
),
rules = mapOf(
"https://search\\.cac\\.gov\\.cn/cms/cmsadmin/infopub/gjjs\\.jsp\\?templetid=1563339473064626&pubtype=S&pubpath=portal&page=\\d*&webappcode=A09&huopro=.+&mustpro=&notpro=&inpro=&startDate=\\\$*&endDate=\\\$*&sort=1&searchfield=\\\$*&searchdir=A09" to Rule(
list = Content(
expression = ".xpage-container .list-item",
title = Selector("a", process = Process(remove = listOf("\\s*»\\s*"))),
dateTime = Selector(".search_time"),
link = Selector("a", "href", process = Process(prefix = "https:"))
),
next = Selector(
".xpage-pagination .xpage-pagination-next a:contains(下一页)",
"href",
Process(prefix = "https://search.cac.gov.cn/cms/cmsadmin/infopub/")
)
),
"https://www\\.cac\\.gov\\.cn/.+/c_\\d+\\.htm" to Rule(
text = Content(
expression = ".main",
title = Selector("h1.title"),
author = Selector("#source", process = Process(remove = listOf("来源:"))),
dateTime = Selector("#pubtime"),
content = Selector(".main-content"),
extra = mapOf(
"source" to Selector(".main", properties = mapOf("html" to "true"))
)
)
)
)
)
val sites = setOf(
广东政务服务和数据管理局,
深圳市政务服务和数据管理局,
中华人民共和国中央人民政府,
中华人民共和国国家互联网信息办公室,
) )

View File

@@ -125,7 +125,7 @@ class NewsRunner : Runner {
description = null, description = null,
score = null, score = null,
createTime = createTime, createTime = createTime,
pushed = null, pushed = false,
) )
} ?: return null } ?: return null
} }
@@ -159,7 +159,7 @@ class NewsRunner : Runner {
} }
} }
next = page["next"] as String? next = page["next"] as String?
} while (recursive and !next.isNullOrBlank()) } while (recursive && !next.isNullOrBlank())
return links return links
} }
} }

View File

@@ -0,0 +1,48 @@
package com.lanyuanxiaoyao.digtal.market
import cn.hutool.json.JSONUtil
import com.lanyuanxiaoyao.digtal.market.runner.NewsRunner
import com.lanyuanxiaoyao.squirrel.core.common.Management
import jakarta.annotation.Resource
import org.junit.jupiter.api.Test
import org.slf4j.LoggerFactory
import org.springframework.boot.test.context.SpringBootTest
@SpringBootTest(
classes = [Application::class],
webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
)
@ExperimentalStdlibApi
class TestManagement {
private val logger = LoggerFactory.getLogger(javaClass)
@Resource
private lateinit var management: Management
@Resource
private lateinit var newsRunner: NewsRunner
@Test
fun testParseList() {
newsRunner
.parseArticleLink(
"1df28c35-1e9e-4d58-9595-f08029b160b4",
"https://search.cac.gov.cn/cms/cmsadmin/infopub/gjjs.jsp?templetid=1563339473064626&pubtype=S&pubpath=portal&page=1&webappcode=A09&huopro=数据要素&mustpro=&notpro=&inpro=&startDate=\$\$\$&endDate=\$\$\$&sort=1&searchfield=\$\$\$&searchdir=A09",
true,
)
.forEach { logger.info("{} {} {}", it.datetime, it.url, it.title) }
}
@Test
fun testParseArticle() {
newsRunner
.parseArticle(
"1df28c35-1e9e-4d58-9595-f08029b160b4",
"https://www.cac.gov.cn/2024-10/14/c_1730595202555062.htm",
"no title",
"no datetime",
"",
)
?.let { logger.info("{}", JSONUtil.toJsonPrettyStr(it)) }
}
}