1
0

Compare commits

...

29 Commits

Author SHA1 Message Date
1ae75a3e6c 增加数据采集 2025-09-17 18:35:03 +08:00
d4a5a8f586 设计止损函数 2025-02-23 22:18:37 +08:00
5dcc00d9b2 修正MACD策略参数 2025-02-21 11:56:28 +08:00
edec606449 增加批量回测函数 2025-02-19 23:18:19 +08:00
2b01c56471 修正策略的编写 2025-02-19 18:43:14 +08:00
b8f97f7203 新增backtesting框架用于回测 2025-02-18 18:01:01 +08:00
5489a0bb4b 完成macd回测 2025-02-17 22:35:46 +08:00
3f8b210564 尝试在策略内部使用talib计算指标 2025-02-17 18:28:08 +08:00
9689625fc1 完成macd回测 2025-02-16 23:24:34 +08:00
46a539cf72 尝试构建回测模块 2025-02-13 18:21:54 +08:00
e1eba0559e 学习使用backtrader 2025-02-11 22:56:21 +08:00
5236a0f6f9 尝试构建回测模块 2025-02-11 17:35:00 +08:00
e856588b6e 进行了一些尝试 2025-02-10 23:22:55 +08:00
7092fd65c4 优化代码 2025-02-10 17:28:30 +08:00
32f563518d 进行了一番计算 2025-01-22 16:03:28 +08:00
2901905e20 更新依赖信息 2025-01-22 09:00:45 +08:00
f34ce1374b K线分析 2025-01-21 23:19:07 +08:00
576d9abc3c 完成金字塔选股 2025-01-20 18:14:42 +08:00
bdacbcde9d 完善金字塔选股策略计算 2025-01-20 17:16:34 +08:00
063ece34c9 优化选股计算 2025-01-20 00:49:23 +08:00
c711a8c94a 明确策略开发方向 2025-01-17 18:20:28 +08:00
bd1131ee06 优化代码 2025-01-16 17:00:48 +08:00
b74e33b37b 增加结构化策略 2025-01-15 18:43:23 +08:00
a89c60227c 优化财报获取 2025-01-14 22:48:14 +08:00
b78cc1dd23 优化依赖版本 2025-01-14 21:55:07 +08:00
b2b2c10e56 增加回测判断 2025-01-14 10:58:16 +08:00
3e36da8c04 实现日线函数拟合 2025-01-13 23:40:43 +08:00
74415118ba 优化数据更新 2025-01-13 18:24:28 +08:00
64ea792444 优化金字塔选股结果格式 2025-01-13 14:16:47 +08:00
18 changed files with 16107 additions and 41689 deletions

3
.gitignore vendored
View File

@@ -116,4 +116,5 @@ cython_debug/
# Custom
temp/
temp/**/*
temp/**/*
.idea/**/csv-editor.xml

5
.idea/codeStyles/codeStyleConfig.xml generated Normal file
View File

@@ -0,0 +1,5 @@
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>

6
.idea/vcs.xml generated
View File

@@ -1,5 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CommitMessageInspectionProfile">
<profile version="1.0">
<inspection_tool class="CommitFormat" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="CommitNamingConvention" enabled="true" level="WARNING" enabled_by_default="true" />
</profile>
</component>
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>

10
note.md Normal file
View File

@@ -0,0 +1,10 @@
talib安装
macOS
```
poetry run pip install TA-Lib
```
Windows
> https://github.com/cgohlke/talib-build
```
poetry run pip install ta_lib-0.6.3-cp312-cp312-win_amd64.whl
```

3948
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -14,6 +14,9 @@ jupyter = "^1.1.1"
matplotlib = "^3.10.0"
prophet = "^1.1.6"
mplfinance = "^0.12.10b0"
scipy = "^1.15.1"
backtrader = "^1.9.78.123"
backtesting = "^0.6.1"
[build-system]

44
strategy.py Normal file
View File

@@ -0,0 +1,44 @@
import pandas as pd
class Selector:
def select(self, codes: [str], df: pd.DataFrame) -> [str]:
return codes
class Strategy:
def __init__(self, selectors: [Selector]):
self.selectors = selectors
def select(self, codes: [str], df: pd.DataFrame) -> [str]:
return list(map(lambda code: self.selectors.select(code, df), codes))
class PeriodSelector(Selector):
def __init__(self, period: int = 5):
self.__period = period
def select(self, codes: [str], df: pd.DataFrame) -> [str]:
size_df = df.groupby("code").size()
return list(filter(lambda code: size_df[code] > self.__period, codes))
class PyramidSelector(Selector):
def select(self, codes: [str], df: pd.DataFrame) -> [str]:
target_df = df[df["code"].isin(codes)]
target_df["score"] = 0
group_df = target_df.groupby("code")
target_df["prev_total_stockholder_interest"] = group_df["total_stockholder_interest"].shift(1)
target_df["roe"] = target_df["net_income"] / ((target_df["prev_total_stockholder_interest"] + target_df["total_stockholder_interest"]) / 2)
target_df["average_roe"] = target_df["roe"].mean()
target_df[target_df["average_roe"] >= 35] = target_df["score"] + 550
target_df[(target_df["average_roe"] < 35) & (target_df["average_roe"] >= 30)] = target_df["score"] + 500
target_df[(target_df["average_roe"] < 30) & (target_df["average_roe"] >= 25)] = target_df["score"] + 450
target_df[(target_df["average_roe"] < 25) & (target_df["average_roe"] >= 15)] = target_df["score"] + 300
target_df[(target_df["average_roe"] < 15) & (target_df["average_roe"] >= 10)] = target_df["score"] + 250
target_df["prev_total_assets"] = group_df["total_assets"].shift(1)
target_df["roa"] = target_df["net_income"] / ((target_df["prev_total_assets"] + target_df["total_assets"]) / 2)
return super().select(codes, df)

3456
回测/backtesting.ipynb Normal file

File diff suppressed because one or more lines are too long

403
回测/backtrader.ipynb Normal file

File diff suppressed because one or more lines are too long

587
材料准备/ta-lib.ipynb Normal file
View File

@@ -0,0 +1,587 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-02-11T06:31:19.433336Z",
"start_time": "2025-02-11T06:31:18.884130Z"
}
},
"source": [
"import pandas as pd\n",
"\n",
"# source_df = \\\n",
"# pd.read_csv(\"C:\\\\Users\\\\lanyuanxiaoyao\\\\SynologyDrive\\\\data\\\\Tushare\\\\日线行情 1990-2024\\\\分组行情\\\\000001.SZ.csv\") \\\n",
"source_df = pd.read_csv(\"/Users/lanyuanxiaoyao/SynologyDrive/data/Tushare/日线行情 1990-2024/分组行情/600519.SH.csv\") \\\n",
" [[\"trade_date\", \"vol\", \"open_qfq\", \"close_qfq\", \"high_qfq\", \"low_qfq\", \"macd\", \"macd_dif\", \"macd_dea\"]]\n",
"df = pd.DataFrame()\n",
"df[[\"date\", \"volume\", \"open\", \"close\", \"high\", \"low\", \"macd\", \"macd_dif\", \"macd_dea\"]] = \\\n",
" source_df[[\"trade_date\", \"vol\", \"open_qfq\", \"close_qfq\", \"high_qfq\", \"low_qfq\", \"macd\", \"macd_dif\", \"macd_dea\"]]\n",
"df[\"datetime\"] = pd.to_datetime(df[\"date\"], format=\"%Y%m%d\")\n",
"df[\"datetime_text\"] = df[\"datetime\"].apply(lambda x: x.strftime(\"%Y%m%d\"))\n",
"df.sort_values(by='datetime', inplace=True)\n",
"df"
],
"outputs": [
{
"data": {
"text/plain": [
" date volume open close high low \\\n",
"2187 20010827 406318.00 4.23675 4.36443 4.63820 4.03295 \n",
"2188 20010828 129647.79 4.29568 4.52525 4.54244 4.24902 \n",
"2189 20010829 53252.75 4.53999 4.46632 4.54244 4.43195 \n",
"2190 20010830 48013.06 4.45405 4.55472 4.60505 4.41967 \n",
"2191 20010831 23231.48 4.56086 4.54367 4.61856 4.51789 \n",
"... ... ... ... ... ... ... \n",
"2182 20241225 17123.39 1538.80000 1530.00000 1538.80000 1526.10000 \n",
"2183 20241226 18286.51 1534.00000 1527.79000 1538.78000 1523.00000 \n",
"2184 20241227 20759.32 1528.90000 1528.97000 1536.00000 1519.50000 \n",
"2185 20241230 25129.82 1533.97000 1525.00000 1543.96000 1525.00000 \n",
"2186 20241231 39354.45 1525.40000 1524.00000 1545.00000 1522.01000 \n",
"\n",
" macd macd_dif macd_dea datetime datetime_text \n",
"2187 0.000 0.000 0.000 2001-08-27 20010827 \n",
"2188 0.021 0.013 0.003 2001-08-28 20010828 \n",
"2189 0.025 0.018 0.006 2001-08-29 20010829 \n",
"2190 0.037 0.029 0.010 2001-08-30 20010830 \n",
"2191 0.042 0.036 0.016 2001-08-31 20010831 \n",
"... ... ... ... ... ... \n",
"2182 5.430 5.063 2.348 2024-12-25 20241225 \n",
"2183 4.192 4.968 2.872 2024-12-26 20241226 \n",
"2184 3.295 4.931 3.283 2024-12-27 20241227 \n",
"2185 1.993 4.529 3.533 2024-12-30 20241230 \n",
"2186 0.880 4.083 3.643 2024-12-31 20241231 \n",
"\n",
"[5591 rows x 11 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>volume</th>\n",
" <th>open</th>\n",
" <th>close</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>macd</th>\n",
" <th>macd_dif</th>\n",
" <th>macd_dea</th>\n",
" <th>datetime</th>\n",
" <th>datetime_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2187</th>\n",
" <td>20010827</td>\n",
" <td>406318.00</td>\n",
" <td>4.23675</td>\n",
" <td>4.36443</td>\n",
" <td>4.63820</td>\n",
" <td>4.03295</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>2001-08-27</td>\n",
" <td>20010827</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2188</th>\n",
" <td>20010828</td>\n",
" <td>129647.79</td>\n",
" <td>4.29568</td>\n",
" <td>4.52525</td>\n",
" <td>4.54244</td>\n",
" <td>4.24902</td>\n",
" <td>0.021</td>\n",
" <td>0.013</td>\n",
" <td>0.003</td>\n",
" <td>2001-08-28</td>\n",
" <td>20010828</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2189</th>\n",
" <td>20010829</td>\n",
" <td>53252.75</td>\n",
" <td>4.53999</td>\n",
" <td>4.46632</td>\n",
" <td>4.54244</td>\n",
" <td>4.43195</td>\n",
" <td>0.025</td>\n",
" <td>0.018</td>\n",
" <td>0.006</td>\n",
" <td>2001-08-29</td>\n",
" <td>20010829</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2190</th>\n",
" <td>20010830</td>\n",
" <td>48013.06</td>\n",
" <td>4.45405</td>\n",
" <td>4.55472</td>\n",
" <td>4.60505</td>\n",
" <td>4.41967</td>\n",
" <td>0.037</td>\n",
" <td>0.029</td>\n",
" <td>0.010</td>\n",
" <td>2001-08-30</td>\n",
" <td>20010830</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2191</th>\n",
" <td>20010831</td>\n",
" <td>23231.48</td>\n",
" <td>4.56086</td>\n",
" <td>4.54367</td>\n",
" <td>4.61856</td>\n",
" <td>4.51789</td>\n",
" <td>0.042</td>\n",
" <td>0.036</td>\n",
" <td>0.016</td>\n",
" <td>2001-08-31</td>\n",
" <td>20010831</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2182</th>\n",
" <td>20241225</td>\n",
" <td>17123.39</td>\n",
" <td>1538.80000</td>\n",
" <td>1530.00000</td>\n",
" <td>1538.80000</td>\n",
" <td>1526.10000</td>\n",
" <td>5.430</td>\n",
" <td>5.063</td>\n",
" <td>2.348</td>\n",
" <td>2024-12-25</td>\n",
" <td>20241225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2183</th>\n",
" <td>20241226</td>\n",
" <td>18286.51</td>\n",
" <td>1534.00000</td>\n",
" <td>1527.79000</td>\n",
" <td>1538.78000</td>\n",
" <td>1523.00000</td>\n",
" <td>4.192</td>\n",
" <td>4.968</td>\n",
" <td>2.872</td>\n",
" <td>2024-12-26</td>\n",
" <td>20241226</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2184</th>\n",
" <td>20241227</td>\n",
" <td>20759.32</td>\n",
" <td>1528.90000</td>\n",
" <td>1528.97000</td>\n",
" <td>1536.00000</td>\n",
" <td>1519.50000</td>\n",
" <td>3.295</td>\n",
" <td>4.931</td>\n",
" <td>3.283</td>\n",
" <td>2024-12-27</td>\n",
" <td>20241227</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2185</th>\n",
" <td>20241230</td>\n",
" <td>25129.82</td>\n",
" <td>1533.97000</td>\n",
" <td>1525.00000</td>\n",
" <td>1543.96000</td>\n",
" <td>1525.00000</td>\n",
" <td>1.993</td>\n",
" <td>4.529</td>\n",
" <td>3.533</td>\n",
" <td>2024-12-30</td>\n",
" <td>20241230</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2186</th>\n",
" <td>20241231</td>\n",
" <td>39354.45</td>\n",
" <td>1525.40000</td>\n",
" <td>1524.00000</td>\n",
" <td>1545.00000</td>\n",
" <td>1522.01000</td>\n",
" <td>0.880</td>\n",
" <td>4.083</td>\n",
" <td>3.643</td>\n",
" <td>2024-12-31</td>\n",
" <td>20241231</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5591 rows × 11 columns</p>\n",
"</div>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T06:33:12.365001Z",
"start_time": "2025-02-11T06:33:12.329227Z"
}
},
"cell_type": "code",
"source": [
"import talib\n",
"\n",
"dif, dea, hist = talib.MACD(df[\"close\"], fastperiod=12, slowperiod=26, signalperiod=9)\n",
"\n",
"# 将结果添加到数据框\n",
"df['DIF'] = dif\n",
"df['DEA'] = dea\n",
"df['MACD'] = hist\n",
"\n",
"#根据close列计算macd值\n",
"talib.SMA(df[\"close\"], timeperiod=26, step=10)\n",
"\n",
"df"
],
"id": "72b1c0c3f57e8b8e",
"outputs": [
{
"data": {
"text/plain": [
" date volume open close high low \\\n",
"2187 20010827 406318.00 4.23675 4.36443 4.63820 4.03295 \n",
"2188 20010828 129647.79 4.29568 4.52525 4.54244 4.24902 \n",
"2189 20010829 53252.75 4.53999 4.46632 4.54244 4.43195 \n",
"2190 20010830 48013.06 4.45405 4.55472 4.60505 4.41967 \n",
"2191 20010831 23231.48 4.56086 4.54367 4.61856 4.51789 \n",
"... ... ... ... ... ... ... \n",
"2182 20241225 17123.39 1538.80000 1530.00000 1538.80000 1526.10000 \n",
"2183 20241226 18286.51 1534.00000 1527.79000 1538.78000 1523.00000 \n",
"2184 20241227 20759.32 1528.90000 1528.97000 1536.00000 1519.50000 \n",
"2185 20241230 25129.82 1533.97000 1525.00000 1543.96000 1525.00000 \n",
"2186 20241231 39354.45 1525.40000 1524.00000 1545.00000 1522.01000 \n",
"\n",
" macd macd_dif macd_dea datetime datetime_text DIF DEA \\\n",
"2187 0.000 0.000 0.000 2001-08-27 20010827 NaN NaN \n",
"2188 0.021 0.013 0.003 2001-08-28 20010828 NaN NaN \n",
"2189 0.025 0.018 0.006 2001-08-29 20010829 NaN NaN \n",
"2190 0.037 0.029 0.010 2001-08-30 20010830 NaN NaN \n",
"2191 0.042 0.036 0.016 2001-08-31 20010831 NaN NaN \n",
"... ... ... ... ... ... ... ... \n",
"2182 5.430 5.063 2.348 2024-12-25 20241225 5.062711 2.347629 \n",
"2183 4.192 4.968 2.872 2024-12-26 20241226 4.967756 2.871654 \n",
"2184 3.295 4.931 3.283 2024-12-27 20241227 4.930880 3.283499 \n",
"2185 1.993 4.529 3.533 2024-12-30 20241230 4.529101 3.532620 \n",
"2186 0.880 4.083 3.643 2024-12-31 20241231 4.082931 3.642682 \n",
"\n",
" MACD \n",
"2187 NaN \n",
"2188 NaN \n",
"2189 NaN \n",
"2190 NaN \n",
"2191 NaN \n",
"... ... \n",
"2182 2.715082 \n",
"2183 2.096102 \n",
"2184 1.647381 \n",
"2185 0.996481 \n",
"2186 0.440249 \n",
"\n",
"[5591 rows x 14 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>volume</th>\n",
" <th>open</th>\n",
" <th>close</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>macd</th>\n",
" <th>macd_dif</th>\n",
" <th>macd_dea</th>\n",
" <th>datetime</th>\n",
" <th>datetime_text</th>\n",
" <th>DIF</th>\n",
" <th>DEA</th>\n",
" <th>MACD</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2187</th>\n",
" <td>20010827</td>\n",
" <td>406318.00</td>\n",
" <td>4.23675</td>\n",
" <td>4.36443</td>\n",
" <td>4.63820</td>\n",
" <td>4.03295</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>2001-08-27</td>\n",
" <td>20010827</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2188</th>\n",
" <td>20010828</td>\n",
" <td>129647.79</td>\n",
" <td>4.29568</td>\n",
" <td>4.52525</td>\n",
" <td>4.54244</td>\n",
" <td>4.24902</td>\n",
" <td>0.021</td>\n",
" <td>0.013</td>\n",
" <td>0.003</td>\n",
" <td>2001-08-28</td>\n",
" <td>20010828</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2189</th>\n",
" <td>20010829</td>\n",
" <td>53252.75</td>\n",
" <td>4.53999</td>\n",
" <td>4.46632</td>\n",
" <td>4.54244</td>\n",
" <td>4.43195</td>\n",
" <td>0.025</td>\n",
" <td>0.018</td>\n",
" <td>0.006</td>\n",
" <td>2001-08-29</td>\n",
" <td>20010829</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2190</th>\n",
" <td>20010830</td>\n",
" <td>48013.06</td>\n",
" <td>4.45405</td>\n",
" <td>4.55472</td>\n",
" <td>4.60505</td>\n",
" <td>4.41967</td>\n",
" <td>0.037</td>\n",
" <td>0.029</td>\n",
" <td>0.010</td>\n",
" <td>2001-08-30</td>\n",
" <td>20010830</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2191</th>\n",
" <td>20010831</td>\n",
" <td>23231.48</td>\n",
" <td>4.56086</td>\n",
" <td>4.54367</td>\n",
" <td>4.61856</td>\n",
" <td>4.51789</td>\n",
" <td>0.042</td>\n",
" <td>0.036</td>\n",
" <td>0.016</td>\n",
" <td>2001-08-31</td>\n",
" <td>20010831</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2182</th>\n",
" <td>20241225</td>\n",
" <td>17123.39</td>\n",
" <td>1538.80000</td>\n",
" <td>1530.00000</td>\n",
" <td>1538.80000</td>\n",
" <td>1526.10000</td>\n",
" <td>5.430</td>\n",
" <td>5.063</td>\n",
" <td>2.348</td>\n",
" <td>2024-12-25</td>\n",
" <td>20241225</td>\n",
" <td>5.062711</td>\n",
" <td>2.347629</td>\n",
" <td>2.715082</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2183</th>\n",
" <td>20241226</td>\n",
" <td>18286.51</td>\n",
" <td>1534.00000</td>\n",
" <td>1527.79000</td>\n",
" <td>1538.78000</td>\n",
" <td>1523.00000</td>\n",
" <td>4.192</td>\n",
" <td>4.968</td>\n",
" <td>2.872</td>\n",
" <td>2024-12-26</td>\n",
" <td>20241226</td>\n",
" <td>4.967756</td>\n",
" <td>2.871654</td>\n",
" <td>2.096102</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2184</th>\n",
" <td>20241227</td>\n",
" <td>20759.32</td>\n",
" <td>1528.90000</td>\n",
" <td>1528.97000</td>\n",
" <td>1536.00000</td>\n",
" <td>1519.50000</td>\n",
" <td>3.295</td>\n",
" <td>4.931</td>\n",
" <td>3.283</td>\n",
" <td>2024-12-27</td>\n",
" <td>20241227</td>\n",
" <td>4.930880</td>\n",
" <td>3.283499</td>\n",
" <td>1.647381</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2185</th>\n",
" <td>20241230</td>\n",
" <td>25129.82</td>\n",
" <td>1533.97000</td>\n",
" <td>1525.00000</td>\n",
" <td>1543.96000</td>\n",
" <td>1525.00000</td>\n",
" <td>1.993</td>\n",
" <td>4.529</td>\n",
" <td>3.533</td>\n",
" <td>2024-12-30</td>\n",
" <td>20241230</td>\n",
" <td>4.529101</td>\n",
" <td>3.532620</td>\n",
" <td>0.996481</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2186</th>\n",
" <td>20241231</td>\n",
" <td>39354.45</td>\n",
" <td>1525.40000</td>\n",
" <td>1524.00000</td>\n",
" <td>1545.00000</td>\n",
" <td>1522.01000</td>\n",
" <td>0.880</td>\n",
" <td>4.083</td>\n",
" <td>3.643</td>\n",
" <td>2024-12-31</td>\n",
" <td>20241231</td>\n",
" <td>4.082931</td>\n",
" <td>3.642682</td>\n",
" <td>0.440249</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5591 rows × 14 columns</p>\n",
"</div>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 3
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -4,7 +4,11 @@
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-01-13T01:30:08.643756Z",
"start_time": "2025-01-13T01:30:06.177472Z"
}
},
"source": [
"import pandas as pd\n",
@@ -13,10 +17,15 @@
"ts_pro = ts.pro_api(token=\"64ebff4fa679167600b905ee45dd88e76f3963c0ff39157f3f085f0e\")"
],
"outputs": [],
"execution_count": null
"execution_count": 1
},
{
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-13T01:30:08.652416Z",
"start_time": "2025-01-13T01:30:08.646084Z"
}
},
"cell_type": "code",
"source": [
"def get_balance_sheet_df(start_year, end_year):\n",
@@ -59,55 +68,272 @@
],
"id": "14a28ff4952f0df8",
"outputs": [],
"execution_count": null
"execution_count": 2
},
{
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-13T09:08:04.607819Z",
"start_time": "2025-01-13T09:08:04.602640Z"
}
},
"cell_type": "code",
"source": [
"start_year = 2014\n",
"start_year = 1990\n",
"end_year = 2024"
],
"id": "dc68cde196159626",
"outputs": [],
"execution_count": null
"execution_count": 11
},
{
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-13T09:09:36.004427Z",
"start_time": "2025-01-13T09:08:33.797018Z"
}
},
"cell_type": "code",
"source": [
"# 财务负债表\n",
"balance_sheet_df = clean_df(get_balance_sheet_df(start_year, end_year))\n",
"balance_sheet_df.to_csv(\"../temp/balance_sheet.csv\", index=False)"
"balance_sheet_df.to_csv(f\"/Users/lanyuanxiaoyao/SynologyDrive/data/Tushare/财务报表/资产负债表{start_year}-{end_year}.csv\", index=False)"
],
"id": "33cd797a12ad567e",
"outputs": [],
"execution_count": null
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pull balance sheet: 1990\n",
"Pull balance sheet: 1991\n",
"Pull balance sheet: 1992\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/7h/w0cmp4zj6mn9br_6nyj310m40000gn/T/ipykernel_50121/709533518.py:8: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" result = pd.concat([result, temp], ignore_index=True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pull balance sheet: 1993\n",
"Pull balance sheet: 1994\n",
"Pull balance sheet: 1995\n",
"Pull balance sheet: 1996\n",
"Pull balance sheet: 1997\n",
"Pull balance sheet: 1998\n",
"Pull balance sheet: 1999\n",
"Pull balance sheet: 2000\n",
"Pull balance sheet: 2001\n",
"Pull balance sheet: 2002\n",
"Pull balance sheet: 2003\n",
"Pull balance sheet: 2004\n",
"Pull balance sheet: 2005\n",
"Pull balance sheet: 2006\n",
"Pull balance sheet: 2007\n",
"Pull balance sheet: 2008\n",
"Pull balance sheet: 2009\n",
"Pull balance sheet: 2010\n",
"Pull balance sheet: 2011\n",
"Pull balance sheet: 2012\n",
"Pull balance sheet: 2013\n",
"Pull balance sheet: 2014\n",
"Pull balance sheet: 2015\n",
"Pull balance sheet: 2016\n",
"Pull balance sheet: 2017\n",
"Pull balance sheet: 2018\n",
"Pull balance sheet: 2019\n",
"Pull balance sheet: 2020\n",
"Pull balance sheet: 2021\n",
"Pull balance sheet: 2022\n",
"Pull balance sheet: 2023\n",
"Pull balance sheet: 2024\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/7h/w0cmp4zj6mn9br_6nyj310m40000gn/T/ipykernel_50121/709533518.py:8: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" result = pd.concat([result, temp], ignore_index=True)\n"
]
}
],
"execution_count": 12
},
{
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-13T09:11:33.507055Z",
"start_time": "2025-01-13T09:10:33.447304Z"
}
},
"cell_type": "code",
"source": [
"income_df = clean_df(get_income_df(start_year, end_year))\n",
"income_df.to_csv(\"../temp/income.csv\", index=False)"
"income_df.to_csv(f\"/Users/lanyuanxiaoyao/SynologyDrive/data/Tushare/财务报表/利润表{start_year}-{end_year}.csv\", index=False)"
],
"id": "17306c1524f5e173",
"outputs": [],
"execution_count": null
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pull income: 1990\n",
"Pull income: 1991\n",
"Pull income: 1992\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/7h/w0cmp4zj6mn9br_6nyj310m40000gn/T/ipykernel_50121/709533518.py:19: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" result = pd.concat([result, temp], ignore_index=True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pull income: 1993\n",
"Pull income: 1994\n",
"Pull income: 1995\n",
"Pull income: 1996\n",
"Pull income: 1997\n",
"Pull income: 1998\n",
"Pull income: 1999\n",
"Pull income: 2000\n",
"Pull income: 2001\n",
"Pull income: 2002\n",
"Pull income: 2003\n",
"Pull income: 2004\n",
"Pull income: 2005\n",
"Pull income: 2006\n",
"Pull income: 2007\n",
"Pull income: 2008\n",
"Pull income: 2009\n",
"Pull income: 2010\n",
"Pull income: 2011\n",
"Pull income: 2012\n",
"Pull income: 2013\n",
"Pull income: 2014\n",
"Pull income: 2015\n",
"Pull income: 2016\n",
"Pull income: 2017\n",
"Pull income: 2018\n",
"Pull income: 2019\n",
"Pull income: 2020\n",
"Pull income: 2021\n",
"Pull income: 2022\n",
"Pull income: 2023\n",
"Pull income: 2024\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/7h/w0cmp4zj6mn9br_6nyj310m40000gn/T/ipykernel_50121/709533518.py:19: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" result = pd.concat([result, temp], ignore_index=True)\n"
]
}
],
"execution_count": 13
},
{
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-13T09:13:32.890801Z",
"start_time": "2025-01-13T09:12:46.032342Z"
}
},
"cell_type": "code",
"source": [
"cash_flow_df = clean_df(get_cash_flow_df(start_year, end_year))\n",
"cash_flow_df.to_csv(\"../temp/cash_flow.csv\", index=False)"
"cash_flow_df.to_csv(f\"/Users/lanyuanxiaoyao/SynologyDrive/data/Tushare/财务报表/现金流量表{start_year}-{end_year}.csv\", index=False)"
],
"id": "334dbe20f2047a1e",
"outputs": [],
"execution_count": null
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pull cash flow: 1990\n",
"Pull cash flow: 1991\n",
"Pull cash flow: 1992\n",
"Pull cash flow: 1993\n",
"Pull cash flow: 1994\n",
"Pull cash flow: 1995\n",
"Pull cash flow: 1996\n",
"Pull cash flow: 1997\n",
"Pull cash flow: 1998\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/7h/w0cmp4zj6mn9br_6nyj310m40000gn/T/ipykernel_50121/709533518.py:30: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" result = pd.concat([result, temp], ignore_index=True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pull cash flow: 1999\n",
"Pull cash flow: 2000\n",
"Pull cash flow: 2001\n",
"Pull cash flow: 2002\n",
"Pull cash flow: 2003\n",
"Pull cash flow: 2004\n",
"Pull cash flow: 2005\n",
"Pull cash flow: 2006\n",
"Pull cash flow: 2007\n",
"Pull cash flow: 2008\n",
"Pull cash flow: 2009\n",
"Pull cash flow: 2010\n",
"Pull cash flow: 2011\n",
"Pull cash flow: 2012\n",
"Pull cash flow: 2013\n",
"Pull cash flow: 2014\n",
"Pull cash flow: 2015\n",
"Pull cash flow: 2016\n",
"Pull cash flow: 2017\n",
"Pull cash flow: 2018\n",
"Pull cash flow: 2019\n",
"Pull cash flow: 2020\n",
"Pull cash flow: 2021\n",
"Pull cash flow: 2022\n",
"Pull cash flow: 2023\n",
"Pull cash flow: 2024\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/7h/w0cmp4zj6mn9br_6nyj310m40000gn/T/ipykernel_50121/709533518.py:30: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" result = pd.concat([result, temp], ignore_index=True)\n"
]
}
],
"execution_count": 15
},
{
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-13T01:46:57.717599Z",
"start_time": "2025-01-13T01:46:46.377806Z"
}
},
"cell_type": "code",
"source": [
"finance_df = pd.merge(balance_sheet_df, income_df, on=[\"ts_code\", \"end_date\"])\n",
@@ -116,13 +342,13 @@
],
"id": "f8bea62f377b5e2",
"outputs": [],
"execution_count": null
"execution_count": 7
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-12T12:04:00.424439Z",
"start_time": "2025-01-12T12:04:00.364557Z"
"end_time": "2025-01-13T01:46:57.814028Z",
"start_time": "2025-01-13T01:46:57.728111Z"
}
},
"cell_type": "code",
@@ -512,12 +738,408 @@
"</div>"
]
},
"execution_count": 29,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 29
"execution_count": 8
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-13T09:37:00.340262Z",
"start_time": "2025-01-13T09:36:59.184110Z"
}
},
"cell_type": "code",
"source": [
"import akshare as ak\n",
"\n",
"# ak.stock_balance_sheet_by_yearly_em(\"SZ000001\")\n",
"# df = ak.stock_financial_report_sina(\"sz000001\", symbol=\"资产负债表\")\n",
"df = ak.stock_financial_benefit_ths(\"000001\")\n",
"# df[df[\"报告日\"].str.endswith(\"1231\")]\n",
"df"
],
"id": "582154dbc28b6bd",
"outputs": [
{
"data": {
"text/plain": [
" 报告期 报表核心指标 *净利润 *营业总收入 *营业支出 *归属于母公司所有者的净利润 \\\n",
"0 2024-09-30 397.29亿 1115.82亿 637.13亿 397.29亿 \n",
"1 2024-06-30 258.79亿 771.32亿 450.45亿 258.79亿 \n",
"2 2024-03-31 149.32亿 387.70亿 202.16亿 149.32亿 \n",
"3 2023-12-31 464.55亿 1646.99亿 1067.71亿 464.55亿 \n",
"4 2023-09-30 396.35亿 1276.34亿 785.87亿 396.35亿 \n",
".. ... ... ... ... ... ... \n",
"110 1993-06-30 1.37亿 2.66亿 False 1.37亿 \n",
"111 1992-12-31 1.72亿 4.76亿 False 1.72亿 \n",
"112 1991-12-31 1.13亿 3.35亿 False 1.13亿 \n",
"113 1990-12-31 7087.50万 False False 7087.50万 \n",
"114 1989-12-31 4302.00万 False False 4302.00万 \n",
"\n",
" *扣除非经常性损益后的净利润 报表全部指标 一、营业总收入 其中:营业收入 ... 少数股东损益 扣除非经常性损益后的利润 六、每股收益 \\\n",
"0 397.48亿 1115.82亿 1115.82亿 ... False 397.48亿 \n",
"1 258.80亿 771.32亿 771.32亿 ... False 258.80亿 \n",
"2 149.06亿 387.70亿 387.70亿 ... False 149.06亿 \n",
"3 464.31亿 1646.99亿 1646.99亿 ... False 464.31亿 \n",
"4 395.68亿 1276.34亿 1276.34亿 ... False 395.68亿 \n",
".. ... ... ... ... ... ... ... ... \n",
"110 False 2.66亿 2.66亿 ... False False \n",
"111 False 4.76亿 4.76亿 ... False False \n",
"112 False 3.35亿 3.35亿 ... False False \n",
"113 False False False ... False False \n",
"114 False False False ... False False \n",
"\n",
" (一)基本每股收益 (二)稀释每股收益 七、其他综合收益 归属母公司所有者的其他综合收益 八、综合收益总额 归属于母公司股东的综合收益总额 \\\n",
"0 1.94 1.94 -7.86亿 -7.86亿 389.43亿 False \n",
"1 1.23 1.23 -3.56亿 -3.56亿 255.23亿 False \n",
"2 0.66 0.66 3.45亿 3.45亿 152.77亿 False \n",
"3 2.25 2.25 -3.72亿 -3.72亿 460.83亿 False \n",
"4 1.94 1.94 -8.38亿 -8.38亿 387.97亿 False \n",
".. ... ... ... ... ... ... \n",
"110 False False False False False False \n",
"111 False False False False False False \n",
"112 False False False False False False \n",
"113 False False False False False False \n",
"114 False False False False False False \n",
"\n",
" 归属于少数股东的综合收益总额 \n",
"0 False \n",
"1 False \n",
"2 False \n",
"3 False \n",
"4 False \n",
".. ... \n",
"110 False \n",
"111 False \n",
"112 False \n",
"113 False \n",
"114 False \n",
"\n",
"[115 rows x 43 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>报告期</th>\n",
" <th>报表核心指标</th>\n",
" <th>*净利润</th>\n",
" <th>*营业总收入</th>\n",
" <th>*营业支出</th>\n",
" <th>*归属于母公司所有者的净利润</th>\n",
" <th>*扣除非经常性损益后的净利润</th>\n",
" <th>报表全部指标</th>\n",
" <th>一、营业总收入</th>\n",
" <th>其中:营业收入</th>\n",
" <th>...</th>\n",
" <th>少数股东损益</th>\n",
" <th>扣除非经常性损益后的利润</th>\n",
" <th>六、每股收益</th>\n",
" <th>(一)基本每股收益</th>\n",
" <th>(二)稀释每股收益</th>\n",
" <th>七、其他综合收益</th>\n",
" <th>归属母公司所有者的其他综合收益</th>\n",
" <th>八、综合收益总额</th>\n",
" <th>归属于母公司股东的综合收益总额</th>\n",
" <th>归属于少数股东的综合收益总额</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2024-09-30</td>\n",
" <td></td>\n",
" <td>397.29亿</td>\n",
" <td>1115.82亿</td>\n",
" <td>637.13亿</td>\n",
" <td>397.29亿</td>\n",
" <td>397.48亿</td>\n",
" <td></td>\n",
" <td>1115.82亿</td>\n",
" <td>1115.82亿</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>397.48亿</td>\n",
" <td></td>\n",
" <td>1.94</td>\n",
" <td>1.94</td>\n",
" <td>-7.86亿</td>\n",
" <td>-7.86亿</td>\n",
" <td>389.43亿</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2024-06-30</td>\n",
" <td></td>\n",
" <td>258.79亿</td>\n",
" <td>771.32亿</td>\n",
" <td>450.45亿</td>\n",
" <td>258.79亿</td>\n",
" <td>258.80亿</td>\n",
" <td></td>\n",
" <td>771.32亿</td>\n",
" <td>771.32亿</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>258.80亿</td>\n",
" <td></td>\n",
" <td>1.23</td>\n",
" <td>1.23</td>\n",
" <td>-3.56亿</td>\n",
" <td>-3.56亿</td>\n",
" <td>255.23亿</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2024-03-31</td>\n",
" <td></td>\n",
" <td>149.32亿</td>\n",
" <td>387.70亿</td>\n",
" <td>202.16亿</td>\n",
" <td>149.32亿</td>\n",
" <td>149.06亿</td>\n",
" <td></td>\n",
" <td>387.70亿</td>\n",
" <td>387.70亿</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>149.06亿</td>\n",
" <td></td>\n",
" <td>0.66</td>\n",
" <td>0.66</td>\n",
" <td>3.45亿</td>\n",
" <td>3.45亿</td>\n",
" <td>152.77亿</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2023-12-31</td>\n",
" <td></td>\n",
" <td>464.55亿</td>\n",
" <td>1646.99亿</td>\n",
" <td>1067.71亿</td>\n",
" <td>464.55亿</td>\n",
" <td>464.31亿</td>\n",
" <td></td>\n",
" <td>1646.99亿</td>\n",
" <td>1646.99亿</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>464.31亿</td>\n",
" <td></td>\n",
" <td>2.25</td>\n",
" <td>2.25</td>\n",
" <td>-3.72亿</td>\n",
" <td>-3.72亿</td>\n",
" <td>460.83亿</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2023-09-30</td>\n",
" <td></td>\n",
" <td>396.35亿</td>\n",
" <td>1276.34亿</td>\n",
" <td>785.87亿</td>\n",
" <td>396.35亿</td>\n",
" <td>395.68亿</td>\n",
" <td></td>\n",
" <td>1276.34亿</td>\n",
" <td>1276.34亿</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>395.68亿</td>\n",
" <td></td>\n",
" <td>1.94</td>\n",
" <td>1.94</td>\n",
" <td>-8.38亿</td>\n",
" <td>-8.38亿</td>\n",
" <td>387.97亿</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>110</th>\n",
" <td>1993-06-30</td>\n",
" <td></td>\n",
" <td>1.37亿</td>\n",
" <td>2.66亿</td>\n",
" <td>False</td>\n",
" <td>1.37亿</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>2.66亿</td>\n",
" <td>2.66亿</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>111</th>\n",
" <td>1992-12-31</td>\n",
" <td></td>\n",
" <td>1.72亿</td>\n",
" <td>4.76亿</td>\n",
" <td>False</td>\n",
" <td>1.72亿</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>4.76亿</td>\n",
" <td>4.76亿</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>112</th>\n",
" <td>1991-12-31</td>\n",
" <td></td>\n",
" <td>1.13亿</td>\n",
" <td>3.35亿</td>\n",
" <td>False</td>\n",
" <td>1.13亿</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>3.35亿</td>\n",
" <td>3.35亿</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>113</th>\n",
" <td>1990-12-31</td>\n",
" <td></td>\n",
" <td>7087.50万</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>7087.50万</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>114</th>\n",
" <td>1989-12-31</td>\n",
" <td></td>\n",
" <td>4302.00万</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>4302.00万</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>115 rows × 43 columns</p>\n",
"</div>"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 21
}
],
"metadata": {

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

25
财报筛选/loader.py Normal file
View File

@@ -0,0 +1,25 @@
import os.path
import pandas as pd
finance_root = "/Users/lanyuanxiaoyao/SynologyDrive/data/Tushare"
# finance_root = "C:\\Users\\lanyuanxiaoyao\\Documents\\Tushare"
def load_balance_sheet():
return pd.read_csv(os.path.join(finance_root, "财务报表", "资产负债表1990-2024.csv"))
def load_income():
return pd.read_csv(os.path.join(finance_root, "财务报表", "利润表1990-2024.csv"))
def load_cashflow():
return pd.read_csv(os.path.join(finance_root, "财务报表", "现金流量表1990-2024.csv"))
def load_finance():
finance_df = load_balance_sheet()
finance_df = pd.merge(finance_df, load_income(), on=["ts_code", "end_date"])
finance_df = pd.merge(finance_df, load_cashflow(), on=["ts_code", "end_date"])
return finance_df

View File

@@ -0,0 +1,159 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true
},
"source": [
"import pandas as pd\n",
"import tushare as ts\n",
"\n",
"ts_pro = ts.pro_api(token=\"64ebff4fa679167600b905ee45dd88e76f3963c0ff39157f3f085f0e\")"
],
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"sse_df = ts_pro.trade_cal(exchange=\"SSE\")[[\"cal_date\", \"is_open\"]]\n",
"szse_df = ts_pro.trade_cal(exchange=\"SZSE\")[[\"cal_date\", \"is_open\"]]\n",
"merge_df = pd.merge(sse_df, szse_df, on=\"cal_date\", how=\"left\")\n",
"merge_df = merge_df.fillna(value=0)\n",
"merge_df[\"is_open\"] = (merge_df[\"is_open_x\"] + merge_df[\"is_open_y\"]) > 0\n",
"trade_date_df = merge_df[[\"cal_date\", \"is_open\"]]\n",
"trade_date_df"
],
"id": "fe5b7f88c1739a13",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"import os\n",
"import time\n",
"\n",
"for year in range(1990, 2025):\n",
" target_path = f\"../temp/stk_factor/{year}.csv\"\n",
" # 判断target_path文件是否存在\n",
" if os.path.exists(target_path):\n",
" continue\n",
" result_df = pd.DataFrame()\n",
" for trade_date in trade_date_df[(trade_date_df[\"cal_date\"].str.startswith(f\"{year}\")) & trade_date_df[\"is_open\"]][\n",
" \"cal_date\"].values:\n",
" time.sleep(1)\n",
" print(f\"{trade_date} downloading...\")\n",
" stk_df = ts_pro.stk_factor(trade_date=trade_date)\n",
" result_df = pd.concat([result_df, stk_df])\n",
" result_df.sort_values(by=[\"trade_date\", \"ts_code\"], ascending=True, inplace=True)\n",
" result_df.to_csv(f\"/Users/lanyuanxiaoyao/SynologyDrive/data/Tushare/日线行情/{year}.csv\", index=False)"
],
"id": "bde839364048fb89",
"outputs": [],
"execution_count": null
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-14T02:29:02.535840Z",
"start_time": "2025-01-14T02:28:19.578528Z"
}
},
"cell_type": "code",
"source": [
"root = \"/Users/lanyuanxiaoyao/SynologyDrive/data/Tushare/日线行情\"\n",
"\n",
"\n",
"def get_daily_df(start_year, end_year):\n",
" daily_df = pd.read_csv(f\"{root}/{start_year}.csv\")\n",
" for year in range(start_year + 1, end_year + 1):\n",
" daily_df = pd.concat([daily_df, pd.read_csv(f\"{root}/{year}.csv\")])\n",
" return daily_df"
],
"id": "d0701612c56add14",
"outputs": [],
"execution_count": 39
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "daily_df = get_daily_df(2019, 2024)",
"id": "ffc7072a7c5a44f9"
},
{
"metadata": {},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"\n",
"def is_ascending(code, year):\n",
" daily_df = get_daily_df(year, year)\n",
" daily_df = daily_df[daily_df[\"ts_code\"] == code][[\"trade_date\", \"close_qfq\"]]\n",
" daily_df[\"trade_date\"] = pd.to_datetime(daily_df[\"trade_date\"], format=\"%Y%m%d\")\n",
" daily_df[\"trade_date_timestamp\"] = daily_df[\"trade_date\"].astype(np.int64)\n",
" daily_df.sort_values(by=\"trade_date\", ascending=True, inplace=True)\n",
"\n",
" first = daily_df[daily_df[\"trade_date_timestamp\"] == daily_df[\"trade_date_timestamp\"].min()][\"close_qfq\"].values[0]\n",
" last = daily_df[daily_df[\"trade_date_timestamp\"] == daily_df[\"trade_date_timestamp\"].max()][\"close_qfq\"].values[0]\n",
"\n",
" print(first, last)\n",
"\n",
" coefficients = np.polyfit(daily_df[\"trade_date_timestamp\"], daily_df[\"close_qfq\"], 1)\n",
"\n",
" print(coefficients)\n",
" fn = np.poly1d(coefficients)\n",
" daily_df[\"close_predict\"] = fn(daily_df[\"trade_date_timestamp\"])\n",
"\n",
" plt.figure(figsize=(25, 8))\n",
" plt.scatter(daily_df[\"trade_date\"], daily_df[\"close_qfq\"])\n",
" plt.plot(daily_df[\"trade_date\"], daily_df[\"close_predict\"], color=\"orange\")\n",
"\n",
" plt.gcf().autofmt_xdate()\n",
" plt.show()\n",
"\n",
" return bool((coefficients[0] > 0) and (last > first))\n",
"\n",
"\n",
"print(is_ascending(\"000001.SZ\", 2020))\n",
"print(is_ascending(\"000001.SZ\", 2021))\n",
"print(is_ascending(\"000001.SZ\", 2022))\n",
"print(is_ascending(\"000001.SZ\", 2023))\n",
"print(is_ascending(\"000001.SZ\", 2024))"
],
"id": "e58494f608dcb055",
"outputs": [],
"execution_count": null
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,113 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-01-16T07:57:04.498115Z",
"start_time": "2025-01-16T07:57:00.401734Z"
}
},
"source": [
"import pandas as pd\n",
"import loader as ld\n",
"\n",
"source_finance_df = ld.load_finance()\n",
"finance_df = pd.DataFrame()\n",
"finance_df[[\n",
" \"code\",\n",
" # 年份\n",
" \"year\",\n",
" # 股东权益合计(含少数股东权益)\n",
" \"total_stockholder_interest\",\n",
" # 净利润\n",
" \"net_income\",\n",
" # 总资产\n",
" \"total_assets\",\n",
" # 营业总收入\n",
" \"total_revenue\",\n",
" # 存货\n",
" \"inventories\",\n",
" # 应收账款\n",
" \"accounts_receivable\",\n",
" # 营业成本\n",
" \"operating_costs\",\n",
" # 营业利润\n",
" \"operating_profit\",\n",
" # 现金与现金等价物\n",
" \"cash\",\n",
" # 营业活动现金流量净值\n",
" \"operating_net_cash_flow\",\n",
"]] = source_finance_df[[\n",
" \"ts_code\",\n",
" \"end_date\",\n",
" \"total_hldr_eqy_inc_min_int\",\n",
" \"n_income\",\n",
" \"total_assets\",\n",
" \"total_revenue\",\n",
" \"inventories\",\n",
" \"accounts_receiv\",\n",
" \"oper_cost\",\n",
" \"operate_profit\",\n",
" \"money_cap\",\n",
" \"n_cashflow_act\",\n",
"]]"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-16T07:57:04.552251Z",
"start_time": "2025-01-16T07:57:04.499999Z"
}
},
"cell_type": "code",
"source": [
"from strategy import PeriodSelector\n",
"\n",
"filter_df = finance_df[(2010 < finance_df[\"year\"]) & (finance_df[\"year\"] < 2025)]\n",
"codes = filter_df[\"code\"].unique().tolist()\n",
"print(len(codes))\n",
"codes = PeriodSelector().select(codes, filter_df)\n",
"print(len(codes))"
],
"id": "26539ccd8ce0fb9e",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5715\n",
"4892\n"
]
}
],
"execution_count": 2
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff