diff --git a/selector.py b/selector.py new file mode 100644 index 0000000..52fa701 --- /dev/null +++ b/selector.py @@ -0,0 +1,15 @@ +import pandas as pd + + +class Selector: + def select(self, codes: [str], df: pd.DataFrame) -> [str]: + return codes + + +class PeriodSelector(Selector): + def __init__(self, period: int = 5): + self.__period = period + + def select(self, codes: [str], df: pd.DataFrame) -> [str]: + size_df = df.groupby("code").size() + return list(filter(lambda code: size_df[code] > self.__period, codes)) diff --git a/财报筛选/loader.py b/财报筛选/loader.py index 59b78c1..1853cf5 100644 --- a/财报筛选/loader.py +++ b/财报筛选/loader.py @@ -1,18 +1,20 @@ +import os.path + import pandas as pd -finance_root = "C:\\Users\\lanyuanxiaoyao\\Documents\\Tushare" +finance_root = "/Users/lanyuanxiaoyao/SynologyDrive/data/Tushare" def load_balance_sheet(): - return pd.read_csv(f"{finance_root}\\财务报表\\资产负债表1990-2024.csv") + return pd.read_csv(os.path.join(finance_root, "财务报表", "资产负债表1990-2024.csv")) def load_income(): - return pd.read_csv(f"{finance_root}\\财务报表\\利润表1990-2024.csv") + return pd.read_csv(os.path.join(finance_root, "财务报表", "利润表1990-2024.csv")) def load_cashflow(): - return pd.read_csv(f"{finance_root}\\财务报表\\现金流量表1990-2024.csv") + return pd.read_csv(os.path.join(finance_root, "财务报表", "现金流量表1990-2024.csv")) def load_finance(): diff --git a/财报筛选/选股回测.ipynb b/财报筛选/选股回测.ipynb index 4404b24..ff5ce56 100644 --- a/财报筛选/选股回测.ipynb +++ b/财报筛选/选股回测.ipynb @@ -102,12 +102,12 @@ " daily_df[\"trade_date\"] = pd.to_datetime(daily_df[\"trade_date\"], format=\"%Y%m%d\")\n", " daily_df[\"trade_date_timestamp\"] = daily_df[\"trade_date\"].astype(np.int64)\n", " daily_df.sort_values(by=\"trade_date\", ascending=True, inplace=True)\n", - " \n", - " first = daily_df[daily_df[\"trade_date_timestamp\"] == daily_df[\"trade_date_timestamp\"].min()][\"close_qfq\"].values[0]\n", - " last = daily_df[daily_df[\"trade_date_timestamp\"] == daily_df[\"trade_date_timestamp\"].max()][\"close_qfq\"].values[0]\n", - " \n", + "\n", + " first = daily_df[daily_df[\"trade_date_timestamp\"] == daily_df[\"trade_date_timestamp\"].min()][\"close_qfq\"].values[0]\n", + " last = daily_df[daily_df[\"trade_date_timestamp\"] == daily_df[\"trade_date_timestamp\"].max()][\"close_qfq\"].values[0]\n", + "\n", " print(first, last)\n", - " \n", + "\n", " coefficients = np.polyfit(daily_df[\"trade_date_timestamp\"], daily_df[\"close_qfq\"], 1)\n", "\n", " print(coefficients)\n", diff --git a/财报筛选/选股测试.ipynb b/财报筛选/选股测试.ipynb new file mode 100644 index 0000000..a0a5149 --- /dev/null +++ b/财报筛选/选股测试.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2025-01-15T09:42:10.585456Z", + "start_time": "2025-01-15T09:42:06.693869Z" + } + }, + "source": [ + "import pandas as pd\n", + "import loader as ld\n", + "\n", + "source_finance_df = ld.load_finance()\n", + "finance_df = pd.DataFrame()\n", + "finance_df[[\n", + " \"code\",\n", + " # 年份\n", + " \"year\",\n", + " # 股东权益合计(含少数股东权益)\n", + " \"total_stockholder_interest\",\n", + " # 净利润\n", + " \"net_income\",\n", + " # 总资产\n", + " \"total_assets\",\n", + " # 营业总收入\n", + " \"total_revenue\",\n", + " # 存货\n", + " \"inventories\",\n", + " # 应收账款\n", + " \"accounts_receivable\",\n", + " # 营业成本\n", + " \"operating_costs\",\n", + " # 营业利润\n", + " \"operating_profit\",\n", + " # 现金与现金等价物\n", + " \"cash\",\n", + " # 营业活动现金流量净值\n", + " \"operating_net_cash_flow\",\n", + "]] = source_finance_df[[\n", + " \"ts_code\",\n", + " \"end_date\",\n", + " \"total_hldr_eqy_inc_min_int\",\n", + " \"n_income\",\n", + " \"total_assets\",\n", + " \"total_revenue\",\n", + " \"inventories\",\n", + " \"accounts_receiv\",\n", + " \"oper_cost\",\n", + " \"operate_profit\",\n", + " \"money_cap\",\n", + " \"n_cashflow_act\",\n", + "]]" + ], + "outputs": [], + "execution_count": 17 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-15T09:45:06.335656Z", + "start_time": "2025-01-15T09:45:06.291888Z" + } + }, + "cell_type": "code", + "source": [ + "from selector import PeriodSelector\n", + "\n", + "filter_df = finance_df[(2010 < finance_df[\"year\"]) & (finance_df[\"year\"] < 2025)]\n", + "codes = filter_df[\"code\"].unique().tolist()\n", + "print(len(codes))\n", + "codes = PeriodSelector().select(codes, filter_df)\n", + "print(len(codes))" + ], + "id": "26539ccd8ce0fb9e", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5715\n", + "4892\n" + ] + } + ], + "execution_count": 24 + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}