diff --git a/0_材料准备/3_TuShare财务报表.ipynb b/0_材料准备/3_TuShare财务报表.ipynb index 16c570b..7809efe 100644 --- a/0_材料准备/3_TuShare财务报表.ipynb +++ b/0_材料准备/3_TuShare财务报表.ipynb @@ -6,17 +6,18 @@ "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2025-01-10T02:38:31.764906Z", - "start_time": "2025-01-10T02:38:30.972805Z" + "end_time": "2025-01-11T03:30:00.044697Z", + "start_time": "2025-01-11T03:30:00.041372Z" } }, "source": [ + "import pandas as pd\n", "import tushare as ts\n", "\n", "ts_pro = ts.pro_api(token=\"64ebff4fa679167600b905ee45dd88e76f3963c0ff39157f3f085f0e\")" ], "outputs": [], - "execution_count": 1 + "execution_count": 25 }, { "metadata": { @@ -39,19 +40,6 @@ "outputs": [], "execution_count": 15 }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-10T02:54:34.367006Z", - "start_time": "2025-01-10T02:54:34.363702Z" - } - }, - "cell_type": "code", - "source": "import pandas as pd", - "id": "7e3783e99491ba52", - "outputs": [], - "execution_count": 19 - }, { "metadata": { "ExecuteTime": { @@ -540,6 +528,177 @@ } ], "execution_count": 98 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-11T03:51:41.253282Z", + "start_time": "2025-01-11T03:51:41.243498Z" + } + }, + "cell_type": "code", + "source": [ + "def get_blancesheet_df(start_year, end_year):\n", + " fields = [\"ts_code\", \"end_date\", \"total_assets\", \"total_hldr_eqy_inc_min_int\", \"money_cap\", \"accounts_receiv\",\n", + " \"inventories\"]\n", + " result = pd.DataFrame(columns=fields)\n", + " for year in range(start_year, end_year + 1):\n", + " period = f\"{year}1231\"\n", + " temp = ts_pro.balancesheet_vip(period=period, fields=\",\".join(fields))\n", + " result = pd.concat([result, temp], ignore_index=True)\n", + " return result\n", + "\n", + "def get_income_df(start_year, end_year):\n", + " fields = [\"ts_code\", \"end_date\", \"n_income\", \"revenue\", \"total_revenue\", \"oper_cost\", \"operate_profit\", \"total_opcost\"]\n", + " result = pd.DataFrame(columns=fields)\n", + " for year in range(start_year, end_year + 1):\n", + " period = f\"{year}1231\"\n", + " temp = ts_pro.income_vip(period=period, fields=\",\".join(fields))\n", + " result = pd.concat([result, temp], ignore_index=True)\n", + " return result\n", + "\n", + "def clean_df(df):\n", + " df = df.drop_duplicates(subset=[\"ts_code\", \"end_date\"])\n", + " df[\"end_date\"] = df[\"end_date\"].str[:4]\n", + " return df" + ], + "id": "6d90bcb4c5151976", + "outputs": [], + "execution_count": 54 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-11T03:51:53.799229Z", + "start_time": "2025-01-11T03:51:42.756645Z" + } + }, + "cell_type": "code", + "source": [ + "blancesheet_df = clean_df(get_blancesheet_df(2018, 2024))\n", + "income_df = clean_df(get_income_df(2018, 2024))" + ], + "id": "9d7013cff65167ed", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/7h/w0cmp4zj6mn9br_6nyj310m40000gn/T/ipykernel_39014/3037579022.py:8: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " result = pd.concat([result, temp], ignore_index=True)\n", + "/var/folders/7h/w0cmp4zj6mn9br_6nyj310m40000gn/T/ipykernel_39014/3037579022.py:8: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " result = pd.concat([result, temp], ignore_index=True)\n", + "/var/folders/7h/w0cmp4zj6mn9br_6nyj310m40000gn/T/ipykernel_39014/3037579022.py:17: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " result = pd.concat([result, temp], ignore_index=True)\n", + "/var/folders/7h/w0cmp4zj6mn9br_6nyj310m40000gn/T/ipykernel_39014/3037579022.py:17: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " result = pd.concat([result, temp], ignore_index=True)\n" + ] + } + ], + "execution_count": 55 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-11T04:06:53.483655Z", + "start_time": "2025-01-11T04:06:53.417743Z" + } + }, + "cell_type": "code", + "source": [ + "stock_df = pd.merge(blancesheet_df, income_df, on=[\"ts_code\", \"end_date\"])\n", + "test_df = stock_df[stock_df[\"ts_code\"] == \"000672.SZ\"].copy()\n", + "test_df[\"roe\"] = test_df[\"n_income\"] / ((test_df[\"total_hldr_eqy_inc_min_int\"] +\n", + " test_df[\"total_hldr_eqy_inc_min_int\"].shift(1)) / 2)\n", + "test_df[[\"ts_code\",\"end_date\",\"roe\"]]\n", + "# stock_group = income_df.groupby('ts_code')\n", + "# stock_group.get_group(\"000672.SZ\")" + ], + "id": "f9317a2d18449db2", + "outputs": [ + { + "data": { + "text/plain": [ + " ts_code end_date roe\n", + "3346 000672.SZ 2018 NaN\n", + "8529 000672.SZ 2019 0.508885\n", + "12623 000672.SZ 2020 0.316230\n", + "17853 000672.SZ 2021 0.278297\n", + "22036 000672.SZ 2022 0.105235\n", + "28907 000672.SZ 2023 0.074670" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ts_codeend_dateroe
3346000672.SZ2018NaN
8529000672.SZ20190.508885
12623000672.SZ20200.316230
17853000672.SZ20210.278297
22036000672.SZ20220.105235
28907000672.SZ20230.074670
\n", + "
" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 68 } ], "metadata": {