{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2025-01-11T13:37:21.407428Z", "start_time": "2025-01-11T13:37:18.600140Z" } }, "source": [ "import pandas as pd\n", "import tushare as ts\n", "from numpy.ma.extras import column_stack\n", "\n", "ts_pro = ts.pro_api(token=\"64ebff4fa679167600b905ee45dd88e76f3963c0ff39157f3f085f0e\")" ], "outputs": [], "execution_count": 1 }, { "metadata": { "ExecuteTime": { "end_time": "2025-01-11T15:15:46.117153Z", "start_time": "2025-01-11T15:15:46.107715Z" } }, "cell_type": "code", "source": [ "def get_blancesheet_df(start_year, end_year):\n", " fields = [\"ts_code\", \"end_date\", \"total_assets\", \"total_hldr_eqy_inc_min_int\", \"money_cap\", \"accounts_receiv\",\n", " \"accounts_receiv_bill\",\n", " \"oth_rcv_total\", \"inventories\"]\n", " result = pd.DataFrame(columns=fields)\n", " for year in range(start_year, end_year + 1):\n", " period = f\"{year}1231\"\n", " temp = ts_pro.balancesheet_vip(period=period, fields=\",\".join(fields))\n", " result = pd.concat([result, temp], ignore_index=True)\n", " return result\n", "\n", "\n", "def get_income_df(start_year, end_year):\n", " fields = [\"ts_code\", \"end_date\", \"n_income\", \"revenue\", \"total_revenue\", \"oper_cost\", \"operate_profit\",\n", " \"total_cogs\"]\n", " result = pd.DataFrame(columns=fields)\n", " for year in range(start_year, end_year + 1):\n", " period = f\"{year}1231\"\n", " temp = ts_pro.income_vip(period=period, fields=\",\".join(fields))\n", " result = pd.concat([result, temp], ignore_index=True)\n", " return result\n", "\n", "\n", "def clean_df(df):\n", " df = df.drop_duplicates(subset=[\"ts_code\", \"end_date\"])\n", " df[\"end_date\"] = df[\"end_date\"].str[:4]\n", " return df" ], "id": "3100e2f517963834", "outputs": [], "execution_count": 107 }, { "metadata": { "ExecuteTime": { "end_time": "2025-01-11T15:16:10.137410Z", "start_time": "2025-01-11T15:15:51.266092Z" } }, "cell_type": "code", "source": [ "blancesheet_df = clean_df(get_blancesheet_df(2018, 2024))\n", "income_df = clean_df(get_income_df(2018, 2024))" ], "id": "66ab3d52c88888ea", "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\lanyuanxiaoyao\\AppData\\Local\\Temp\\ipykernel_32660\\3334449392.py:9: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", " result = pd.concat([result, temp], ignore_index=True)\n", "C:\\Users\\lanyuanxiaoyao\\AppData\\Local\\Temp\\ipykernel_32660\\3334449392.py:9: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", " result = pd.concat([result, temp], ignore_index=True)\n", "C:\\Users\\lanyuanxiaoyao\\AppData\\Local\\Temp\\ipykernel_32660\\3334449392.py:20: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", " result = pd.concat([result, temp], ignore_index=True)\n", "C:\\Users\\lanyuanxiaoyao\\AppData\\Local\\Temp\\ipykernel_32660\\3334449392.py:20: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", " result = pd.concat([result, temp], ignore_index=True)\n" ] } ], "execution_count": 108 }, { "metadata": { "ExecuteTime": { "end_time": "2025-01-11T15:16:38.476024Z", "start_time": "2025-01-11T15:16:38.460593Z" } }, "cell_type": "code", "source": [ "def cal_roe(df):\n", " df_group = stock_df.groupby(\"ts_code\")\n", " df[\"prev_total_hldr_eqy_inc_min_int\"] = df_group[\"total_hldr_eqy_inc_min_int\"].shift(1)\n", " df[\"roe\"] = df[\"n_income\"] / (\n", " (df[\"prev_total_hldr_eqy_inc_min_int\"] + df[\"total_hldr_eqy_inc_min_int\"]) / 2)\n", " df[\"roe\"] = df[\"roe\"].round(4)\n", " df.drop(columns=[\"prev_total_hldr_eqy_inc_min_int\"], inplace=True)\n", "\n", "\n", "def cal_roa(df):\n", " df_group = stock_df.groupby(\"ts_code\")\n", " df[\"prev_toral_assets\"] = df_group[\"total_assets\"].shift(1)\n", " df[\"roa\"] = df[\"n_income\"] / ((df[\"prev_toral_assets\"] + df[\"total_assets\"]) / 2)\n", " df[\"roa\"] = df[\"roa\"].round(4)\n", " df.drop(columns=[\"prev_toral_assets\"], inplace=True)\n", "\n", "\n", "# 总资产周转率\n", "def cal_total_assets_turnover(df):\n", " df_group = stock_df.groupby(\"ts_code\")\n", " df[\"prev_toral_assets\"] = df_group[\"total_assets\"].shift(1)\n", " df[\"total_assets_turnover\"] = df[\"total_revenue\"] / ((df[\"prev_toral_assets\"] + df[\"total_assets\"]) / 2)\n", " df[\"total_assets_turnover\"] = df[\"total_assets_turnover\"].round(2)\n", " df.drop(columns=[\"prev_toral_assets\"], inplace=True)\n", "\n", "\n", "# 现金比率\n", "def cal_cash_ratio(df):\n", " df[\"cash_ratio\"] = df[\"money_cap\"] / df[\"total_assets\"]\n", " df[\"cash_ratio\"] = df[\"cash_ratio\"].round(4)\n", "\n", "\n", "# 毛利率\n", "def cal_gross_profit(df):\n", " df[\"gross_profit\"] = (df[\"total_revenue\"] - df[\"oper_cost\"]) / df[\"total_revenue\"]\n", " df[\"gross_profit\"] = df[\"gross_profit\"].round(4)\n", "\n", "\n", "# 经营利润率\n", "def cal_operating_profit(df):\n", " df[\"operating_profit\"] = df[\"operate_profit\"] / df[\"total_revenue\"]\n", " df[\"operating_profit\"] = df[\"operating_profit\"].round(4)\n", "\n", "\n", "# 经营安全边际\n", "def cal_operating_safety_margin(df):\n", " df[\"operating_safety_margin\"] = df[\"operating_profit\"] / df[\"gross_profit\"]\n", " df[\"operating_safety_margin\"] = df[\"operating_safety_margin\"].round(4)\n", "\n", "\n", "# 收现日数\n", "def cal_collection_cash_period(df):\n", " df[\"collection_cash_period\"] = 360 / (df[\"total_revenue\"] / df[\"accounts_receiv\"])\n", " df[\"collection_cash_period\"] = df[\"collection_cash_period\"].round(2)\n", "\n", "\n", "# 销货日数\n", "def cal_sales_period(df):\n", " df[\"sales_period\"] = 360 / (df[\"oper_cost\"] / df[\"inventories\"])\n", " df[\"sales_period\"] = df[\"sales_period\"].round(2)" ], "id": "85618db7e29a25eb", "outputs": [], "execution_count": 110 }, { "metadata": { "ExecuteTime": { "end_time": "2025-01-11T15:24:45.366231Z", "start_time": "2025-01-11T15:24:45.251939Z" } }, "cell_type": "code", "source": [ "stock_df = pd.merge(blancesheet_df, income_df, on=[\"ts_code\", \"end_date\"])\n", "\n", "cal_roe(stock_df)\n", "cal_roa(stock_df)\n", "cal_total_assets_turnover(stock_df)\n", "cal_cash_ratio(stock_df)\n", "cal_gross_profit(stock_df)\n", "cal_operating_profit(stock_df)\n", "cal_operating_safety_margin(stock_df)\n", "cal_collection_cash_period(stock_df)\n", "cal_sales_period(stock_df)\n", "\n", "stock_df[\n", " # [\"ts_code\", \"end_date\", \"roe\", \"roa\", \"total_assets_turnover\", \"cash_ratio\", \"gross_profit\", \"operating_profit\", \"operating_safety_margin\"]\n", " [\"ts_code\", \"end_date\", \"collection_cash_period\", \"sales_period\", \"oper_cost\", \"total_cogs\"]\n", "]" ], "id": "cc0d2f7406bb447e", "outputs": [ { "data": { "text/plain": [ " ts_code end_date collection_cash_period sales_period oper_cost \\\n", "0 301093.SZ 2018 93.76 137.78 2.160737e+08 \n", "1 301092.SZ 2018 67.58 141.15 2.604383e+08 \n", "2 688280.SH 2018 212.32 137.55 7.666165e+08 \n", "3 688739.SH 2018 124.23 314.40 2.063751e+08 \n", "4 688257.SH 2018 101.39 148.16 4.092993e+08 \n", "... ... ... ... ... ... \n", "31118 873896.BJ 2023 40.83 1.20 2.687648e+08 \n", "31119 873911.BJ 2023 41.27 377.77 1.362851e+08 \n", "31120 873755.BJ 2023 63.15 155.20 1.354382e+08 \n", "31121 603091.SH 2023 36.89 93.67 9.023004e+08 \n", "31122 301622.SZ 2023 134.91 116.69 7.020213e+08 \n", "\n", " total_cogs \n", "0 3.352792e+08 \n", "1 3.321345e+08 \n", "2 1.110642e+09 \n", "3 6.802215e+08 \n", "4 5.354176e+08 \n", "... ... \n", "31118 3.891025e+08 \n", "31119 2.304407e+08 \n", "31120 1.716143e+08 \n", "31121 1.057946e+09 \n", "31122 8.257937e+08 \n", "\n", "[31123 rows x 6 columns]" ], "text/html": [ "
| \n", " | ts_code | \n", "end_date | \n", "collection_cash_period | \n", "sales_period | \n", "oper_cost | \n", "total_cogs | \n", "
|---|---|---|---|---|---|---|
| 0 | \n", "301093.SZ | \n", "2018 | \n", "93.76 | \n", "137.78 | \n", "2.160737e+08 | \n", "3.352792e+08 | \n", "
| 1 | \n", "301092.SZ | \n", "2018 | \n", "67.58 | \n", "141.15 | \n", "2.604383e+08 | \n", "3.321345e+08 | \n", "
| 2 | \n", "688280.SH | \n", "2018 | \n", "212.32 | \n", "137.55 | \n", "7.666165e+08 | \n", "1.110642e+09 | \n", "
| 3 | \n", "688739.SH | \n", "2018 | \n", "124.23 | \n", "314.40 | \n", "2.063751e+08 | \n", "6.802215e+08 | \n", "
| 4 | \n", "688257.SH | \n", "2018 | \n", "101.39 | \n", "148.16 | \n", "4.092993e+08 | \n", "5.354176e+08 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 31118 | \n", "873896.BJ | \n", "2023 | \n", "40.83 | \n", "1.20 | \n", "2.687648e+08 | \n", "3.891025e+08 | \n", "
| 31119 | \n", "873911.BJ | \n", "2023 | \n", "41.27 | \n", "377.77 | \n", "1.362851e+08 | \n", "2.304407e+08 | \n", "
| 31120 | \n", "873755.BJ | \n", "2023 | \n", "63.15 | \n", "155.20 | \n", "1.354382e+08 | \n", "1.716143e+08 | \n", "
| 31121 | \n", "603091.SH | \n", "2023 | \n", "36.89 | \n", "93.67 | \n", "9.023004e+08 | \n", "1.057946e+09 | \n", "
| 31122 | \n", "301622.SZ | \n", "2023 | \n", "134.91 | \n", "116.69 | \n", "7.020213e+08 | \n", "8.257937e+08 | \n", "
31123 rows × 6 columns
\n", "