{
"cells": [
{
"metadata": {},
"cell_type": "raw",
"source": "课程地址:https://www.bilibili.com/video/BV18M4y1x7FC",
"id": "3513184200ddb334"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:42.452488Z",
"start_time": "2024-12-29T05:43:42.445497Z"
}
},
"cell_type": "code",
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"%matplotlib inline\n",
"%config InlineBackend.figure_format = 'svg'"
],
"id": "e4ed301b47929483",
"outputs": [],
"execution_count": 185
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:42.480654Z",
"start_time": "2024-12-29T05:43:42.463155Z"
}
},
"cell_type": "code",
"source": [
"# 设置索引,时间格式化,选择指定的列(优先级比设置索引高,先选列,再根据选择的列来设置索引,列命名也在index前面),命名列名\n",
"df = pd.read_csv(\n",
" '../document/SZ000001.csv',\n",
" usecols=[1, 4],\n",
" names=['date', 'price'],\n",
" index_col='date',\n",
" parse_dates=True,\n",
" date_format='%Y-%m-%d'\n",
")\n",
"# 截取2020至2024年的数据\n",
"df = df.loc['2023':'2024']\n",
"df"
],
"id": "525040c36ae50cdc",
"outputs": [
{
"data": {
"text/plain": [
" price\n",
"date \n",
"2023-01-03 12.52\n",
"2023-01-04 13.07\n",
"2023-01-05 13.23\n",
"2023-01-06 13.37\n",
"2023-01-09 13.55\n",
"... ...\n",
"2024-12-23 11.73\n",
"2024-12-24 11.86\n",
"2024-12-25 11.92\n",
"2024-12-26 11.86\n",
"2024-12-27 11.82\n",
"\n",
"[482 rows x 1 columns]"
],
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" price | \n",
"
\n",
" \n",
" | date | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2023-01-03 | \n",
" 12.52 | \n",
"
\n",
" \n",
" | 2023-01-04 | \n",
" 13.07 | \n",
"
\n",
" \n",
" | 2023-01-05 | \n",
" 13.23 | \n",
"
\n",
" \n",
" | 2023-01-06 | \n",
" 13.37 | \n",
"
\n",
" \n",
" | 2023-01-09 | \n",
" 13.55 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
"
\n",
" \n",
" | 2024-12-23 | \n",
" 11.73 | \n",
"
\n",
" \n",
" | 2024-12-24 | \n",
" 11.86 | \n",
"
\n",
" \n",
" | 2024-12-25 | \n",
" 11.92 | \n",
"
\n",
" \n",
" | 2024-12-26 | \n",
" 11.86 | \n",
"
\n",
" \n",
" | 2024-12-27 | \n",
" 11.82 | \n",
"
\n",
" \n",
"
\n",
"
482 rows × 1 columns
\n",
"
"
]
},
"execution_count": 186,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 186
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:42.605385Z",
"start_time": "2024-12-29T05:43:42.515740Z"
}
},
"cell_type": "code",
"source": [
"# plot按列数据来绘制,如果有多个列,就会把多个列绘制在一起\n",
"df.plot(figsize=(10, 6), subplots=False)"
],
"id": "41129f84b03175a6",
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 187,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
""
],
"image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 187
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:42.638722Z",
"start_time": "2024-12-29T05:43:42.629987Z"
}
},
"cell_type": "code",
"source": [
"# 保留2位小数\n",
"df.describe().round(2)"
],
"id": "6956cbc9873a8e7a",
"outputs": [
{
"data": {
"text/plain": [
" price\n",
"count 482.00\n",
"mean 10.45\n",
"std 1.27\n",
"min 8.07\n",
"25% 9.64\n",
"50% 10.25\n",
"75% 11.39\n",
"max 13.90"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" price | \n",
"
\n",
" \n",
" \n",
" \n",
" | count | \n",
" 482.00 | \n",
"
\n",
" \n",
" | mean | \n",
" 10.45 | \n",
"
\n",
" \n",
" | std | \n",
" 1.27 | \n",
"
\n",
" \n",
" | min | \n",
" 8.07 | \n",
"
\n",
" \n",
" | 25% | \n",
" 9.64 | \n",
"
\n",
" \n",
" | 50% | \n",
" 10.25 | \n",
"
\n",
" \n",
" | 75% | \n",
" 11.39 | \n",
"
\n",
" \n",
" | max | \n",
" 13.90 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 188,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 188
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:42.741755Z",
"start_time": "2024-12-29T05:43:42.734722Z"
}
},
"cell_type": "code",
"source": [
"# 计算各种累计值,也可以传入函数\n",
"df.aggregate(['min', 'max', 'std', 'mean', 'median'])"
],
"id": "4fae9992ff3b233a",
"outputs": [
{
"data": {
"text/plain": [
" price\n",
"min 8.070000\n",
"max 13.900000\n",
"std 1.273458\n",
"mean 10.454066\n",
"median 10.250000"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" price | \n",
"
\n",
" \n",
" \n",
" \n",
" | min | \n",
" 8.070000 | \n",
"
\n",
" \n",
" | max | \n",
" 13.900000 | \n",
"
\n",
" \n",
" | std | \n",
" 1.273458 | \n",
"
\n",
" \n",
" | mean | \n",
" 10.454066 | \n",
"
\n",
" \n",
" | median | \n",
" 10.250000 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 189
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:42.811128Z",
"start_time": "2024-12-29T05:43:42.802866Z"
}
},
"cell_type": "code",
"source": [
"# 计算差异值(后一天比前一天的变化)\n",
"# periods=3,表示3天前的数据\n",
"# df.diff(periods=3)\n",
"df.diff()"
],
"id": "9a126039c26384bf",
"outputs": [
{
"data": {
"text/plain": [
" price\n",
"date \n",
"2023-01-03 NaN\n",
"2023-01-04 0.55\n",
"2023-01-05 0.16\n",
"2023-01-06 0.14\n",
"2023-01-09 0.18\n",
"... ...\n",
"2024-12-23 0.11\n",
"2024-12-24 0.13\n",
"2024-12-25 0.06\n",
"2024-12-26 -0.06\n",
"2024-12-27 -0.04\n",
"\n",
"[482 rows x 1 columns]"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" price | \n",
"
\n",
" \n",
" | date | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2023-01-03 | \n",
" NaN | \n",
"
\n",
" \n",
" | 2023-01-04 | \n",
" 0.55 | \n",
"
\n",
" \n",
" | 2023-01-05 | \n",
" 0.16 | \n",
"
\n",
" \n",
" | 2023-01-06 | \n",
" 0.14 | \n",
"
\n",
" \n",
" | 2023-01-09 | \n",
" 0.18 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
"
\n",
" \n",
" | 2024-12-23 | \n",
" 0.11 | \n",
"
\n",
" \n",
" | 2024-12-24 | \n",
" 0.13 | \n",
"
\n",
" \n",
" | 2024-12-25 | \n",
" 0.06 | \n",
"
\n",
" \n",
" | 2024-12-26 | \n",
" -0.06 | \n",
"
\n",
" \n",
" | 2024-12-27 | \n",
" -0.04 | \n",
"
\n",
" \n",
"
\n",
"
482 rows × 1 columns
\n",
"
"
]
},
"execution_count": 190,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 190
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:42.933849Z",
"start_time": "2024-12-29T05:43:42.925610Z"
}
},
"cell_type": "code",
"source": [
"# 计算增长率\n",
"df.pct_change().round(3)"
],
"id": "268f80de9fd5d9ef",
"outputs": [
{
"data": {
"text/plain": [
" price\n",
"date \n",
"2023-01-03 NaN\n",
"2023-01-04 0.044\n",
"2023-01-05 0.012\n",
"2023-01-06 0.011\n",
"2023-01-09 0.013\n",
"... ...\n",
"2024-12-23 0.009\n",
"2024-12-24 0.011\n",
"2024-12-25 0.005\n",
"2024-12-26 -0.005\n",
"2024-12-27 -0.003\n",
"\n",
"[482 rows x 1 columns]"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" price | \n",
"
\n",
" \n",
" | date | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2023-01-03 | \n",
" NaN | \n",
"
\n",
" \n",
" | 2023-01-04 | \n",
" 0.044 | \n",
"
\n",
" \n",
" | 2023-01-05 | \n",
" 0.012 | \n",
"
\n",
" \n",
" | 2023-01-06 | \n",
" 0.011 | \n",
"
\n",
" \n",
" | 2023-01-09 | \n",
" 0.013 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
"
\n",
" \n",
" | 2024-12-23 | \n",
" 0.009 | \n",
"
\n",
" \n",
" | 2024-12-24 | \n",
" 0.011 | \n",
"
\n",
" \n",
" | 2024-12-25 | \n",
" 0.005 | \n",
"
\n",
" \n",
" | 2024-12-26 | \n",
" -0.005 | \n",
"
\n",
" \n",
" | 2024-12-27 | \n",
" -0.003 | \n",
"
\n",
" \n",
"
\n",
"
482 rows × 1 columns
\n",
"
"
]
},
"execution_count": 191,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 191
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:43.094945Z",
"start_time": "2024-12-29T05:43:43.061508Z"
}
},
"cell_type": "code",
"source": [
"# 用kind来指定统计图的样子\n",
"df.pct_change().mean().plot(kind='bar', figsize=(10, 6))"
],
"id": "58412eac5f71069c",
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
""
],
"image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 192
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:43.253586Z",
"start_time": "2024-12-29T05:43:43.176762Z"
}
},
"cell_type": "code",
"source": [
"# 连续增长率计算\n",
"# df/df.shift(1)当前一天和前一天的相互操作\n",
"# 增长率需要用对数进行累加(cumsum)\n",
"# np.exp用于还原对数计算\n",
"np.cumsum(np.log(df / df.shift(1))).apply(np.exp).plot(figsize=(10, 6))"
],
"id": "a3b0cb1549229355",
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 193,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
""
],
"image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 193
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:43.289315Z",
"start_time": "2024-12-29T05:43:43.280775Z"
}
},
"cell_type": "code",
"source": [
"# 时间序列重新采样\n",
"# label指定从时间周期的前面还是后面开始\n",
"df.resample('ME').mean()"
],
"id": "586fbb01abef8112",
"outputs": [
{
"data": {
"text/plain": [
" price\n",
"date \n",
"2023-01-31 13.515000\n",
"2023-02-28 12.755500\n",
"2023-03-31 11.854783\n",
"2023-04-30 11.318947\n",
"2023-05-31 11.214000\n",
"2023-06-30 10.500000\n",
"2023-07-31 10.502857\n",
"2023-08-31 10.733478\n",
"2023-09-30 10.305500\n",
"2023-10-31 9.781176\n",
"2023-11-30 9.291818\n",
"2023-12-31 8.334286\n",
"2024-01-31 8.316818\n",
"2024-02-29 9.127333\n",
"2024-03-31 9.482381\n",
"2024-04-30 9.546500\n",
"2024-05-31 10.195000\n",
"2024-06-30 9.887368\n",
"2024-07-31 9.958696\n",
"2024-08-31 9.918636\n",
"2024-09-30 9.980000\n",
"2024-10-31 11.807222\n",
"2024-11-30 11.521429\n",
"2024-12-31 11.659500"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" price | \n",
"
\n",
" \n",
" | date | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2023-01-31 | \n",
" 13.515000 | \n",
"
\n",
" \n",
" | 2023-02-28 | \n",
" 12.755500 | \n",
"
\n",
" \n",
" | 2023-03-31 | \n",
" 11.854783 | \n",
"
\n",
" \n",
" | 2023-04-30 | \n",
" 11.318947 | \n",
"
\n",
" \n",
" | 2023-05-31 | \n",
" 11.214000 | \n",
"
\n",
" \n",
" | 2023-06-30 | \n",
" 10.500000 | \n",
"
\n",
" \n",
" | 2023-07-31 | \n",
" 10.502857 | \n",
"
\n",
" \n",
" | 2023-08-31 | \n",
" 10.733478 | \n",
"
\n",
" \n",
" | 2023-09-30 | \n",
" 10.305500 | \n",
"
\n",
" \n",
" | 2023-10-31 | \n",
" 9.781176 | \n",
"
\n",
" \n",
" | 2023-11-30 | \n",
" 9.291818 | \n",
"
\n",
" \n",
" | 2023-12-31 | \n",
" 8.334286 | \n",
"
\n",
" \n",
" | 2024-01-31 | \n",
" 8.316818 | \n",
"
\n",
" \n",
" | 2024-02-29 | \n",
" 9.127333 | \n",
"
\n",
" \n",
" | 2024-03-31 | \n",
" 9.482381 | \n",
"
\n",
" \n",
" | 2024-04-30 | \n",
" 9.546500 | \n",
"
\n",
" \n",
" | 2024-05-31 | \n",
" 10.195000 | \n",
"
\n",
" \n",
" | 2024-06-30 | \n",
" 9.887368 | \n",
"
\n",
" \n",
" | 2024-07-31 | \n",
" 9.958696 | \n",
"
\n",
" \n",
" | 2024-08-31 | \n",
" 9.918636 | \n",
"
\n",
" \n",
" | 2024-09-30 | \n",
" 9.980000 | \n",
"
\n",
" \n",
" | 2024-10-31 | \n",
" 11.807222 | \n",
"
\n",
" \n",
" | 2024-11-30 | \n",
" 11.521429 | \n",
"
\n",
" \n",
" | 2024-12-31 | \n",
" 11.659500 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 194,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 194
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:43.336423Z",
"start_time": "2024-12-29T05:43:43.330685Z"
}
},
"cell_type": "code",
"source": [
"# 时间窗口重采样\n",
"df.rolling(window=5).mean().round(2)"
],
"id": "37b699c8884de4f0",
"outputs": [
{
"data": {
"text/plain": [
" price\n",
"date \n",
"2023-01-03 NaN\n",
"2023-01-04 NaN\n",
"2023-01-05 NaN\n",
"2023-01-06 NaN\n",
"2023-01-09 13.15\n",
"... ...\n",
"2024-12-23 11.62\n",
"2024-12-24 11.69\n",
"2024-12-25 11.74\n",
"2024-12-26 11.80\n",
"2024-12-27 11.84\n",
"\n",
"[482 rows x 1 columns]"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" price | \n",
"
\n",
" \n",
" | date | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2023-01-03 | \n",
" NaN | \n",
"
\n",
" \n",
" | 2023-01-04 | \n",
" NaN | \n",
"
\n",
" \n",
" | 2023-01-05 | \n",
" NaN | \n",
"
\n",
" \n",
" | 2023-01-06 | \n",
" NaN | \n",
"
\n",
" \n",
" | 2023-01-09 | \n",
" 13.15 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
"
\n",
" \n",
" | 2024-12-23 | \n",
" 11.62 | \n",
"
\n",
" \n",
" | 2024-12-24 | \n",
" 11.69 | \n",
"
\n",
" \n",
" | 2024-12-25 | \n",
" 11.74 | \n",
"
\n",
" \n",
" | 2024-12-26 | \n",
" 11.80 | \n",
"
\n",
" \n",
" | 2024-12-27 | \n",
" 11.84 | \n",
"
\n",
" \n",
"
\n",
"
482 rows × 1 columns
\n",
"
"
]
},
"execution_count": 195,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 195
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:43.482871Z",
"start_time": "2024-12-29T05:43:43.395745Z"
}
},
"cell_type": "code",
"source": [
"# 滑动平均线计算\n",
"df['m10'] = df['price'].rolling(window=10).mean()\n",
"df['m30'] = df['price'].rolling(window=30).mean()\n",
"df.plot(figsize=(20, 10))"
],
"id": "e6395d0a50951195",
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 196,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
""
],
"image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 196
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:43.690226Z",
"start_time": "2024-12-29T05:43:43.562721Z"
}
},
"cell_type": "code",
"source": [
"df['check'] = np.where(df['m10'] > df['m30'], 1, -1)\n",
"# 绘制第二坐标轴\n",
"df.plot(figsize=(20, 10), secondary_y='check')"
],
"id": "d4729649db0e2d0d",
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 197,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
""
],
"image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 197
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:43:43.807150Z",
"start_time": "2024-12-29T05:43:43.696734Z"
}
},
"cell_type": "code",
"source": [
"# 默认第一列是第一坐标轴\n",
"df[['m10', 'm30']].plot(figsize=(10, 6), secondary_y='m30')"
],
"id": "3c6ee8f302e52d71",
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 198,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
""
],
"image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 198
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:48:28.066351Z",
"start_time": "2024-12-29T05:48:27.860390Z"
}
},
"cell_type": "code",
"source": [
"df_test = df[['m10', 'm30']].copy()\n",
"pd.plotting.scatter_matrix(\n",
" df_test,\n",
" alpha=0.5,\n",
" diagonal='hist',\n",
" hist_kwds={'bins': 100},\n",
" figsize=(18, 10),\n",
")"
],
"id": "1ed94510b4e8746c",
"outputs": [
{
"data": {
"text/plain": [
"array([[,\n",
" ],\n",
" [,\n",
" ]], dtype=object)"
]
},
"execution_count": 206,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
""
],
"image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 206
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:52:08.565634Z",
"start_time": "2024-12-29T05:52:08.557923Z"
}
},
"cell_type": "code",
"source": [
"# 构建回归方程(解方程)\n",
"df_test.dropna(inplace=True)\n",
"reg = np.polyfit(\n",
" df_test['m10'],\n",
" df_test['m30'],\n",
" # 多项式方程的最高次方\n",
" deg=3,\n",
")\n",
"reg"
],
"id": "aae5696d251e43fd",
"outputs": [
{
"data": {
"text/plain": [
"array([ 0.01308501, -0.37629985, 4.52481977, -10.67969834])"
]
},
"execution_count": 215,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 215
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-29T05:52:10.430030Z",
"start_time": "2024-12-29T05:52:10.358538Z"
}
},
"cell_type": "code",
"source": "df_test.plot(kind='scatter', x='m10', y='m30', figsize=(18, 10)).plot(df_test['m10'], np.polyval(reg, df_test['m10']), color='red')",
"id": "489b794346056bb0",
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 216,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
""
],
"image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 216
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}