cumcm-2024-e/notebook/issues_1/cleaning.ipynb

520 lines
1.2 MiB
Plaintext
Raw Normal View History

2024-09-11 12:38:38 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-08T09:41:41.074231Z",
"start_time": "2024-09-08T09:41:41.044494Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"from typing import List, Dict, Union\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import klib as kl\n",
"import missingno as mns\n",
"import os\n",
"import warnings\n",
"\n",
"\n",
"os.environ['KERAS_BACKEND']='tensorflow'\n",
"os.environ[\"CUDA_VISIBLE_DE VICES\"]=\"0\"\n",
"os.environ[\"KMP_DUPLICATE_LIB_OK\"]=\"TRUE\"\n",
"warnings.filterwarnings('ignore')\n",
"plt.rcParams['font.sans-serif'] = ['Kaiti']\n",
"plt.rcParams['axes.unicode_minus'] = False\n",
"PIC_PATH = \"../../models/image/cleaning/\"\n",
"DATA_PATH = '../../data/data/'\n",
"RESULT_PATH = '../../data/summary/cleaning/'\n",
"MODEL_PATH = '../../models/cleaning/'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "fce34e6f181696a1",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-08T09:41:41.104699Z",
"start_time": "2024-09-08T09:41:41.090180Z"
}
},
"outputs": [],
"source": [
"import pathlib2 as pl2\n",
"import shutil\n",
"\n",
"def creat_dir():\n",
" pic_path = pl2.Path(PIC_PATH)\n",
" if os.path.exists(PIC_PATH):\n",
" shutil.rmtree(PIC_PATH)\n",
" pic_path.mkdir(parents=True, exist_ok=True)\n",
" if not os.path.exists(RESULT_PATH):\n",
" os.mkdir(RESULT_PATH)\n",
" if not os.path.exists(MODEL_PATH):\n",
" os.mkdir(MODEL_PATH)\n",
"\n",
"creat_dir()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "47986cc296c6c67b",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-08T09:41:41.120399Z",
"start_time": "2024-09-08T09:41:41.105873Z"
}
},
"outputs": [],
"source": [
"figure_count = 0\n",
"\n",
"def create_figure(figure_name: str, dpi=800):\n",
" global figure_count\n",
" figure_count += 1\n",
" plt.savefig(PIC_PATH + f'/figure{figure_count}_{figure_name}.png', dpi=dpi)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5895b73198de6066",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-08T09:41:52.005356Z",
"start_time": "2024-09-08T09:41:41.121545Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>方向</th>\n",
" <th>时间</th>\n",
" <th>车牌号</th>\n",
" <th>交叉口</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
" <td>2024-04-03T14:39:08.632</td>\n",
" <td>AF5B7CEM</td>\n",
" <td>环西路-纬中路</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2024-04-03T17:45:32.316</td>\n",
" <td>BK2IA84</td>\n",
" <td>环西路-纬中路</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>2024-04-03T15:09:04.087</td>\n",
" <td>AF4EC7FK</td>\n",
" <td>环西路-纬中路</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2</td>\n",
" <td>2024-04-03T11:49:03.665</td>\n",
" <td>AF4MBB6</td>\n",
" <td>环西路-纬中路</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3</td>\n",
" <td>2024-04-01T11:47:49.391</td>\n",
" <td>CBA7KCG</td>\n",
" <td>环西路-纬中路</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8844991</th>\n",
" <td>2</td>\n",
" <td>2024-05-06T19:03:30.000</td>\n",
" <td>无车牌</td>\n",
" <td>经中路-环南路</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8844992</th>\n",
" <td>1</td>\n",
" <td>2024-05-04T12:21:28.000</td>\n",
" <td>无车牌</td>\n",
" <td>经中路-环南路</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8844993</th>\n",
" <td>1</td>\n",
" <td>2024-05-06T16:55:47.000</td>\n",
" <td>无车牌</td>\n",
" <td>经中路-环南路</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8844994</th>\n",
" <td>2</td>\n",
" <td>2024-05-06T07:30:43.000</td>\n",
" <td>无车牌</td>\n",
" <td>经中路-环南路</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8844995</th>\n",
" <td>2</td>\n",
" <td>2024-05-06T14:12:02.000</td>\n",
" <td>无车牌</td>\n",
" <td>经中路-环南路</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8844996 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" 方向 时间 车牌号 交叉口\n",
"0 3 2024-04-03T14:39:08.632 AF5B7CEM 环西路-纬中路\n",
"1 1 2024-04-03T17:45:32.316 BK2IA84 环西路-纬中路\n",
"2 3 2024-04-03T15:09:04.087 AF4EC7FK 环西路-纬中路\n",
"3 2 2024-04-03T11:49:03.665 AF4MBB6 环西路-纬中路\n",
"4 3 2024-04-01T11:47:49.391 CBA7KCG 环西路-纬中路\n",
"... .. ... ... ...\n",
"8844991 2 2024-05-06T19:03:30.000 无车牌 经中路-环南路\n",
"8844992 1 2024-05-04T12:21:28.000 无车牌 经中路-环南路\n",
"8844993 1 2024-05-06T16:55:47.000 无车牌 经中路-环南路\n",
"8844994 2 2024-05-06T07:30:43.000 无车牌 经中路-环南路\n",
"8844995 2 2024-05-06T14:12:02.000 无车牌 经中路-环南路\n",
"\n",
"[8844996 rows x 4 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_csv(DATA_PATH + '附件2.csv', encoding='gb2312')\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "47f20bd786c86d34",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-08T09:41:57.924391Z",
"start_time": "2024-09-08T09:41:52.007352Z"
}
},
"outputs": [],
"source": [
"# 索引设置为时间\n",
"data.index = data['时间']\n",
"# 按时间和车牌号排序\n",
"data.drop(['时间', '车牌号'], axis=1, inplace=True)\n",
"# 索引转换为时间格式\n",
"data.index = pd.to_datetime(data.index)\n",
"# 按方向进行分组\n",
"group = data.query('交叉口 == \"经中路-纬中路\"').groupby('方向')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "85a9a0106850c012",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-08T09:41:58.519749Z",
"start_time": "2024-09-08T09:41:57.926380Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"{3: 车流量\n",
" 时间 \n",
" 2024-04-01 00:00:00 52\n",
" 2024-04-01 01:00:00 47\n",
" 2024-04-01 02:00:00 30\n",
" 2024-04-01 03:00:00 24\n",
" 2024-04-01 04:00:00 29\n",
" ... ...\n",
" 2024-05-06 19:00:00 299\n",
" 2024-05-06 20:00:00 221\n",
" 2024-05-06 21:00:00 190\n",
" 2024-05-06 22:00:00 115\n",
" 2024-05-06 23:00:00 91\n",
" \n",
" [864 rows x 1 columns],\n",
" 1: 车流量\n",
" 时间 \n",
" 2024-04-01 00:00:00 114\n",
" 2024-04-01 01:00:00 76\n",
" 2024-04-01 02:00:00 33\n",
" 2024-04-01 03:00:00 14\n",
" 2024-04-01 04:00:00 23\n",
" ... ...\n",
" 2024-05-06 19:00:00 532\n",
" 2024-05-06 20:00:00 451\n",
" 2024-05-06 21:00:00 341\n",
" 2024-05-06 22:00:00 239\n",
" 2024-05-06 23:00:00 166\n",
" \n",
" [864 rows x 1 columns],\n",
" 2: 车流量\n",
" 时间 \n",
" 2024-04-18 16:00:00 412\n",
" 2024-04-18 17:00:00 277\n",
" 2024-04-18 18:00:00 431\n",
" 2024-04-18 19:00:00 344\n",
" 2024-04-18 20:00:00 394\n",
" ... ...\n",
" 2024-05-06 19:00:00 300\n",
" 2024-05-06 20:00:00 298\n",
" 2024-05-06 21:00:00 198\n",
" 2024-05-06 22:00:00 154\n",
" 2024-05-06 23:00:00 109\n",
" \n",
" [440 rows x 1 columns],\n",
" 4: 车流量\n",
" 时间 \n",
" 2024-04-01 00:00:00 118\n",
" 2024-04-01 01:00:00 78\n",
" 2024-04-01 02:00:00 40\n",
" 2024-04-01 03:00:00 47\n",
" 2024-04-01 04:00:00 82\n",
" ... ...\n",
" 2024-05-06 19:00:00 544\n",
" 2024-05-06 20:00:00 469\n",
" 2024-05-06 21:00:00 370\n",
" 2024-05-06 22:00:00 240\n",
" 2024-05-06 23:00:00 163\n",
" \n",
" [864 rows x 1 columns]}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"unique = data['方向'].unique()\n",
"dct = dict()\n",
"for i in unique:\n",
" dct[i] = group.get_group(i).resample('h').count().drop('方向', axis=1)\n",
" dct[i].columns = ['车流量']\n",
" dct[i].to_csv(RESULT_PATH + f'经中路-纬中路-{i}.csv')\n",
"dct"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "92b126e68be96650",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-08T09:41:59.430059Z",
"start_time": "2024-09-08T09:41:58.520947Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"{1: '由东向西', 2: '由西向东', 3: '由南向北', 4: '由北向南'}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ordinal = pd.read_excel(DATA_PATH + '附件1.xlsx', sheet_name=0, index_col=0)\n",
"ordinal = ordinal['方向']\n",
"ordinal = ordinal.to_dict()\n",
"ordinal"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "6accb909e375e84f",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-08T09:42:04.669398Z",
"start_time": "2024-09-08T09:41:59.433059Z"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAAN6CAYAAADGtcsXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXRkdZk//vfdaknSqexJp1d6b7qRTRaVRaAVhcnQSguyKOA2gI7jj9EZ0TMNzDDCaWRARRk3BoWvTreOAtIgCMywKLQ0S9MJ3UBDp5d0Z09Vttrv/f1xl6pKqm6qKpVU3dT7dU4OSa0XyCdVdZ/P87wFTdM0EBERERERERERERERlTix2AdARERERERERERERESUDRY1iIiIiIiIiIiIiIjIEVjUICIiIiIiIiIiIiIiR2BRg4iIiIiIiIiIiIiIHIFFDSIiIiIiIiIiIiIicgQWNYiIiIiIiIiIiIiIyBFY1CAiIiIiIiIiIiIiIkdgUYOIiIiIiIiIiIiIiBxBLvYBTEVVVRw5cgTz5s2DIAjFPhwiIiIiIiIiIiIiIiowTdMwMjKC1tZWiGLmfoySL2ocOXIEixYtKvZhEBERERERERERERHRDDt06BAWLlyY8fqSL2rMmzcPgP4vUl1dXeSjKQ3j48Cpp+rf//WvQEVFcY+HqNRwjRDZ4xohssc1QmSPa4TIHtcIkT2uESLKZHh4GIsWLbJqApmUfFHDHDlVXV3NooZBkoBDh/Tv580DKiuLezxEpYZrhMge1wiRPa4RIntcI0T2uEaI7HGNENFUpoqhYFA4ERERERERERERERE5AosaRERERERERERERETkCCxqEBERERERERERERGRI5R8pgYRERERERERERERUbZUVUUkEin2YdAEiqJAkqRpPw6LGkREREREREREREQ0J0QiEezfvx+qqhb7UCiNmpoatLS0TBkGbodFDQcSBODYYxPfE1EqrhEie1wjRPa4RojscY0Q2eMaIbLHNUIzSdM0HD16FJIkYdGiRRBFpi+UCk3TMD4+jt7eXgDA/Pnz834sQdM0rVAHNhOGh4fh8/kQCARQXV1d7MMhIiIiIiIiIiIiohIUjUaxb98+tLa2wufzFftwKI2BgQH09vZi1apVk0ZRZVsLYKmKiIiIiIiIiIiIiBwvHo8DAFwuV5GPhDKpqKgAoBeg8sWiBhERERERERERERHNGdPJa6CZVYj/NyxqOND4OLBunf41Pl7soyEqPVwjRPa4RojscY0Q2eMaIbLHNUJkj2uEaGbFYrGc76NpGko8pSIFixoOpGnAm2/qXw76XSOaNVwjRPa4RojscY0Q2eMaIbLHNUJkj2uEnCCuanjx3QE8/HoXXnx3AHF1dn9Z//KXv2Dt2rVWqHYu/vmf/xm33nprTvd54YUXsGbNGnR1deX8fKqqor+/P+f7TYc8q89GRERERERERERERFSi/th+FLf84U0cDYSsy+b7PLip7Vh8bP38WTmGU045BZ2dnZDl3E/fx2IxrF+/Pqf7bN++Heeeey5aW1uzvs8LL7yA5cuXo7m5GSeccAK2b9+O448/PtfDzQs7NYiIiIiIiIiIiIio7P2x/Siue/DVlIIGAHQHQrjuwVfxx/ajs3IciqKgqakJdXV1Wd+nvb0dW7duxTPPPIPvf//7+PCHP4xXXnllyvt1dXWhr68PP/rRj3LKu3j55Zdx9913QxRFtLa2orGxMev7Thc7NYiIiIiIiIiIiIhoztE0DcFoPKvbxlUNNz3SgXSDpjQAAoCbH3kTH1rRAEmc+uS/V5FyKhLs3LkTa9asgaIoeO+99yAIAu666y6Ew2H09PRg+/btuOWWW3DZZZelvX9zczP6+/uxcOFCPPTQQ3C73QgGg9i9ezeOO+64tPdRVRX33nsvtmzZgtdeew3t7e04ePAgjh49iieeeAI33HADrr/++rT3XbJkCSRJAgBUVlZaXR4PP/wwzjvvPFRVVWX9754rFjWIiIiIiIiIiIiIaM4JRuM4dvMTBXksDUD3cAjH3fxkVrd/81/PR4Ur+9PvP/zhDxEIBLBx40bMnz8fFRUVuO666+DxeADoHRXbt2/PWNQIh8Po7u5GLBbDN77xDaxduxajo6N4+OGHMW/ePDz++OPWbV955RVs2bIF69atg8fjwUMPPYSVK1fi/PPPR3NzMwDggQcewK233pq2qKGqKgYGBjBv3jwAsAoYO3fuxJe+9CXcc889+NSnPpX1v3uuWNQgIiIiIiIiIiIiIiqiM888Ez09PfjsZz8LAKiurrYKGgDg8Xhw7rnnptznnXfewWWXXYYrr7wSw8PDOPXUU/HLX/4SH/nIR/D9738fALBmzRps2bIl5X4nnXQSBgcHsXnzZuuyAwcO4O6778YZZ5yBCy+8EAsXLsRpp52Wcr/HHnsMP/jBD7B69WpUV1dj+fLlAABRFPGLX/wChw8fRkdHBxoaGgr3HyYNFjUcSBCAJUsS3xNRKq4RIntcI0T2uEaI7HGNENnjGiGyxzVCs8mrSHjzX8/P6rZ/3T+Iq//r5Slvd/81p+DUY6bOuvAqUlbPa1qxYgX27t2LJ598Eq+++io6OzuxYcMGLF++HD/+8Y8xPDyMZcuWpdxn5cqVWLx4Mb72ta/B7/fj97//Pbq7u3HMMcfgvvvuw+c+9zlUV1fjlFNOSbmfIAiora0FANx00004ePAgLrroImiahp/+9Ke48MILIQjCpEyPCy64ABdccAEA4P/+7/9w4MABAHo4+UUXXYSampqc/p3zxaKGA1VUAJ2dxT4KotLFNUJkj2uEyB7XCJE9rhEie1wjRPa4Rmg2CYKQ9QioM1c2Yr7Pg+5AKG2uhgCgxefBmSsbs8rUyNXixYvh9/uxevVq1NfX49FHH8VTTz1lXd/f348FCxZMup9ZeHj11Vexc+dOPPvss/jv//5vqwAyPj5ujYlK54orrsB9992HjRs3YmhoCGeffTYAIBQKoaKiIu19VFXFiy++iPnz5wMAampqJhU0BgYGUF9fn/1/gByIM/KoREREREREREREREQOIYkCbmo7FoBewEhm/nxT27EzUtAAgAULFmBkZARLlizBySefPClo++DBg2mLGvG4HoS+YsUKnHnmmRgaGsJdd92Fv//7v0d3dzf8fj9keXJhx+Vywe/3Y9WqVXj33XexefNmvPHGG1ahYnBwENXV1Sn3CQaD+K//+i/8f//f/4d58+ZZQeiqqqbcrre3Fy+++GL+/zGmwE4NIiIiIiIiIiIiIip7H1s/H/deeRJu+cObOBoIWZe3+Dy4qe1YfGz9/Bl7bkVREIlEcOedd+Lw4cOIRCK49tpr0d3dDZ/PB0mSUjI2TAMDA+ju7sZjjz2GE088EevWrcPPf/5z6/quri643e5J9xMEAYODg6ipqcHPfvYz+Hw+AEAkEgEAdHR0YPXq1dbt4/E4Nm7ciPPOOw/f+973cN9990FRFOuxXnrpJZx44okYHBzEHXfcgaVLlxbyP08KFjUcKBgEzjpL//655wCvt7jHQ1RquEaI7HGNENnjGiGyxzVCZI9rhMge1wiVuo+tn4+PHNuCv+4fRO9ICE3zPDj1mLoZ69BI9oUvfAFnn332pLFPmzdvxqZNm9LeR5IktLS04NprrwUAK6Q7FArhO9/5Dn7729/im9/85qT7NTQ0WJ0YZkEDAIaHh3HVVVfh2WefxcsvJzJGJEnCE088Yf18+PBhvP/97wcA/Pu//ztuuukm7N27F6qqYv78+bj00kvz+U+QFUHTtHQjwkrG8PAwfD4fAoHApHaXcjU2BpjdR6OjQGVlcY+HqNRwjRDZ4xohssc1QmSPa4TIHtcIkT2uEZpJoVAI+/fvxzHHHJO2q8Gp3n77bVRWVqYdP7Vv3z6sWLHC+vmPf/wjPvaxjwEAdu3ahbPPPhtvv/02mpqaUu43NjaGygwL8LHHHsODDz6IX/3qVxmP6a233kJtbe2kx52K3f+jbGsB7NQgIiIiIiIiIiIiIipRq1atynhdckEDgFXQAIDjjz8ee/bsSVt4yFTQAIALLrgAF1xwge0
"text/plain": [
"<Figure size 1600x900 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(16, 9))\n",
"\n",
"for i, style in zip(unique, ('-o', '-s', '-^', '-*')):\n",
" plt.plot(dct[i][dct[i].index < '2024-04-30'][-24 * 7: ], style, label=ordinal.get(i))\n",
"\n",
"line = dct[1][dct[1].index < '2024-04-30'][-24 * 7: ].index[::24]\n",
"for xc in line:\n",
" plt.axvline(x=xc, color='blue', linestyle='--')\n",
"\n",
"plt.legend()\n",
"plt.tight_layout()\n",
"create_figure('query_time')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "3b4fefbe42c44d8f",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-08T09:42:11.092698Z",
"start_time": "2024-09-08T09:42:04.670390Z"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAAN6CAYAAADGtcsXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeZwldXX//3fdvZfp7tk3hn0HlSWAQRFFFAUJ5CuJCn5FlCggMfmamGgWEGOQL0pEUYkb+kVjQP0ZiQ4IQRMSHNkRmWFRYYYZhlm7p7unt7tU1e+PWm7d27eXobtv1ef26/l49KNv32WmpmvOrVufU+ccy3VdVwAAAAAAAAAAAAmXinsDAAAAAAAAAAAApoOkBgAAAAAAAAAAMAJJDQAAAAAAAAAAYASSGgAAAAAAAAAAwAgkNQAAAAAAAAAAgBFIagAAAAAAAAAAACOQ1AAAAAAAAAAAAEYgqQEAAAAAAAAAAIyQiXsDZsJxHL300ktasGCBLMuKe3MAAAAAAAAAAMDL4Lqu9u7dq1WrVimVmrgew+ikxksvvaQ1a9bEvRkAAAAAAAAAAGAWbNmyRfvtt9+Ejxud1FiwYIEk7x/Z1dUV89bMvZER6eSTvdsPPSS1t8e7PUDSECPA1IgTYHLECDA14gSYHDECTI4YATCRwcFBrVmzJlz3n4jRSY2g5VRXV9e8SGqk09KWLd7tBQukjo54twdIGmIEmBpxAkyOGAGmRpwAkyNGgMkRIwCmMtWoCQaFAwAAAAAAAAAAI5DUAAAAAAAAAAAARiCpAQAAAAAAAAAAjGD0TA0AAAAAAAAAAKIcx1GpVIp7M1Anm80qnU7P+M8hqQEAAAAAAAAAaAmlUkkbN26U4zhxbwoa6Onp0YoVK6YcBj4ZkhoGsSzp6KOrtwHUIkaAqREnwOSIEWBqxAkwOWIEmBwxgrnkuq62bdumdDqtNWvWKJVi+kJSuK6rkZER7dy5U5K0cuXKl/1nWa7rurO1Yc02ODio7u5uDQwMqKurK+7NAQAAAAAAAADEpFwu63e/+51WrVql7u7uuDcHDfT29mrnzp06/PDDx7Wimu56P6kqAAAAAAAAAIDxbNuWJOVyuZi3BBNpb2+X5CWgXi6SGgAAAAAAAACAljGTeQ2YW7Oxb0hqGGRkRDrmGO9rZCTurQGShxgBpkacAJMjRoCpESfA5IgRYHLECDC3KpXKPr/GdV2ZNKWCpIZBXFd66invy6D/Y0DTECPA1IgTYHLECDA14gSYHDECTI4YgQlsx9Uvn+vVHb/aql8+1yvbae5/1nXr1umoo44Kh2rvi7/+67/Wpz71qX16zf33368jjzxSW7du3ee/z3Ec7d69e59fNxOZpv5tAAAAAAAAAAAk1E/Xb9M1P35K2wbGwvtWdhd09blH6y3HrmzKNpx00knatGmTMpl9X76vVCo69thj9+k1a9eu1RlnnKFVq1ZN+zX333+/DjnkEC1fvlzHHXec1q5dq1e96lX7urkvC5UaAAAAAAAAAIB576frt+ny7zxWk9CQpO0DY7r8O4/pp+u3NWU7stmsli1bpkWLFk37NevXr9ftt9+un//85/rCF76g17/+9Xr00UenfN3WrVu1a9cuffnLX96neRcPP/ywbrzxRqVSKa1atUpLly6d9mtnikoNAAAAAAAAAEDLcV1Xo2V7Ws+1HVdX//sGNWo05UqyJH3i35/Saw5donRq6sX/tmx6n5IEjzzyiI488khls1k9//zzsixLn/vc51QsFrVjxw6tXbtW11xzjd71rnc1fP3y5cu1e/du7bfffvrRj36kfD6v0dFRPfnkk3rFK17R8DWO4+jmm2/W9ddfr8cff1zr16/X5s2btW3bNt199936yEc+oiuuuKLhaw844ACl02lJUkdHR1jlcccdd+iNb3yjOjs7p/1v31ckNQAAAAAAAAAALWe0bOvoq+6elT/LlbR9cEyv+MQ903r+U588S+256S+/f+lLX9LAwIDOP/98rVy5Uu3t7br88stVKBQkeRUVa9eunTCpUSwWtX37dlUqFX30ox/VUUcdpaGhId1xxx1asGCB7rrrrvC5jz76qK6//nodc8wxKhQK+tGPfqTDDjtMZ511lpYvXy5J+va3v61PfepTDZMajuOot7dXCxYskKQwgfHII4/oAx/4gL74xS/qj/7oj6b9b99XJDUAAAAAAAAAAIjRaaedph07dug973mPJKmrqytMaEhSoVDQGWecUfOa3/72t3rXu96ld7/73RocHNTJJ5+sW2+9VW9605v0hS98QZJ05JFH6vrrr6953QknnKC+vj5dddVV4X0vvPCCbrzxRr32ta/VOeeco/3220+nnHJKzevuvPNO3XTTTTriiCPU1dWlQw45RJKUSqX0//7f/9OLL76oDRs2aMmSJbP3i2mApIZBLEs64IDqbQC1iBFgasQJMDliBJgacQJMjhgBJkeMoJnasmk99cmzpvXchzb26b3ffHjK533rkpN08kFTz7poy6an9fcGDj30UD3zzDO655579Nhjj2nTpk0688wzdcghh+grX/mKBgcHdfDBB9e85rDDDtP++++vP//zP1d/f7/+7d/+Tdu3b9dBBx2kW265Re973/vU1dWlk046qeZ1lmVp4cKFkqSrr75amzdv1nnnnSfXdfW1r31N55xzjizLGjfT4+yzz9bZZ58tSfqv//ovvfDCC5K84eTnnXeeenp69unf/HKR1DBIe7u0aVPcWwEkFzECTI04ASZHjABTI06AyREjwOSIETSTZVnTbgF12mFLtbK7oO0DYw3naliSVnQXdNphS6c1U2Nf7b///urv79cRRxyhxYsX6yc/+Ynuvffe8PHdu3dr9erV414XJB4ee+wxPfLII7rvvvt02223hQmQkZGRsE1UIxdddJFuueUWnX/++dqzZ49OP/10SdLY2Jja29sbvsZxHP3yl7/UypUrJUk9PT3jEhq9vb1avHjx9H8B+yA1J38qAAAAAAAAAACGSKcsXX3u0ZK8BEZU8PPV5x49JwkNSVq9erX27t2rAw44QCeeeOK4QdubN29umNSwbW8Q+qGHHqrTTjtNe/bs0ec+9zn96Z/+qbZv367+/n5lMuMTO7lcTv39/Tr88MP13HPP6aqrrtKvf/3rMFHR19enrq6umteMjo7qm9/8pv7P//k/WrBgQTgI3XGcmuft3LlTv/zlL1/+L2MKVGoAAAAAAAAAAOa9txy7Uje/+wRd8+OntG1gLLx/RXdBV597tN5y7Mo5+7uz2axKpZJuuOEGvfjiiyqVSrrsssu0fft2dXd3K51O18zYCPT29mr79u268847dfzxx+uYY47RN77xjfDxrVu3Kp/Pj3udZVnq6+tTT0+Pvv71r6u7u1uSVCqVJEkbNmzQEUccET7ftm2df/75euMb36jPf/7zuuWWW5TNZsM/64EHHtDxxx+vvr4+feYzn9GBBx44m7+eGiQ1DDI6Kr3udd7t//5vqa0t3u0BkoYYAaZGnACTI0aAqREnwOSIEWByxAiS7i3HrtSbjl6hhzb2aefeMS1bUNDJBy2aswqNqEsvvVSnn376uLZPV111lS644IKGr0mn01qxYoUuu+wySQqHdI+Njenaa6/VD37wA33sYx8b97olS5aElRhBQkOSBgcHdfHFF+u+++7Tww9XZ4yk02ndfffd4c8vvviifu/3fk+S9I//+I+6+uqr9cwzz8hxHK1cuVLveMc7Xs6vYFos13UbtQgzwuDgoLq7uzUwMDCuFKYVDQ9LQdXR0JDU0RHv9gBJQ4wAUyNOgMkRI8DUiBNgcsQIMDliBHNpbGxMGzdu1EEHHdSwqsFUv/nNb9TR0dGw/dTvfvc7HXrooeHPP/3pT/WWt7xFkvTEE0/o9NNP129+8xstW7as5nXDw8PqmCAA77zzTn3nO9/Rd7/73Qm36dlnn9XChQvH/blTmWwfTXe9n0oNAAAAAAAAAAAS6vDDD5/wsWhCQ1KY0JCkV73qVXr66acbJh4mSmhI0tlnn62zzz570m2KtqZqNgaFAwAAAAAAAADQgoLB362
"text/plain": [
"<Figure size 1600x900 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(16, 9))\n",
"\n",
"for i, style in zip(unique, ('-o', '-s', '-^', '-*')):\n",
" plt.plot(dct[i][dct[i].index < '2024-04-28'][-24 * 7: ].diff(), style, label=ordinal.get(i))\n",
" \n",
"line = dct[1][dct[1].index < '2024-04-28'][-24 * 7: ].index[::24]\n",
"for xc in line:\n",
" plt.axvline(x=xc, color='blue', linestyle='--')\n",
" \n",
"plt.legend()\n",
"plt.tight_layout()\n",
"create_figure('query_time_diff')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "1717ec72c635e36a",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-08T09:42:17.594621Z",
"start_time": "2024-09-08T09:42:11.094031Z"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjYAAAN5CAYAAACrFgK6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wUdf4/8Ndsz6b33iCBhN5BEERBUREU2+mp2PvpFUXF7+l5NvTOU+9+Z8NeEetZUZBepUkghEB672VTt8/vj9mZJJBQk53PZ/N+Ph4+Hgeb7A5zuzvz+bybIIqiCEIIIYQQQgghhBBCCCGEEA5o1D4AQgghhBBCCCGEEEIIIYSQk0WBDUIIIYQQQgghhBBCCCGEcIMCG4QQQgghhBBCCCGEEEII4QYFNgghhBBCCCGEEEIIIYQQwg0KbBBCCCGEEEIIIYQQQgghhBsU2CCEEEIIIYQQQgghhBBCCDcosEEIIYQQQgghhBBCCCGEEG5QYIMQQgghhBBCCCGEEEIIIdygwAYhhJyA3W5HWVnZGT3HsmXLUFlZedq/X1xcjM7OzjM6BkIIIYQQQoj6+mN9cab6Y33xwQcfYNeuXf10RIQQQsipocAGIYScgMvlwqhRo/Dkk0+e9nO89NJLWL58+Wn//nPPPYchQ4agvr7+tJ8DAGpqavDDDz/g+eefx5o1a87ouQghhBBCCCGnrj/WF6IooqCgACtWrMDTTz+N/Pz8U/r9/lhffPvtt1iyZMlp/z4hhBByJnRqHwAhhLDOz88PkydPht1uP63fX79+PQICArB06dLT+n23242ffvoJt9xyCyIiIk7pd9977z18//33KC8vR2NjI2prazF//nyMGTMG8fHxp3U8hBBCCCGEkNN3OuuLtrY2vPTSS9i1axfKy8vR3NwMh8OBiy66COPGjUNwcPBJP9eZrC9kFosF69evx5YtW07r9wkhhJAzRRUbhBByEiIiIhAdHX3Kv9fY2IinnnoKM2bMwL/+9S9ce+21yMjIQHR0NNauXXtSz/HWW2/h0ksvxTPPPHPKr3/TTTchLi4Od9xxB44cOYKxY8figgsuwMMPP4wRI0ac8vMRQgghhBBCztypri8CAgLw6KOPwmKx4J133kFeXh7q6+vx4IMP4p577kFkZORJP9eZrC8AoKOjA//4xz+QmpqK1atX4/bbb8fkyZMRGhqK559//rSekxBCCDlVVLFBCCEnwWAwICAgAHa7Hfn5+di9ezcKCwtxyy23ICkpqcfPvvHGG3jppZeg0Wig1Wrhcrlw/vnnIyoqCuPHj8djjz2G5ORk+Pv7n/B1KysrsW3bNrzzzjvK33V2dmLfvn3YsWMHTCYT7rzzTmg0fcepzz77bKxbtw633HILtFrtMcdLCCGEEEII8a5TWV/ItFotRo4cidzcXIwbNw46ne6Uq7BPd32xfft23HvvvWhvb0dYWBgOHDiAhx56CAaDAfPnz8fdd9+N5ORkhIeHn/rJIIQQQk6DIIqiqPZBEEIIS7Zs2YK3334bEydOhM1mQ0NDA77++mvEx8dj3LhxSE9PR2RkJJ566ink5eUhOzsbKSkpvT7XpZdeioceeghNTU0wGo04//zzT/j6DQ0N8PPzQ0dHB+68805cdNFFqKqqQkdHBxwOBzQaDZKTk2G32/H444/jsssuw4cfftjn823evBkrV67E7373O9xyyy24/vrrYbVa0dbWhtzcXEyYMAF/+9vfYDabT/eUEUIIIYQQQvrQn+uLp556ComJiQgLC8N9992HW265BR0dHWhqakJeXh6uu+463HbbbT1+p7/XFy+++CJsNhsuu+wybN26FTfffDO0Wm1/njJCCCHkhKhigxBCjjJhwgScd955+P3vf48ZM2bAbDajsrISF198Ma6++mrl57Zv3w6LxYLk5ORen+eHH35AZGQkZsyYgZUrV+KZZ545qcDG5s2bcfvtt+Mvf/kLrr/+egwfPhxpaWkwGAzH/Oy6deuOqfyoqanB/fffj7CwMISGhqK0tBQdHR2wWq1ITU3Fgw8+qPxOU1MTUlJScOjQIXz77bencpoIIYQQQgghJ+FM1he//PIL3n33XURERCAwMBAbN27EueeeC6PRiPnz52Pp0qXKOmHdunWYM2cOWlpa8Je//EV5jjNdX3RXWVmJzz//HBs3bkRTUxPuvvtu/O53v0NgYGB/nCpCCCHkpNGMDUIIOYrZbMbw4cMREhLSo4rBZDId87NTpkyBIAjH/H1BQQEee+wx/Otf/0JlZSWqqqrQ1taGO++8E9OmTUN8fDzefvvtXl//4osvRnh4OJYuXYpFixZhxIgRMBgM2L59O6666iosW7YMTqcTABAWFoYxY8b0+P3o6Gi0tbVh1KhRePbZZzFv3jxceOGFOP/88yEIQo9/R2hoKPR6Pa688srTOleEEEIIIYSQ4zuT9cWMGTOwevVq3HHHHXj66acxbNgwXH/99Rg7dix0Ol2P4ERUVBS0Wi0WLVrU4znPdH0h6+zsxLXXXot///vfaG9vR15eHsxmMx577DGcf/75SExMxO23335G54oQQgg5WRTYIISQXgwdOhRNTU3YvXs3Vq5ciZycHLzwwgsYN24cHn30UQBAS0sLhgwZ0uvv33TTTTh48CBmzpyJP/zhDzh06BAiIyNx/fXX44MPPsCuXbtw7bXX9vq7BoNBWfDs2bMHMTExePrpp1FRUYHY2Fj897//hcViAQAIgoCQkJBjnuPSSy9FRUUFAGDr1q0499xzAQBnnXUWkpOTcc4558DtdqO+vh4Wi4UCG4QQQgghhAyg011f+Pn54fzzz1fu7UtLS5Geno709HQUFRUhMTERS5YsAQDk5uZi6tSpSE1N7fEc/bG+AIBnn30WmzZtwg033IBrrrkGX331FcLDw7FgwQK8/PLL2LFjB/75z3/22zkjhBBCjodaURFCSC+SkpLQ3NwMi8WCkpIS+Pn54eqrr8ZVV12FqKgoAFKv2rFjx/b6+48//jgyMjKQmJgIADh06BCefvppzJw586Re39/fH+3t7Zg4cSJSUlJw2223ISYmBjabDeHh4cpQPpvN1utsjHHjxmHDhg1oaGhAWloa0tPTAQBPPPEEnnjiCeXntm/fjjFjxtB8DUIIIYQQQgbQmawvxo0bh+LiYmzevBl//vOfIQgC9Ho9vvvuux4/t337dkybNq3X1z/T9QUALFq0CFdccQXGjRun/N2GDRswZ86c0zklhBBCyBmhig1CCOlFYmIimpubcdVVV+Ghhx5CamoqRo4cqSw6AKCqqgoJCQm9/v55552HVatWKX92u91KefehQ4fw0EMP4cCBA32+fmhoKGprawEAF110EdasWYPly5fj7bffhp+fn/JzFosFAQEBx/z+iBEjUFRUBJPJhAcffBAAYLfbUVJSgl27diErKwsAsHr1akycOPFkTwshhBBCCCHkNJzJ+mLkyJEoKCjAhAkTcPHFFwOA0gpqx44dKC4uBnD8e/szXV8A0qyQHTt29Pg7u90OALBarfjTn/7UYw1ECCGEDCSq2CCEkF6kpqaiqKhI+bPT6URhYSFaWlpgsViwaNEi5OTkICUlpdffv/vuuxEZGan82eVyweVyAQAyMzNhMBgwfvx4fPfdd7jooouO+X2z2Yy6ujqkpqZiwYIF+OWXXzBr1iwsWrSox/NWVlYiLCzsmN8PCAiAxWLBggULEBgYCI1GA61WC7PZDKPRiAMHDuDCCy/EF198gZdffvk0zxIhhBBCCCHkZJzJ+mLUqFG49dZbsXfvXgQGBkIURRgMBvj5+UGr1WLbtm246667kJOTo7SgPdqZri8A4KOPPsJHH32Eu+66S/k7m80GQJoXMm/ePCxcuBAPPfQQnnnmmdM5TYQQQshJo8AGIYT0IjMzEy+++CLy8/PR3NyM4OBglJaWwmKxoLOzE5MnT0ZHRweGDRt2zO+++eab+Pbbb7F//37l7xoaGuB2u5U/P/3007Barfj00097DWzY7Xa0tbUBkDKjJkyYoDzW2dmJ6upqhISEIDc395geurKnnnoKV1xxRa+P2e1
"text/plain": [
"<Figure size 1600x900 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from statsmodels.tsa.seasonal import STL, seasonal_decompose\n",
"\n",
"fig = plt.figure(figsize=(16, 9))\n",
"\n",
"for i in unique:\n",
" sd = seasonal_decompose(dct[i], model='additive', period=7)\n",
" df = pd.DataFrame()\n",
" df['trend'] = sd.trend\n",
" df['seasonal'] = sd.seasonal\n",
" df['resid'] = sd.resid\n",
" df.to_csv(RESULT_PATH + f'seasonal_{i}.csv')\n",
" ax = fig.add_subplot(2, 2, i)\n",
" ax.plot(sd.trend[sd.trend.index < '2024-04-30'][-24 * 7:], label='趋势项')\n",
" ax.plot(sd.seasonal[sd.trend.index < '2024-04-30'][-24 * 7:], label='季节项')\n",
" ax.plot(sd.resid[sd.trend.index < '2024-04-30'][-24 * 7:], label='残差项')\n",
" plt.title(ordinal.get(i))\n",
" plt.xticks(rotation=45)\n",
" plt.legend(bbox_to_anchor=(1.05, 0.8), loc=3, borderaxespad=0)\n",
" plt.tight_layout()\n",
"plt.title('时序分解')\n",
"create_figure('seasonal_decompose')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}