{ "cells": [ { "cell_type": "markdown", "id": "105fe78c-cb4e-4647-a733-bb8dc796a123", "metadata": {}, "source": [ "# Simple Time Series Decomposition" ] }, { "cell_type": "markdown", "id": "b99a4a24-bdfb-4852-a2e8-debd696752f1", "metadata": {}, "source": [ "In this notebook we will see how time series decomposition work. This is not the original breakdown of the `statsmodels` seasonal decomposition instead this post will help to understand each and every component of the decomposition process." ] }, { "cell_type": "markdown", "id": "a05c27b2-86d5-44c8-96f2-495a78d8cb69", "metadata": {}, "source": [ "## Load Libraries" ] }, { "cell_type": "code", "execution_count": 8, "id": "b7c3e3bc-21ec-4234-bb54-6b539b97656a", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import statsmodels as stm\n", "from statsmodels.tsa.seasonal import seasonal_decompose\n", "from matplotlib import pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "markdown", "id": "1029a303-4985-43cd-bcdf-c9dfaa7c55a2", "metadata": {}, "source": [ "## Load dataset" ] }, { "cell_type": "code", "execution_count": 2, "id": "c3068f27-0fb4-448b-aba6-894a7fc660d8", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"TS/data.csv\", index_col='Month')" ] }, { "cell_type": "code", "execution_count": 3, "id": "6a59c22a-c888-4a36-9106-b69ad2e965e9", "metadata": {}, "outputs": [], "source": [ "df.sort_index(inplace=True)" ] }, { "cell_type": "code", "execution_count": 4, "id": "56fbf380-ff6a-47c8-8444-47b5b31c3287", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Passengers | \n", "
|---|---|
| Month | \n", "\n", " |
| 1949-01 | \n", "112 | \n", "
| 1949-02 | \n", "118 | \n", "
| 1949-03 | \n", "132 | \n", "
| 1949-04 | \n", "129 | \n", "
| 1949-05 | \n", "121 | \n", "
| ... | \n", "... | \n", "
| 1960-08 | \n", "606 | \n", "
| 1960-09 | \n", "508 | \n", "
| 1960-10 | \n", "461 | \n", "
| 1960-11 | \n", "390 | \n", "
| 1960-12 | \n", "432 | \n", "
144 rows × 1 columns
\n", "| \n", " | Passengers | \n", "
|---|---|
| Month | \n", "\n", " |
| 1949-01 | \n", "112 | \n", "
| 1949-02 | \n", "118 | \n", "
| 1949-03 | \n", "132 | \n", "
| 1949-04 | \n", "129 | \n", "
| 1949-05 | \n", "121 | \n", "
| \n", " | index | \n", "ts | \n", "data | \n", "
|---|---|---|---|
| 0 | \n", "0 | \n", "1949-01 | \n", "112 | \n", "
| 1 | \n", "1 | \n", "1949-02 | \n", "118 | \n", "
| 2 | \n", "2 | \n", "1949-03 | \n", "132 | \n", "
| 3 | \n", "3 | \n", "1949-04 | \n", "129 | \n", "
| 4 | \n", "4 | \n", "1949-05 | \n", "121 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 139 | \n", "139 | \n", "1960-08 | \n", "606 | \n", "
| 140 | \n", "140 | \n", "1960-09 | \n", "508 | \n", "
| 141 | \n", "141 | \n", "1960-10 | \n", "461 | \n", "
| 142 | \n", "142 | \n", "1960-11 | \n", "390 | \n", "
| 143 | \n", "143 | \n", "1960-12 | \n", "432 | \n", "
144 rows × 3 columns
\n", "| \n", " | index | \n", "ts | \n", "data | \n", "approx_trend | \n", "
|---|---|---|---|---|
| 0 | \n", "0 | \n", "1949-01 | \n", "112 | \n", "114.028041 | \n", "
| 1 | \n", "1 | \n", "1949-02 | \n", "118 | \n", "115.690061 | \n", "
| 2 | \n", "2 | \n", "1949-03 | \n", "132 | \n", "117.366097 | \n", "
| 3 | \n", "3 | \n", "1949-04 | \n", "129 | \n", "119.056149 | \n", "
| 4 | \n", "4 | \n", "1949-05 | \n", "121 | \n", "120.760218 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 139 | \n", "139 | \n", "1960-08 | \n", "606 | \n", "479.480046 | \n", "
| 140 | \n", "140 | \n", "1960-09 | \n", "508 | \n", "483.090345 | \n", "
| 141 | \n", "141 | \n", "1960-10 | \n", "461 | \n", "486.714660 | \n", "
| 142 | \n", "142 | \n", "1960-11 | \n", "390 | \n", "490.352992 | \n", "
| 143 | \n", "143 | \n", "1960-12 | \n", "432 | \n", "494.005340 | \n", "
144 rows × 4 columns
\n", "| \n", " | index | \n", "ts | \n", "data | \n", "approx_trend | \n", "
|---|---|---|---|---|
| 0 | \n", "0 | \n", "1949-01 | \n", "112 | \n", "114.028041 | \n", "
| 1 | \n", "1 | \n", "1949-02 | \n", "118 | \n", "115.690061 | \n", "
| 2 | \n", "2 | \n", "1949-03 | \n", "132 | \n", "117.366097 | \n", "
| 3 | \n", "3 | \n", "1949-04 | \n", "129 | \n", "119.056149 | \n", "
| 4 | \n", "4 | \n", "1949-05 | \n", "121 | \n", "120.760218 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 139 | \n", "139 | \n", "1960-08 | \n", "606 | \n", "479.480046 | \n", "
| 140 | \n", "140 | \n", "1960-09 | \n", "508 | \n", "483.090345 | \n", "
| 141 | \n", "141 | \n", "1960-10 | \n", "461 | \n", "486.714660 | \n", "
| 142 | \n", "142 | \n", "1960-11 | \n", "390 | \n", "490.352992 | \n", "
| 143 | \n", "143 | \n", "1960-12 | \n", "432 | \n", "494.005340 | \n", "
144 rows × 4 columns
\n", "| \n", " | index | \n", "ts | \n", "data | \n", "approx_trend | \n", "detrended | \n", "date | \n", "
|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "1949-01 | \n", "112 | \n", "114.028041 | \n", "-2.028041 | \n", "1949-01-01 | \n", "
| 1 | \n", "1 | \n", "1949-02 | \n", "118 | \n", "115.690061 | \n", "2.309939 | \n", "1949-02-01 | \n", "
| 2 | \n", "2 | \n", "1949-03 | \n", "132 | \n", "117.366097 | \n", "14.633903 | \n", "1949-03-01 | \n", "
| 3 | \n", "3 | \n", "1949-04 | \n", "129 | \n", "119.056149 | \n", "9.943851 | \n", "1949-04-01 | \n", "
| 4 | \n", "4 | \n", "1949-05 | \n", "121 | \n", "120.760218 | \n", "0.239782 | \n", "1949-05-01 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 139 | \n", "139 | \n", "1960-08 | \n", "606 | \n", "479.480046 | \n", "126.519954 | \n", "1960-08-01 | \n", "
| 140 | \n", "140 | \n", "1960-09 | \n", "508 | \n", "483.090345 | \n", "24.909655 | \n", "1960-09-01 | \n", "
| 141 | \n", "141 | \n", "1960-10 | \n", "461 | \n", "486.714660 | \n", "-25.714660 | \n", "1960-10-01 | \n", "
| 142 | \n", "142 | \n", "1960-11 | \n", "390 | \n", "490.352992 | \n", "-100.352992 | \n", "1960-11-01 | \n", "
| 143 | \n", "143 | \n", "1960-12 | \n", "432 | \n", "494.005340 | \n", "-62.005340 | \n", "1960-12-01 | \n", "
144 rows × 6 columns
\n", "| \n", " | index | \n", "ts | \n", "data | \n", "approx_trend | \n", "detrended | \n", "date | \n", "month | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "1949-01 | \n", "112 | \n", "114.028041 | \n", "-2.028041 | \n", "1949-01-01 | \n", "1 | \n", "
| 1 | \n", "1 | \n", "1949-02 | \n", "118 | \n", "115.690061 | \n", "2.309939 | \n", "1949-02-01 | \n", "2 | \n", "
| 2 | \n", "2 | \n", "1949-03 | \n", "132 | \n", "117.366097 | \n", "14.633903 | \n", "1949-03-01 | \n", "3 | \n", "
| 3 | \n", "3 | \n", "1949-04 | \n", "129 | \n", "119.056149 | \n", "9.943851 | \n", "1949-04-01 | \n", "4 | \n", "
| 4 | \n", "4 | \n", "1949-05 | \n", "121 | \n", "120.760218 | \n", "0.239782 | \n", "1949-05-01 | \n", "5 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 139 | \n", "139 | \n", "1960-08 | \n", "606 | \n", "479.480046 | \n", "126.519954 | \n", "1960-08-01 | \n", "8 | \n", "
| 140 | \n", "140 | \n", "1960-09 | \n", "508 | \n", "483.090345 | \n", "24.909655 | \n", "1960-09-01 | \n", "9 | \n", "
| 141 | \n", "141 | \n", "1960-10 | \n", "461 | \n", "486.714660 | \n", "-25.714660 | \n", "1960-10-01 | \n", "10 | \n", "
| 142 | \n", "142 | \n", "1960-11 | \n", "390 | \n", "490.352992 | \n", "-100.352992 | \n", "1960-11-01 | \n", "11 | \n", "
| 143 | \n", "143 | \n", "1960-12 | \n", "432 | \n", "494.005340 | \n", "-62.005340 | \n", "1960-12-01 | \n", "12 | \n", "
144 rows × 7 columns
\n", "| \n", " | index | \n", "ts | \n", "data | \n", "approx_trend | \n", "detrended | \n", "date | \n", "month | \n", "seasonality | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "1949-01 | \n", "112 | \n", "114.028041 | \n", "-2.028041 | \n", "1949-01-01 | \n", "1 | \n", "-24.062583 | \n", "
| 1 | \n", "1 | \n", "1949-02 | \n", "118 | \n", "115.690061 | \n", "2.309939 | \n", "1949-02-01 | \n", "2 | \n", "-33.399685 | \n", "
| 2 | \n", "2 | \n", "1949-03 | \n", "132 | \n", "117.366097 | \n", "14.633903 | \n", "1949-03-01 | \n", "3 | \n", "-0.834137 | \n", "
| 3 | \n", "3 | \n", "1949-04 | \n", "129 | \n", "119.056149 | \n", "9.943851 | \n", "1949-04-01 | \n", "4 | \n", "-6.532605 | \n", "
| 4 | \n", "4 | \n", "1949-05 | \n", "121 | \n", "120.760218 | \n", "0.239782 | \n", "1949-05-01 | \n", "5 | \n", "-4.411756 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 139 | \n", "139 | \n", "1960-08 | \n", "606 | \n", "479.480046 | \n", "126.519954 | \n", "1960-08-01 | \n", "8 | \n", "66.866692 | \n", "
| 140 | \n", "140 | \n", "1960-09 | \n", "508 | \n", "483.090345 | \n", "24.909655 | \n", "1960-09-01 | \n", "9 | \n", "15.514809 | \n", "
| 141 | \n", "141 | \n", "1960-10 | \n", "461 | \n", "486.714660 | \n", "-25.714660 | \n", "1960-10-01 | \n", "10 | \n", "-23.017758 | \n", "
| 142 | \n", "142 | \n", "1960-11 | \n", "390 | \n", "490.352992 | \n", "-100.352992 | \n", "1960-11-01 | \n", "11 | \n", "-59.481007 | \n", "
| 143 | \n", "143 | \n", "1960-12 | \n", "432 | \n", "494.005340 | \n", "-62.005340 | \n", "1960-12-01 | \n", "12 | \n", "-33.208273 | \n", "
144 rows × 8 columns
\n", "| \n", " | index | \n", "ts | \n", "data | \n", "approx_trend | \n", "detrended | \n", "date | \n", "month | \n", "seasonality | \n", "deseasonal | \n", "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "1949-01 | \n", "112 | \n", "114.028041 | \n", "-2.028041 | \n", "1949-01-01 | \n", "1 | \n", "-24.062583 | \n", "136.062583 | \n", "
| 1 | \n", "1 | \n", "1949-02 | \n", "118 | \n", "115.690061 | \n", "2.309939 | \n", "1949-02-01 | \n", "2 | \n", "-33.399685 | \n", "151.399685 | \n", "
| 2 | \n", "2 | \n", "1949-03 | \n", "132 | \n", "117.366097 | \n", "14.633903 | \n", "1949-03-01 | \n", "3 | \n", "-0.834137 | \n", "132.834137 | \n", "
| 3 | \n", "3 | \n", "1949-04 | \n", "129 | \n", "119.056149 | \n", "9.943851 | \n", "1949-04-01 | \n", "4 | \n", "-6.532605 | \n", "135.532605 | \n", "
| 4 | \n", "4 | \n", "1949-05 | \n", "121 | \n", "120.760218 | \n", "0.239782 | \n", "1949-05-01 | \n", "5 | \n", "-4.411756 | \n", "125.411756 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 139 | \n", "139 | \n", "1960-08 | \n", "606 | \n", "479.480046 | \n", "126.519954 | \n", "1960-08-01 | \n", "8 | \n", "66.866692 | \n", "539.133308 | \n", "
| 140 | \n", "140 | \n", "1960-09 | \n", "508 | \n", "483.090345 | \n", "24.909655 | \n", "1960-09-01 | \n", "9 | \n", "15.514809 | \n", "492.485191 | \n", "
| 141 | \n", "141 | \n", "1960-10 | \n", "461 | \n", "486.714660 | \n", "-25.714660 | \n", "1960-10-01 | \n", "10 | \n", "-23.017758 | \n", "484.017758 | \n", "
| 142 | \n", "142 | \n", "1960-11 | \n", "390 | \n", "490.352992 | \n", "-100.352992 | \n", "1960-11-01 | \n", "11 | \n", "-59.481007 | \n", "449.481007 | \n", "
| 143 | \n", "143 | \n", "1960-12 | \n", "432 | \n", "494.005340 | \n", "-62.005340 | \n", "1960-12-01 | \n", "12 | \n", "-33.208273 | \n", "465.208273 | \n", "
144 rows × 9 columns
\n", "| \n", " | index | \n", "ts | \n", "data | \n", "approx_trend | \n", "detrended | \n", "date | \n", "month | \n", "seasonality | \n", "deseasonal | \n", "trend | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "1949-01 | \n", "112 | \n", "114.028041 | \n", "-2.028041 | \n", "1949-01-01 | \n", "1 | \n", "-24.062583 | \n", "136.062583 | \n", "114.239650 | \n", "
| 1 | \n", "1 | \n", "1949-02 | \n", "118 | \n", "115.690061 | \n", "2.309939 | \n", "1949-02-01 | \n", "2 | \n", "-33.399685 | \n", "151.399685 | \n", "115.886544 | \n", "
| 2 | \n", "2 | \n", "1949-03 | \n", "132 | \n", "117.366097 | \n", "14.633903 | \n", "1949-03-01 | \n", "3 | \n", "-0.834137 | \n", "132.834137 | \n", "117.547711 | \n", "
| 3 | \n", "3 | \n", "1949-04 | \n", "129 | \n", "119.056149 | \n", "9.943851 | \n", "1949-04-01 | \n", "4 | \n", "-6.532605 | \n", "135.532605 | \n", "119.223151 | \n", "
| 4 | \n", "4 | \n", "1949-05 | \n", "121 | \n", "120.760218 | \n", "0.239782 | \n", "1949-05-01 | \n", "5 | \n", "-4.411756 | \n", "125.411756 | \n", "120.912865 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 139 | \n", "139 | \n", "1960-08 | \n", "606 | \n", "479.480046 | \n", "126.519954 | \n", "1960-08-01 | \n", "8 | \n", "66.866692 | \n", "539.133308 | \n", "480.054386 | \n", "
| 140 | \n", "140 | \n", "1960-09 | \n", "508 | \n", "483.090345 | \n", "24.909655 | \n", "1960-09-01 | \n", "9 | \n", "15.514809 | \n", "492.485191 | \n", "483.685287 | \n", "
| 141 | \n", "141 | \n", "1960-10 | \n", "461 | \n", "486.714660 | \n", "-25.714660 | \n", "1960-10-01 | \n", "10 | \n", "-23.017758 | \n", "484.017758 | \n", "487.330462 | \n", "
| 142 | \n", "142 | \n", "1960-11 | \n", "390 | \n", "490.352992 | \n", "-100.352992 | \n", "1960-11-01 | \n", "11 | \n", "-59.481007 | \n", "449.481007 | \n", "490.989910 | \n", "
| 143 | \n", "143 | \n", "1960-12 | \n", "432 | \n", "494.005340 | \n", "-62.005340 | \n", "1960-12-01 | \n", "12 | \n", "-33.208273 | \n", "465.208273 | \n", "494.663631 | \n", "
144 rows × 10 columns
\n", "