import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
from pathlib import Path
import warnings
"ignore")
warnings.filterwarnings(
'ggplot')
plt.style.use('fivethirtyeight')
plt.style.use(
def mean_absolute_percentage_error(y_true, y_pred):
"""Calculates MAPE given y_true and y_pred"""
= np.array(y_true), np.array(y_pred)
y_true, y_pred return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
Hourly Time Series Forecasting using Facebook’s Prophet
Background on the Types of Time Series Data
Data
The data we will be using is hourly power consumption data from PJM. Energy consumption has some unique characteristics. It will be interesting to see how prophet picks them up.
Pulling the PJM East
which has data from 2002-2018 for the entire east region.
= 'hourly-energy-consumption'
name = Path(f'Data/{name}')
path = pd.read_csv(f'{path}/PJME_hourly.csv',
pjme =[0],
index_col=[0])
parse_dates pjme.head()
PJME_MW | |
---|---|
Datetime | |
2002-12-31 01:00:00 | 26498.0 |
2002-12-31 02:00:00 | 25147.0 |
2002-12-31 03:00:00 | 24574.0 |
2002-12-31 04:00:00 | 24393.0 |
2002-12-31 05:00:00 | 24860.0 |
= sns.color_palette()
color_pal ='.',
pjme.plot(style=(10, 5),
figsize=1,
ms=color_pal[0],
color='PJME MW')
title plt.show()
Time Series Features
from pandas.api.types import CategoricalDtype
= CategoricalDtype(categories=['Monday','Tuesday',
cat_type 'Wednesday',
'Thursday','Friday',
'Saturday','Sunday'],
=True)
ordered
def create_features(df, label=None):
"""
Creates time series features from datetime index.
"""
= df.copy()
df 'date'] = df.index
df['hour'] = df['date'].dt.hour
df['dayofweek'] = df['date'].dt.dayofweek
df['weekday'] = df['date'].dt.day_name()
df['weekday'] = df['weekday'].astype(cat_type)
df['quarter'] = df['date'].dt.quarter
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year
df['dayofyear'] = df['date'].dt.dayofyear
df['dayofmonth'] = df['date'].dt.day
df['weekofyear'] = df['date'].dt.isocalendar().week
df['date_offset'] = (df.date.dt.month*100 + df.date.dt.day - 320)%1300
df[
'season'] = pd.cut(df['date_offset'], [0, 300, 602, 900, 1300],
df[=['Spring', 'Summer', 'Fall', 'Winter']
labels
)= df[['hour','dayofweek','quarter','month','year',
X 'dayofyear','dayofmonth','weekofyear','weekday',
'season']]
if label:
= df[label]
y return X, y
return X
= create_features(pjme, label='PJME_MW')
X, y = pd.concat([X, y], axis=1) features_and_target
= plt.subplots(figsize=(10, 5))
fig, ax =features_and_target.dropna(),
sns.boxplot(data='weekday',
x='PJME_MW',
y='season',
hue=ax,
ax=1)
linewidth'Power Use MW by Day of Week')
ax.set_title('Day of Week')
ax.set_xlabel('Energy (MW)')
ax.set_ylabel(=(1, 1))
ax.legend(bbox_to_anchor plt.show()
Train / Test Split
= '1-Jan-2015'
split_date = pjme.loc[pjme.index <= split_date].copy()
pjme_train = pjme.loc[pjme.index > split_date].copy()
pjme_test
# Plot train and test so you can see where we have split
\
pjme_test ={'PJME_MW': 'TEST SET'}) \
.rename(columns={'PJME_MW': 'TRAINING SET'}),
.join(pjme_train.rename(columns='outer') \
how=(10, 5), title='PJM East', style='.', ms=1)
.plot(figsize plt.show()
Simple Prophet Model
- Prophet model expects the dataset to be named a specific way. We will rename our dataframe columns before feeding it into the model.
- Datetime column named:
ds
- target :
y
- Datetime column named:
# Format data for prophet model using ds and y
= pjme_train.reset_index() \
pjme_train_prophet ={'Datetime':'ds',
.rename(columns'PJME_MW':'y'})
= Prophet()
model model.fit(pjme_train_prophet)
18:04:09 - cmdstanpy - INFO - Chain [1] start processing
18:04:59 - cmdstanpy - INFO - Chain [1] done processing
<prophet.forecaster.Prophet>
# Predict on test set with model
= pjme_test.reset_index() \
pjme_test_prophet ={'Datetime':'ds',
.rename(columns'PJME_MW':'y'})
= model.predict(pjme_test_prophet) pjme_test_fcst
pjme_test_fcst.head()
ds | trend | yhat_lower | yhat_upper | trend_lower | trend_upper | additive_terms | additive_terms_lower | additive_terms_upper | daily | ... | weekly | weekly_lower | weekly_upper | yearly | yearly_lower | yearly_upper | multiplicative_terms | multiplicative_terms_lower | multiplicative_terms_upper | yhat | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2015-01-01 01:00:00 | 31210.530967 | 23912.189071 | 32701.384160 | 31210.530967 | 31210.530967 | -2893.742472 | -2893.742472 | -2893.742472 | -4430.272423 | ... | 1281.328732 | 1281.328732 | 1281.328732 | 255.201219 | 255.201219 | 255.201219 | 0.0 | 0.0 | 0.0 | 28316.788495 |
1 | 2015-01-01 02:00:00 | 31210.494154 | 22326.307825 | 31018.012373 | 31210.494154 | 31210.494154 | -4398.239425 | -4398.239425 | -4398.239425 | -5927.272577 | ... | 1272.574102 | 1272.574102 | 1272.574102 | 256.459050 | 256.459050 | 256.459050 | 0.0 | 0.0 | 0.0 | 26812.254729 |
2 | 2015-01-01 03:00:00 | 31210.457342 | 21462.567951 | 30599.849415 | 31210.457342 | 31210.457342 | -5269.974485 | -5269.974485 | -5269.974485 | -6790.346308 | ... | 1262.613389 | 1262.613389 | 1262.613389 | 257.758434 | 257.758434 | 257.758434 | 0.0 | 0.0 | 0.0 | 25940.482857 |
3 | 2015-01-01 04:00:00 | 31210.420529 | 21734.347259 | 30591.022280 | 31210.420529 | 31210.420529 | -5411.456410 | -5411.456410 | -5411.456410 | -6922.126021 | ... | 1251.570211 | 1251.570211 | 1251.570211 | 259.099400 | 259.099400 | 259.099400 | 0.0 | 0.0 | 0.0 | 25798.964119 |
4 | 2015-01-01 05:00:00 | 31210.383716 | 22005.992294 | 31027.473128 | 31210.383716 | 31210.383716 | -4737.018106 | -4737.018106 | -4737.018106 | -6237.080479 | ... | 1239.580401 | 1239.580401 | 1239.580401 | 260.481971 | 260.481971 | 260.481971 | 0.0 | 0.0 | 0.0 | 26473.365610 |
5 rows × 22 columns
= plt.subplots(figsize=(10, 5))
fig, ax = model.plot(pjme_test_fcst, ax=ax)
fig 'Prophet Forecast')
ax.set_title( plt.show()
= model.plot_components(pjme_test_fcst)
fig plt.show()
Compare Forecast to Actuals
# Plot the forecast with the actuals
= plt.subplots(figsize=(15, 5))
f, ax 'PJME_MW'], color='r')
ax.scatter(pjme_test.index, pjme_test[= model.plot(pjme_test_fcst, ax=ax) fig
'PJME_MW'].head() pjme_test[
Datetime
2015-12-31 01:00:00 24305.0
2015-12-31 02:00:00 23156.0
2015-12-31 03:00:00 22514.0
2015-12-31 04:00:00 22330.0
2015-12-31 05:00:00 22773.0
Name: PJME_MW, dtype: float64
= plt.subplots(figsize=(10, 5))
fig, ax 'PJME_MW'], color='r')
ax.scatter(pjme_test.index, pjme_test[
= model.plot(pjme_test_fcst, ax=ax)
fig =pd.to_datetime('12-01-2014'),
ax.set_xbound(lower=pd.to_datetime('02-01-2015'))
upper0, 60000)
ax.set_ylim(
ax.legend()= plt.suptitle('January 2015 Forecast vs Actuals') plot
# Plot the forecast with the actuals
= plt.subplots(figsize=(15, 5))
f, ax 'PJME_MW'], color='r')
ax.scatter(pjme_test.index, pjme_test[= model.plot(pjme_test_fcst, ax=ax)
fig = pd.to_datetime('2015-01-05'), upper=pd.to_datetime('2015-01-08'))
ax.set_xbound(lower0, 60000)
ax.set_ylim('First Week of January Forecast vs Actuals')
ax.set_title( plt.show()
Evaluate the model with Error Metrics
=pjme_test['PJME_MW'],
np.sqrt(mean_squared_error(y_true=pjme_test_fcst['yhat'])) y_pred
6616.966074225221
=pjme_test['PJME_MW'],
mean_absolute_error(y_true=pjme_test_fcst['yhat']) y_pred
5181.911537928106
=pjme_test['PJME_MW'],
mean_absolute_percentage_error(y_true=pjme_test_fcst['yhat']) y_pred
16.512003880182647
Adding Holidays
Next we will see if adding holiday indicators will help the accuracy of the model. Prophet comes with a Holiday Effects parameter that can be provided to the model prior to training.
We will use the built in pandas USFederalHolidayCalendar to pull the list of holidays
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
= calendar()
cal
= cal.holidays(start=pjme.index.min(),
holidays =pjme.index.max(),
end=True)
return_name= pd.DataFrame(data=holidays,
holiday_df =['holiday'])
columns= holiday_df.reset_index().rename(columns={'index':'ds'}) holiday_df
= Prophet(holidays=holiday_df)
model_with_holidays model_with_holidays.fit(pjme_train_prophet)
18:05:15 - cmdstanpy - INFO - Chain [1] start processing
18:06:36 - cmdstanpy - INFO - Chain [1] done processing
<prophet.forecaster.Prophet>
# Predict on training set with model
= \
pjme_test_fcst_with_hols =pjme_test_prophet) model_with_holidays.predict(df
= model_with_holidays.plot_components(
fig
pjme_test_fcst_with_hols) plt.show()
= plt.subplots(figsize=(10, 5))
fig, ax 'PJME_MW'], color='r')
ax.scatter(pjme_test.index, pjme_test[= model.plot(pjme_test_fcst_with_hols, ax=ax)
fig =pd.to_datetime('07-01-2015'),
ax.set_xbound(lower=pd.to_datetime('07-07-2015'))
upper0, 60000)
ax.set_ylim(= plt.suptitle('July 4 Predictions vs Actual') plot
=pjme_test['PJME_MW'],
np.sqrt(mean_squared_error(y_true=pjme_test_fcst_with_hols['yhat'])) y_pred
6639.587205626055
=pjme_test['PJME_MW'],
mean_absolute_error(y_true=pjme_test_fcst_with_hols['yhat']) y_pred
5201.46462763833
=pjme_test['PJME_MW'],
mean_absolute_percentage_error(y_true=pjme_test_fcst_with_hols['yhat']) y_pred
16.558807523531467
Predict into the Future
We can use the built in make_future_dataframe
method to build our future dataframe and make predictions.
= model.make_future_dataframe(periods=365*24, freq='h', include_history=False)
future = model_with_holidays.predict(future) forecast
'ds','yhat']].head() forecast[[
ds | yhat | |
---|---|---|
0 | 2015-01-01 01:00:00 | 25567.271675 |
1 | 2015-01-01 02:00:00 | 24065.351481 |
2 | 2015-01-01 03:00:00 | 23195.828972 |
3 | 2015-01-01 04:00:00 | 23056.212620 |
4 | 2015-01-01 05:00:00 | 23732.192245 |