import pandas as pd
from statsforecast import StatsForecast
Nixtla - ML Forecast
= pd.read_csv('https://datasets-nixtla.s3.amazonaws.com/air-passengers.csv', parse_dates=['ds'])
df df.head()
unique_id | ds | y | |
---|---|---|---|
0 | AirPassengers | 1949-01-01 | 112 |
1 | AirPassengers | 1949-02-01 | 118 |
2 | AirPassengers | 1949-03-01 | 132 |
3 | AirPassengers | 1949-04-01 | 129 |
4 | AirPassengers | 1949-05-01 | 121 |
'unique_id'].value_counts() df[
unique_id
AirPassengers 144
Name: count, dtype: int64
='plotly') StatsForecast.plot(df, engine
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from sklearn.linear_model import LinearRegression
= MLForecast(
fcst =LinearRegression(),
models='MS', # our serie has a monthly frequency
freq=[12],
lags=[Differences([1])],
target_transforms )
fcst.fit(df)
MLForecast(models=[LinearRegression], freq=<MonthBegin>, lag_features=['lag12'], date_features=[], num_threads=1)
= fcst.predict(12)
preds preds
unique_id | ds | LinearRegression | |
---|---|---|---|
0 | AirPassengers | 1961-01-01 | 444.656555 |
1 | AirPassengers | 1961-02-01 | 417.470764 |
2 | AirPassengers | 1961-03-01 | 446.903076 |
3 | AirPassengers | 1961-04-01 | 491.014160 |
4 | AirPassengers | 1961-05-01 | 502.622253 |
5 | AirPassengers | 1961-06-01 | 568.751465 |
6 | AirPassengers | 1961-07-01 | 660.044312 |
7 | AirPassengers | 1961-08-01 | 643.343323 |
8 | AirPassengers | 1961-09-01 | 540.666748 |
9 | AirPassengers | 1961-10-01 | 491.462799 |
10 | AirPassengers | 1961-11-01 | 417.095245 |
11 | AirPassengers | 1961-12-01 | 461.206299 |
='plotly') StatsForecast.plot(df, preds, engine