!pip install pystan
!pip install fbprophet

Requirement already satisfied: pystan in c:\users\dell\anaconda3\lib\site-packages (2.19.1.1)
Requirement already satisfied: Cython!=0.25.1,>=0.22 in c:\users\dell\anaconda3\lib\site-packages (from pystan) (0.29.24)
Requirement already satisfied: numpy>=1.7 in c:\users\dell\anaconda3\lib\site-packages (from pystan) (1.20.3)
Requirement already satisfied: fbprophet in c:\users\dell\anaconda3\lib\site-packages (0.7.1)
Requirement already satisfied: pystan>=2.14 in c:\users\dell\anaconda3\lib\site-packages (from fbprophet) (2.19.1.1)
Requirement already satisfied: Cython>=0.22 in c:\users\dell\anaconda3\lib\site-packages (from fbprophet) (0.29.24)
Collecting cmdstanpy==0.9.5
  Using cached cmdstanpy-0.9.5-py3-none-any.whl (37 kB)
Requirement already satisfied: holidays>=0.10.2 in c:\users\dell\anaconda3\lib\site-packages (from fbprophet) (0.11.3.1)
Requirement already satisfied: python-dateutil>=2.8.0 in c:\users\dell\anaconda3\lib\site-packages (from fbprophet) (2.8.2)
Requirement already satisfied: pandas>=1.0.4 in c:\users\dell\anaconda3\lib\site-packages (from fbprophet) (1.3.4)
Collecting setuptools-git>=1.2
  Using cached setuptools_git-1.2-py2.py3-none-any.whl (10 kB)
Requirement already satisfied: matplotlib>=2.0.0 in c:\users\dell\anaconda3\lib\site-packages (from fbprophet) (3.4.3)
Requirement already satisfied: numpy>=1.15.4 in c:\users\dell\anaconda3\lib\site-packages (from fbprophet) (1.20.3)
Requirement already satisfied: LunarCalendar>=0.0.9 in c:\users\dell\anaconda3\lib\site-packages (from fbprophet) (0.0.9)
Requirement already satisfied: convertdate>=2.1.2 in c:\users\dell\anaconda3\lib\site-packages (from fbprophet) (2.3.2)
Requirement already satisfied: tqdm>=4.36.1 in c:\users\dell\anaconda3\lib\site-packages (from fbprophet) (4.62.3)
Requirement already satisfied: pymeeus<=1,>=0.3.13 in c:\users\dell\anaconda3\lib\site-packages (from convertdate>=2.1.2->fbprophet) (0.5.11)
Requirement already satisfied: pytz>=2014.10 in c:\users\dell\anaconda3\lib\site-packages (from convertdate>=2.1.2->fbprophet) (2021.3)
Requirement already satisfied: korean-lunar-calendar in c:\users\dell\anaconda3\lib\site-packages (from holidays>=0.10.2->fbprophet) (0.2.1)
Requirement already satisfied: hijri-converter in c:\users\dell\anaconda3\lib\site-packages (from holidays>=0.10.2->fbprophet) (2.2.2)
Requirement already satisfied: ephem>=3.7.5.3 in c:\users\dell\anaconda3\lib\site-packages (from LunarCalendar>=0.0.9->fbprophet) (3.7.7.1)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\dell\anaconda3\lib\site-packages (from matplotlib>=2.0.0->fbprophet) (1.3.1)
Requirement already satisfied: pyparsing>=2.2.1 in c:\users\dell\anaconda3\lib\site-packages (from matplotlib>=2.0.0->fbprophet) (3.0.4)
Requirement already satisfied: cycler>=0.10 in c:\users\dell\anaconda3\lib\site-packages (from matplotlib>=2.0.0->fbprophet) (0.10.0)
Requirement already satisfied: pillow>=6.2.0 in c:\users\dell\anaconda3\lib\site-packages (from matplotlib>=2.0.0->fbprophet) (8.4.0)
Requirement already satisfied: six in c:\users\dell\anaconda3\lib\site-packages (from cycler>=0.10->matplotlib>=2.0.0->fbprophet) (1.16.0)
Requirement already satisfied: colorama in c:\users\dell\anaconda3\lib\site-packages (from tqdm>=4.36.1->fbprophet) (0.4.4)
Installing collected packages: setuptools-git, cmdstanpy
Successfully installed cmdstanpy-0.9.5 setuptools-git-1.2

import fbprophet
from fbprophet import Prophet
import pandas as pd
from fbprophet.diagnostics import cross_validation
from fbprophet.plot import plot_cross_validation_metric
from fbprophet.diagnostics import performance_metrics

dir(Prophet)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_load_stan_backend',
 'add_country_holidays',
 'add_group_component',
 'add_regressor',
 'add_seasonality',
 'construct_holiday_dataframe',
 'fit',
 'flat_growth_init',
 'flat_trend',
 'fourier_series',
 'initialize_scales',
 'linear_growth_init',
 'logistic_growth_init',
 'make_all_seasonality_features',
 'make_future_dataframe',
 'make_holiday_features',
 'make_seasonality_features',
 'parse_seasonality_args',
 'percentile',
 'piecewise_linear',
 'piecewise_logistic',
 'plot',
 'plot_components',
 'predict',
 'predict_seasonal_components',
 'predict_trend',
 'predict_uncertainty',
 'predictive_samples',
 'regressor_column_matrix',
 'sample_model',
 'sample_posterior_predictive',
 'sample_predictive_trend',
 'set_auto_seasonalities',
 'set_changepoints',
 'setup_dataframe',
 'validate_column_name',
 'validate_inputs']

df=pd.read_csv('covid.csv')
df.head()

Data Preprocessing

df.shape

(49068, 10)

df.dtypes

Province/State     object
Country/Region     object
Lat               float64
Long              float64
Date               object
Confirmed           int64
Deaths              int64
Recovered           int64
Active              int64
WHO Region         object
dtype: object

df['Date']=pd.to_datetime(df['Date'])

df.dtypes

Province/State            object
Country/Region            object
Lat                      float64
Long                     float64
Date              datetime64[ns]
Confirmed                  int64
Deaths                     int64
Recovered                  int64
Active                     int64
WHO Region                object
dtype: object

df.isnull().sum()

Province/State    34404
Country/Region        0
Lat                   0
Long                  0
Date                  0
Confirmed             0
Deaths                0
Recovered             0
Active                0
WHO Region            0
dtype: int64

We can see there is lots of missing value in Province/State column so we need to handle it.

df['Date'].nunique()

188

total=df.groupby(['Date'])['Confirmed','Deaths','Recovered','Active'].sum().reset_index()
total

C:\Users\DELL\AppData\Local\Temp/ipykernel_15636/269672384.py:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  total=df.groupby(['Date'])['Confirmed','Deaths','Recovered','Active'].sum().reset_index()

df_prophet=total.rename(columns={'Date':'ds','Confirmed':'y'})
df_prophet.head()

Building a Prophet Model

m=Prophet()

model=m.fit(df_prophet)

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

model.seasonalities

OrderedDict([('weekly',
              {'period': 7,
               'fourier_order': 3,
               'prior_scale': 10.0,
               'mode': 'additive',
               'condition_name': None})])

# Simulate the trend using the extrapolated generative model.
# Returns pandas dataframe
future_global=model.make_future_dataframe(periods=30,freq='D')
future_global.head()

future_global.shape

(218, 1)

df_prophet['ds'].tail()

183   2020-07-23
184   2020-07-24
185   2020-07-25
186   2020-07-26
187   2020-07-27
Name: ds, dtype: datetime64[ns]

future_global.tail()

prediction=model.predict(future_global)
prediction

prediction[['ds','yhat','yhat_lower','yhat_upper']].tail()

model.plot(prediction)

This is what our prediction looks like. The direction of overall case numbers is probably true, u will observe how cases rises exponentially.

model.plot_components(prediction)

from fbprophet.plot import add_changepoints_to_plot

#Add markers for significant changepoints to prophet forecast plot.
#Example:
#fig = m.plot(forecast)
#add_changepoints_to_plot(fig.gca(), m, forecast)
fig=model.plot(prediction)

a=add_changepoints_to_plot(fig.gca(),model,prediction)

## horizon='365 days'--> for how many days we have to cross validate=====

## Computes forecasts from historical cutoff points Beginning from..

## (end - horizon) it means it is going to take that date that is (end - horizon) bcz on these date we have to just 
## cross-validate for the new dataset that we have to find out

## period=180 as from documentation of func as period=0.5*365=180 or {period=1/2*horizon value}
## initial -How many total no. of days we actually want-- 3*365 from documentation of function or  {initial=3*horizon}


df_cv=cross_validation(model,horizon='30 days',period='15 days',initial='90 days')

INFO:fbprophet:Making 5 forecasts with cutoffs between 2020-04-28 00:00:00 and 2020-06-27 00:00:00

df_cv.head()

df_cv.shape

(150, 6)

Obtaining the Performance Metrics

We use the performance_metrics utility to compute the Mean Squared Error(MSE), Root Mean Squared Error(RMSE),Mean Absolute Error(MAE), Mean Absolute Percentage Error(MAPE) and the coverage of the yhat_lower and yhat_upper estimates.

df_performance=performance_metrics(df_cv)
df_performance.head()

Plotting all the metrics with days of cross validated data.

df_performance=plot_cross_validation_metric(df_cv,metric='rmse')

df_performance=plot_cross_validation_metric(df_cv,metric='mse')

df_performance=plot_cross_validation_metric(df_cv,metric='mape')

	Province/State	Country/Region	Lat	Long	Date	WHO Region
0	NaN	Afghanistan	33.93911	67.709953	2020-01-22	Eastern Mediterranean
1	NaN	Albania	41.15330	20.168300	2020-01-22	Europe
2	NaN	Algeria	28.03390	1.659600	2020-01-22	Africa
3	NaN	Andorra	42.50630	1.521800	2020-01-22	Europe
4	NaN	Angola	-11.20270	17.873900	2020-01-22	Africa

	Date	Confirmed	Deaths	Recovered	Active
0	2020-01-22	555	17	28	510
1	2020-01-23	654	18	30	606
2	2020-01-24	941	26	36	879
3	2020-01-25	1434	42	39	1353
4	2020-01-26	2118	56	52	2010
...	...	...	...	...	...
183	2020-07-23	15510481	633506	8710969	6166006
184	2020-07-24	15791645	639650	8939705	6212290
185	2020-07-25	16047190	644517	9158743	6243930
186	2020-07-26	16251796	648621	9293464	6309711
187	2020-07-27	16480485	654036	9468087	6358362

	ds	y	Deaths	Recovered	Active
0	2020-01-22	555	17	28	510
1	2020-01-23	654	18	30	606
2	2020-01-24	941	26	36	879
3	2020-01-25	1434	42	39	1353
4	2020-01-26	2118	56	52	2010

	ds
0	2020-01-22
1	2020-01-23
2	2020-01-24
3	2020-01-25
4	2020-01-26

	ds
213	2020-08-22
214	2020-08-23
215	2020-08-24
216	2020-08-25
217	2020-08-26

	ds	trend	yhat_lower	yhat_upper	trend_lower	trend_upper	additive_terms	additive_terms_lower	additive_terms_upper	weekly	weekly_lower	weekly_upper	multiplicative_terms	multiplicative_terms_lower	multiplicative_terms_upper	yhat
0	2020-01-22	-1.063986e+04	-1.178154e+05	8.425973e+04	-1.063986e+04	-1.063986e+04	-10982.447793	-10982.447793	-10982.447793	-10982.447793	-10982.447793	-10982.447793	0.0	0.0	0.0	-2.162230e+04
1	2020-01-23	-7.901275e+03	-1.173334e+05	9.708244e+04	-7.901275e+03	-7.901275e+03	-1076.670504	-1076.670504	-1076.670504	-1076.670504	-1076.670504	-1076.670504	0.0	0.0	0.0	-8.977945e+03
2	2020-01-24	-5.162694e+03	-9.789328e+04	1.039234e+05	-5.162694e+03	-5.162694e+03	10097.471121	10097.471121	10097.471121	10097.471121	10097.471121	10097.471121	0.0	0.0	0.0	4.934777e+03
3	2020-01-25	-2.424113e+03	-9.009647e+04	1.171123e+05	-2.424113e+03	-2.424113e+03	13741.427160	13741.427160	13741.427160	13741.427160	13741.427160	13741.427160	0.0	0.0	0.0	1.131731e+04
4	2020-01-26	3.144674e+02	-1.040148e+05	1.073533e+05	3.144674e+02	3.144674e+02	7236.701390	7236.701390	7236.701390	7236.701390	7236.701390	7236.701390	0.0	0.0	0.0	7.551169e+03
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
213	2020-08-22	2.144058e+07	2.106047e+07	2.180491e+07	2.106079e+07	2.175931e+07	13741.427160	13741.427160	13741.427160	13741.427160	13741.427160	13741.427160	0.0	0.0	0.0	2.145432e+07
214	2020-08-23	2.164466e+07	2.124622e+07	2.202496e+07	2.124114e+07	2.198362e+07	7236.701390	7236.701390	7236.701390	7236.701390	7236.701390	7236.701390	0.0	0.0	0.0	2.165190e+07
215	2020-08-24	2.184875e+07	2.141090e+07	2.223934e+07	2.142355e+07	2.221139e+07	-2165.964784	-2165.964784	-2165.964784	-2165.964784	-2165.964784	-2165.964784	0.0	0.0	0.0	2.184658e+07
216	2020-08-25	2.205283e+07	2.156957e+07	2.243193e+07	2.160755e+07	2.243839e+07	-16850.516589	-16850.516589	-16850.516589	-16850.516589	-16850.516589	-16850.516589	0.0	0.0	0.0	2.203598e+07
217	2020-08-26	2.225691e+07	2.177083e+07	2.266496e+07	2.179042e+07	2.266599e+07	-10982.447793	-10982.447793	-10982.447793	-10982.447793	-10982.447793	-10982.447793	0.0	0.0	0.0	2.224593e+07

	ds	yhat	yhat_lower	yhat_upper	y	cutoff
0	2020-04-29	3.201064e+06	3.195700e+06	3.206117e+06	3185195	2020-04-28
1	2020-04-30	3.285432e+06	3.279401e+06	3.291880e+06	3268876	2020-04-28
2	2020-05-01	3.369640e+06	3.361684e+06	3.378217e+06	3355922	2020-04-28
3	2020-05-02	3.450902e+06	3.438502e+06	3.463530e+06	3437608	2020-04-28
4	2020-05-03	3.530365e+06	3.512702e+06	3.547939e+06	3515244	2020-04-28

	horizon	mse	rmse	mae	mape	mdape	coverage
0	3 days	2.732177e+10	165292.978537	129431.475977	0.016894	0.017781	0.000000
1	4 days	3.783770e+10	194519.153621	151640.363022	0.019358	0.022609	0.000000
2	5 days	5.187132e+10	227752.759122	175493.706735	0.021809	0.025662	0.066667
3	6 days	6.958295e+10	263785.805797	202488.978430	0.024588	0.026660	0.133333
4	7 days	9.037899e+10	300630.981552	232841.149368	0.027870	0.031220	0.200000