import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['figure.figsize'] = (17, 5)
plt.style.use('seaborn-whitegrid') # fivethirtyeight
%config InlineBackend.figure_format = 'retina';
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:70% !important;}</style>"));


%autosave 300

Autosaving every 300 seconds


import statsmodels as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
#print(f"statsmodels version: {sm.version.full_version}")


Lupa_upd = pd.read_csv(r'C:\Users\Kurt\Documents\Notebooks\XGBoost/acea-water-prediction/Lupa_updated.csv', index_col=0, parse_dates=True)
Lupa_upd.head()


Lupa_upd["Flow_Rate_Lupa"]= Lupa_upd.Flow_Rate_Lupa.resample("MS").sum()
Lupa_upd = Lupa_upd.asfreq('MS')


from statsmodels.tsa.seasonal import seasonal_decompose

# Perform additive decomposition
decomp = seasonal_decompose(Lupa_upd["Flow_Rate_Lupa"], period=12)

# Plot decomposition
decomp.plot( ); plt.tight_layout();


Lupa_upd = pd.read_csv(r'C:\Users\Kurt\Documents\Notebooks\XGBoost/acea-water-prediction/Lupa_Arrone_PETIR.csv', index_col=0, parse_dates=True)
Lupa_upd.head()


Lupa_upd["Flow_Rate_Lupa"]= Lupa_upd.Flow_Rate_Lupa.resample("MS").sum()
Lupa_upd = Lupa_upd.asfreq('MS')


from statsmodels.tsa.seasonal import seasonal_decompose

# Perform additive decomposition
decomp = seasonal_decompose(Lupa_upd["Flow_Rate_Lupa"], period=12)

# Plot decomposition
decomp.plot( ); plt.tight_layout();


from statsmodels.graphics.tsaplots import plot_acf

# Create figure and subplot
fig, ax1 = plt.subplots()

# Plot the ACF on ax1
plot_acf(Lupa_upd["Flow_Rate_Lupa"], lags=39, zero=False, ax=ax1);


from statsmodels.graphics.tsaplots import plot_acf

# Create figure and subplot
fig, ax1 = plt.subplots()

# Plot the ACF on ax1
plot_acf(Lupa_upd["Flow_Rate_Lupa"].diff().dropna(), lags=39, zero=False, ax=ax1);


# Subtract the rolling mean
Lupa_upd_2 = Lupa_upd - Lupa_upd.rolling(37).mean() # 25 ipv 15

# Drop the NaN values
Lupa_upd_2 = Lupa_upd_2.dropna()

# Create figure and subplots
fig, ax1 = plt.subplots()

# Plot the ACF
plot_acf(Lupa_upd['Flow_Rate_Lupa'], lags=38, zero=True, ax=ax1);


Lupa_upd = pd.read_csv(r'C:\Users\Kurt\Documents\Notebooks\XGBoost/acea-water-prediction/Lupa_Arrone.csv', index_col=0, parse_dates=True)
Lupa_upd.head()


Lupa_upd["Flow_Rate_Lupa"]= Lupa_upd.Flow_Rate_Lupa.resample("W").sum()
Lupa_upd = Lupa_upd.asfreq('W')


from statsmodels.tsa.seasonal import seasonal_decompose

# Perform additive decomposition
decomp = seasonal_decompose(Lupa_upd["Flow_Rate_Lupa"], period=52)

# Plot decomposition
decomp.plot( ); plt.tight_layout();


from statsmodels.graphics.tsaplots import plot_acf

# Create figure and subplot
fig, ax1 = plt.subplots()

# Plot the ACF on ax1
plot_acf(Lupa_upd["Flow_Rate_Lupa"], lags=156, zero=False, ax=ax1);


from statsmodels.graphics.tsaplots import plot_acf
# Subtract the rolling mean
Lupa_upd_2 = Lupa_upd - Lupa_upd.rolling(52).mean() # 25 ipv 15

# Drop the NaN values
Lupa_upd_2 = Lupa_upd_2.dropna()

fig, ax1 = plt.subplots()
# Plot the ACF
plot_acf(Lupa_upd_2['Flow_Rate_Lupa'], lags=158, zero=True, ax=ax1);


from statsmodels.graphics.tsaplots import plot_pacf
fig, ax1 = plt.subplots()
# Plot the PACF
plot_pacf(Lupa_upd_2['Flow_Rate_Lupa'], lags=58, zero=True, ax=ax1);

c:\program files\python38\lib\site-packages\statsmodels\regression\linear_model.py:1434: RuntimeWarning: invalid value encountered in sqrt
  return rho, np.sqrt(sigmasq)


df1 = df1.asfreq('d')
df2 = df2.asfreq('d')
df3 = df3.asfreq('d')
df1.plot();


df2.plot();


# Create a SARIMAX model
model = SARIMAX(df2, order=(2, 1, 1), seasonal_order=(1, 0, 0, 4))

# Fit the model
results = model.fit()

# Print the results summary
results.summary()


# Create a SARIMAX model
model = SARIMAX(df3, order=(1, 1, 0), seasonal_order=(0, 1, 1, 12))

# Fit the model
results = model.fit()

# Print the results summary
results.summary()


Lupa_upd.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 600 entries, 2009-01-04 to 2020-06-28
Freq: W-SUN
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Rainfall_Terni  600 non-null    float64
 1   Flow_Rate_Lupa  600 non-null    float64
 2   doy             600 non-null    int64  
 3   Month           600 non-null    int64  
 4   Year            600 non-null    int64  
dtypes: float64(2), int64(3)
memory usage: 28.1 KB


from statsmodels.graphics.tsaplots import plot_pacf

# Take the first and seasonal differences and drop NaNs
Lupa_upd_diff = Lupa_upd.Flow_Rate_Lupa.diff().diff(12).dropna()  # Flow_Rate_Lupa

# Create the figure 
fig, (ax1, ax2) = plt.subplots(2,1,figsize=(8,5.6))

# Plot the ACF on ax1
plot_acf(Lupa_upd_diff, lags=11, zero=False, ax=ax1);

# Plot the PACF on ax2
plot_pacf(Lupa_upd_diff, lags=11, zero=False, ax=ax2);
plt.tight_layout();


# Make list of lags
lags = [12, 24, 36, 48, 60]

# Create the figure 
fig, (ax1, ax2) = plt.subplots(2,1,figsize=(8,5.6))

# Plot the ACF on ax1
plot_acf(Lupa_upd_diff, lags=lags, zero=False, ax=ax1);

# Plot the PACF on ax2
plot_pacf(Lupa_upd_diff, lags=lags, zero=False, ax=ax2);
plt.tight_layout();


from statsmodels.tsa.statespace.sarimax import SARIMAX

model = SARIMAX(Lupa_upd.Flow_Rate_Lupa, order=(2,0,0), trend='ct', 
                enforce_stationarity=True, enforce_invertibility=True)

arima_results = model.fit()


arima_results.specification

{'seasonal_periods': 0,
 'measurement_error': False,
 'time_varying_regression': False,
 'simple_differencing': False,
 'enforce_stationarity': True,
 'enforce_invertibility': True,
 'hamilton_representation': False,
 'concentrate_scale': False,
 'trend_offset': 1,
 'order': (1, 1, 1),
 'seasonal_order': (0, 0, 0, 0),
 'k_diff': 1,
 'k_seasonal_diff': 0,
 'k_ar': 1,
 'k_ma': 1,
 'k_seasonal_ar': 0,
 'k_seasonal_ma': 0,
 'k_ar_params': 1,
 'k_ma_params': 1,
 'trend': 'c',
 'k_trend': 1,
 'k_exog': 0,
 'mle_regression': False,
 'state_regression': False}


Lupa_upd.tail(20)


from statsmodels.tsa.statespace.sarimax import SARIMAX
# cut off the last 20 weeks instead of take all
modelsa = SARIMAX(Lupa_upd.Flow_Rate_Lupa.loc["2014-01-01":"2020-02-10"], order=(1,0,1), seasonal_order=(1,0,0, 52), trend='c', 
                enforce_stationarity=False, enforce_invertibility=True)

sarima_results = modelsa.fit()


sarima_results.summary()


sarima_results.specification

{'seasonal_periods': 52,
 'measurement_error': False,
 'time_varying_regression': False,
 'simple_differencing': False,
 'enforce_stationarity': False,
 'enforce_invertibility': True,
 'hamilton_representation': False,
 'concentrate_scale': False,
 'trend_offset': 1,
 'order': (1, 0, 1),
 'seasonal_order': (1, 0, 0, 52),
 'k_diff': 0,
 'k_seasonal_diff': 0,
 'k_ar': 1,
 'k_ma': 1,
 'k_seasonal_ar': 52,
 'k_seasonal_ma': 0,
 'k_ar_params': 1,
 'k_ma_params': 1,
 'trend': 'c',
 'k_trend': 1,
 'k_exog': 0,
 'mle_regression': False,
 'state_regression': False}


# Create ARIMA mean forecast  25 weeks = 1/2 year
arima_pred = arima_results.get_forecast(25)
arima_mean = arima_pred.predicted_mean

# Create SARIMA mean forecast
sarima_pred = sarima_results.get_forecast(25)
sarima_mean = sarima_pred.predicted_mean
dates = Lupa_upd.index[-25:]
# Plot mean ARIMA and SARIMA predictions and observed
plt.plot(dates, sarima_mean, label='SARIMA');
plt.plot(dates, arima_mean, label='ARIMA');
plt.plot(Lupa_upd.Flow_Rate_Lupa, label='observed');
plt.legend();


Lupa_upd = pd.read_csv(r'C:\Users\Kurt\Documents\Notebooks\XGBoost/acea-water-prediction/Lupa_Arrone_PETIR.csv', index_col=0, parse_dates=True)
Lupa_upd.head()


Lupa_upd["Flow_Rate_Lupa"]= Lupa_upd.Flow_Rate_Lupa.resample("W").sum()
Lupa_upd = Lupa_upd.asfreq('W')


from statsmodels.tsa.statespace.sarimax import SARIMAX
# cut off the last 20 weeks instead of take all
modelsa = SARIMAX(Lupa_upd.Flow_Rate_Lupa.loc["2014-01-01":"2020-02-10"], order=(3, 0, 1), seasonal_order=(2, 0, 1, 52), trend='c', 
                enforce_stationarity=True, enforce_invertibility=True)

sarima_results = modelsa.fit()

c:\program files\python38\lib\site-packages\statsmodels\tsa\statespace\sarimax.py:997: UserWarning: Non-stationary starting seasonal autoregressive Using zeros as starting parameters.
  warn('Non-stationary starting seasonal autoregressive'
c:\program files\python38\lib\site-packages\statsmodels\base\model.py:566: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "


sarima_results.summary()


sarima_results.specification

{'seasonal_periods': 52,
 'measurement_error': False,
 'time_varying_regression': False,
 'simple_differencing': False,
 'enforce_stationarity': True,
 'enforce_invertibility': True,
 'hamilton_representation': False,
 'concentrate_scale': False,
 'trend_offset': 1,
 'order': (3, 0, 1),
 'seasonal_order': (2, 0, 1, 52),
 'k_diff': 0,
 'k_seasonal_diff': 0,
 'k_ar': 3,
 'k_ma': 1,
 'k_seasonal_ar': 104,
 'k_seasonal_ma': 52,
 'k_ar_params': 3,
 'k_ma_params': 1,
 'trend': 'c',
 'k_trend': 1,
 'k_exog': 0,
 'mle_regression': False,
 'state_regression': False}


# Create ARIMA mean forecast
#arima_pred = arima_results.get_forecast(25)
#arima_mean = arima_pred.predicted_mean

# Create SARIMA mean forecast
sarima_pred = sarima_results.get_forecast(30)
sarima_mean = sarima_pred.predicted_mean
dates2 = pd.date_range(Lupa_upd.index[-30] , freq="W-SUN", periods=30 ) #Lupa_upd.index[-30:] 
dates  = pd.date_range(Lupa_upd.index[-20] , freq="W-SUN", periods=30 )

# Plot mean ARIMA and SARIMA predictions and observed
plt.plot(dates, sarima_mean/7, label='SARIMA', marker="+"); 
#plt.plot(dates, arima_mean, label='ARIMA');
plt.plot( Lupa_upd.index, Lupa_upd.Flow_Rate_Lupa[-600:]/7, label='observed');
plt.xlim( pd.to_datetime("2020-01-01"))
plt.legend();


sarima_mean

2020-02-16    773.798356
2020-02-23    771.716107
2020-03-01    774.638470
2020-03-08    778.803846
2020-03-15    797.259882
2020-03-22    822.908894
2020-03-29    846.378658
2020-04-05    862.822484
2020-04-12    871.018869
2020-04-19    877.530006
2020-04-26    883.128483
2020-05-03    889.300929
2020-05-10    893.246971
2020-05-17    898.866165
2020-05-24    904.774141
2020-05-31    910.530374
2020-06-07    914.001029
2020-06-14    916.217966
2020-06-21    913.919089
2020-06-28    908.460579
2020-07-05    902.062455
2020-07-12    895.578868
2020-07-19    885.019315
2020-07-26    874.179283
2020-08-02    863.227052
2020-08-09    852.962019
2020-08-16    843.199485
2020-08-23    834.611397
2020-08-30    825.154046
2020-09-06    816.593538
Freq: W-SUN, Name: predicted_mean, dtype: float64


dates

DatetimeIndex(['2020-02-16', '2020-02-23', '2020-03-01', '2020-03-08',
               '2020-03-15', '2020-03-22', '2020-03-29', '2020-04-05',
               '2020-04-12', '2020-04-19', '2020-04-26', '2020-05-03',
               '2020-05-10', '2020-05-17', '2020-05-24', '2020-05-31',
               '2020-06-07', '2020-06-14', '2020-06-21', '2020-06-28',
               '2020-07-05', '2020-07-12', '2020-07-19', '2020-07-26',
               '2020-08-02', '2020-08-09', '2020-08-16', '2020-08-23',
               '2020-08-30', '2020-09-06'],
              dtype='datetime64[ns]', freq='W-SUN')


sarima_results.plot_diagnostics();


# Create ARIMA mean forecast
#arima_pred = arima_results.get_forecast(25)
#arima_mean = arima_pred.predicted_mean

# Create SARIMA mean forecast
sarima_pred = sarima_results.get_forecast(25)
sarima_mean = sarima_pred.predicted_mean
dates = Lupa_upd.index[-25:]
# Plot mean ARIMA and SARIMA predictions and observed
plt.plot(dates, sarima_mean, label='SARIMA');
#plt.plot(dates, arima_mean, label='ARIMA');
plt.plot(Lupa_upd.Flow_Rate_Lupa, label='observed');
plt.legend();


Lupa_upd["Flow_Rate_Lupa"]= Lupa_upd.Flow_Rate_Lupa.resample("MS").sum()
Lupa_upd = Lupa_upd.asfreq('MS')


Lupa_upd.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 138 entries, 2009-01-01 to 2020-06-01
Freq: MS
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Rainfall_Terni  138 non-null    float64
 1   Flow_Rate_Lupa  138 non-null    float64
 2   doy             138 non-null    float64
 3   Month           138 non-null    float64
 4   Year            138 non-null    float64
 5   PET             138 non-null    float64
 6   PETs            138 non-null    float64
 7   Infilt_         138 non-null    float64
 8   Infiltsum       138 non-null    float64
 9   Infilt_35       136 non-null    float64
 10  Flow_35         136 non-null    float64
 11  Net_35          136 non-null    float64
dtypes: float64(12)
memory usage: 14.0 KB


import pmdarima as pm
df1= Lupa_upd.Flow_Rate_Lupa
# Create auto_arima model
model1 = pm.auto_arima(df1,
                      seasonal=True, m=12,
                      d=0, D=0,
                       start_p=2, 
                      max_p=4, max_q=4,
                      trace=True,
                      error_action='ignore',
                      suppress_warnings=True)

# Print model summary
print(model1.summary())

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[12] intercept   : AIC=inf, Time=0.55 sec
 ARIMA(0,0,0)(0,0,0)[12] intercept   : AIC=1335.449, Time=0.01 sec
 ARIMA(1,0,0)(1,0,0)[12] intercept   : AIC=1086.424, Time=0.11 sec
 ARIMA(0,0,1)(0,0,1)[12] intercept   : AIC=1187.983, Time=0.15 sec
 ARIMA(0,0,0)(0,0,0)[12]             : AIC=1333.449, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[12] intercept   : AIC=1096.166, Time=0.02 sec
 ARIMA(1,0,0)(2,0,0)[12] intercept   : AIC=1079.731, Time=0.32 sec
 ARIMA(1,0,0)(2,0,1)[12] intercept   : AIC=1066.776, Time=0.63 sec
 ARIMA(1,0,0)(1,0,1)[12] intercept   : AIC=inf, Time=0.39 sec
 ARIMA(1,0,0)(2,0,2)[12] intercept   : AIC=inf, Time=1.08 sec
 ARIMA(1,0,0)(1,0,2)[12] intercept   : AIC=1066.641, Time=0.51 sec
 ARIMA(1,0,0)(0,0,2)[12] intercept   : AIC=1087.072, Time=0.30 sec
 ARIMA(1,0,0)(0,0,1)[12] intercept   : AIC=1089.752, Time=0.12 sec
 ARIMA(0,0,0)(1,0,2)[12] intercept   : AIC=1324.782, Time=0.39 sec
 ARIMA(2,0,0)(1,0,2)[12] intercept   : AIC=1040.186, Time=0.39 sec
 ARIMA(2,0,0)(0,0,2)[12] intercept   : AIC=1038.587, Time=0.33 sec
 ARIMA(2,0,0)(0,0,1)[12] intercept   : AIC=1039.360, Time=0.13 sec
 ARIMA(2,0,0)(1,0,1)[12] intercept   : AIC=inf, Time=0.25 sec
 ARIMA(3,0,0)(0,0,2)[12] intercept   : AIC=1039.473, Time=0.40 sec
 ARIMA(2,0,1)(0,0,2)[12] intercept   : AIC=1039.155, Time=0.45 sec
 ARIMA(1,0,1)(0,0,2)[12] intercept   : AIC=1051.879, Time=0.42 sec
 ARIMA(3,0,1)(0,0,2)[12] intercept   : AIC=1041.147, Time=0.70 sec
 ARIMA(2,0,0)(0,0,2)[12]             : AIC=1036.594, Time=0.20 sec
 ARIMA(2,0,0)(0,0,1)[12]             : AIC=1037.371, Time=0.07 sec
 ARIMA(2,0,0)(1,0,2)[12]             : AIC=1038.191, Time=0.35 sec
 ARIMA(2,0,0)(1,0,1)[12]             : AIC=inf, Time=0.19 sec
 ARIMA(1,0,0)(0,0,2)[12]             : AIC=1085.111, Time=0.19 sec
 ARIMA(3,0,0)(0,0,2)[12]             : AIC=1037.480, Time=0.24 sec
 ARIMA(2,0,1)(0,0,2)[12]             : AIC=1037.162, Time=0.30 sec
 ARIMA(1,0,1)(0,0,2)[12]             : AIC=1049.902, Time=0.26 sec
 ARIMA(3,0,1)(0,0,2)[12]             : AIC=1039.154, Time=0.50 sec

Best model:  ARIMA(2,0,0)(0,0,2)[12]          
Total fit time: 9.970 seconds
                                        SARIMAX Results                                        
===============================================================================================
Dep. Variable:                                       y   No. Observations:                  138
Model:             SARIMAX(2, 0, 0)x(0, 0, [1, 2], 12)   Log Likelihood                -513.297
Date:                                 Fri, 14 May 2021   AIC                           1036.594
Time:                                         16:04:50   BIC                           1051.230
Sample:                                              0   HQIC                          1042.542
                                                 - 138                                         
Covariance Type:                                   opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          1.4369      0.075     19.062      0.000       1.289       1.585
ar.L2         -0.5649      0.082     -6.909      0.000      -0.725      -0.405
ma.S.L12       0.0391      0.072      0.541      0.589      -0.103       0.181
ma.S.L24       0.1880      0.097      1.937      0.053      -0.002       0.378
sigma2        97.1145      6.850     14.178      0.000      83.689     110.540
===================================================================================
Ljung-Box (L1) (Q):                   0.23   Jarque-Bera (JB):               245.05
Prob(Q):                              0.63   Prob(JB):                         0.00
Heteroskedasticity (H):               0.70   Skew:                             2.03
Prob(H) (two-sided):                  0.23   Kurtosis:                         8.11
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


Lupa_upd = pd.read_csv(r'C:\Users\Kurt\Documents\Notebooks\XGBoost/acea-water-prediction/Lupa_Arrone_PETIR.csv', index_col=0, parse_dates=True)
Lupa_upd.head()


Lupa_updW= Lupa_upd.loc["2010-01-01":].Flow_Rate_Lupa.resample("W").mean()
Lupa_updW = Lupa_updW.asfreq('W')


Lupa_updW.plot(); # .Flow_Rate_Lupa


Lupa_updW.head(30)

Date
2010-01-03     88.233333
2010-01-10    107.960000
2010-01-17    142.602857
2010-01-24    156.262857
2010-01-31    158.530000
2010-02-07    163.534286
2010-02-14    175.007143
2010-02-21    185.575714
2010-02-28    202.022857
2010-03-07    219.265714
2010-03-14    234.205714
2010-03-21    240.834286
2010-03-28    241.071429
2010-04-04    240.362857
2010-04-11    238.864286
2010-04-18    236.798571
2010-04-25    232.751429
2010-05-02    228.647143
2010-05-09    223.965714
2010-05-16    223.518571
2010-05-23    245.901429
2010-05-30    263.078571
2010-06-06    263.841429
2010-06-13    257.070000
2010-06-20    248.525714
2010-06-27    240.901429
2010-07-04    233.381429
2010-07-11    224.557143
2010-07-18    216.880000
2010-07-25    208.685714
Freq: W-SUN, Name: Flow_Rate_Lupa, dtype: float64


import pmdarima as pm
df1= Lupa_updW
model2 = pm.auto_arima(df1,
                       seasonal=True, m=52,
                       d=0,
                       trend='c',
                      max_p=6, max_q=6,
                      trace=True,
                      error_action='ignore',
                      suppress_warnings=True)

# Print model summary
print(model2.summary())

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[52] intercept   : AIC=inf, Time=14.05 sec
 ARIMA(0,0,0)(0,0,0)[52] intercept   : AIC=8179.200, Time=0.01 sec
 ARIMA(1,0,0)(1,0,0)[52] intercept   : AIC=inf, Time=5.29 sec
 ARIMA(0,0,1)(0,0,1)[52] intercept   : AIC=inf, Time=3.22 sec
 ARIMA(0,0,0)(0,0,0)[52]             : AIC=8179.200, Time=0.01 sec
 ARIMA(0,0,0)(1,0,0)[52] intercept   : AIC=8179.181, Time=1.57 sec
 ARIMA(0,0,0)(2,0,0)[52] intercept   : AIC=8175.471, Time=20.84 sec
 ARIMA(0,0,0)(2,0,1)[52] intercept   : AIC=8173.632, Time=16.88 sec
 ARIMA(0,0,0)(1,0,1)[52] intercept   : AIC=8171.919, Time=2.13 sec
 ARIMA(0,0,0)(0,0,1)[52] intercept   : AIC=8169.920, Time=0.87 sec
 ARIMA(0,0,0)(0,0,2)[52] intercept   : AIC=8168.447, Time=17.93 sec
 ARIMA(0,0,0)(1,0,2)[52] intercept   : AIC=8170.667, Time=12.71 sec
 ARIMA(1,0,0)(0,0,2)[52] intercept   : AIC=5958.782, Time=33.54 sec
 ARIMA(1,0,0)(0,0,1)[52] intercept   : AIC=5963.467, Time=3.34 sec
 ARIMA(1,0,0)(1,0,2)[52] intercept   : AIC=inf, Time=42.05 sec
 ARIMA(1,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=8.55 sec
 ARIMA(2,0,0)(0,0,2)[52] intercept   : AIC=5450.926, Time=34.57 sec
 ARIMA(2,0,0)(0,0,1)[52] intercept   : AIC=5451.637, Time=3.79 sec
 ARIMA(2,0,0)(1,0,2)[52] intercept   : AIC=inf, Time=49.52 sec
 ARIMA(2,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=10.05 sec
 ARIMA(3,0,0)(0,0,2)[52] intercept   : AIC=5415.877, Time=26.82 sec
 ARIMA(3,0,0)(0,0,1)[52] intercept   : AIC=5419.025, Time=3.81 sec
 ARIMA(3,0,0)(1,0,2)[52] intercept   : AIC=inf, Time=55.36 sec
 ARIMA(3,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=11.97 sec
 ARIMA(4,0,0)(0,0,2)[52] intercept   : AIC=5415.482, Time=36.56 sec
 ARIMA(4,0,0)(0,0,1)[52] intercept   : AIC=5417.910, Time=5.32 sec
 ARIMA(4,0,0)(1,0,2)[52] intercept   : AIC=inf, Time=61.79 sec
 ARIMA(4,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=12.37 sec
 ARIMA(5,0,0)(0,0,2)[52] intercept   : AIC=5414.332, Time=61.16 sec
 ARIMA(5,0,0)(0,0,1)[52] intercept   : AIC=5416.990, Time=6.98 sec
 ARIMA(5,0,0)(1,0,2)[52] intercept   : AIC=5415.576, Time=66.53 sec
 ARIMA(5,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=15.75 sec
 ARIMA(6,0,0)(0,0,2)[52] intercept   : AIC=5412.368, Time=41.23 sec
 ARIMA(6,0,0)(0,0,1)[52] intercept   : AIC=5415.830, Time=7.31 sec
 ARIMA(6,0,0)(1,0,2)[52] intercept   : AIC=5414.351, Time=69.59 sec
 ARIMA(6,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=16.67 sec
 ARIMA(6,0,1)(0,0,2)[52] intercept   : AIC=5413.157, Time=72.34 sec
 ARIMA(5,0,1)(0,0,2)[52] intercept   : AIC=5413.951, Time=68.55 sec
 ARIMA(6,0,0)(0,0,2)[52]             : AIC=5412.368, Time=40.97 sec

Best model:  ARIMA(6,0,0)(0,0,2)[52]          
Total fit time: 962.179 seconds
                                        SARIMAX Results                                        
===============================================================================================
Dep. Variable:                                       y   No. Observations:                  548
Model:             SARIMAX(6, 0, 0)x(0, 0, [1, 2], 52)   Log Likelihood               -2696.184
Date:                                 Sun, 30 May 2021   AIC                           5412.368
Time:                                         14:44:56   BIC                           5455.431
Sample:                                              0   HQIC                          5429.199
                                                 - 548                                         
Covariance Type:                                   opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept      9.8691      3.960      2.492      0.013       2.107      17.631
ar.L1          2.0518      0.026     79.107      0.000       2.001       2.103
ar.L2         -1.3897      0.064    -21.879      0.000      -1.514      -1.265
ar.L3          0.3423      0.088      3.902      0.000       0.170       0.514
ar.L4         -0.0244      0.119     -0.204      0.838      -0.258       0.209
ar.L5          0.1006      0.111      0.910      0.363      -0.116       0.317
ar.L6         -0.0937      0.052     -1.806      0.071      -0.195       0.008
ma.S.L52      -0.0545      0.037     -1.464      0.143      -0.127       0.018
ma.S.L104      0.1176      0.046      2.557      0.011       0.027       0.208
sigma2      1082.1714     33.294     32.503      0.000    1016.916    1147.427
===================================================================================
Ljung-Box (L1) (Q):                  13.58   Jarque-Bera (JB):              4679.77
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               0.51   Skew:                             1.77
Prob(H) (two-sided):                  0.00   Kurtosis:                        16.87
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


# Create model for SARIMAX(p,1,q)(P,1,Q)7   with log transform this time
df2= np.log1p( Lupa_upd.Flow_Rate_Lupa)
model3 = pm.auto_arima(df2,
                      seasonal=True, m=52, trend='ct',
                      d=0, D=0,
                      start_p=1, start_q=1,
                      #max_p=1, max_q=1,
                      #max_P=1, max_Q=1,
                      trace=True,
                      error_action='ignore',
                      suppress_warnings=True)

# Print model summary
print(model3.summary())

Performing stepwise search to minimize aic
 ARIMA(1,0,1)(1,0,1)[52] intercept   : AIC=-1451.522, Time=8.24 sec
 ARIMA(0,0,0)(0,0,0)[52] intercept   : AIC=899.170, Time=0.21 sec
 ARIMA(1,0,0)(1,0,0)[52] intercept   : AIC=inf, Time=15.46 sec
 ARIMA(0,0,1)(0,0,1)[52] intercept   : AIC=114.797, Time=8.49 sec
 ARIMA(0,0,0)(0,0,0)[52]             : AIC=899.170, Time=0.21 sec
 ARIMA(1,0,1)(0,0,1)[52] intercept   : AIC=-1738.453, Time=3.80 sec
 ARIMA(1,0,1)(0,0,0)[52] intercept   : AIC=-1740.453, Time=0.15 sec
 ARIMA(1,0,1)(1,0,0)[52] intercept   : AIC=-1244.094, Time=13.91 sec
 ARIMA(0,0,1)(0,0,0)[52] intercept   : AIC=113.840, Time=0.33 sec
 ARIMA(1,0,0)(0,0,0)[52] intercept   : AIC=inf, Time=0.25 sec
 ARIMA(2,0,1)(0,0,0)[52] intercept   : AIC=-1871.215, Time=0.19 sec
 ARIMA(2,0,1)(1,0,0)[52] intercept   : AIC=-1389.063, Time=11.60 sec

c:\program files\python38\lib\site-packages\statsmodels\tsa\statespace\sarimax.py:1897: RuntimeWarning: divide by zero encountered in reciprocal
  return np.roots(self.polynomial_reduced_ma)**-1

 ARIMA(2,0,1)(0,0,1)[52] intercept   : AIC=-1869.215, Time=4.72 sec
 ARIMA(2,0,1)(1,0,1)[52] intercept   : AIC=-1578.314, Time=11.73 sec
 ARIMA(2,0,0)(0,0,0)[52] intercept   : AIC=-1876.867, Time=0.15 sec
 ARIMA(2,0,0)(1,0,0)[52] intercept   : AIC=inf, Time=10.81 sec

c:\program files\python38\lib\site-packages\statsmodels\tsa\statespace\sarimax.py:1897: RuntimeWarning: divide by zero encountered in reciprocal
  return np.roots(self.polynomial_reduced_ma)**-1

 ARIMA(2,0,0)(0,0,1)[52] intercept   : AIC=-1874.867, Time=4.72 sec
 ARIMA(2,0,0)(1,0,1)[52] intercept   : AIC=-738.314, Time=6.80 sec
 ARIMA(3,0,0)(0,0,0)[52] intercept   : AIC=-1870.884, Time=0.21 sec
 ARIMA(3,0,1)(0,0,0)[52] intercept   : AIC=-1864.760, Time=0.24 sec
 ARIMA(2,0,0)(0,0,0)[52]             : AIC=-1876.867, Time=0.16 sec

Best model:  ARIMA(2,0,0)(0,0,0)[52] intercept
Total fit time: 102.389 seconds
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  600
Model:               SARIMAX(2, 0, 0)   Log Likelihood                 943.434
Date:                Fri, 07 May 2021   AIC                          -1876.867
Time:                        19:53:25   BIC                          -1854.883
Sample:                             0   HQIC                         -1868.309
                                - 600                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept      0.0820      0.031      2.684      0.007       0.022       0.142
drift      -3.838e-06   1.18e-05     -0.326      0.744   -2.69e-05    1.92e-05
ar.L1          1.6156      0.010    161.479      0.000       1.596       1.635
ar.L2         -0.6280      0.010    -65.138      0.000      -0.647      -0.609
sigma2         0.0022      4e-05     54.273      0.000       0.002       0.002
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):             11294.19
Prob(Q):                              0.91   Prob(JB):                         0.00
Heteroskedasticity (H):               0.77   Skew:                             2.56
Prob(H) (two-sided):                  0.07   Kurtosis:                        23.63
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


from statsmodels.tsa.statespace.sarimax import SARIMAX
# Create model object
df1= Lupa_updW # (with PETir)
modelW = SARIMAX(df1, order=(6, 0, 0), seasonal_order=(0,0,2, 52), trend='ct')
# Fit model
results = modelW.fit()

c:\program files\python38\lib\site-packages\statsmodels\base\model.py:566: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "


modelW.score(results.params) #params_object

array([ 3.97300208e-02, -6.28339937e+00,  6.64267276e+00,  2.01247780e+01,
        3.29970397e+01,  4.16195477e+01,  4.75812399e+01,  5.07203976e+01,
        6.97140830e-01, -7.59670422e-01, -3.81762862e-01])


results.summary()


# Plot common diagnostics
plt.style.use('bmh')
results.plot_diagnostics(figsize=(18, 10));
plt.tight_layout();

c:\program files\python38\lib\site-packages\statsmodels\graphics\gofplots.py:993: UserWarning: marker is redundantly defined by the 'marker' keyword argument and the fmt string "bo" (-> marker='o'). The keyword argument will take precedence.
  ax.plot(x, y, fmt, **plot_style)


# Create forecast object
forecast_object = results.get_forecast(27)

# Extract predicted mean attribute
mean = forecast_object.predicted_mean

# Calculate the confidence intervals
conf_int = forecast_object.conf_int()

# Extract the forecast dates
dates = mean.index

plt.figure()
# Plot past levels
plt.plot(df1.index, df1, label='past');

# Plot the prediction means as line
plt.plot(dates, mean, label='predicted');
plt.plot(dates[-27:], Lupa_JULY_2020W.iloc[-27:], label='observed');
# Shade between the confidence intervals
plt.fill_between(dates, conf_int.loc[:, 'lower Flow_Rate_Lupa'], 
                 conf_int.loc[:, 'upper Flow_Rate_Lupa'], alpha=0.32);
plt.ylim( 0,)
# Plot legend
plt.legend();


conf_int


# Print last predicted mean
print(mean.iloc[-1])

# Print last confidence interval
print(conf_int.iloc[-1])

78.58600838594779
lower Flow_Rate_Lupa    -36.848152
upper Flow_Rate_Lupa    194.020169
Name: 2021-01-10 00:00:00, dtype: float64


Lupa_JULY_2020= pd.read_excel(r"../Arima/Lupa_JULY_2020.xlsx", engine='openpyxl', parse_dates=["Date"],index_col=0)
Lupa_JULY_2020.head()


Lupa_JULY_2020.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 184 entries, 2020-07-01 to 2020-12-31
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Rainfall_Terni  184 non-null    float64
 1   Flow_Rate_Lupa  184 non-null    float64
 2   doy             0 non-null      float64
 3   Month           0 non-null      float64
 4   Year            0 non-null      float64
dtypes: float64(5)
memory usage: 8.6 KB


Lupa_JULY_2020W= Lupa_JULY_2020.Flow_Rate_Lupa.resample("W").mean() #.loc["2010-01-01":]
Lupa_JULY_2020W = Lupa_JULY_2020W.asfreq('W')


Lupa_JULY_2020W

Date
2020-07-05     71.622000
2020-07-12     70.114286
2020-07-19     68.295714
2020-07-26     66.737143
2020-08-02     64.922857
2020-08-09     63.222857
2020-08-16     61.740000
2020-08-23     60.282857
2020-08-30     58.970000
2020-09-06     58.040000
2020-09-13     56.807143
2020-09-20     55.390000
2020-09-27     54.220000
2020-10-04     54.014286
2020-10-11     54.522857
2020-10-18     59.942857
2020-10-25     61.474286
2020-11-01     62.081429
2020-11-08     61.955714
2020-11-15     61.864286
2020-11-22     63.724286
2020-11-29     64.310000
2020-12-06     65.625714
2020-12-13    102.547143
2020-12-20    130.021429
2020-12-27    139.651429
2021-01-03    145.542500
Freq: W-SUN, Name: Flow_Rate_Lupa, dtype: float64


Lupa_JULY_2020W.tail()

Date
2020-12-06     65.625714
2020-12-13    102.547143
2020-12-20    130.021429
2020-12-27    139.651429
2021-01-03    145.542500
Freq: W-SUN, Name: Flow_Rate_Lupa, dtype: float64


Arrone_TpmsIrr =pd.read_csv(r"C:\Users\Kurt\Documents\Notebooks\XGBoost\acea-water-prediction\POWER_SinglePoint_Daily_20100101_20210526_042d59N_012d77E_c3befd81.csv",
                             engine='python', sep=",",encoding="UTF-8",decimal=".",header=15,na_values=["-99.00","-999"], skipfooter=2)
#Arrone_TpmsIrr['Date'] = pd.to_datetime(Arrone_TpmsIrr['DOY'], format='%j').dt.strftime('%m-%d')  #), origin=pd.to_datetime(Arrone_TpmsIrr['YEAR'])
Arrone_TpmsIrr['Date'] = pd.date_range("2010-01-01", "2021-05-24",)
Arrone_TpmsIrr.set_index(['Date'] , inplace=True)
Arrone_TpmsIrr.head()


Arrone_WindIrr =pd.read_csv(r"C:\Users\Kurt\Documents\Notebooks\XGBoost\acea-water-prediction\POWER_SinglePoint_Daily_20100101_20210527_042d59N_012d78E_fbd0a9d6.csv",
                             engine='python', sep=",",encoding="UTF-8",decimal=".",header=11,na_values=["-99.00","-999"], skipfooter=2)
#Arrone_TpmsIrr['Date'] = pd.to_datetime(Arrone_TpmsIrr['DOY'], format='%j').dt.strftime('%m-%d')  #), origin=pd.to_datetime(Arrone_TpmsIrr['YEAR'])
Arrone_WindIrr['Date'] = pd.date_range("2010-01-01", "2021-05-25",)
Arrone_WindIrr.set_index(['Date'] , inplace=True)
Arrone_WindIrr.head()


Lupa_upd.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4199 entries, 2009-01-01 to 2020-06-30
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Rainfall_Terni  4199 non-null   float64
 1   Flow_Rate_Lupa  4199 non-null   float64
 2   doy             4199 non-null   float64
 3   Month           4199 non-null   float64
 4   Year            4199 non-null   float64
 5   ET01            3834 non-null   float64
 6   Infilt_         3834 non-null   float64
 7   Infiltsum       3834 non-null   float64
 8   Rainfall_Ter    4199 non-null   float64
 9   Flow_Rate_Lup   4199 non-null   float64
 10  Infilt_m3       3834 non-null   float64
dtypes: float64(11)
memory usage: 553.7 KB


3834/7

547.7142857142857


Lupa_updWavg= Lupa_upd.loc["2010-01-01":].Flow_Rate_Lupa.resample("W").mean() #
Lupa_updWavg = Lupa_updWavg.asfreq('W')


Lupa_updWsum= Lupa_upd.loc["2010-01-01":].Flow_Rate_Lupa.resample("W").sum() #
Lupa_updWsum = Lupa_updWsum.asfreq('W')


Lupa_updWavg

Date
2010-01-03     575.84
2010-01-10     755.72
2010-01-17     998.22
2010-01-24    1093.84
2010-01-31    1109.71
               ...   
2020-05-31     586.97
2020-06-07     570.66
2020-06-14     553.24
2020-06-21     535.92
2020-06-28     519.73
Freq: W-SUN, Name: Flow_Rate_Lupa, Length: 548, dtype: float64


Lupa_updWavg.plot();


Lupa_updWsum.plot();


Lupa_updWavg.min()

204.94


import pmdarima as pm


import pmdarima as pm
df1= Lupa_updWavg   #Lupa_updW
model2 = pm.auto_arima(df1,
                       seasonal=True, m=52,
                       d=0,scoring="mse",
                       trend='c', start_p=6, start_Q=2, 
                      max_p=6, max_q=6,
                      trace=True,
                      error_action='ignore',
                      suppress_warnings=True)

# Print model summary
print(model2.summary())

Performing stepwise search to minimize aic
 ARIMA(6,0,2)(1,0,2)[52] intercept   : AIC=inf, Time=82.36 sec
 ARIMA(0,0,0)(0,0,0)[52] intercept   : AIC=8177.780, Time=0.01 sec
 ARIMA(1,0,0)(1,0,0)[52] intercept   : AIC=inf, Time=5.33 sec
 ARIMA(0,0,1)(0,0,1)[52] intercept   : AIC=inf, Time=3.19 sec
 ARIMA(0,0,0)(0,0,0)[52]             : AIC=8177.780, Time=0.01 sec
 ARIMA(0,0,0)(1,0,0)[52] intercept   : AIC=8177.693, Time=1.60 sec
 ARIMA(0,0,0)(2,0,0)[52] intercept   : AIC=8170.974, Time=34.05 sec
 ARIMA(0,0,0)(2,0,1)[52] intercept   : AIC=8171.764, Time=16.54 sec
 ARIMA(0,0,0)(1,0,1)[52] intercept   : AIC=8169.884, Time=4.06 sec
 ARIMA(0,0,0)(0,0,1)[52] intercept   : AIC=8168.070, Time=0.93 sec
 ARIMA(0,0,0)(0,0,2)[52] intercept   : AIC=8166.404, Time=16.71 sec
 ARIMA(0,0,0)(1,0,2)[52] intercept   : AIC=8168.645, Time=12.95 sec
 ARIMA(1,0,0)(0,0,2)[52] intercept   : AIC=5885.129, Time=17.77 sec
 ARIMA(1,0,0)(0,0,1)[52] intercept   : AIC=5891.341, Time=3.04 sec
 ARIMA(1,0,0)(1,0,2)[52] intercept   : AIC=inf, Time=36.53 sec
 ARIMA(1,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=9.01 sec
 ARIMA(2,0,0)(0,0,2)[52] intercept   : AIC=5373.481, Time=19.96 sec
 ARIMA(2,0,0)(0,0,1)[52] intercept   : AIC=5374.949, Time=3.79 sec
 ARIMA(2,0,0)(1,0,2)[52] intercept   : AIC=inf, Time=54.00 sec
 ARIMA(2,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=10.55 sec
 ARIMA(3,0,0)(0,0,2)[52] intercept   : AIC=5347.149, Time=38.56 sec
 ARIMA(3,0,0)(0,0,1)[52] intercept   : AIC=5351.036, Time=6.11 sec
 ARIMA(3,0,0)(1,0,2)[52] intercept   : AIC=inf, Time=53.78 sec
 ARIMA(3,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=12.31 sec
 ARIMA(4,0,0)(0,0,2)[52] intercept   : AIC=5345.599, Time=34.39 sec
 ARIMA(4,0,0)(0,0,1)[52] intercept   : AIC=5348.485, Time=7.51 sec
 ARIMA(4,0,0)(1,0,2)[52] intercept   : AIC=5345.021, Time=67.89 sec
 ARIMA(4,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=14.10 sec
 ARIMA(4,0,0)(2,0,2)[52] intercept   : AIC=inf, Time=75.88 sec
 ARIMA(4,0,0)(2,0,1)[52] intercept   : AIC=5346.773, Time=53.63 sec
 ARIMA(5,0,0)(1,0,2)[52] intercept   : AIC=5347.593, Time=28.77 sec
 ARIMA(4,0,1)(1,0,2)[52] intercept   : AIC=inf, Time=70.19 sec
 ARIMA(3,0,1)(1,0,2)[52] intercept   : AIC=5348.009, Time=55.26 sec
 ARIMA(5,0,1)(1,0,2)[52] intercept   : AIC=inf, Time=67.85 sec
 ARIMA(4,0,0)(1,0,2)[52]             : AIC=5345.021, Time=63.19 sec

Best model:  ARIMA(4,0,0)(1,0,2)[52]          
Total fit time: 982.010 seconds
                                        SARIMAX Results                                        
===============================================================================================
Dep. Variable:                                       y   No. Observations:                  548
Model:             SARIMAX(4, 0, 0)x(1, 0, [1, 2], 52)   Log Likelihood               -2663.511
Date:                                 Sat, 10 Jul 2021   AIC                           5345.021
Time:                                         20:12:50   BIC                           5383.778
Sample:                                              0   HQIC                          5360.169
                                                 - 548                                         
Covariance Type:                                   opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept      1.9087      2.143      0.891      0.373      -2.291       6.108
ar.L1          1.9663      0.027     72.886      0.000       1.913       2.019
ar.L2         -1.2747      0.061    -20.798      0.000      -1.395      -1.155
ar.L3          0.3588      0.069      5.205      0.000       0.224       0.494
ar.L4         -0.0611      0.037     -1.657      0.098      -0.133       0.011
ar.S.L52       0.7522      0.211      3.567      0.000       0.339       1.166
ma.S.L52      -0.7855      0.221     -3.551      0.000      -1.219      -0.352
ma.S.L104      0.1198      0.042      2.833      0.005       0.037       0.203
sigma2       961.9048     37.204     25.855      0.000     888.986    1034.824
===================================================================================
Ljung-Box (L1) (Q):                   0.16   Jarque-Bera (JB):              2317.83
Prob(Q):                              0.69   Prob(JB):                         0.00
Heteroskedasticity (H):               0.61   Skew:                             1.81
Prob(H) (two-sided):                  0.00   Kurtosis:                        12.40
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


df1.tail()

Date
2020-05-31    586.97
2020-06-07    570.66
2020-06-14    553.24
2020-06-21    535.92
2020-06-28    519.73
Freq: W-SUN, Name: Flow_Rate_Lupa, dtype: float64


# Create model object
modelW = SARIMAX(df1[-548:], order=(4,0,0), seasonal_order=(0,0,2, 52), trend='c', ) # Lupa_upd.Infilt_[-549:]
# Fit model
results = modelW.fit()


results.summary()


# Plot common diagnostics
plt.style.use('bmh')
results.plot_diagnostics(figsize=(18, 10));
plt.tight_layout();

c:\program files\python38\lib\site-packages\statsmodels\graphics\gofplots.py:993: UserWarning: marker is redundantly defined by the 'marker' keyword argument and the fmt string "bo" (-> marker='o'). The keyword argument will take precedence.
  ax.plot(x, y, fmt, **plot_style)


# Create forecast object
forecast_object = results.get_forecast(27)  #, exog=Lupa_updWavg[-27:]

# Extract predicted mean attribute
mean = forecast_object.predicted_mean

# Calculate the confidence intervals
conf_int = forecast_object.conf_int()

# Extract the forecast dates
dates = mean.index

plt.figure()
# Plot past levels
plt.plot(df1.index, df1, label='past');

# Plot the prediction means as line
plt.plot(dates, mean, label='predicted');
plt.plot(dates[-27:], Lupa_JULY_2020W.iloc[-27:]*7, label='observed');
# Shade between the confidence intervals
plt.fill_between(dates, conf_int.loc[:, 'lower Flow_Rate_Lupa'], 
                 conf_int.loc[:, 'upper Flow_Rate_Lupa'], alpha=0.32);

# Plot legend
plt.legend();


# Accuracy metrics
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0,1]   # corr
    mins = np.amin(np.hstack([forecast.to_numpy().reshape(-1,1), 
                              actual.reshape(-1,1)]), axis=1) #[:,None] deprecat
    maxs = np.amax(np.hstack([forecast.to_numpy().reshape(-1,1), 
                              actual.reshape(-1,1)]), axis=1)
    minmax = 1 - np.mean(mins/maxs)             # minmax
    #acf1 = acf(fc-test)[1]                      # ACF1
    return({'mape':mape, 'me':me, 'mae': mae, 
            'mpe': mpe, 'rmse':rmse, #'acf1':acf1, 
            'corr':corr, 'minmax':minmax})

forecast_accuracy(mean,  Lupa_JULY_2020W.iloc[-27:].values )

{'mape': 7.714544481998472,
 'me': 512.9325501561569,
 'mae': 512.9325501561569,
 'mpe': 7.714544481998472,
 'rmse': 515.0350846562457,
 'corr': 0.5346446368516088,
 'minmax': 0.8781608046437273}


Lupa_JULY_2020W.iloc[-27:]

Date
2020-07-05     71.622000
2020-07-12     70.114286
2020-07-19     68.295714
2020-07-26     66.737143
2020-08-02     64.922857
2020-08-09     63.222857
2020-08-16     61.740000
2020-08-23     60.282857
2020-08-30     58.970000
2020-09-06     58.040000
2020-09-13     56.807143
2020-09-20     55.390000
2020-09-27     54.220000
2020-10-04     54.014286
2020-10-11     54.522857
2020-10-18     59.942857
2020-10-25     61.474286
2020-11-01     62.081429
2020-11-08     61.955714
2020-11-15     61.864286
2020-11-22     63.724286
2020-11-29     64.310000
2020-12-06     65.625714
2020-12-13    102.547143
2020-12-20    130.021429
2020-12-27    139.651429
2021-01-03    145.542500
Freq: W-SUN, Name: Flow_Rate_Lupa, dtype: float64


# Create model object
modelW = SARIMAX(df1[-548:], order=(4,0,0), seasonal_order=(1,0,2, 52), trend='c', exog=Lupa_upd.Infilt_[-548:]) # Lupa_upd.Infilt_[-549:]
# Fit model
results = modelW.fit()

c:\program files\python38\lib\site-packages\statsmodels\base\model.py:566: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "


results.summary()


results.mle_retvals

{'fopt': 4.080816593651847,
 'gopt': array([ 1.82821404e-02,  1.23265840e-01, -6.15602094e-03, -8.48193958e-02,
         1.82399695e-04,  2.41473454e-01,  4.52535076e-04, -1.25916937e-02,
        -1.69633531e-02,  5.09544353e-03,  4.44010357e-03,  1.35387310e-02]),
 'fcalls': 949,
 'warnflag': 1,
 'converged': False,
 'iterations': 50}


# Plot common diagnostics
plt.style.use('bmh')
results.plot_diagnostics(figsize=(18, 10));
plt.tight_layout();

c:\program files\python38\lib\site-packages\statsmodels\graphics\gofplots.py:993: UserWarning: marker is redundantly defined by the 'marker' keyword argument and the fmt string "bo" (-> marker='o'). The keyword argument will take precedence.
  ax.plot(x, y, fmt, **plot_style)


# Create forecast object
forecast_object = results.get_forecast(27, exog=Lupa_updWavg[-27:])

# Extract predicted mean attribute
mean = forecast_object.predicted_mean

# Calculate the confidence intervals
conf_int = forecast_object.conf_int()

# Extract the forecast dates
dates = mean.index

plt.figure()
# Plot past levels
plt.plot(df1.index, df1, label='past');

# Plot the prediction means as line
plt.plot(dates, mean, label='predicted');
plt.plot(dates[-27:], Lupa_JULY_2020W.iloc[-27:], label='observed');
# Shade between the confidence intervals
plt.fill_between(dates, conf_int.loc[:, 'lower Flow_Rate_Lupa'], 
                 conf_int.loc[:, 'upper Flow_Rate_Lupa'], alpha=0.32);

# Plot legend
plt.legend();


# Accuracy metrics
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0,1]   # corr
    mins = np.amin(np.hstack([forecast.to_numpy().reshape(-1,1), 
                              actual.reshape(-1,1)]), axis=1) #[:,None] deprecat
    maxs = np.amax(np.hstack([forecast.to_numpy().reshape(-1,1), 
                              actual.reshape(-1,1)]), axis=1)
    minmax = 1 - np.mean(mins/maxs)             # minmax
    #acf1 = acf(fc-test)[1]                      # ACF1
    return({'mape':mape, 'me':me, 'mae': mae, 
            'mpe': mpe, 'rmse':rmse, #'acf1':acf1, 
            'corr':corr, 'minmax':minmax})

forecast_accuracy(mean,  Lupa_JULY_2020W.iloc[-27:].values )

{'mape': 0.17037097399116238,
 'me': -107.49201504188157,
 'mae': 107.49201504188157,
 'mpe': -0.17037097399116238,
 'rmse': 182.33518448081426,
 'corr': 0.6576651441074307,
 'minmax': 0.17037097399116252}


Lupa_upd = pd.read_csv(r'C:\Users\Kurt\Documents\Notebooks\XGBoost/acea-water-prediction/Lupa_Arrone_PETIR.csv', index_col=0, parse_dates=True)
Lupa_upd.head()


Avg_Years = pd.read_csv(r'C:\Users\Kurt\Documents\Notebooks\Writing.Functions.in.Python/Exp.we.mean.over.11.years.csv', index_col=0, parse_dates=True)
Avg_Years.head()


Avg_Years.reset_index()


Lupa_updreset = Lupa_upd.reset_index()
Lupa_updreset


merger =pd.merge(Lupa_updreset, Avg_Years, left_on="doy" , right_on="doy" )


merger.set_index( ["Date"], inplace=True)
merger


merger.Flow_Rate_Lupa_y.plot();


merger.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4199 entries, 2009-01-01 to 2016-12-31
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Rainfall_Terni    4199 non-null   float64
 1   Flow_Rate_Lupa_x  4199 non-null   float64
 2   doy               4199 non-null   float64
 3   Month             4199 non-null   float64
 4   Year              4199 non-null   float64
 5   ET01              3834 non-null   float64
 6   Infilt_           3834 non-null   float64
 7   Infiltsum         3834 non-null   float64
 8   Rainfall_Ter      4199 non-null   float64
 9   Flow_Rate_Lup     4199 non-null   float64
 10  Infilt_m3         3834 non-null   float64
 11  Flow_Rate_Lupa_y  4199 non-null   float64
dtypes: float64(12)
memory usage: 586.5 KB


merger.Infilt_["2013-12-30":"2014-01-05"]

Date
2014-01-01     0.131237
2014-01-02     0.238967
2014-01-03    -0.444582
2014-01-04    14.881705
2014-01-05    -0.924697
2013-12-30     0.749527
2013-12-31     0.585341
Name: Infilt_, dtype: float64


merger.Infilt_.plot();


Infilt_Ymean = merger.loc["2010-01-01":].Infilt_.resample("YS").mean()
Infilt_Y_sum = merger.loc["2010-01-01":].Infilt_.resample("YS").sum()


Infilt_Ymean.plot();


Infilt_Y_sum.plot();


Flow_Rate_Lupa_x = merger.loc["2010-01-01":].Flow_Rate_Lupa_x.resample("W").mean()
Flow_Rate_Lupa_y = merger.loc["2010-01-01":].Flow_Rate_Lupa_y.resample("W").mean()
Flow_Rate_Lupa_x = Flow_Rate_Lupa_x.asfreq('W'); 
Flow_Rate_Lupa_y = Flow_Rate_Lupa_y.asfreq('W');


Flow_Rate_Lupa_x.plot(); # .Flow_Rate_Lupa


Flow_Rate_Lupa_y.head(30)

Date
2009-01-04    100.403429
2009-01-11    101.682713
2009-01-18    104.150091
2009-01-25    107.296756
2009-02-01    110.397278
2009-02-08    114.664694
2009-02-15    121.333226
2009-02-22    127.921822
2009-03-01    133.287361
2009-03-08    137.477994
2009-03-15    141.624792
2009-03-22    145.611805
2009-03-29    149.478973
2009-04-05    152.951083
2009-04-12    155.621041
2009-04-19    157.181894
2009-04-26    157.861809
2009-05-03    157.728591
2009-05-10    156.749340
2009-05-17    155.054932
2009-05-24    153.809920
2009-05-31    153.117541
2009-06-07    152.179773
2009-06-14    150.400433
2009-06-21    147.675442
2009-06-28    144.242281
2009-07-05    140.896495
2009-07-12    138.589053
2009-07-19    135.038480
2009-07-26    130.600481
Freq: W-SUN, Name: Flow_Rate_Lupa_y, dtype: float64


Infilt_ = merger.loc["2010-01-01":].Infilt_.resample("W").mean() # sum better than  mean? 
Infilt_ = Infilt_.asfreq('W');


import pmdarima as pm

model2_EX = pm.auto_arima(Flow_Rate_Lupa_x.loc["2010-01-01":] , exog=merge2Exog, #["Infilt_", "Flow_Rate_Lupa_y"], 
                       seasonal=True, m=52, max_order=10 ,method="bfgs",
                       d=0,
                       trend='ct',
                      max_p=7, max_q=3, start_p=6, start_P=0, start_Q=1,
                      trace=True,
                      error_action='ignore',
                      suppress_warnings=True)

# Print model summary
print(model2_EX.summary())

Performing stepwise search to minimize aic
 ARIMA(6,0,2)(0,0,1)[52] intercept   : AIC=3349.689, Time=31.77 sec
 ARIMA(0,0,0)(0,0,0)[52] intercept   : AIC=5998.543, Time=0.02 sec
 ARIMA(1,0,0)(1,0,0)[52] intercept   : AIC=inf, Time=17.56 sec
 ARIMA(0,0,1)(0,0,1)[52] intercept   : AIC=inf, Time=16.22 sec
 ARIMA(0,0,0)(0,0,0)[52]             : AIC=5998.543, Time=0.02 sec
 ARIMA(6,0,2)(0,0,0)[52] intercept   : AIC=3231.163, Time=1.24 sec
 ARIMA(6,0,2)(1,0,0)[52] intercept   : AIC=3547.445, Time=25.09 sec
 ARIMA(6,0,2)(1,0,1)[52] intercept   : AIC=inf, Time=25.56 sec
 ARIMA(5,0,2)(0,0,0)[52] intercept   : AIC=3237.909, Time=0.88 sec
 ARIMA(6,0,1)(0,0,0)[52] intercept   : AIC=3267.004, Time=0.54 sec
 ARIMA(7,0,2)(0,0,0)[52] intercept   : AIC=3238.316, Time=1.62 sec
 ARIMA(6,0,3)(0,0,0)[52] intercept   : AIC=4327.295, Time=1.40 sec
 ARIMA(5,0,1)(0,0,0)[52] intercept   : AIC=3255.592, Time=0.53 sec
 ARIMA(5,0,3)(0,0,0)[52] intercept   : AIC=5191.792, Time=1.04 sec
 ARIMA(7,0,1)(0,0,0)[52] intercept   : AIC=3253.356, Time=1.34 sec
 ARIMA(7,0,3)(0,0,0)[52] intercept   : AIC=6847.088, Time=0.23 sec
 ARIMA(6,0,2)(0,0,0)[52]             : AIC=3231.163, Time=1.29 sec

Best model:  ARIMA(6,0,2)(0,0,0)[52] intercept
Total fit time: 126.370 seconds
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  549
Model:               SARIMAX(6, 0, 2)   Log Likelihood               -1604.581
Date:                Sat, 12 Jun 2021   AIC                           3231.163
Time:                        21:04:01   BIC                           3278.552
Sample:                             0   HQIC                          3249.683
                                - 549                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept      1.9965      1.970      1.013      0.311      -1.865       5.858
drift         -0.0009      0.003     -0.326      0.744      -0.006       0.005
ar.L1          1.1436      0.735      1.555      0.120      -0.298       2.585
ar.L2          0.0797      1.513      0.053      0.958      -2.886       3.045
ar.L3         -0.1740      1.244     -0.140      0.889      -2.611       2.264
ar.L4         -0.0826      0.546     -0.151      0.880      -1.153       0.988
ar.L5          0.0915      0.097      0.943      0.346      -0.099       0.282
ar.L6         -0.0748      0.056     -1.324      0.185      -0.185       0.036
ma.L1          0.8016      0.735      1.091      0.275      -0.638       2.241
ma.L2          0.2784      0.420      0.663      0.508      -0.545       1.102
sigma2        18.8003      0.660     28.470      0.000      17.506      20.095
===================================================================================
Ljung-Box (L1) (Q):                   0.03   Jarque-Bera (JB):              2608.09
Prob(Q):                              0.87   Prob(JB):                         0.00
Heteroskedasticity (H):               0.61   Skew:                             1.90
Prob(H) (two-sided):                  0.00   Kurtosis:                        12.98
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


print(Flow_Rate_Lupa_x.shape, Infilt_.shape , Flow_Rate_Lupa_y.shape)

(601,) (601,) (601,)


Flow_Rate_Lupa_x.loc["2010":].head()# .reshape(-1,1)

Date
2010-01-03     82.262857
2010-01-10    107.960000
2010-01-17    142.602857
2010-01-24    156.262857
2010-01-31    158.530000
Freq: W-SUN, Name: Flow_Rate_Lupa_x, dtype: float64


Infilt_.describe() #.reshape(-1,1)

count    601.000000
mean       0.928175
std        2.709564
min       -3.234013
25%       -0.848383
50%        0.804162
75%        2.052808
max       15.660311
Name: Infilt_, dtype: float64


Flow_Rate_Lupa_y.describe() #reshape(-1,1)

count    601.000000
mean     118.561725
std       29.122899
min       73.248423
25%       90.236023
50%      119.894088
75%      147.675442
max      157.928490
Name: Flow_Rate_Lupa_y, dtype: float64


Infilt_= Infilt_.fillna( Infilt_.mean())


merge2Exog =pd.merge( Infilt_.loc["2010":] ,Flow_Rate_Lupa_y.loc["2010":], left_index=True, right_index=True )


import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
warnings.simplefilter('ignore', ConvergenceWarning)


from statsmodels.tsa.statespace.sarimax import SARIMAX
# Create model object
model2Ex = SARIMAX(Flow_Rate_Lupa_x.loc["2010":], order=(6,0,2), seasonal_order=(0,0,2, 52), trend='ct', exog= merge2Exog,
                  missing=None ,enforce_stationarity=False, enforce_invertibility=False) #[-548:] Lupa_upd.Infilt_[-549:]
# Fit model
results = model2Ex.fit()


results.summary()


results.mle_retvals

{'fopt': 2.321971463288944,
 'gopt': array([ 8.93997454e-04,  1.00561076e+00, -1.65926532e-02,  7.96306172e-03,
        -1.06968684e-01, -1.99171147e-01, -1.50544408e-01, -2.41182781e-01,
        -8.64727706e-02, -1.72161255e-01,  2.05054504e-01, -1.84602182e-01,
         1.49385898e-02, -2.99802554e-02,  3.31623593e-02]),
 'fcalls': 928,
 'warnflag': 1,
 'converged': False,
 'iterations': 50}


# Plot common diagnostics
plt.style.use('bmh')
results.plot_diagnostics(figsize=(18, 10));
plt.tight_layout();

c:\program files\python38\lib\site-packages\statsmodels\graphics\gofplots.py:993: UserWarning: marker is redundantly defined by the 'marker' keyword argument and the fmt string "bo" (-> marker='o'). The keyword argument will take precedence.
  ax.plot(x, y, fmt, **plot_style)


# Create forecast object
forecast_object = results.get_forecast(27, exog=merge2Exog[-27:])

# Extract predicted mean attribute
mean = forecast_object.predicted_mean

# Calculate the confidence intervals
conf_int = forecast_object.conf_int()

# Extract the forecast dates
dates = mean.index

plt.figure()
# Plot past levels
plt.plot( Flow_Rate_Lupa_x.index, Flow_Rate_Lupa_x, label='past');

# Plot the prediction means as line
plt.plot(dates, mean, label='predicted');
plt.plot(dates[-27:], Lupa_JULY_2020W.iloc[-27:], label='observed');
# Shade between the confidence intervals
plt.fill_between(dates, conf_int.loc[:, 'lower Flow_Rate_Lupa_x'], 
                 conf_int.loc[:, 'upper Flow_Rate_Lupa_x'], alpha=0.32);

# Plot legend
plt.legend();


mean

2020-07-12     52.636424
2020-07-19     57.245828
2020-07-26     61.572218
2020-08-02     67.148070
2020-08-09     72.045804
2020-08-16     78.025563
2020-08-23     84.491484
2020-08-30     90.889382
2020-09-06     95.762998
2020-09-13    100.381253
2020-09-20    106.123535
2020-09-27    110.744771
2020-10-04    114.737103
2020-10-11    119.049950
2020-10-18    122.546763
2020-10-25    125.277490
2020-11-01    126.668256
2020-11-08    127.723425
2020-11-15    129.196421
2020-11-22    129.273639
2020-11-29    129.237623
2020-12-06    130.915563
2020-12-13    131.309909
2020-12-20    132.137699
2020-12-27    131.160815
2021-01-03    131.024898
2021-01-10    130.891759
Freq: W-SUN, Name: predicted_mean, dtype: float64

	coef	std err	z	P>\|z\|	[0.025	0.975]
ar.L1	0.2701	0.162	1.672	0.095	-0.047	0.587
ar.L2	0.5015	0.110	4.560	0.000	0.286	0.717
ma.L1	-0.4271	0.178	-2.401	0.016	-0.776	-0.078
ar.S.L4	0.1075	0.127	0.847	0.397	-0.141	0.356
sigma2	8.45e+04	1.63e+04	5.178	0.000	5.25e+04	1.16e+05

	AR(p)	MA(q)	ARMA(p, q)
ACF	Tails off	Cuts off after lag q	Tails off
PACF	Cuts off after lag p	Tails off	Tails off

	Rainfall_Terni	Flow_Rate_Lupa	doy	Month	Year	ET01	Infilt_	Infiltsum	Rainfall_Ter	Flow_Rate_Lup	Infilt_m3
Date
2018-11-01	21.1	2321.38	305.0	11.0	2018.0	2.058200	19.865080	2791.951025	2658600.0	7101.216	2.503000e+06
2018-12-01	0.2	2334.30	335.0	12.0	2018.0	1.675676	-0.805406	2886.626748	25200.0	6652.800	-1.014811e+05
2019-01-01	0.0	2234.34	1.0	1.0	2019.0	1.178402	-0.707041	2920.800012	0.0	6460.992	-8.908717e+04
2019-02-01	14.2	3073.87	32.0	2.0	2019.0	1.779154	13.132508	2990.949233	1789200.0	6292.512	1.654696e+06
2019-03-01	0.0	3431.76	60.0	3.0	2019.0	1.921843	-1.153106	3034.526263	0.0	9981.792	-1.452914e+05
2019-04-01	0.0	2956.20	91.0	4.0	2019.0	2.511952	-1.507171	3019.779521	0.0	9008.064	-1.899036e+05
2019-05-01	1.6	2916.13	121.0	5.0	2019.0	2.141265	0.315241	3101.896094	201600.0	7960.896	3.972035e+04
2019-06-01	0.0	3853.07	152.0	6.0	2019.0	3.055949	-1.833569	3248.954961	0.0	9913.536	-2.310298e+05
2019-07-01	0.0	3867.76	182.0	7.0	2019.0	4.990285	-2.994171	3179.285551	0.0	11346.912	-3.772656e+05
2019-08-01	0.0	3266.60	213.0	8.0	2019.0	4.759991	-2.855995	3170.883263	0.0	9967.104	-3.598553e+05
2019-09-01	2.4	2640.87	244.0	9.0	2019.0	3.499812	0.300113	3124.262731	302400.0	8225.280	3.781419e+04
2019-10-01	0.2	2306.65	274.0	10.0	2019.0	3.622014	-1.973209	3193.109184	25200.0	6933.600	-2.486243e+05
2019-11-01	1.4	2467.15	305.0	11.0	2019.0	2.251213	0.049272	3201.445983	176400.0	5943.456	6.208275e+03
2019-12-01	0.2	2948.94	335.0	12.0	2019.0	1.893429	-0.936058	3456.655100	25200.0	7839.072	-1.179433e+05
2020-01-01	0.0	3401.94	1.0	1.0	2020.0	1.528371	-0.917023	3514.129781	0.0	9324.396	-1.155449e+05
2020-02-01	2.2	3126.24	32.0	2.0	2020.0	1.810417	1.113750	3508.293249	277200.0	9649.152	1.403325e+05
2020-03-01	16.4	3193.85	61.0	3.0	2020.0	1.680146	15.391913	3530.595964	2066400.0	8936.352	1.939381e+06
2020-04-01	0.0	2938.61	92.0	4.0	2020.0	1.093987	-0.656392	3552.927070	0.0	8779.104	-8.270544e+04
2020-05-01	0.2	2737.95	122.0	5.0	2020.0	2.762967	-1.457780	3562.914119	25200.0	8120.736	-1.836803e+05
2020-06-01	0.0	2324.96	153.0	6.0	2020.0	2.974286	-1.784572	3564.007961	0.0	7126.272	-2.248561e+05

	coef	std err	z	P>\|z\|	[0.025	0.975]
intercept	404.9390	549.238	0.737	0.461	-671.548	1481.426
ar.L1	0.7770	0.182	4.273	0.000	0.421	1.133
ma.L1	0.6130	0.327	1.876	0.061	-0.027	1.253
ar.S.L52	0.0894	0.182	0.492	0.622	-0.266	0.445
sigma2	1.391e+05	6.66e+04	2.088	0.037	8521.805	2.7e+05

	coef	std err	z	P>\|z\|	[0.025	0.975]
intercept	11.8090	10.032	1.177	0.239	-7.853	31.471
ar.L1	2.1658	0.274	7.903	0.000	1.629	2.703
ar.L2	-1.5600	0.493	-3.167	0.002	-2.526	-0.594
ar.L3	0.3847	0.225	1.709	0.087	-0.056	0.826
ma.L1	-0.2327	0.293	-0.795	0.426	-0.806	0.341
ar.S.L52	-0.7254	0.380	-1.911	0.056	-1.469	0.019
ar.S.L104	0.1621	0.061	2.653	0.008	0.042	0.282
ma.S.L52	0.7832	0.435	1.800	0.072	-0.070	1.636
sigma2	761.7349	56.278	13.535	0.000	651.431	872.039

Table of Contents

Seasonal ARIMA Models¶

Seasonal time series¶

Seasonal decompose¶

first with an updated but still not stationary dataset:¶

test with an updated and made stationary dataset:¶

Seasonal ACF and PACF¶

Lupa with updated flowrate and rain data¶

Seasonal ACF and PACF¶

SARIMA models¶

Fitting SARIMA models¶

Choosing SARIMA order¶

SARIMA vs ARIMA forecasts¶

with weekly data:¶

with updated data for Arrone¶

Automation and saving¶

Automated model selection¶

Create auto_arima monthly model¶

Create model: Weekly data¶

try out with log transform this time¶

SARIMA and Box-Jenkins¶

SARIMA model diagnostics¶

SARIMA forecast¶

The SARIMAX model¶

Auto arima search with a cyclic trend¶

No exogene data¶

with exogene data¶

2 exogenous series¶

	Rainfall_Terni	Flow_Rate_Lupa	doy	Month	Year
Date
2009-01-01	2.797	NaN	1	1	2009
2009-01-02	2.797	NaN	2	1	2009
2009-01-03	2.797	NaN	3	1	2009
2009-01-04	2.797	NaN	4	1	2009
2009-01-05	2.797	NaN	5	1	2009

Dep. Variable:	Y	No. Observations:	80
Model:	SARIMAX(2, 1, 1)x(1, 0, [], 4)	Log Likelihood	-560.340
Date:	Fri, 30 Apr 2021	AIC	1130.679
Time:	10:08:50	BIC	1142.526
Sample:	01-01-2013	HQIC	1135.426
	- 03-21-2013
Covariance Type:	opg

Ljung-Box (L1) (Q):	0.00	Jarque-Bera (JB):	0.95
Prob(Q):	1.00	Prob(JB):	0.62
Heteroskedasticity (H):	0.60	Skew:	-0.07
Prob(H) (two-sided):	0.20	Kurtosis:	2.48

Ljung-Box (L1) (Q):	0.03	Jarque-Bera (JB):	0.02
Prob(Q):	0.86	Prob(JB):	0.99
Heteroskedasticity (H):	0.77	Skew:	0.02
Prob(H) (two-sided):	0.48	Kurtosis:	3.05

Ljung-Box (L1) (Q):	0.10	Jarque-Bera (JB):	4.64
Prob(Q):	0.75	Prob(JB):	0.10
Heteroskedasticity (H):	1.44	Skew:	1.12
Prob(H) (two-sided):	0.64	Kurtosis:	3.52

Ljung-Box (L1) (Q):	0.22	Jarque-Bera (JB):	2412.43
Prob(Q):	0.64	Prob(JB):	0.00
Heteroskedasticity (H):	0.62	Skew:	1.83
Prob(H) (two-sided):	0.00	Kurtosis:	12.60

	lower Flow_Rate_Lupa	upper Flow_Rate_Lupa
2020-07-05	446.023600	572.934043
2020-07-12	359.776791	650.798271
2020-07-19	272.268271	736.428413
2020-07-26	192.821390	820.543257
2020-08-02	123.417486	897.698460
2020-08-09	63.266329	970.435493
2020-08-16	8.849800	1039.411030
2020-08-23	-40.199525	1105.903595
2020-08-30	-84.750172	1168.578559
2020-09-06	-122.847268	1228.110440
2020-09-13	-155.073911	1283.056067
2020-09-20	-181.964526	1332.804092
2020-09-27	-204.292822	1377.056284
2020-10-04	-221.025445	1417.548071
2020-10-11	-234.488711	1452.689944
2020-10-18	-244.463687	1483.430472
2020-10-25	-251.755408	1509.709263
2020-11-01	-254.816524	1533.848482
2020-11-08	-261.018186	1549.270432
2020-11-15	-263.498261	1563.622559
2020-11-22	-266.593559	1573.319454
2020-11-29	-261.559762	1587.805583
2020-12-06	-261.183299	1594.933755
2020-12-13	-258.400836	1602.340292
2020-12-20	-252.104007	1611.637103
2020-12-27	-253.242811	1612.306814
2021-01-03	-251.860341	1614.668607
2021-01-10	-250.667705	1616.306043
2021-01-17	-250.218556	1616.897219
2021-01-24	-249.410454	1617.719504
2021-01-31	-245.639452	1621.501962
2021-02-07	-224.094805	1643.138096
2021-02-14	-216.327573	1651.124786
2021-02-21	-215.508371	1652.311834
2021-02-28	-217.366287	1650.969829
2021-03-07	-220.076906	1648.908204

	Rainfall_Terni	Flow_Rate_Lupa	doy	Month	Year
Date
2020-07-01	0.0	72.18	NaN	NaN	NaN
2020-07-02	0.0	71.91	NaN	NaN	NaN
2020-07-03	0.8	71.59	NaN	NaN	NaN
2020-07-04	0.8	71.22	NaN	NaN	NaN
2020-07-05	0.2	71.21	NaN	NaN	NaN

	LAT	LON	YEAR	DOY	T2M_MAX	T2M_MIN	T2M	ALLSKY_SFC_LW_DWN	RH2M	PRECTOT
Date
2010-01-01	42.58631	12.77281	2010	1	9.50	5.56	7.57	28.31	95.38	20.00
2010-01-02	42.58631	12.77281	2010	2	10.08	0.29	5.16	25.23	89.86	2.02
2010-01-03	42.58631	12.77281	2010	3	3.86	-2.43	0.12	22.25	81.25	0.58
2010-01-04	42.58631	12.77281	2010	4	3.45	-0.69	1.38	27.21	93.79	2.18
2010-01-05	42.58631	12.77281	2010	5	7.34	3.00	5.23	28.38	98.94	26.46

	LAT	LON	YEAR	DOY	ALLSKY_SFC_SW_DWN	WS10M_MAX
Date
2010-01-01	42.58611	12.77761	2010	1	2.71	7.44
2010-01-02	42.58611	12.77761	2010	2	6.25	7.77
2010-01-03	42.58611	12.77761	2010	3	7.78	3.55
2010-01-04	42.58611	12.77761	2010	4	2.96	2.92
2010-01-05	42.58611	12.77761	2010	5	1.71	3.56

	coef	std err	z	P>\|z\|	[0.025	0.975]
ar.L1	0.4236	0.090	4.719	0.000	0.248	0.600
ma.S.L12	-0.0898	0.116	-0.776	0.438	-0.317	0.137
sigma2	9347.1462	1407.490	6.641	0.000	6588.516	1.21e+04

	coef	std err	z	P>\|z\|	[0.025	0.975]
intercept	1.9734	0.806	2.449	0.014	0.394	3.552
drift	-0.0014	0.001	-1.069	0.285	-0.004	0.001
ar.L1	1.9576	0.028	70.012	0.000	1.903	2.012
ar.L2	-1.2507	0.062	-20.182	0.000	-1.372	-1.129
ar.L3	0.3226	0.083	3.906	0.000	0.161	0.484
ar.L4	-0.0419	0.112	-0.376	0.707	-0.261	0.177
ar.L5	0.0612	0.101	0.605	0.545	-0.137	0.259
ar.L6	-0.0628	0.048	-1.302	0.193	-0.157	0.032
ma.S.L52	-0.0374	0.038	-0.981	0.327	-0.112	0.037
ma.S.L104	0.1039	0.043	2.397	0.017	0.019	0.189
sigma2	20.2387	0.770	26.274	0.000	18.729	21.748

	coef	std err	z	P>\|z\|	[0.025	0.975]
intercept	8.8284	4.092	2.158	0.031	0.809	16.848
ar.L1	1.9741	0.027	72.348	0.000	1.921	2.028
ar.L2	-1.2938	0.061	-21.298	0.000	-1.413	-1.175
ar.L3	0.3915	0.068	5.750	0.000	0.258	0.525
ar.L4	-0.0830	0.037	-2.220	0.026	-0.156	-0.010
ma.S.L52	-0.0291	0.036	-0.806	0.420	-0.100	0.042
ma.S.L104	0.1014	0.043	2.371	0.018	0.018	0.185
sigma2	966.9039	34.791	27.792	0.000	898.715	1035.093

	coef	std err	z	P>\|z\|	[0.025	0.975]
intercept	16.4186	0.001	2.09e+04	0.000	16.417	16.420
drift	-0.0425	0.003	-12.243	0.000	-0.049	-0.036
Infilt_	-0.2409	0.044	-5.501	0.000	-0.327	-0.155
ar.L1	2.2321	0.019	118.034	0.000	2.195	2.269
ar.L2	-1.3655	0.028	-49.191	0.000	-1.420	-1.311
ar.L3	0.5613	0.067	8.434	0.000	0.431	0.692
ar.L4	-1.8074	0.077	-23.586	0.000	-1.958	-1.657
ar.L5	2.0234	0.055	37.096	0.000	1.917	2.130
ar.L6	-0.6465	0.057	-11.306	0.000	-0.759	-0.534
ma.S.L52	0.7692	0.071	10.874	0.000	0.631	0.908
ma.S.L104	-0.1682	0.053	-3.181	0.001	-0.272	-0.065
sigma2	195.4020	0.000	8.25e+05	0.000	195.402	195.403

Ljung-Box (L1) (Q):	0.13	Jarque-Bera (JB):	2351.38
Prob(Q):	0.72	Prob(JB):	0.00
Heteroskedasticity (H):	0.64	Skew:	1.82
Prob(H) (two-sided):	0.00	Kurtosis:	12.47

Ljung-Box (L1) (Q):	1.65	Jarque-Bera (JB):	2539.03
Prob(Q):	0.20	Prob(JB):	0.00
Heteroskedasticity (H):	0.68	Skew:	-0.99
Prob(H) (two-sided):	0.01	Kurtosis:	13.36

	Flow_Rate_Lupa
doy
1.0	99.772604
2.0	100.127876
3.0	100.413193
4.0	100.669220
5.0	100.898665

	Date	Rainfall_Terni	Flow_Rate_Lupa	doy	Month	Year	ET01	Infilt_	Infiltsum	Rainfall_Ter	Flow_Rate_Lup	Infilt_m3
0	2009-01-01	2.797	135.47	1.0	1.0	2009.0	NaN	NaN	NaN	352422.0	11704.608	NaN
1	2009-01-02	2.797	135.24	2.0	1.0	2009.0	NaN	NaN	NaN	352422.0	11684.736	NaN
2	2009-01-03	2.797	135.17	3.0	1.0	2009.0	NaN	NaN	NaN	352422.0	11678.688	NaN
3	2009-01-04	2.797	134.87	4.0	1.0	2009.0	NaN	NaN	NaN	352422.0	11652.768	NaN
4	2009-01-05	2.797	134.80	5.0	1.0	2009.0	NaN	NaN	NaN	352422.0	11646.720	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...
4194	2020-06-26	0.000	73.93	178.0	6.0	2020.0	4.171681	-2.503008	3582.172252	0.0	6387.552	-315379.064449
4195	2020-06-27	0.000	73.60	179.0	6.0	2020.0	4.449783	-2.669870	3579.502382	0.0	6359.040	-336403.599674
4196	2020-06-28	0.000	73.14	180.0	6.0	2020.0	4.513588	-2.708153	3576.794229	0.0	6319.296	-341227.242488
4197	2020-06-29	0.000	72.88	181.0	6.0	2020.0	4.510906	-2.706544	3574.087685	0.0	6296.832	-341024.517234
4198	2020-06-30	0.000	72.53	182.0	6.0	2020.0	4.882469	-2.929482	3571.158204	0.0	6266.592	-369114.672333

Dep. Variable:	Flow_Rate_Lupa_x	No. Observations:	549
Model:	SARIMAX(6, 0, 2)x(0, 0, 2, 52)	Log Likelihood	-1274.762
Date:	Sat, 12 Jun 2021	AIC	2579.525
Time:	21:07:58	BIC	2640.894
Sample:	01-03-2010	HQIC	2603.731
	- 07-05-2020
Covariance Type:	opg

Ljung-Box (L1) (Q):	15.59	Jarque-Bera (JB):	876.77
Prob(Q):	0.00	Prob(JB):	0.00
Heteroskedasticity (H):	0.59	Skew:	0.91
Prob(H) (two-sided):	0.00	Kurtosis:	9.66