import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
#from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import tensorflow as tf
mpl.rcParams['figure.figsize'] = (16, 8)
plt.style.use("seaborn-darkgrid") # fivethirtyeight # seaborn-darkgrid
%config InlineBackend.figure_format = 'retina'
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:70% !important;}</style>"));
mpl.rcParams['axes.grid'] = True
c:\program files\python38\lib\site-packages\scipy\__init__.py:135: UserWarning: NumPy 1.16.5 or above is required for this version of SciPy (detected version 1.16.4) warnings.warn("NumPy 1.16.5 or above is required for this version of "
from tensorflow.python.keras.preprocessing.sequence import TimeseriesGenerator
df=pd.read_csv('../Lupa_Arrone_PETIR.csv', index_col=0, parse_dates=True,usecols=[0,1,2,3,4,5,6,7])#
import numpy as np
df.Infilt_= df.Infilt_.fillna( np.mean( df.Infilt_))
#df.Flow_35= df.Flow_35.fillna( np.mean( df.Flow_35 ))
#df.Net_35= df.Net_35.fillna( np.mean( df.Net_35))
df
Rainfall_Terni | Flow_Rate_Lupa | doy | Month | Year | ET01 | Infilt_ | |
---|---|---|---|---|---|---|---|
Date | |||||||
2009-01-01 | 2.797 | 135.47 | 1.0 | 1.0 | 2009.0 | NaN | 0.931444 |
2009-01-02 | 2.797 | 135.24 | 2.0 | 1.0 | 2009.0 | NaN | 0.931444 |
2009-01-03 | 2.797 | 135.17 | 3.0 | 1.0 | 2009.0 | NaN | 0.931444 |
2009-01-04 | 2.797 | 134.87 | 4.0 | 1.0 | 2009.0 | NaN | 0.931444 |
2009-01-05 | 2.797 | 134.80 | 5.0 | 1.0 | 2009.0 | NaN | 0.931444 |
... | ... | ... | ... | ... | ... | ... | ... |
2020-06-26 | 0.000 | 73.93 | 178.0 | 6.0 | 2020.0 | 4.171681 | -2.503008 |
2020-06-27 | 0.000 | 73.60 | 179.0 | 6.0 | 2020.0 | 4.449783 | -2.669870 |
2020-06-28 | 0.000 | 73.14 | 180.0 | 6.0 | 2020.0 | 4.513588 | -2.708153 |
2020-06-29 | 0.000 | 72.88 | 181.0 | 6.0 | 2020.0 | 4.510906 | -2.706544 |
2020-06-30 | 0.000 | 72.53 | 182.0 | 6.0 | 2020.0 | 4.882469 | -2.929482 |
4199 rows × 7 columns
The first attempt using daily data was poor, so now I gonna use weekly data. But what was the weekday of 1-1-2009?
import datetime
#datetime.dayname("1-1-2009")
df.index[0].day_name()
'Thursday'
df=df[["Rainfall_Terni","Flow_Rate_Lupa","ET01","Infilt_"]].resample("W-THU").sum()
#df["Week"]=df.index.isocalender.week
df
Rainfall_Terni | Flow_Rate_Lupa | ET01 | Infilt_ | |
---|---|---|---|---|
Date | ||||
2009-01-01 | 2.797 | 135.47 | 0.000000 | 0.931444 |
2009-01-08 | 19.579 | 945.79 | 0.000000 | 6.520111 |
2009-01-15 | 19.579 | 948.84 | 0.000000 | 6.520111 |
2009-01-22 | 19.579 | 951.00 | 0.000000 | 6.520111 |
2009-01-29 | 19.579 | 964.28 | 0.000000 | 6.520111 |
... | ... | ... | ... | ... |
2020-06-04 | 25.800 | 576.66 | 21.294532 | 13.023281 |
2020-06-11 | 34.200 | 561.90 | 20.436589 | 21.938047 |
2020-06-18 | 20.800 | 542.70 | 22.228401 | 7.462959 |
2020-06-25 | 0.200 | 527.10 | 26.129299 | -15.477580 |
2020-07-02 | 0.000 | 366.08 | 22.528427 | -13.517056 |
601 rows × 4 columns
df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 601 entries, 2009-01-01 to 2020-07-02 Freq: W-THU Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Rainfall_Terni 601 non-null float64 1 Flow_Rate_Lupa 601 non-null float64 2 ET01 601 non-null float64 3 Infilt_ 601 non-null float64 dtypes: float64(4) memory usage: 23.5 KB
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
df= df.set_index('Date')
df[['Rainfall_Terni','Infilt_','ET01']].plot(subplots=True);
The target is the first row!
df["R750"]= df['Rainfall_Terni'].rolling(110, min_periods=52).sum().fillna(method= 'bfill')
df_input=df.loc["2010":][["Flow_Rate_Lupa",'R750','Infilt_','ET01']] #Flow_Rate_Lupa
df_input
Flow_Rate_Lupa | R750 | Infilt_ | ET01 | |
---|---|---|---|---|
Date | ||||
2010-01-07 | 668.70 | 1003.993 | 17.692467 | 8.697554 |
2010-01-14 | 901.35 | 1026.904 | 17.954969 | 8.260052 |
2010-01-21 | 1069.68 | 1049.815 | 17.913947 | 8.328421 |
2010-01-28 | 1105.66 | 1072.726 | 18.794913 | 6.860145 |
2010-02-04 | 1125.23 | 1097.501 | 20.032784 | 7.903693 |
... | ... | ... | ... | ... |
2020-06-04 | 576.66 | 2276.400 | 13.023281 | 21.294532 |
2020-06-11 | 561.90 | 2281.300 | 21.938047 | 20.436589 |
2020-06-18 | 542.70 | 2221.900 | 7.462959 | 22.228401 |
2020-06-25 | 527.10 | 2185.500 | -15.477580 | 26.129299 |
2020-07-02 | 366.08 | 2132.000 | -13.517056 | 22.528427 |
548 rows × 4 columns
df_input[['Flow_Rate_Lupa','Infilt_','R750']].plot(subplots=True);
df_input.describe()
Flow_Rate_Lupa | R750 | Infilt_ | ET01 | |
---|---|---|---|---|
count | 548.000000 | 548.000000 | 548.000000 | 548.000000 |
mean | 822.102746 | 1910.397949 | 6.516712 | 18.865599 |
std | 420.094965 | 269.000014 | 20.808402 | 8.485704 |
min | 204.830000 | 1003.993000 | -23.867515 | 5.039743 |
25% | 513.335000 | 1715.309750 | -6.745961 | 11.515861 |
50% | 724.330000 | 1918.351500 | 0.843919 | 17.102134 |
75% | 1047.015000 | 2112.922750 | 15.869124 | 25.042909 |
max | 1855.320000 | 2447.200000 | 158.994136 | 39.779192 |
df_input.query("Flow_Rate_Lupa > 1850")
Flow_Rate_Lupa | R750 | Infilt_ | ET01 | |
---|---|---|---|---|
Date | ||||
2010-06-03 | 1855.07 | 1491.826 | 8.943818 | 20.411970 |
2014-02-27 | 1855.32 | 2171.595 | 16.489999 | 10.850002 |
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df_input)
data_scaled
array([[0.28104987, 0. , 0.22727555, 0.10529274], [0.42200801, 0.01587506, 0.22871107, 0.09269893], [0.5239959 , 0.03175012, 0.22848674, 0.09466697], ..., [0.2047089 , 0.84388934, 0.17133431, 0.49478788], [0.19525717, 0.81866773, 0.04588133, 0.60707803], [0.09769826, 0.78159751, 0.05660268, 0.50342434]])
features= data_scaled
target= data_scaled[:,0]
Keras provides the TimeseriesGenerator that can be used to automatically transform a univariate or multivariate time series dataset into a supervised learning problem.
There are two parts to using the TimeseriesGenerator: defining it and using it to train models.
You can create an instance of the class and specify the input and output aspects of your time series problem and it will provide an instance of a Sequence class that can then be used to iterate across the inputs and outputs of the series.
In most time series prediction problems, the input and output series will be the same series.
For example:
# load data
inputs = ...
outputs = ...
# define generator
generator = TimeseriesGenerator(inputs, outputs, ...)
# iterator generator
for i in range(len(generator)): # ...
Technically, the class is not a generator in the sense that it is not a Python Generator and you cannot use the next() function on it.
In addition to specifying the input and output aspects of your time series problem, there are some additional parameters that you should configure; for example:
You must define a length argument based on your designed framing of the problem. That is the desired number of lag observations to use as input.
You must also define the batch size as the batch size of your model during training. If the number of samples in your dataset is less than your batch size, you can set the batch size in the generator and in your model to the total number of samples in your generator found via calculating its length; for example:
print(len(generator))
There are also other arguments such as defining start and end offsets into your data, the sampling rate, stride, and more. You are less likely to use these features, but you can see the full API for more details.
The samples are not shuffled by default. This is useful for some recurrent neural networks like LSTMs that maintain state across samples within a batch.
It can benefit other neural networks, such as CNNs and MLPs, to shuffle the samples when training. Shuffling can be enabled by setting the ‘shuffle‘ argument to True. This will have the effect of shuffling samples returned for each batch.
At the time of writing, the TimeseriesGenerator is limited to one-step outputs. Multi-step time series forecasting is not supported.
Once a TimeseriesGenerator instance has been defined, it can be used to train a neural network model.
A model can be trained using the TimeseriesGenerator as a data generator. This can be achieved by fitting the defined model using the fit_generator() function.
This function takes the generator as an argument. It also takes a steps_per_epoch argument that defines the number of samples to use in each epoch. This can be set to the length of the TimeseriesGenerator instance to use all samples in the generator.
TimeseriesGenerator(features, target, length=2, sampling_rate=1, batch_size=1)[0]
(array([[[0.28104987, 0. , 0.22727555, 0.10529274], [0.42200801, 0.01587506, 0.22871107, 0.09269893]]]), array([0.5239959]))
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.10, random_state=123, shuffle = False)
x_train.shape
(493, 4)
493/17
29.0
x_test.shape
(55, 4)
win_length= 50 #720
batch_size=17
num_features=3 #4
train_generator = TimeseriesGenerator(x_train, y_train, length=win_length, sampling_rate=1, batch_size=batch_size)
test_generator = TimeseriesGenerator(x_test, y_test, length=25, sampling_rate=1, batch_size= 8 )
train_generator[0]
(array([[[0.28104987, 0. , 0.22727555, 0.10529274], [0.42200801, 0.01587506, 0.22871107, 0.09269893], [0.5239959 , 0.03175012, 0.22848674, 0.09466697], ..., [0.39285303, 0.68061685, 0.34962736, 0.18116933], [0.58921896, 0.69775299, 0.22744881, 0.19108943], [0.66047053, 0.71488913, 0.24443682, 0.0420535 ]], [[0.42200801, 0.01587506, 0.22871107, 0.09269893], [0.5239959 , 0.03175012, 0.22848674, 0.09466697], [0.54579549, 0.04762518, 0.2333044 , 0.05240159], ..., [0.58921896, 0.69775299, 0.22744881, 0.19108943], [0.66047053, 0.71488913, 0.24443682, 0.0420535 ], [0.68416955, 0.73202527, 0.23409558, 0.13277729]], [[0.5239959 , 0.03175012, 0.22848674, 0.09466697], [0.54579549, 0.04762518, 0.2333044 , 0.05240159], [0.55765258, 0.06479181, 0.24007385, 0.08244087], ..., [0.66047053, 0.71488913, 0.24443682, 0.0420535 ], [0.68416955, 0.73202527, 0.23409558, 0.13277729], [0.71481196, 0.74916142, 0.23955043, 0.08492187]], ..., [[0.88382844, 0.21373718, 0.2032319 , 0.27995789], [0.87130489, 0.22908841, 0.19587447, 0.34450463], [0.85293458, 0.24443964, 0.19068582, 0.39002473], ..., [0.71824731, 0.78899008, 0.23193594, 0.10403547], [0.70437264, 0.79198895, 0.21693345, 0.23565242], [0.72473023, 0.79503495, 0.22023153, 0.20671834]], [[0.87130489, 0.22908841, 0.19587447, 0.34450463], [0.85293458, 0.24443964, 0.19068582, 0.39002473], [0.83428558, 0.26367804, 0.22360024, 0.37041274], ..., [0.70437264, 0.79198895, 0.21693345, 0.23565242], [0.72473023, 0.79503495, 0.22023153, 0.20671834], [0.72948034, 0.79808094, 0.21351057, 0.26568136]], [[0.85293458, 0.24443964, 0.19068582, 0.39002473], [0.83428558, 0.26367804, 0.22360024, 0.37041274], [0.81692104, 0.28356431, 0.22695272, 0.38585914], ..., [0.72473023, 0.79503495, 0.22023153, 0.20671834], [0.72948034, 0.79808094, 0.21351057, 0.26568136], [0.73876849, 0.79184344, 0.12920421, 0.36251652]]]), array([0.68416955, 0.71481196, 0.74385183, 0.76403977, 0.78071361, 0.78213743, 0.77426704, 0.76201007, 0.74473641, 0.73848372, 0.73175845, 0.71824731, 0.70437264, 0.72473023, 0.72948034, 0.73876849, 0.74121624]))
layer = tf.keras.layers.LeakyReLU() # negative gradient slope default= 0.3
output = layer([-3.0, -1.0, 0.0, 2.0])
list(output.numpy())
[-0.90000004, -0.3, 0.0, 2.0]
layer = tf.keras.layers.LeakyReLU(alpha= 0.1)
output = layer([-3.0, -1.0, 0.0, 2.0])
list(output.numpy())
[-0.3, -0.1, 0.0, 2.0]
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(256, input_shape= (win_length, num_features), return_sequences=True,
#stateful=True, batch_input_shape= (17,win_length, num_features))
))
#model.add(tf.keras.layers.LeakyReLU(alpha=0.95))
model.add(tf.keras.layers.LSTM(128, return_sequences=True))
#model.add(tf.keras.layers.LeakyReLU(alpha=0.95))
model.add(tf.keras.layers.Dropout(0.01))
model.add(tf.keras.layers.LSTM(64, return_sequences=False))
#model.add(tf.keras.layers.Dropout(0.003))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(1))
model.summary()
Model: "sequential_14" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm_38 (LSTM) (None, 50, 256) 266240 _________________________________________________________________ lstm_39 (LSTM) (None, 50, 128) 197120 _________________________________________________________________ dropout_13 (Dropout) (None, 50, 128) 0 _________________________________________________________________ lstm_40 (LSTM) (None, 64) 49408 _________________________________________________________________ flatten (Flatten) (None, 64) 0 _________________________________________________________________ dense_11 (Dense) (None, 1) 65 ================================================================= Total params: 512,833 Trainable params: 512,833 Non-trainable params: 0 _________________________________________________________________
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
patience=10, #restore_best_weights=True,
mode='min')
model.compile(loss=tf.losses.MeanSquaredError(),
optimizer=tf.optimizers.Adam(),
metrics=[tf.metrics.MeanAbsoluteError()])
history = model.fit(train_generator, epochs=39 ,
validation_data=test_generator,
shuffle=False,
callbacks=[early_stopping])
Epoch 1/39 27/27 [==============================] - 6s 55ms/step - loss: 0.2008 - mean_absolute_error: 0.3652 - val_loss: 0.0068 - val_mean_absolute_error: 0.0717 Epoch 2/39 27/27 [==============================] - 0s 13ms/step - loss: 0.0565 - mean_absolute_error: 0.1976 - val_loss: 0.0033 - val_mean_absolute_error: 0.0504 Epoch 3/39 27/27 [==============================] - 0s 14ms/step - loss: 0.0320 - mean_absolute_error: 0.1428 - val_loss: 0.0066 - val_mean_absolute_error: 0.0732 Epoch 4/39 27/27 [==============================] - 0s 14ms/step - loss: 0.0300 - mean_absolute_error: 0.1366 - val_loss: 9.3717e-04 - val_mean_absolute_error: 0.0250 Epoch 5/39 27/27 [==============================] - 0s 14ms/step - loss: 0.0140 - mean_absolute_error: 0.0887 - val_loss: 0.0036 - val_mean_absolute_error: 0.0573 Epoch 6/39 27/27 [==============================] - 0s 15ms/step - loss: 0.0131 - mean_absolute_error: 0.0883 - val_loss: 0.0012 - val_mean_absolute_error: 0.0312 Epoch 7/39 27/27 [==============================] - 0s 14ms/step - loss: 0.0085 - mean_absolute_error: 0.0676 - val_loss: 8.7979e-04 - val_mean_absolute_error: 0.0283 Epoch 8/39 27/27 [==============================] - 0s 14ms/step - loss: 0.0086 - mean_absolute_error: 0.0681 - val_loss: 6.9516e-04 - val_mean_absolute_error: 0.0169 Epoch 9/39 27/27 [==============================] - 0s 13ms/step - loss: 0.0069 - mean_absolute_error: 0.0611 - val_loss: 4.9390e-04 - val_mean_absolute_error: 0.0141 Epoch 10/39 27/27 [==============================] - 0s 14ms/step - loss: 0.0107 - mean_absolute_error: 0.0755 - val_loss: 0.0022 - val_mean_absolute_error: 0.0384 Epoch 11/39 27/27 [==============================] - 0s 13ms/step - loss: 0.0081 - mean_absolute_error: 0.0728 - val_loss: 0.0012 - val_mean_absolute_error: 0.0212 Epoch 12/39 27/27 [==============================] - 0s 13ms/step - loss: 0.0061 - mean_absolute_error: 0.0605 - val_loss: 0.0024 - val_mean_absolute_error: 0.0423 Epoch 13/39 27/27 [==============================] - 0s 13ms/step - loss: 0.0057 - mean_absolute_error: 0.0588 - val_loss: 0.0013 - val_mean_absolute_error: 0.0240 Epoch 14/39 27/27 [==============================] - 0s 14ms/step - loss: 0.0090 - mean_absolute_error: 0.0748 - val_loss: 0.0065 - val_mean_absolute_error: 0.0774 Epoch 15/39 27/27 [==============================] - 0s 13ms/step - loss: 0.0089 - mean_absolute_error: 0.0756 - val_loss: 0.0024 - val_mean_absolute_error: 0.0434 Epoch 16/39 27/27 [==============================] - 0s 14ms/step - loss: 0.0133 - mean_absolute_error: 0.0940 - val_loss: 0.0077 - val_mean_absolute_error: 0.0840 Epoch 17/39 27/27 [==============================] - 0s 14ms/step - loss: 0.0064 - mean_absolute_error: 0.0631 - val_loss: 0.0036 - val_mean_absolute_error: 0.0561 Epoch 18/39 27/27 [==============================] - 0s 14ms/step - loss: 0.0053 - mean_absolute_error: 0.0551 - val_loss: 0.0026 - val_mean_absolute_error: 0.0457 Epoch 19/39 27/27 [==============================] - 0s 15ms/step - loss: 0.0041 - mean_absolute_error: 0.0481 - val_loss: 0.0016 - val_mean_absolute_error: 0.0324
model.evaluate(test_generator, verbose=0)
[0.0013154871994629502, 0.023586073890328407]
[0.005353941582143307, 0.06212739273905754] for 19 epochs
predictions=model.predict(test_generator)
predictions.shape[0]
30
predictions
array([[0.33575445], [0.353515 ], [0.36118737], [0.3680064 ], [0.37141764], [0.37084252], [0.36677942], [0.36417076], [0.32993746], [0.3271436 ], [0.32349548], [0.32080647], [0.31744757], [0.32060704], [0.32402036], [0.32449418], [0.3192868 ], [0.31275964], [0.30342007], [0.29141372], [0.28134808], [0.27508107], [0.26641318], [0.25555772], [0.24702673], [0.23716058], [0.22902866], [0.22481972], [0.221744 ], [0.21387284]], dtype=float32)
y_test
array([0.4363795 , 0.43808808, 0.43269574, 0.42353483, 0.41128392, 0.39203509, 0.37261662, 0.35440384, 0.33405837, 0.31336149, 0.29473671, 0.27743276, 0.26214033, 0.24555738, 0.23599052, 0.21929851, 0.20467861, 0.19371823, 0.18412108, 0.17405134, 0.18449067, 0.20781101, 0.23885028, 0.25769317, 0.26055899, 0.26130422, 0.26217669, 0.29293725, 0.32960742, 0.33618047, 0.33978016, 0.34337984, 0.34697953, 0.3467387 , 0.33857824, 0.33124103, 0.32277081, 0.31682712, 0.31693012, 0.31203461, 0.31010185, 0.30764197, 0.30219511, 0.2941793 , 0.28607868, 0.27860817, 0.26965325, 0.25835358, 0.2468782 , 0.2364207 , 0.22528461, 0.21634181, 0.2047089 , 0.19525717, 0.09769826])
x_test
array([[0.4363795 , 0.8210236 , 0.0640968 , 0.72553557], [0.43808808, 0.79746495, 0.0334708 , 0.7735271 ], [0.43269574, 0.78707143, 0.01530239, 0.86575204], [0.42353483, 0.79247606, 0.06628185, 0.79272327], [0.41128392, 0.75602945, 0.05243321, 0.67433645], [0.39203509, 0.74674458, 0.01198327, 0.9044659 ], [0.37261662, 0.78208254, 0.39556533, 0.74410795], [0.35440384, 0.77210476, 0.02267274, 0.80109193], [0.33405837, 0.76281989, 0.01328147, 0.88348157], [0.31336149, 0.75034766, 0.01739242, 0.84741623], [0.29473671, 0.7592168 , 0.22727695, 0.71405047], [0.27743276, 0.76808594, 0.12326611, 0.63822854], [0.26214033, 0.7697489 , 0.06071692, 0.58247298], [0.24555738, 0.76864026, 0.05847268, 0.66932832], [0.23599052, 0.83959335, 0.69912384, 0.5469637 ], [0.21929851, 0.85345138, 0.16549087, 0.50767142], [0.20467861, 0.82573532, 0.0785927 , 0.43524387], [0.19371823, 0.83169428, 0.1176087 , 0.45757529], [0.18412108, 0.8448594 , 0.20371664, 0.49855467], [0.17405134, 0.8217165 , 0.07604362, 0.40003552], [0.18449067, 0.89654984, 0.67315761, 0.27581309], [0.20781101, 0.93438225, 0.38773113, 0.21793259], [0.23885028, 0.97457052, 0.59966924, 0.20088416], [0.25769317, 0.97318472, 0.26355976, 0.22302683], [0.26055899, 0.97138318, 0.17156075, 0.19534888], [0.26130422, 0.95738657, 0.12237993, 0.14705059], [0.26217669, 0.95378348, 0.21506115, 0.20712441], [0.29293725, 0.97415478, 0.34290722, 0.17938728], [0.32960742, 0.95974243, 0.10521077, 0.09617574], [0.33618047, 0.94643873, 0.10300216, 0.11555188], [0.33978016, 0.94685447, 0.10319013, 0.12349807], [0.34337984, 0.95322916, 0.15311471, 0.12689041], [0.34697953, 0.95738657, 0.13364103, 0.17299495], [0.3467387 , 0.96306836, 0.13790395, 0.19316779], [0.33857824, 0.96334552, 0.09700266, 0.18737598], [0.33124103, 0.95904953, 0.16231439, 0.20930059], [0.32277081, 0.96507777, 0.14108399, 0.23243595], [0.31682712, 1. , 0.36962899, 0.18483803], [0.31693012, 0.98191528, 0.15150474, 0.21777654], [0.31203461, 0.95537716, 0.08545922, 0.26945618], [0.31010185, 0.95302129, 0.09889712, 0.16116061], [0.30764197, 0.94927963, 0.17059147, 0.20385237], [0.30219511, 0.94609228, 0.08147129, 0.28525188], [0.2941793 , 0.90825987, 0.08480941, 0.32313319], [0.28607868, 0.90036079, 0.22937605, 0.33101596], [0.27860817, 0.88657206, 0.19432144, 0.36988359], [0.26965325, 0.88463193, 0.07171964, 0.41877933], [0.25835358, 0.86176619, 0.07957634, 0.51297151], [0.2468782 , 0.86391419, 0.2759152 , 0.4792515 ], [0.2364207 , 0.86377561, 0.05854794, 0.48635854], [0.22528461, 0.88165246, 0.20174157, 0.46790579], [0.21634181, 0.88504768, 0.25049299, 0.44320927], [0.2047089 , 0.84388934, 0.17133431, 0.49478788], [0.19525717, 0.81866773, 0.04588133, 0.60707803], [0.09769826, 0.78159751, 0.05660268, 0.50342434]])
x_test[:,1:][win_length:]
array([[0.88165246, 0.20174157, 0.46790579], [0.88504768, 0.25049299, 0.44320927], [0.84388934, 0.17133431, 0.49478788], [0.81866773, 0.04588133, 0.60707803], [0.78159751, 0.05660268, 0.50342434]])
x_test[:,1:][win_length:].shape
(5, 3)
df_pred=pd.concat([pd.DataFrame(predictions), pd.DataFrame(x_test[:,1:][win_length:])] ,axis=1)
df_pred
0 | 0 | 1 | 2 | |
---|---|---|---|---|
0 | 0.335754 | 0.881652 | 0.201742 | 0.467906 |
1 | 0.353515 | 0.885048 | 0.250493 | 0.443209 |
2 | 0.361187 | 0.843889 | 0.171334 | 0.494788 |
3 | 0.368006 | 0.818668 | 0.045881 | 0.607078 |
4 | 0.371418 | 0.781598 | 0.056603 | 0.503424 |
5 | 0.370843 | NaN | NaN | NaN |
6 | 0.366779 | NaN | NaN | NaN |
7 | 0.364171 | NaN | NaN | NaN |
8 | 0.329937 | NaN | NaN | NaN |
9 | 0.327144 | NaN | NaN | NaN |
10 | 0.323495 | NaN | NaN | NaN |
11 | 0.320806 | NaN | NaN | NaN |
12 | 0.317448 | NaN | NaN | NaN |
13 | 0.320607 | NaN | NaN | NaN |
14 | 0.324020 | NaN | NaN | NaN |
15 | 0.324494 | NaN | NaN | NaN |
16 | 0.319287 | NaN | NaN | NaN |
17 | 0.312760 | NaN | NaN | NaN |
18 | 0.303420 | NaN | NaN | NaN |
19 | 0.291414 | NaN | NaN | NaN |
20 | 0.281348 | NaN | NaN | NaN |
21 | 0.275081 | NaN | NaN | NaN |
22 | 0.266413 | NaN | NaN | NaN |
23 | 0.255558 | NaN | NaN | NaN |
24 | 0.247027 | NaN | NaN | NaN |
25 | 0.237161 | NaN | NaN | NaN |
26 | 0.229029 | NaN | NaN | NaN |
27 | 0.224820 | NaN | NaN | NaN |
28 | 0.221744 | NaN | NaN | NaN |
29 | 0.213873 | NaN | NaN | NaN |
Reverse the transformation:
rev_trans=scaler.inverse_transform(df_pred)
rev_trans
array([[ 758.98936901, 2276.4 , 13.02328054, 21.29453244], [ 788.30297095, 2281.3 , 21.93804664, 20.43658893], [ 800.96614005, 2221.9 , 7.4629592 , 22.22840134], [ 812.22089669, 2185.5 , -15.47757967, 26.12929945], [ 817.85110335, 2132. , -13.51705632, 22.5284272 ], [ 816.90186493, nan, nan, nan], [ 810.19575963, nan, nan, nan], [ 805.89019753, nan, nan, nan], [ 749.38847512, nan, nan, nan], [ 744.77725608, nan, nan, nan], [ 738.75605056, nan, nan, nan], [ 734.31787644, nan, nan, nan], [ 728.77404467, nan, nan, nan], [ 733.98870743, nan, nan, nan], [ 739.62235728, nan, nan, nan], [ 740.4044042 , nan, nan, nan], [ 731.80965975, nan, nan, nan], [ 721.03665465, nan, nan, nan], [ 705.62178611, nan, nan, nan], [ 685.80543802, nan, nan, nan], [ 669.19219164, nan, nan, nan], [ 658.84855238, nan, nan, nan], [ 644.54229279, nan, nan, nan], [ 626.6254545 , nan, nan, nan], [ 612.54514199, nan, nan, nan], [ 596.26116299, nan, nan, nan], [ 582.83950822, nan, nan, nan], [ 575.89269932, nan, nan, nan], [ 570.81625606, nan, nan, nan], [ 557.82497551, nan, nan, nan]])
df_final= df_input[predictions.shape[0]*-1:]
df_final.count()
Flow_Rate_Lupa 30 R750 30 Infilt_ 30 ET01 30 dtype: int64
rev_trans.shape # 'numpy.ndarray' object
(30, 4)
rev_trans[:,0]
array([758.98936901, 788.30297095, 800.96614005, 812.22089669, 817.85110335, 816.90186493, 810.19575963, 805.89019753, 749.38847512, 744.77725608, 738.75605056, 734.31787644, 728.77404467, 733.98870743, 739.62235728, 740.4044042 , 731.80965975, 721.03665465, 705.62178611, 685.80543802, 669.19219164, 658.84855238, 644.54229279, 626.6254545 , 612.54514199, 596.26116299, 582.83950822, 575.89269932, 570.81625606, 557.82497551])
df_final['App_Pred']=rev_trans[:,0]
<ipython-input-252-62669267b60b>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_final['App_Pred']=rev_trans[:,0]
df_final
Flow_Rate_Lupa | R750 | Infilt_ | ET01 | App_Pred | |
---|---|---|---|---|---|
Date | |||||
2019-12-12 | 636.11000 | 2385.7 | -1.488919 | 10.148199 | 758.989369 |
2019-12-19 | 637.55000 | 2380.5 | 15.458922 | 12.235131 | 788.302971 |
2019-12-26 | 688.32000 | 2409.9 | 38.837065 | 11.271558 | 800.966140 |
2020-01-02 | 748.84375 | 2389.1 | -4.628501 | 8.380835 | 812.220897 |
2020-01-09 | 759.69250 | 2369.9 | -5.032371 | 9.053951 | 817.851103 |
2020-01-16 | 765.63375 | 2370.5 | -4.997998 | 9.329997 | 816.901865 |
2020-01-23 | 771.57500 | 2379.7 | 4.131293 | 9.447846 | 810.195760 |
2020-01-30 | 777.51625 | 2385.7 | 0.570305 | 11.049492 | 805.890198 |
2020-02-06 | 777.11875 | 2393.9 | 1.349829 | 11.750285 | 749.388475 |
2020-02-13 | 763.65000 | 2394.3 | -6.129449 | 11.549081 | 744.777256 |
2020-02-20 | 751.54000 | 2388.1 | 5.813562 | 12.310730 | 738.756051 |
2020-02-27 | 737.56000 | 2396.8 | 1.931336 | 13.114440 | 734.317876 |
2020-03-05 | 727.75000 | 2447.2 | 43.723452 | 11.460914 | 728.774045 |
2020-03-12 | 727.92000 | 2421.1 | 3.836892 | 12.605180 | 733.988707 |
2020-03-19 | 719.84000 | 2382.8 | -8.240301 | 14.400502 | 739.622357 |
2020-03-26 | 716.65000 | 2379.4 | -5.783024 | 10.638374 | 740.404404 |
2020-04-02 | 712.59000 | 2374.0 | 7.327123 | 12.121462 | 731.809660 |
2020-04-09 | 703.60000 | 2369.4 | -8.969542 | 14.949236 | 721.036655 |
2020-04-16 | 690.37000 | 2314.8 | -8.359127 | 16.265212 | 705.621786 |
2020-04-23 | 677.00000 | 2303.4 | 18.076567 | 16.539055 | 685.805438 |
2020-04-30 | 664.67000 | 2283.5 | 11.666423 | 17.889295 | 669.192192 |
2020-05-07 | 649.89000 | 2280.7 | -10.752744 | 19.587906 | 658.848552 |
2020-05-14 | 631.24000 | 2247.7 | -9.316054 | 22.860091 | 644.542293 |
2020-05-21 | 612.30000 | 2250.8 | 26.586794 | 21.688676 | 626.625455 |
2020-05-28 | 595.04000 | 2250.6 | -13.161342 | 21.935571 | 612.545142 |
2020-06-04 | 576.66000 | 2276.4 | 13.023281 | 21.294532 | 596.261163 |
2020-06-11 | 561.90000 | 2281.3 | 21.938047 | 20.436589 | 582.839508 |
2020-06-18 | 542.70000 | 2221.9 | 7.462959 | 22.228401 | 575.892699 |
2020-06-25 | 527.10000 | 2185.5 | -15.477580 | 26.129299 | 570.816256 |
2020-07-02 | 366.08000 | 2132.0 | -13.517056 | 22.528427 | 557.824976 |
df_final[['Flow_Rate_Lupa','App_Pred']].plot();
This will help the developer of the model make informed decisions about the architectural choices that need to be made.
loss_train = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1,20)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
history.history
{'loss': [0.12196853011846542, 0.04042739421129227, 0.032468635588884354, 0.020224202424287796, 0.012289104983210564, 0.011802777647972107, 0.007663832511752844, 0.0076283616945147514, 0.006231871899217367, 0.010474818758666515, 0.006262043956667185, 0.005870786029845476, 0.005392034072428942, 0.008328978903591633, 0.010311692021787167, 0.011460966430604458, 0.005972784943878651, 0.004674278199672699, 0.0038580279797315598], 'mean_absolute_error': [0.26325494050979614, 0.15631504356861115, 0.1464846283197403, 0.10600168257951736, 0.08283749967813492, 0.08465701341629028, 0.06425126641988754, 0.06407380104064941, 0.05850755423307419, 0.07555001974105835, 0.06322453171014786, 0.06104826554656029, 0.05683555826544762, 0.07179701328277588, 0.07745169848203659, 0.08589889109134674, 0.060877103358507156, 0.05142376199364662, 0.046905517578125], 'val_loss': [0.0067686052061617374, 0.003283388912677765, 0.006598788313567638, 0.0009371747728437185, 0.0036164780613034964, 0.0012093840632587671, 0.0008797855698503554, 0.00069515744689852, 0.0004939029458910227, 0.002209327183663845, 0.0012109280796721578, 0.0024367505684494972, 0.0013344347244128585, 0.006475319620221853, 0.0023659125436097383, 0.0077432855032384396, 0.0036054968368262053, 0.0025771826039999723, 0.0015962652396410704], 'val_mean_absolute_error': [0.0717148706316948, 0.05038197711110115, 0.0731576532125473, 0.02496698871254921, 0.057277195155620575, 0.03119385987520218, 0.028266144916415215, 0.01690657250583172, 0.014115170575678349, 0.03841071203351021, 0.02118641510605812, 0.04230603203177452, 0.024027308449149132, 0.07741264998912811, 0.04340384900569916, 0.08398668467998505, 0.056145597249269485, 0.04568672552704811, 0.03239639103412628]}
loss_train = history.history['mean_absolute_error'] #acc
loss_val = history.history['val_mean_absolute_error']#val_acc
epochs = range(1,20)
plt.plot(epochs, loss_train, 'g', label='Training accuracy')
plt.plot(epochs, loss_val, 'r', label='validation accuracy')
plt.title('Training and Validation mean absolute_error')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
from sklearn.metrics import mean_absolute_percentage_error
y_true = df_final[['Flow_Rate_Lupa']]
y_pred = df_final[['App_Pred']]
mean_absolute_percentage_error(y_true, y_pred)
0.066020683748383
from sklearn.metrics import mean_absolute_percentage_error
y_true = df_final['Flow_Rate_Lupa'][:25]
y_pred = df_final['App_Pred'][:25]
mean_absolute_percentage_error(y_true, y_pred)
0.04965943086967971
The Abs percentage Error is <5% for the last 25 weeks!