import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('mode.chained_assignment', None)

from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
from tensorflow.keras.optimizers.legacy import RMSprop
from keras.callbacks import Callback


humidity = pd.read_csv("../weather-pogodynka/humidity.csv")
temp = pd.read_csv("../weather-pogodynka/temperature.csv")
pressure = pd.read_csv("../weather-pogodynka/pressure.csv")


humidity_ALB = humidity[['datetime','Albuquerque']]
temp_ALB = temp[['datetime','Albuquerque']]
pressure_ALB = pressure[['datetime','Albuquerque']]


#humidity_ALB.head(10)
pressure_ALB.head(10)


pressure_ALB.tail(10)


print(humidity_ALB.shape)
print(temp_ALB.shape)
print(pressure_ALB.shape)

(45253, 2)
(45253, 2)
(45253, 2)


print("How many NaN are there in the humidity dataset?",humidity_ALB.isna().sum()['Albuquerque'])
print("How many NaN are there in the temperature dataset?",temp_ALB.isna().sum()['Albuquerque'])
print("How many NaN are there in the pressure dataset?",pressure_ALB.isna().sum()['Albuquerque'])

How many NaN are there in the humidity dataset? 710
How many NaN are there in the temperature dataset? 1
How many NaN are there in the pressure dataset? 456


Tp = 7000


def plot_train_points(quantity='humidity',Tp=7000):
    plt.figure(figsize=(15,4))
    if quantity=='humidity':
        plt.title("Humidity of first {} data points".format(Tp),fontsize=16)
        plt.plot(humidity_ALB['Albuquerque'][:Tp],c='k',lw=1)
    if quantity=='temperature':
        plt.title("Temperature of first {} data points".format(Tp),fontsize=16)
        plt.plot(temp_ALB['Albuquerque'][:Tp],c='k',lw=1)
    if quantity=='pressure':
        plt.title("Pressure of selected {} data points".format(Tp),fontsize=16)
        plt.plot(pressure_ALB['Albuquerque'][16300:23300],c='k',lw=1)
    plt.grid(True)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.show()


plot_train_points('humidity')


plot_train_points('temperature')


plot_train_points('pressure')


humidity_ALB.interpolate(inplace=True)
humidity_ALB.dropna(inplace=True)

temp_ALB.interpolate(inplace=True)
temp_ALB.dropna(inplace=True)

pressure_ALB.interpolate(inplace=True)
pressure_ALB.dropna(inplace=True)


print(humidity_ALB.shape)
print(temp_ALB.shape)
print(pressure_ALB.shape)

(45252, 2)
(45252, 2)
(45252, 2)


train = np.array(humidity_ALB['Albuquerque'][:Tp])
test = np.array(humidity_ALB['Albuquerque'][Tp:])

C:\Users\kompiuter\AppData\Local\Temp\ipykernel_2340\3039798577.py:1: FutureWarning: The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.
  train = np.array(humidity_ALB['Albuquerque'][:Tp])
C:\Users\kompiuter\AppData\Local\Temp\ipykernel_2340\3039798577.py:2: FutureWarning: The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.
  test = np.array(humidity_ALB['Albuquerque'][Tp:])


print("Train data length:", train.shape)
print("Test data length:", test.shape)

Train data length: (7000,)
Test data length: (38252,)


train=train.reshape(-1,1)
test=test.reshape(-1,1)


plt.figure(figsize=(15,4))
plt.title("Train and test data plotted together",fontsize=16)
plt.plot(np.arange(Tp),train,c='blue')
plt.plot(np.arange(Tp,45252),test,c='orange',alpha=0.7)
plt.legend(['Train','Test'])
plt.grid(True)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()


step = 8


# add step elements into train and test
test = np.append(test,np.repeat(test[-1,],step))
train = np.append(train,np.repeat(train[-1,],step))


print("Train data length:", train.shape)
print("Test data length:", test.shape)

Train data length: (7008,)
Test data length: (38260,)


def convertToMatrix(data, step):
    X, Y =[], []
    for i in range(len(data)-step):
        d=i+step  
        X.append(data[i:d,])
        Y.append(data[d,])
    return np.array(X), np.array(Y)


trainX,trainY =convertToMatrix(train,step)
testX,testY =convertToMatrix(test,step)


trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))


print("Training data shape:", trainX.shape,', ',trainY.shape)
print("Test data shape:", testX.shape,', ',testY.shape)

Training data shape: (7000, 1, 8) ,  (7000,)
Test data shape: (38252, 1, 8) ,  (38252,)


def build_simple_rnn(num_units=128, embedding=4,num_dense=32,lr=0.001):
    """
    Builds and compiles a simple RNN model
    Arguments:
              num_units: Number of units of a the simple RNN layer
              embedding: Embedding length
              num_dense: Number of neurons in the dense layer folloId by the RNN layer
              lr: Learning rate (uses RMSprop optimizer)
    Returns:
              A compiled Keras model.
    """
    model = Sequential()
    model.add(SimpleRNN(units=num_units, input_shape=(1,embedding), activation="relu"))
    model.add(Dense(num_dense, activation="relu"))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer=RMSprop(lr=lr),metrics=['mse'])
    
    return model


model_humidity = build_simple_rnn(num_units=128,num_dense=32,embedding=8,lr=0.0005)

c:\Users\kompiuter\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\optimizers\optimizer_v2\rmsprop.py:143: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
  super().__init__(name, **kwargs)


model_humidity.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 simple_rnn_3 (SimpleRNN)    (None, 128)               17536     
                                                                 
 dense_6 (Dense)             (None, 32)                4128      
                                                                 
 dense_7 (Dense)             (None, 1)                 33        
                                                                 
=================================================================
Total params: 21,697
Trainable params: 21,697
Non-trainable params: 0
_________________________________________________________________


class MyCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        if (epoch+1) % 50 == 0 and epoch>0:
            print("Epoch number {} done".format(epoch+1))


batch_size=8
num_epochs = 1000


model_humidity.fit(trainX,trainY, 
          epochs=num_epochs, 
          batch_size=batch_size, 
          callbacks=[MyCallback()],verbose=0)

Epoch number 50 done
Epoch number 100 done
Epoch number 150 done
Epoch number 200 done
Epoch number 250 done
Epoch number 300 done
Epoch number 350 done
Epoch number 400 done
Epoch number 450 done
Epoch number 500 done
Epoch number 550 done
Epoch number 600 done
Epoch number 650 done
Epoch number 700 done
Epoch number 750 done
Epoch number 800 done
Epoch number 850 done
Epoch number 900 done
Epoch number 950 done
Epoch number 1000 done

<keras.callbacks.History at 0x230b59bba30>


plt.figure(figsize=(7,5))
plt.title("RMSE loss over epochs",fontsize=16)
plt.plot(np.sqrt(model_humidity.history.history['loss']),c='k',lw=2)
plt.grid(True)
plt.xlabel("Epochs",fontsize=14)
plt.ylabel("Root-mean-squared error",fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()


plt.figure(figsize=(15,4))
plt.title("This is what the model saw",fontsize=18)
plt.plot(trainX[:,0][:,0],c='blue')
plt.grid(True)
plt.show()


trainPredict = model_humidity.predict(trainX)
testPredict= model_humidity.predict(testX)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

219/219 [==============================] - 0s 696us/step
1196/1196 [==============================] - 1s 670us/step


plt.figure(figsize=(10,4))
plt.title("This is what the model predicted",fontsize=18)
plt.plot(testPredict,c='orange')
plt.grid(True)
plt.show()


index = humidity_ALB.index.values

plt.figure(figsize=(15,5))
plt.title("Humidity: Ground truth and prediction together",fontsize=18)
plt.plot(index,humidity_ALB['Albuquerque'],c='blue')
plt.plot(index,predicted,c='orange',alpha=0.75)
plt.legend(['True data','Predicted'],fontsize=15)
plt.axvline(x=Tp, c="r")
plt.grid(True)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.ylim(-20,120)
plt.show()


train = np.array(temp_ALB['Albuquerque'][:Tp])
test = np.array(temp_ALB['Albuquerque'][Tp:])

train=train.reshape(-1,1)
test=test.reshape(-1,1)

step = 8

# add step elements into train and test
test = np.append(test,np.repeat(test[-1,],step))
train = np.append(train,np.repeat(train[-1,],step))

trainX,trainY =convertToMatrix(train,step)
testX,testY =convertToMatrix(test,step)

trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

C:\Users\kompiuter\AppData\Local\Temp\ipykernel_2340\2750091313.py:1: FutureWarning: The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.
  train = np.array(temp_ALB['Albuquerque'][:Tp])
C:\Users\kompiuter\AppData\Local\Temp\ipykernel_2340\2750091313.py:2: FutureWarning: The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.
  test = np.array(temp_ALB['Albuquerque'][Tp:])


model_temp = build_simple_rnn(num_units=128,num_dense=32,embedding=8,lr=0.0005)

batch_size=8
num_epochs = 2000

model_temp.fit(trainX,trainY, 
          epochs=num_epochs, 
          batch_size=batch_size, 
          callbacks=[MyCallback()],verbose=0)

Epoch number 50 done
Epoch number 100 done
Epoch number 150 done
Epoch number 200 done
Epoch number 250 done
Epoch number 300 done
Epoch number 350 done
Epoch number 400 done
Epoch number 450 done
Epoch number 500 done
Epoch number 550 done
Epoch number 600 done
Epoch number 650 done
Epoch number 700 done
Epoch number 750 done
Epoch number 800 done
Epoch number 850 done
Epoch number 900 done
Epoch number 950 done
Epoch number 1000 done
Epoch number 1050 done
Epoch number 1100 done
Epoch number 1150 done
Epoch number 1200 done
Epoch number 1250 done
Epoch number 1300 done
Epoch number 1350 done
Epoch number 1400 done
Epoch number 1450 done
Epoch number 1500 done
Epoch number 1550 done
Epoch number 1600 done
Epoch number 1650 done
Epoch number 1700 done
Epoch number 1750 done
Epoch number 1800 done
Epoch number 1850 done
Epoch number 1900 done
Epoch number 1950 done
Epoch number 2000 done

<keras.callbacks.History at 0x230b5cc2e00>


plt.figure(figsize=(7,5))
plt.title("RMSE loss over epochs",fontsize=16)
plt.plot(np.sqrt(model_temp.history.history['loss']),c='k',lw=2)
plt.grid(True)
plt.xlabel("Epochs",fontsize=14)
plt.ylabel("Root-mean-squared error",fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()


trainPredict = model_temp.predict(trainX)
testPredict= model_temp.predict(testX)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

219/219 [==============================] - 0s 693us/step
1196/1196 [==============================] - 1s 698us/step


index = temp_ALB.index.values

plt.figure(figsize=(15,5))
plt.title("Temperature: Ground truth and prediction together",fontsize=18)
plt.plot(index,temp_ALB['Albuquerque'],c='blue')
plt.plot(index,predicted,c='orange',alpha=0.75)
plt.legend(['True data','Predicted'],fontsize=15)
plt.axvline(x=Tp, c="r")
plt.grid(True)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()


train = np.array(pressure_ALB['Albuquerque'][:Tp])
test = np.array(pressure_ALB['Albuquerque'][Tp:])

train=train.reshape(-1,1)
test=test.reshape(-1,1)

step = 8

# add step elements into train and test
test = np.append(test,np.repeat(test[-1,],step))
train = np.append(train,np.repeat(train[-1,],step))

trainX,trainY =convertToMatrix(train,step)
testX,testY =convertToMatrix(test,step)

trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

C:\Users\kompiuter\AppData\Local\Temp\ipykernel_2340\2873042680.py:1: FutureWarning: The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.
  train = np.array(pressure_ALB['Albuquerque'][:Tp])
C:\Users\kompiuter\AppData\Local\Temp\ipykernel_2340\2873042680.py:2: FutureWarning: The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.
  test = np.array(pressure_ALB['Albuquerque'][Tp:])


model_pressure= build_simple_rnn(num_units=128,num_dense=32,embedding=8,lr=0.0005)

batch_size=8
num_epochs = 500

model_pressure.fit(trainX,trainY, 
          epochs=num_epochs, 
          batch_size=batch_size, 
          callbacks=[MyCallback()],verbose=0)

Epoch number 50 done
Epoch number 100 done
Epoch number 150 done
Epoch number 200 done
Epoch number 250 done
Epoch number 300 done
Epoch number 350 done
Epoch number 400 done
Epoch number 450 done
Epoch number 500 done

<keras.callbacks.History at 0x230bc8b7940>


plt.figure(figsize=(7,5))
plt.title("RMSE loss over epochs",fontsize=16)
plt.plot(np.sqrt(model_pressure.history.history['loss']),c='k',lw=2)
plt.grid(True)
plt.xlabel("Epochs",fontsize=14)
plt.ylabel("Root-mean-squared error",fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()


trainPredict = model_pressure.predict(trainX)
testPredict= model_pressure.predict(testX)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

219/219 [==============================] - 0s 651us/step
1196/1196 [==============================] - 1s 665us/step


index = pressure_ALB.index.values

plt.figure(figsize=(15,5))
plt.title("Pressure: Ground truth and prediction together",fontsize=18)
plt.plot(index,pressure_ALB['Albuquerque'],c='blue')
plt.plot(index,predicted,c='orange',alpha=0.75)
plt.legend(['True data','Predicted'],fontsize=15)
plt.axvline(x=Tp, c="r")
plt.grid(True)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()

	datetime	Albuquerque
0	2012-10-01 12:00:00	NaN
1	2012-10-01 13:00:00	1024.0
2	2012-10-01 14:00:00	1024.0
3	2012-10-01 15:00:00	1024.0
4	2012-10-01 16:00:00	1024.0
5	2012-10-01 17:00:00	1024.0
6	2012-10-01 18:00:00	1024.0
7	2012-10-01 19:00:00	1024.0
8	2012-10-01 20:00:00	1024.0
9	2012-10-01 21:00:00	1024.0

	datetime	Albuquerque
45243	2017-11-29 15:00:00	1028.0
45244	2017-11-29 16:00:00	1028.0
45245	2017-11-29 17:00:00	1028.0
45246	2017-11-29 18:00:00	1028.0
45247	2017-11-29 19:00:00	1026.0
45248	2017-11-29 20:00:00	1025.0
45249	2017-11-29 21:00:00	1024.0
45250	2017-11-29 22:00:00	1024.0
45251	2017-11-29 23:00:00	1024.0
45252	2017-11-30 00:00:00	1024.0

x	y
1,2,3	4
2,3,4	5
3,4,5	6
...	...
7,8,9	10

WEATHER FORECAST USING RECURRENT NEURAL NETWORK (RNN) BASED ON THE DATA OF AN ALBUQUERQUE TOWN IN THE TIME PERIOD 2012-2017.¶

Data loading and pre-processing¶

There are many `NaN` values (Not a Numbers) in the dataset¶

Choosing a point in the time-series for training data¶

Interpolate data points to fill up `NaN` values¶

Train and test splits on the `Tp=7000`¶

Choose the embedding or step size¶

Converting to a multi-dimensional array¶

Modeling¶

Keras model with `SimpleRNN` layer¶

A simple Keras `Callback` class to print progress of the training at regular epoch interval¶

Batch size and number of epochs¶

Training the model¶

Plot RMSE loss over epochs¶

Result and analysis¶

What did the model see while training?¶

Now predict the future points¶

Plotting the ground truth and model predictions together¶

Modeling the temperature data¶

Modeling the wind speed data¶

WEATHER FORECAST USING RECURRENT NEURAL NETWORK (RNN) BASED ON THE DATA OF AN ALBUQUERQUE TOWN IN THE TIME PERIOD 2012-2017.¶

Data loading and pre-processing¶

There are many NaN values (Not a Numbers) in the dataset¶

Choosing a point in the time-series for training data¶

Interpolate data points to fill up NaN values¶

Train and test splits on the Tp=7000¶

Choose the embedding or step size¶

Converting to a multi-dimensional array¶

Modeling¶

Keras model with SimpleRNN layer¶

A simple Keras Callback class to print progress of the training at regular epoch interval¶

Batch size and number of epochs¶

Training the model¶

Plot RMSE loss over epochs¶

Result and analysis¶

What did the model see while training?¶

Now predict the future points¶

Plotting the ground truth and model predictions together¶

Modeling the temperature data¶

Modeling the wind speed data¶

There are many `NaN` values (Not a Numbers) in the dataset¶

Interpolate data points to fill up `NaN` values¶

Train and test splits on the `Tp=7000`¶

Keras model with `SimpleRNN` layer¶

A simple Keras `Callback` class to print progress of the training at regular epoch interval¶