Traffic prediction using lstm
I am using LSTM model to predict the data traffic in every second of a base station. The dataset is as follows:
The test and train prediction looks as follows:
And the RMSE values for train score and test score are 32.54 and 30.03 respectively. To reduce the RMSE values I have changed the lookback value to 15,20 and 30 but it's not reducing. Can somebody tell me the reason behind this huge prediction error and some advice on how to correct it? I would love to hear from it. Thank you.
My code for the LSTM model looks as follows:
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
# reshape into X=t and Y=t+1
look_back = 10
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
print(trainX.shape)
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[3], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[3], 1))
from keras.layers import Dropout
from keras.layers import Bidirectional
model=Sequential()
model.add(LSTM(50,activation='relu',return_sequences=True,input_shape=(look_back,1)))
model.add(LSTM(50, activation='relu', return_sequences=True))
model.add(LSTM(50, activation='sigmoid', return_sequences=False))
model.add(Dense(50))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam',loss='mean_squared_error')
model.summary()
Xdata_train=[]
Ydata_train=[]
Xdata_train, Ydata_train = create_dataset(train, look_back)
Xdata_train = np.reshape(Xdata_train, (Xdata_train.shape[0], Xdata_train.shape[3], 1))
history = model.fit(Xdata_train,Ydata_train,batch_size=1,epochs=20,shuffle=False)
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.figure(figsize=(16,8))
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.xlabel('Time in Seconds')
plt.ylabel('Data Traffic in MB')
plt.legend(['Train','Train Predict','Test Predict'],loc='best')
plt.show()```
Topic lstm python machine-learning
Category Data Science