LSTM预测天气温度

原创已于 2024-08-29 10:07:48 修改 · 3.3k 阅读

68 ·

CC 4.0 BY-SA版权

文章标签：

#python #机器学习 #深度学习 #lstm

于 2019-04-24 10:00:19 首次发布

tensorflow 专栏收录该内容

6 篇文章

订阅专栏

文章最前：我是Octopus，这个名字来源于我的中文名--章鱼；我热爱编程、热爱算法、热爱开源。所有源码在我的个人github ；这博客是记录我学习的点点滴滴，如果您对 Python、Java、AI、算法有兴趣，可以关注我的动态，一起学习，共同进步。

文章目录：

1）导入算法依赖

利用LSTM对天气温度进行预测

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Bidirectional
from scipy.ndimage import gaussian_filter1d
from scipy.signal import medfilt
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from numpy import array
font = {'family' : 'Arial', 'weight' : 'normal','size'   : 10}
plt.rc('font', **font)

2）设置参数

n_timestamp = 10
train_days = 1500  
testing_days = 500 
n_epochs = 25
filter_on = 1


# 选择模型类型
# 1: Single cell
# 2: Stacked
# 3: Bidirectional
#
model_type = 1

3）读取数据

dataset = pd.read_csv('data/weather_temperature_yilan.csv')
if filter_on == 1:
    dataset['Temperature'] = medfilt(dataset['Temperature'], 3)
    dataset['Temperature'] = gaussian_filter1d(dataset['Temperature'], 1.2)

4）处理数据

train_set = dataset[0:train_days].reset_index(drop=True)
test_set = dataset[train_days: train_days+testing_days].reset_index(drop=True)
training_set = train_set.iloc[:, 1:2].values
testing_set = test_set.iloc[:, 1:2].values

sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)
testing_set_scaled = sc.fit_transform(testing_set)

def data_split(sequence, n_timestamp):
    X = []
    y = []
    for i in range(len(sequence)):
        end_ix = i + n_timestamp
        if end_ix > len(sequence)-1:
            break
        # i to end_ix as input
        # end_ix as target output
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)


X_train, y_train = data_split(training_set_scaled, n_timestamp)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test, y_test = data_split(testing_set_scaled, n_timestamp)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

if model_type == 1:
    # Single cell LSTM
    model = Sequential()
    model.add(LSTM(units = 50, activation='relu',input_shape = (X_train.shape[1], 1)))
    model.add(Dense(units = 1))
if model_type == 2:
    # Stacked LSTM
    model = Sequential()
    model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], 1)))
    model.add(LSTM(50, activation='relu'))
    model.add(Dense(1))
if model_type == 3:
    # Bidirectional LSTM
    model = Sequential()
    model.add(Bidirectional(LSTM(50, activation='relu'), input_shape=(X_train.shape[1], 1)))
    model.add(Dense(1))

5）训练模型

model.compile(optimizer = 'adam', loss = 'mean_squared_error')
history = model.fit(X_train, y_train, epochs = n_epochs, batch_size = 32)
loss = history.history['loss']
epochs = range(len(loss))

Epoch 1/25
47/47 [==============================] - 4s 18ms/step - loss: 0.1637
Epoch 2/25
47/47 [==============================] - 1s 17ms/step - loss: 0.0119
Epoch 3/25
47/47 [==============================] - 1s 17ms/step - loss: 0.0104
Epoch 4/25
47/47 [==============================] - 1s 17ms/step - loss: 0.0099
Epoch 5/25
47/47 [==============================] - 1s 19ms/step - loss: 0.0098
Epoch 6/25
47/47 [==============================] - 1s 20ms/step - loss: 0.0094
Epoch 7/25
47/47 [==============================] - 1s 16ms/step - loss: 0.0089
Epoch 8/25
47/47 [==============================] - 1s 18ms/step - loss: 0.0084
Epoch 9/25
47/47 [==============================] - 1s 16ms/step - loss: 0.0079
Epoch 10/25
47/47 [==============================] - 1s 17ms/step - loss: 0.0073
Epoch 11/25
47/47 [==============================] - 1s 16ms/step - loss: 0.0067
Epoch 12/25
47/47 [==============================] - 1s 16ms/step - loss: 0.0065
Epoch 13/25
47/47 [==============================] - 1s 16ms/step - loss: 0.0058

6）预测数据

y_predicted = model.predict(X_test)

#
# 'De-normalize' the data
#
y_predicted_descaled = sc.inverse_transform(y_predicted)
y_train_descaled = sc.inverse_transform(y_train)
y_test_descaled = sc.inverse_transform(y_test)
y_pred = y_predicted.ravel()
y_pred = [round(yx, 2) for yx in y_pred]
y_tested = y_test.ravel()

7）结果展示

plt.figure(figsize=(8,7))

plt.subplot(3, 1, 1)
plt.plot(dataset['Temperature'], color = 'black', linewidth=1, label = 'True value')
plt.ylabel("Temperature")
plt.xlabel("Day")
plt.title("All data")


plt.subplot(3, 2, 3)
plt.plot(y_test_descaled, color = 'black', linewidth=1, label = 'True value')
plt.plot(y_predicted_descaled, color = 'red',  linewidth=1, label = 'Predicted')
plt.legend(frameon=False)
plt.ylabel("Temperature")
plt.xlabel("Day")
plt.title("Predicted data (n days)")

plt.subplot(3, 2, 4)
plt.plot(y_test_descaled[0:75], color = 'black', linewidth=1, label = 'True value')
plt.plot(y_predicted_descaled[0:75], color = 'red', label = 'Predicted')
plt.legend(frameon=False)
plt.ylabel("Temperature")
plt.xlabel("Day")
plt.title("Predicted data (first 75 days)")

plt.subplot(3, 3, 7)
plt.plot(epochs, loss, color='black')
plt.ylabel("Loss (MSE)")
plt.xlabel("Epoch")
plt.title("Training curve")

plt.subplot(3, 3, 8)
plt.plot(y_test_descaled-y_predicted_descaled, color='black')
plt.ylabel("Residual")
plt.xlabel("Day")
plt.title("Residual plot")

plt.subplot(3, 3, 9)
plt.scatter(y_predicted_descaled, y_test_descaled, s=2, color='black')
plt.ylabel("Y true")
plt.xlabel("Y predicted")
plt.title("Scatter plot")

plt.subplots_adjust(hspace = 0.5, wspace=0.3)
plt.show()



mse = mean_squared_error(y_test_descaled, y_predicted_descaled)
r2 = r2_score(y_test_descaled, y_predicted_descaled)
print("mse=" + str(round(mse,2)))
print("r2=" + str(round(r2,2)))