#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# 查看当前挂载的数据集目录, 该目录下的变更重启环境后会自动还原
# View dataset directory.
# This directory will be recovered automatically after resetting environment.
get_ipython().system('ls /home/aistudio/data')
# In[ ]:
# 查看工作区文件, 该目录下的变更将会持久保存. 请及时清理不必要的文件, 避免加载过慢.
# View personal work directory.
# All changes under this directory will be kept even after reset.
# Please clean unnecessary files in time to speed up environment loading.
get_ipython().system('ls /home/aistudio/work')
# In[ ]:
# 如果需要进行持久化安装, 需要使用持久化路径, 如下方代码示例:
# If a persistence installation is required,
# you need to use the persistence path as the following:
get_ipython().system('mkdir /home/aistudio/external-libraries')
get_ipython().system('pip install beautifulsoup4 -t /home/aistudio/external-libraries')
# In[1]:
# 同时添加如下代码, 这样每次环境(kernel)启动的时候只要运行下方代码即可:
# Also add the following code,
# so that every time the environment (kernel) starts,
# just run the following code:
import sys
sys.path.append('/home/aistudio/external-libraries')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import paddle
import paddle.nn as nn
from sklearn.preprocessing import MinMaxScaler
# 标签就是车流量traffic_volume 特征是is_holiday is_weekend temp clouds_all
df = pd.read_csv('./dataset/series1.csv', usecols=['traffic_volume','temp', 'holiday', 'is_weekend'])
series_test = pd.read_csv('./dataset/series_test.csv', usecols=['temp', 'holiday', 'is_weekend']).values
all_data = df.values
split_boundary = int(all_data.shape[0] * 0.98)
train_data = all_data[: split_boundary, :]
test_data = all_data[split_boundary:, :]
plt.figure(figsize=(15, 8))
# 数据可视化
# plt.plot(np.arange(train_data.shape[0]), train_data[:, 0], label='train data')
# plt.plot(np.arange(train_data.shape[0], train_data.shape[0] + test_data.shape[0]), test_data[:, 0], label='test data')
# plt.legend()
# 窗口划分
def split_windows(x1,y1, size):
X = []
Y = []
# X作为数据,Y作为标签
# 滑动窗口,步长为1
for i in range(len(x1) - size):
X.append(x1[i:i + size, :])
Y.append(y1[i + size, 0])
return np.array(X), np.array(Y)
def split_test(data, size):
X = []
# X作为数据
# 滑动窗口,步长为1
for i in range(len(data) - size):
X.append(data[i:i + size, :])
return np.array(X)
def fit_size(x, y):
from sklearn import preprocessing
x_MinMax = preprocessing.MinMaxScaler()
y_MinMax = preprocessing.MinMaxScaler()
x = x_MinMax.fit_transform(x)
y = y_MinMax.fit_transform(y)
return x, y, y_MinMax
# normalizatioin processing
train_x = train_data[:,1:]
train_y = train_data[:,0].reshape(-1, 1)
test_x = test_data[:,1:]
test_y = test_data[:,0].reshape(-1, 1)
train_x, train_y, y_MinMax = fit_size(train_x, train_y)
test_x, test_y, y_MinMax1 = fit_size(test_x, test_y)
# scaled_train_data = scaler.fit_transform(train_data)
# 使用训练集的最值对测试集归一化,保证训练集和测试集的分布一致性
# scaled_test_data = scaler.transform(test_data)
scaler = MinMaxScaler()
series_test = scaler.fit_transform(series_test)
# 训练集测试集划分
window_size = 30
train_X, train_Y = split_windows(train_x,train_y, size=window_size)
test_X, test_Y = split_windows(test_x,test_y, size=window_size)
series_test = split_test(series_test, size=window_size)
print('train shape', train_X.shape, train_Y.shape)
print('test shape', test_X.shape, test_Y.shape)
print('series_test shape', series_test.shape)
# 预测未来一周每小时的数据
def process(data, bs):
l = len(data)
tmp = []
for i in range(0, l, bs):
if i + bs > l:
tmp.append(data[i:].tolist())
else:
tmp.append(data[i:i + bs].tolist())
tmp = np.array(tmp)
return tmp
batch_size = 1024
train_X = process(train_X, batch_size)
train_Y = process(train_Y, batch_size)
print(train_X.shape, train_Y.shape)
fea_num = 4
out_fea = 30
class CNN_LSTM(nn.Layer):
def __init__(self, window_size, fea_num):
super().__init__()
self.window_size = window_size
self.fea_num = fea_num
self.proj = nn.Linear(in_features=fea_num, out_features=out_fea)
self.conv1 = nn.Conv2D(in_channels=1, out_channels=64, stride=1, kernel_size=3, padding='same')
self.relu1 = nn.ReLU()
self.pool = nn.MaxPool2D(kernel_size=2, stride=1, padding='same')
self.dropout = nn.Dropout2D(0.3)
self.lstm1 = nn.LSTM(input_size=64 * out_fea, hidden_size=256, num_layers=1, time_major=False)
self.lstm2 = nn.LSTM(input_size=256, hidden_size=128, num_layers=1, time_major=False)
self.lstm3 = nn.LSTM(input_size=128, hidden_size=64, num_layers=1, time_major=False)
self.fc = nn.Linear(in_features=64, out_features=32)
self.relu2 = nn.ReLU()
self.head = nn.Linear(in_features=32, out_features=1)
def forward(self, x):
x = x.reshape([x.shape[0], 1, self.window_size, self.fea_num])
x = self.proj(x)
x = self.conv1(x)
x = self.relu1(x)
x = self.pool(x)
x = self.dropout(x)
x = x.reshape([x.shape[0], self.window_size, -1])
x, (h, c) = self.lstm1(x)
x, (h, c) = self.lstm2(x)
x, (h, c) = self.lstm3(x)
x = x[:, -1, :]
x = self.fc(x)
x = self.relu2(x)
x = self.head(x)
return x
model = CNN_LSTM(window_size, fea_num)
# paddle.summary(model, (99, 30, 4))
# 定义超参数
base_lr = 0.001
EPOCH = 100
lr_schedual = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=base_lr, T_max=EPOCH, verbose=True)
loss_fn = nn.MSELoss()
mae_loss = nn.L1Loss()
metric = paddle.metric.Accuracy()
# opt = paddle.optimizer.SGD(parameters=model.parameters(),learning_rate=lr_schedual)
opt = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=lr_schedual, beta1=0.9, beta2=0.999)
mses_train= []
# mses_eval= []
maes_train= []
# maes_eval= []
# 模型训练
for epoch in range(EPOCH):
model.train()
loss_train = 0
for batch_id, data in enumerate(train_X):
label = train_Y[batch_id]
data = paddle.to_tensor(data, dtype='float32')
label = paddle.to_tensor(label, dtype='float32')
label = label.reshape([label.shape[0], 1])
y = model(data)
loss = loss_fn(y, label)
opt.clear_grad()
loss.backward()
opt.step()
loss_train += loss.item()
mae = mae_loss(y,label)
mses_train.append(loss.item())
maes_train.append(mae.item())
print("[TRAIN] ========epoch : {}, loss: {:.4f}==========".format(epoch + 1, loss_train))
lr_schedual.step()
# loss_eval = 0
# # model.eval()
# for batch_id, data in enumerate(test_X):
# label = test_Y[batch_id]
# data = paddle.to_tensor(data, dtype='float32')
# label = paddle.to_tensor(label, dtype='float32')
# label = label.reshape([label.shape[0],1])
# y = model(data)
# mae = mae_loss(y,label)
# loss = loss_fn(y, label)
# loss_eval += loss.item()
# mses_eval.append(loss.item())
# maes_eval.append(mae.item())
# print("[EVAL] ========epoch : {}, loss: {:.4f}==========\n".format(epoch+1, loss_eval))
# 保存模型参数
paddle.save(model.state_dict(), 'wind/cnn_lstm1_ep{}_lr{}.params'.format(EPOCH,base_lr))
paddle.save(lr_schedual.state_dict(), 'wind/cnn_lstm1_ep{}_lr{}.pdopts'.format(EPOCH,base_lr))
# 加载模型
model = CNN_LSTM(window_size, fea_num)
model_dict = paddle.load('wind/cnn_lstm1_ep{}_lr{}.params'.format(EPOCH,base_lr))
model.load_dict(model_dict)
test_X = paddle.to_tensor(test_X, dtype='float32')
series_test =
评论0