
前言: Hello大家好,我是瑟林洞仙人!这里是【财富密码】系列第1期:《LSTM大战上证指数-PyTorch版》。在这里,我将用我的“意识流”代码,手把手教会大家如何在资本市场里被割韭菜。何为“意识流”代码,简单来说,就是忘掉内存管理、忘掉代码规范、忘掉性能、忘掉测试,一路奔向需求的最终目标,逢山开路,遇水架桥。别问为啥,问就是:快!在线等!挺急的!

需求内容
一句话: 使用LSTM(长短期记忆网络)模型对单变量时间序列进行多期预测。

使用方法
将数据文件(2020.csv)和代码(lstm_series.py)放入同一位置下,在配置好的环境下使用IDE或CMD运行皆可。

链接:https://pan.baidu.com/s/1cjwRLoj6eJ0b0w0weTvGLQ 提取码:l903
通过观察源码的参数调节区域:
torch.manual_seed(10086) # 为CPU设置随机种子
model = LSTM(input_size=1, num_units=16, output_size=1, num_layers=1)
max_epochs = 10000
train_window = 10
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
future_steps = 30 # 向后预测时间长度
测试用例中为完全复现神经网络的训练结果,已设定了一个随机种子(该样例及种子需要训练1327次达到误差目标)。在该LSTM样例模型中,输入输出特征维度数均为1。隐藏层数为1和单层节点数为16,除输入输出特征维度数外,其余参数均可调节。

至于训练的“滑动窗口”大小、学习率等参数,乃至目标误差等等我都是随便选的,千万别真拿去割韭菜,就这个模型,调参调得再好也不行,以后我会专门讲原因。


环境配置
- Python3.6
Windows64位参考:https://www.python.org/ftp/python/3.6.8/python-3.6.8-amd64.exe
- numpy 1.19.0
这个随便,pip install numpy
即可
- pandas 0.25.3
pip install pandas==0.25.3
- torch 1.4.0+cpu & torchvision 0.5.0
玩具代码CPU跑跑算了(主要是穷)。pip
下载可能很慢,建议找镜像。
win64位备份:https://pan.baidu.com/s/1yrGIhXHopkftBdR5qsTkAg 提取码:d3sa
- matplotlib 3.2.2
画图,pip install matplotlib
即可
- scikit-learn 0.23.1
用于数据归一化处理,pip install scikit-learn
即可
- openpyxl 3.0.3
用于将预测结果写入Excel,pip install openyxl
即可
- 其他
其他依赖环境scipy
、Pillow
等等,已在上述pip
过程包含,无需重复安装。

完整代码
import os
import sys
import numpy as np
import pandas as pd
import torch
from torch import nn
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
path = os.chdir(sys.path[0])
sys.path.append(path)
# 定义LSTM神经网络模型
class LSTM(nn.Module):
"""
Parameters:
- input_size: 输入特征数
- num_units: 单个隐藏层的节点数
- output_size: 输出特征数
- num_layers: 隐藏层数
"""
def __init__(self, input_size=1, num_units=1, output_size=1, num_layers=1):
super().__init__()
self.input_size = input_size
self.num_units = num_units
self.num_layers = num_layers
self.output_size = output_size
self.lstm = nn.LSTM(input_size, num_units, num_layers)
self.forwardCalculation = nn.Linear(num_units, output_size)
self.hidden_cell = (torch.zeros(self.num_layers,self.output_size,self.num_units),
torch.zeros(self.num_layers,self.output_size,self.num_units))
def forward(self, input_seq):
L = len(input_seq)
lstm_out, self.hidden_cell = self.lstm(input_seq.view(L, 1, -1),self.hidden_cell)
predictions = self.forwardCalculation(lstm_out.view(len(input_seq), -1))
return predictions[-1]
# 用于从表格文件导入数据
def excel_import(input_file, col):
"""
Parameters:
- input_file: 导入文件名(例如'data.csv')
- col: 导入列名
"""
if '.xls' in input_file:
in_df = pd.read_excel(input_file, sheet_name=0, header=0, encoding='gbk')
elif '.csv' in input_file:
in_df = pd.read_csv(input_file, sep=',', header=0, encoding='gbk')
else:
print('请确认文件格式或是否存在!')
exit()
data_import = in_df.as_matrix(columns=col)
return data_import
# 构造训练数据集
def create_inout_sequences(input_data, tw):
"""
Parameters:
- input_data: 训练数据源
- tw: 训练窗口大小
"""
inout_seq = []
L = len(input_data)
for i in range(L-tw):
train_seq = input_data[i:i+tw]
train_label = input_data[i+tw:i+tw+1]
inout_seq.append((train_seq ,train_label))
return inout_seq
# 程序运行入口
if __name__ == '__main__':
# --------------------------- 数据导入 ----------------------------- #
input_file = "2020.csv" # 表文件名
col = ['price'] # 列字段名
dataset = excel_import(input_file, col).astype('float16')
# ----------------------------------------------------------------- #
data_len = len(dataset)
t = range(data_len)
scaler = MinMaxScaler(feature_range=(-1, 1))
dataset_lst = scaler.fit_transform(dataset .reshape(-1, 1))
# 序列观察
"""
plt.figure()
plt.plot(t, dataset_lst, label="y = "+col[-1])
plt.legend(loc='upper right')
plt.show()
"""
# ---------------------------- 调参区 ------------------------------ #
torch.manual_seed(10086) # 为CPU设置随机种子
model = LSTM(input_size=1, num_units=16, output_size=1, num_layers=1)
max_epochs = 10000
train_window = 10
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
future_steps = 30 # 向后预测时间长度
print('LSTM model:', model)
# ----------------------------------------------------------------- #
dataset_n = torch.FloatTensor(dataset_lst).view(-1) # To Tensor
train_inout_seq = create_inout_sequences(dataset_n, train_window)
loss_function = nn.MSELoss()
t_test = range(train_window, data_len)
ts, ys = [], []
for epoch in range(max_epochs):
yhat = []
for seq, labels in train_inout_seq:
optimizer.zero_grad()
model.hidden_cell=(torch.zeros(model.num_layers,model.output_size,model.num_units),
torch.zeros(model.num_layers,model.output_size,model.num_units))
output = model(seq)
loss = loss_function(output, labels)
loss.backward()
optimizer.step()
yhat.extend(output.view(-1, 1).data.numpy())
if loss.item() < 1e-6:
print('Epoch [{}/{}], Loss: {:.6f}'.format(epoch+1, max_epochs, loss.item()))
print("The loss value is reached")
break
else:
ts.append(epoch+1)
ys.append(loss.item())
print('Epoch [{}/{}], Loss: {:.6f}'.format(epoch+1, max_epochs, loss.item()))
"""
# GIF-LOSS
plt.ion()
plt.figure(1)
plt.clf()
plt.plot(ts, ys,'-r')
plt.draw()
plt.pause(0.01)
# GIF-MODEL
plt.figure(2)
plt.clf()
plt.plot(t, dataset_lst, 'b', label='y')
plt.plot(t_test, yhat, 'm--', label='yhat')
plt.legend(loc='upper left')
plt.pause(0.01)
"""
torch.save(model.state_dict(), 'model.pkl') # 模型保存
# model.load_state_dict(torch.load('model.pkl')) # 模型载入
# ---------------------------- 预测区 ------------------------------ #
predict_base = dataset_n[-train_window:].tolist()
model = model.eval()
for i in range(future_steps):
seq = torch.FloatTensor(predict_base[-train_window:])
with torch.no_grad():
predict_base.append(model(seq).item())
t_future = range(data_len, data_len+future_steps)
future = predict_base[-future_steps:]
actual_predictions = scaler.inverse_transform(np.array(future).reshape(-1, 1))
# ----------------------------------------------------------------- #
plt.figure(3)
plt.plot(t, dataset_lst, 'b', label='y')
plt.plot(t_future, future, 'm--', label='predictions')
plt.savefig('figure_results.png')
plt.figure(4)
plt.plot(ts, ys,'-r')
plt.savefig('figure_loss.png')
future_results = pd.DataFrame(actual_predictions)
excelwriter = pd.ExcelWriter('prediction.xlsx')
future_results.to_excel(excelwriter,'Sheet1',encoding='gbk')
excelwriter.save()