diff --git a/BTC_LSTM.html b/BTC_LSTM.html new file mode 100644 index 0000000..6da337b --- /dev/null +++ b/BTC_LSTM.html @@ -0,0 +1,15136 @@ + + +
+ +from __future__ import absolute_import, division, print_function, unicode_literals
+try:
+ # %tensorflow_version only exists in Colab.
+ %tensorflow_version 2.x
+except Exception:
+ pass
+import tensorflow as tf
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+
+mpl.rcParams['figure.figsize'] = (8, 6) #初始化设置,应该是展示图片大小
+
+mpl.rcParams['axes.grid'] = False
+import pandas as pd
+import numpy as np
+df= pd.read_excel("data_summary.xlsx", sheet_name=0)
+df=df[960:]
+df=np.array(df)
+df=pd.DataFrame(df)
+close_price=df[4]
+
+
+CLOSE=[0]
+
+frequency=1200 #1440min合成1条k线
+
+for i in range(len(close_price)):
+ if (i+1) %frequency==0:
+ CLOSE=np.vstack((CLOSE,close_price[i]))
+
+CLOSE=CLOSE[1:]
+
+
+#open_price=df['open']
+
+open_price=df[1]
+
+
+
+OPEN=[0]
+
+for i in range(len(close_price)):
+ if (i+1) %frequency==0:
+ OPEN=np.vstack((OPEN,open_price[i-(frequency-1)]))
+
+OPEN=OPEN[1:]
+
+
+#high_price=df['high']
+high_price=df[2]
+
+
+HIGH=[0]
+
+for i in range(len(high_price)):
+ if (i+1) %frequency==0:
+ HIGH=np.vstack((HIGH,max(high_price[i-(frequency-1):i+1])))
+
+HIGH=HIGH[1:]
+
+#low_price=df['low']
+low_price=df[3]
+
+LOW=[0]
+
+for i in range(len(low_price)):
+ if (i+1) %frequency==0:
+ LOW=np.vstack((LOW,min(low_price[i-(frequency-1):i+1])))
+
+LOW=LOW[1:]
+
+#time=df['time']
+time=df[0]
+
+TIME=[0]
+
+for i in range(len(time)):
+ if (i+1) %frequency==0:
+ TIME=np.vstack((TIME,time[i]))
+
+TIME=TIME[1:]
+data=np.hstack((TIME,CLOSE,HIGH,LOW,OPEN))
+data=pd.DataFrame(data)
+data.columns = ["TIME", "C","H","L","O"]
+df=data
+zero=np.array([0])
+rt=np.diff(np.array(df['C']))
+rt=np.hstack((zero,rt))
+df.insert(5, 'Diff_C', rt)
+
+moving_avg = df['Diff_C'].rolling(window=5).mean(center=True)
+MV=moving_avg[2:len(rt)-2]
+zeros=np.zeros((4))
+MV=np.hstack((zeros,MV))
+df.insert(6, 'MV_C', MV)
+df[0:10]
+features_considered=['MV_C']
+
+features = df[features_considered]
+features.index = df['TIME'] #在前面加一列时间
+
+dataset = features.values
+TRAIN_SPLIT = (len(df)*7)//10 #first 70% of the rows in dataset will be the training dataset
+tf.random.set_seed(13) # 保持random selection每次都一样
+EVALUATION_INTERVAL = 2400 #原来是200
+EPOCHS = 10 #1个epoch等于使用训练集中的全部样本训练一次,通俗的讲epoch的值就是整个数据集被轮几次。
+BATCH_SIZE = 256 #训练的小批次, iteration=TRAIN_SPLIT/256
+BUFFER_SIZE = 10000 #缓存容量
+def multivariate_data(dataset, target, start_index, end_index, history_size,
+ target_size, step, single_step=False):
+ data = []
+ labels = []
+
+ start_index = start_index + history_size
+ if end_index is None:
+ end_index = len(dataset) - target_size
+
+ for i in range(start_index, end_index):
+ indices = range(i-history_size, i, step) #就是在取过去天数的样本时,是间隔着几天取的,这样可以减少训练时间
+ data.append(dataset[indices])
+
+ if single_step:
+ labels.append(target[i+target_size]) #就是说,这里可以规定只预测后面某一天或者是后面好几天
+ else:
+ labels.append(target[i:i+target_size])
+
+ return np.array(data), np.array(labels) #output 出numpy格式的数据
+
+past_history = 35 # 用过去50天的数据,本来是720
+future_target = 0 #本来是72 预测12天后的数据,或者是下一天到12天后的数据 72/6=12
+STEP = 1 #本来是6,取过去天数的样本时,是间隔着6天取的,这样可以减少训练时间
+
+#future_target = 1 # 预测12天后的数据,或者是下一天到12天后的数据 72/6=12
+#STEP = 1 #取过去天数的样本时,是间隔着6天取的,这样可以减少训练时间
+
+
+x_train_single, y_train_single = multivariate_data(dataset, dataset[:, 0], 0,
+ TRAIN_SPLIT, past_history,
+ future_target, STEP,
+ single_step=True)
+x_val_single, y_val_single = multivariate_data(dataset, dataset[:, 0],
+ TRAIN_SPLIT, None, past_history,
+ future_target, STEP,
+ single_step=True)
+
+train_data_single = tf.data.Dataset.from_tensor_slices((x_train_single, y_train_single))
+train_data_single = train_data_single.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat() #traning data
+
+val_data_single = tf.data.Dataset.from_tensor_slices((x_val_single, y_val_single))
+val_data_single = val_data_single.batch(BATCH_SIZE).repeat() #validation data
+
+
+single_step_model = tf.keras.models.Sequential()
+single_step_model.add(tf.keras.layers.LSTM(6,
+ input_shape=x_train_single.shape[-2:])) #应该是这一层有32 个神经元
+single_step_model.add(tf.keras.layers.Dense(1)) #output layer, 因为预测未来1 期的data, 所以是1个神经元
+
+single_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae') #这里优化算法用RMSprop而不是Adam 或 Momentum
+
+single_step_history = single_step_model.fit(train_data_single, epochs=EPOCHS, #EPOCHS = 10 轮10次
+ steps_per_epoch=EVALUATION_INTERVAL, #每轮test 200次 data
+ validation_data=val_data_single,
+ validation_steps=50) #设置验证多少次数据后取平均值作为此epoch训练后的效果
+
+def plot_train_history(history, title): #把training lost 和validation loss 表示出来
+ loss = history.history['loss']
+ val_loss = history.history['val_loss']
+
+ epochs = range(len(loss))
+
+ plt.figure()
+
+ plt.plot(epochs, loss, 'b', label='Training loss')
+ plt.plot(epochs, val_loss, 'r', label='Validation loss')
+ plt.title(title)
+ plt.legend()
+
+ plt.show()
+
+plot_train_history(single_step_history,
+ 'Single Step Training and validation loss')
+
+single_step_model.save('BTC_1200.h5')
+model = tf.keras.models.load_model('BTC_1200.h5')
+df['TIME']#
+
+R1=[0]
+R2=[0]
+
+
+past_history = 35
+TC=100000 #总资金10万
+
+C=1 #每次交易1手
+
+
+leverage=1
+
+cost_rate=0.0005
+
+
+price_per_point=1 #1点300元
+
+last_day=len(df['TIME'])
+
+
+m=0
+n=0
+p=0
+r=0
+R=0
+s=0
+
+I=0
+B=0
+RR=[0]
+II=[0]
+BB=[0]
+tt=0
+t=0
+d=0
+d2=0
+
+rr=[0]
+maximum_markdown_final=[0]
+CR=1
+CRR=[0]
+TCC=[0]
+
+day=500
+k=day
+#for k in range(3000):
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+ #print(data2)
+
+ #j=[0,0,0,0,0]
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+ #trend=x[0][0]-MV[-1-k]-MV[-2-k] #预测正负
+
+ #trend_real=MV[-k]-MV[-1-k]-MV[-2-k] #实际正负
+
+ #P0=np.array(df['C'])[-7-k]
+ #P1=np.array(df['C'])[-6-k]
+ #P2=np.array(df['C'])[-5-k]
+ #P5=np.array(df['C'])[-2-k]
+ #P6=np.array(df['C'])[-1-k]
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ P0=np.array(df['C'])[-6-k]
+ P1=np.array(df['C'])[-5-k]
+ P5=np.array(df['C'])[-1-k]
+
+
+
+
+
+ #line=P6+P5+P2-P1-P0
+
+ line=P5+P1-P0
+
+ L=np.array(df['L'])[-k]
+ H=np.array(df['H'])[-k]
+
+ if trend<0:
+ if P5-d2>line and (np.array(df['O'])[-k])-d2>line:
+ r1=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+
+
+
+ else:
+ if L<line<H:
+ r1=((np.array(df['C'])[-k])-(line))/(line) #r1必须小于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+
+ else:
+ r1=0
+ maximum_markdown=0
+
+
+ if r1!=0:
+ m+=1
+ R1=np.vstack((R1,r1))
+
+
+ if r1<0:
+ n+=1
+ r=r+r1
+ R=R-r1
+
+ else:
+ r1=0
+ r=r
+ R=R
+ maximum_markdown=0
+
+
+ if trend>0:
+
+
+ if P5<line-d and (np.array(df['O'])[-k])<line-d:
+ r2=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+
+ else:
+ if L<line<H:
+ r2=((np.array(df['C'])[-k])-(line))/(line) #r2必须大于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ else:
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+
+ if r2!=0:
+ m+=1
+ s+=1
+
+ R2=np.vstack((R2,r2))
+
+ if r2>0:
+ p+=1
+ R=R+r2
+ else:
+ R=R
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+
+
+ i=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ I=I+i
+
+ b=((np.array(df['C'])[-k])-(np.array(df['C'])[-k-1]))/(np.array(df['C'])[-k-1])
+ B=B+b
+
+
+ BB=np.vstack((BB,B))
+ RR=np.vstack((RR,R))
+ II=np.vstack((II,I))
+
+ maximum_markdown_final=np.vstack((maximum_markdown_final,maximum_markdown))
+ rr=np.vstack((rr,r2))
+
+ if r1!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+ if r2!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+
+ CRR=np.vstack((CRR,(CR-1)*10))
+ k=k-1
+
+
+
+
+
+
+RR=RR[1:]
+II=II[1:]
+tt=tt[1:]
+
+R1=R1[1:]
+R2=R2[1:]
+
+
+plt.plot(tt,RR)
+#plt.plot(tt,II)
+BB=BB[1:]
+plt.plot(tt,BB)
+
+
+
+maximum_markdown_final=maximum_markdown_final[1:]
+#plt.plot(tt,maximum_markdown_final)
+final_maximum=min(maximum_markdown_final)
+rr=rr[1:]
+maximum_loss=min(rr)
+
+
+print("trading dates=",m)
+print("success to sell=",n)
+print("success to buy=",p)
+print('return from short=',-r)
+print('total return of long and short=',R)
+print('Time of long',s)
+print('maximum markdown=',final_maximum)
+print('maximum loss=',maximum_loss)
+
+print('compound interest=',CR-1)
+
+import numpy as np, scipy.stats as st
+CI_R1=st.t.interval(0.9, len(R1)-1, loc=np.mean(R1), scale=st.sem(R1)) #90% Confidence interval of short
+CI_R2=st.t.interval(0.9, len(R2)-1, loc=np.mean(R2), scale=st.sem(R2)) #90% Confidence interval of long
+
+print("confidence interval of short=",CI_R1)
+print("confidence interval of long=",CI_R2)
+#
+
+TC=100000 #总资金10万
+
+C=1 #每次交易1手
+
+
+leverage=1
+
+cost_rate=0.0005
+
+
+price_per_point=1 #1点300元
+
+last_day=len(df['TIME'])
+
+
+m=0
+n=0
+p=0
+r=0
+R=0
+s=0
+
+I=0
+B=100000
+RR=[0]
+II=[0]
+BB=[0]
+tt=0
+t=0
+d=0
+d2=0
+
+rr=[0]
+maximum_markdown_final=[0]
+CR=1
+CRR=[0]
+TCC=[0]
+
+day=500
+k=day
+#for k in range(3000):
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+ #print(data2)
+
+ #j=[0,0,0,0,0]
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+ #trend=x[0][0]-MV[-1-k]-MV[-2-k] #预测正负
+
+ #trend_real=MV[-k]-MV[-1-k]-MV[-2-k] #实际正负
+
+ #P0=np.array(df['C'])[-7-k]
+ #P1=np.array(df['C'])[-6-k]
+ #P2=np.array(df['C'])[-5-k]
+ #P5=np.array(df['C'])[-2-k]
+ #P6=np.array(df['C'])[-1-k]
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ P0=np.array(df['C'])[-6-k]
+ P1=np.array(df['C'])[-5-k]
+ P5=np.array(df['C'])[-1-k]
+
+
+
+
+
+ #line=P6+P5+P2-P1-P0
+
+ line=P5+P1-P0
+
+ L=np.array(df['L'])[-k]
+ H=np.array(df['H'])[-k]
+
+ if trend<0:
+ if P5-d2>line and (np.array(df['O'])[-k])-d2>line:
+ r1=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+ profit1=C*(((np.array(df['O'])[-k])*price_per_point-np.array(df['C'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(np.array(df['O'])[-k])*price_per_point*cost_rate)
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(np.array(df['O'])[-k]),"sell price=",(np.array(df['C'])[-k]),'short profit=',profit1)
+
+
+ else:
+ if L<line<H:
+ r1=((np.array(df['C'])[-k])-(line))/(line) #r1必须小于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ profit1=C*(((line)*price_per_point-np.array(df['C'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(line)*price_per_point*cost_rate)
+ print('date:',df['TIME'][last_day-k],'buy price=',(line),"sell price=",(np.array(df['C'])[-k]),'short profit=',profit1)
+
+ else:
+ r1=0
+ maximum_markdown=0
+ profit1=0
+
+ print('date:',df['TIME'][last_day-k],'No transaction','profit=',0)
+
+
+ if r1!=0:
+ m+=1
+
+
+ if r1<0:
+ n+=1
+ r=r+r1
+ R=R-r1
+
+ else:
+ r1=0
+ r=r
+ R=R
+ maximum_markdown=0
+ profit1=0
+
+
+ if trend>0:
+
+
+ if P5<line-d and (np.array(df['O'])[-k])<line-d:
+ r2=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+ profit2=C*((np.array(df['C'])[-k])*price_per_point-(np.array(df['O'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(np.array(df['O'])[-k])*price_per_point*cost_rate)
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(np.array(df['O'])[-k]),"sell price=",(np.array(df['C'])[-k]),'long profit=',profit2)
+
+
+ else:
+ if L<line<H:
+ r2=((np.array(df['C'])[-k])-(line))/(line) #r2必须大于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ profit2=(np.array(df['C'])[-k])*price_per_point-(line)*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(line)*price_per_point*cost_rate
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(line),"sell price=",(np.array(df['C'])[-k]),'long profit=',profit2)
+
+ else:
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+ print('date:',df['TIME'][last_day-k],'No transaction','profit=',0)
+
+
+ if r2!=0:
+ m+=1
+ s+=1
+
+ if r2>0:
+ p+=1
+ R=R+r2
+ else:
+ R=R
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+ TC=TC+profit1+profit2
+ TCC=np.vstack((TCC,TC))
+
+ i=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ I=I+i
+
+ b=((np.array(df['C'])[-k])-(np.array(df['C'])[-k-1]))
+ B=B+b
+
+
+ BB=np.vstack((BB,B))
+ RR=np.vstack((RR,R))
+ II=np.vstack((II,I))
+
+ maximum_markdown_final=np.vstack((maximum_markdown_final,maximum_markdown))
+ rr=np.vstack((rr,r2))
+
+ if r1!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+ if r2!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+
+ CRR=np.vstack((CRR,(CR-1)*10))
+ k=k-1
+
+
+
+
+
+
+RR=RR[1:]
+II=II[1:]
+tt=tt[1:]
+
+TCC=TCC[1:]
+
+
+
+#plt.plot(tt,RR)
+#plt.plot(tt,II)
+plt.plot(tt,TCC)
+BB=BB[1:]
+plt.plot(tt,BB)
+
+
+
+maximum_markdown_final=maximum_markdown_final[1:]
+#plt.plot(tt,maximum_markdown_final)
+final_maximum=min(maximum_markdown_final)
+rr=rr[1:]
+maximum_loss=min(rr)
+
+
+
+
+print("Final total capital=",TC)
+past_history = 35
+
+
+
+last_day=len(df['TIME'])
+
+
+
+
+day=500
+k=day
+#for k in range(3000):
+
+h=0
+y=0
+z=0
+u=0
+
+
+
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ if trend>0 and trend_real>0:
+ h+=1
+ y+=1
+
+ if trend>0 and trend_real<0:
+ h=h
+ y+=1
+ if trend<0 and trend_real<0:
+ z+=1
+ u+=1
+ if trend<0 and trend_real>0:
+ z=z
+ u+=1
+
+
+
+
+
+
+ k=k-1
+
+print('accuracy of buy=',h/y)
+
+print('accuracy of sell=',z/u)
+past_history = 35
+
+
+
+last_day=len(df['TIME'])
+
+
+
+
+day=500
+k=day
+#for k in range(3000):
+
+h=0
+y=0
+z=0
+u=0
+
+H=[0]
+Z=[0]
+
+a=0
+
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ if trend>0 and trend_real>0:
+ h+=1
+ y+=1
+
+ if trend>0 and trend_real<0:
+ h=h
+ y+=1
+ if trend<0 and trend_real<0:
+ z+=1
+ u+=1
+ if trend<0 and trend_real>0:
+ z=z
+ u+=1
+
+ a+=1
+
+ if a%15==0:
+ #print("counter=",a)
+
+ H=np.vstack((H,h/y))
+ h=0
+ y=0
+ Z=np.vstack((Z,z/u))
+ z=0
+ u=0
+
+
+ k=k-1
+
+H=H[1:]
+Z=Z[1:]
+
+print('success rate of buying in every 15 units',H)
+print('success rate of selling in every 15 units',Z)
+from scipy import stats
+stats.ttest_1samp(H,0.5) #t test of buying success rate
+stats.ttest_1samp(Z,0.5) #t test of selling success rate
+import numpy as np, scipy.stats as st
+st.t.interval(0.95, len(H)-1, loc=np.mean(H), scale=st.sem(H)) #95% Confidence interval of buying success rate
+import numpy as np, scipy.stats as st
+st.t.interval(0.95, len(Z)-1, loc=np.mean(Z), scale=st.sem(Z)) #95% Confidence interval of selling success rate
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+try:
+ # %tensorflow_version only exists in Colab.
+ %tensorflow_version 2.x
+except Exception:
+ pass
+import tensorflow as tf
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+
+mpl.rcParams['figure.figsize'] = (8, 6) #初始化设置,应该是展示图片大小
+
+mpl.rcParams['axes.grid'] = False
+import pandas as pd
+import numpy as np
+df= pd.read_excel("data_summary.xlsx", sheet_name=1)
+df=np.array(df)
+df=pd.DataFrame(df)
+close_price=df[4]
+
+
+CLOSE=[0]
+
+frequency=1200 #1440min合成1条k线
+
+for i in range(len(close_price)):
+ if (i+1) %frequency==0:
+ CLOSE=np.vstack((CLOSE,close_price[i]))
+
+CLOSE=CLOSE[1:]
+
+
+#open_price=df['open']
+
+open_price=df[1]
+
+
+
+OPEN=[0]
+
+for i in range(len(close_price)):
+ if (i+1) %frequency==0:
+ OPEN=np.vstack((OPEN,open_price[i-(frequency-1)]))
+
+OPEN=OPEN[1:]
+
+
+#high_price=df['high']
+high_price=df[2]
+
+
+HIGH=[0]
+
+for i in range(len(high_price)):
+ if (i+1) %frequency==0:
+ HIGH=np.vstack((HIGH,max(high_price[i-(frequency-1):i+1])))
+
+HIGH=HIGH[1:]
+
+#low_price=df['low']
+low_price=df[3]
+
+LOW=[0]
+
+for i in range(len(low_price)):
+ if (i+1) %frequency==0:
+ LOW=np.vstack((LOW,min(low_price[i-(frequency-1):i+1])))
+
+LOW=LOW[1:]
+
+#time=df['time']
+time=df[0]
+
+TIME=[0]
+
+for i in range(len(time)):
+ if (i+1) %frequency==0:
+ TIME=np.vstack((TIME,time[i]))
+
+TIME=TIME[1:]
+data=np.hstack((TIME,CLOSE,HIGH,LOW,OPEN))
+data=pd.DataFrame(data)
+data.columns = ["TIME", "C","H","L","O"]
+df=data
+zero=np.array([0])
+rt=np.diff(np.array(df['C']))
+rt=np.hstack((zero,rt))
+df.insert(5, 'Diff_C', rt)
+
+moving_avg = df['Diff_C'].rolling(window=5).mean(center=True)
+MV=moving_avg[2:len(rt)-2]
+zeros=np.zeros((4))
+MV=np.hstack((zeros,MV))
+df.insert(6, 'MV_C', MV)
+df[0:10]
+features_considered=['MV_C']
+
+features = df[features_considered]
+features.index = df['TIME'] #在前面加一列时间
+
+dataset = features.values
+TRAIN_SPLIT = (len(df)*7)//10 #first 70% of the rows in dataset will be the training dataset
+tf.random.set_seed(13) # 保持random selection每次都一样
+EVALUATION_INTERVAL = 2400 #原来是200
+EPOCHS = 10 #1个epoch等于使用训练集中的全部样本训练一次,通俗的讲epoch的值就是整个数据集被轮几次。
+BATCH_SIZE = 256 #训练的小批次, iteration=TRAIN_SPLIT/256
+BUFFER_SIZE = 10000 #缓存容量
+def multivariate_data(dataset, target, start_index, end_index, history_size,
+ target_size, step, single_step=False):
+ data = []
+ labels = []
+
+ start_index = start_index + history_size
+ if end_index is None:
+ end_index = len(dataset) - target_size
+
+ for i in range(start_index, end_index):
+ indices = range(i-history_size, i, step) #就是在取过去天数的样本时,是间隔着几天取的,这样可以减少训练时间
+ data.append(dataset[indices])
+
+ if single_step:
+ labels.append(target[i+target_size]) #就是说,这里可以规定只预测后面某一天或者是后面好几天
+ else:
+ labels.append(target[i:i+target_size])
+
+ return np.array(data), np.array(labels) #output 出numpy格式的数据
+
+past_history = 35 # 用过去50天的数据,本来是720
+future_target = 0 #本来是72 预测12天后的数据,或者是下一天到12天后的数据 72/6=12
+STEP = 1 #本来是6,取过去天数的样本时,是间隔着6天取的,这样可以减少训练时间
+
+#future_target = 1 # 预测12天后的数据,或者是下一天到12天后的数据 72/6=12
+#STEP = 1 #取过去天数的样本时,是间隔着6天取的,这样可以减少训练时间
+
+
+x_train_single, y_train_single = multivariate_data(dataset, dataset[:, 0], 0,
+ TRAIN_SPLIT, past_history,
+ future_target, STEP,
+ single_step=True)
+x_val_single, y_val_single = multivariate_data(dataset, dataset[:, 0],
+ TRAIN_SPLIT, None, past_history,
+ future_target, STEP,
+ single_step=True)
+
+train_data_single = tf.data.Dataset.from_tensor_slices((x_train_single, y_train_single))
+train_data_single = train_data_single.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat() #traning data
+
+val_data_single = tf.data.Dataset.from_tensor_slices((x_val_single, y_val_single))
+val_data_single = val_data_single.batch(BATCH_SIZE).repeat() #validation data
+
+
+single_step_model = tf.keras.models.Sequential()
+single_step_model.add(tf.keras.layers.LSTM(6,
+ input_shape=x_train_single.shape[-2:])) #应该是这一层有32 个神经元
+single_step_model.add(tf.keras.layers.Dense(1)) #output layer, 因为预测未来1 期的data, 所以是1个神经元
+
+single_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae') #这里优化算法用RMSprop而不是Adam 或 Momentum
+
+single_step_history = single_step_model.fit(train_data_single, epochs=EPOCHS, #EPOCHS = 10 轮10次
+ steps_per_epoch=EVALUATION_INTERVAL, #每轮test 200次 data
+ validation_data=val_data_single,
+ validation_steps=50) #设置验证多少次数据后取平均值作为此epoch训练后的效果
+
+def plot_train_history(history, title): #把training lost 和validation loss 表示出来
+ loss = history.history['loss']
+ val_loss = history.history['val_loss']
+
+ epochs = range(len(loss))
+
+ plt.figure()
+
+ plt.plot(epochs, loss, 'b', label='Training loss')
+ plt.plot(epochs, val_loss, 'r', label='Validation loss')
+ plt.title(title)
+ plt.legend()
+
+ plt.show()
+
+plot_train_history(single_step_history,
+ 'Single Step Training and validation loss')
+
+single_step_model.save('ETH_1200.h5')
+model = tf.keras.models.load_model('ETH_1200.h5')
+df['TIME']#
+
+R1=[0]
+R2=[0]
+
+
+past_history = 35
+TC=100000 #总资金10万
+
+C=1 #每次交易1手
+
+
+leverage=1
+
+cost_rate=0.0005
+
+
+price_per_point=1 #1点300元
+
+last_day=len(df['TIME'])
+
+
+m=0
+n=0
+p=0
+r=0
+R=0
+s=0
+
+I=0
+B=0
+RR=[0]
+II=[0]
+BB=[0]
+tt=0
+t=0
+d=3
+d2=3
+
+rr=[0]
+maximum_markdown_final=[0]
+CR=1
+CRR=[0]
+TCC=[0]
+
+day=600
+k=day
+#for k in range(3000):
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+ #print(data2)
+
+ #j=[0,0,0,0,0]
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+ #trend=x[0][0]-MV[-1-k]-MV[-2-k] #预测正负
+
+ #trend_real=MV[-k]-MV[-1-k]-MV[-2-k] #实际正负
+
+ #P0=np.array(df['C'])[-7-k]
+ #P1=np.array(df['C'])[-6-k]
+ #P2=np.array(df['C'])[-5-k]
+ #P5=np.array(df['C'])[-2-k]
+ #P6=np.array(df['C'])[-1-k]
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ P0=np.array(df['C'])[-6-k]
+ P1=np.array(df['C'])[-5-k]
+ P5=np.array(df['C'])[-1-k]
+
+
+
+
+
+ #line=P6+P5+P2-P1-P0
+
+ line=P5+P1-P0
+
+ L=np.array(df['L'])[-k]
+ H=np.array(df['H'])[-k]
+
+ if trend<0:
+ if P5-d2>line and (np.array(df['O'])[-k])-d2>line:
+ r1=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+
+
+
+ else:
+ if L<line<H:
+ r1=((np.array(df['C'])[-k])-(line))/(line) #r1必须小于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+
+ else:
+ r1=0
+ maximum_markdown=0
+
+
+ if r1!=0:
+ m+=1
+ R1=np.vstack((R1,r1))
+
+
+ if r1<0:
+ n+=1
+ r=r+r1
+ R=R-r1
+
+ else:
+ r1=0
+ r=r
+ R=R
+ maximum_markdown=0
+
+
+ if trend>0:
+
+
+ if P5<line-d and (np.array(df['O'])[-k])<line-d:
+ r2=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+
+ else:
+ if L<line<H:
+ r2=((np.array(df['C'])[-k])-(line))/(line) #r2必须大于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ else:
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+
+ if r2!=0:
+ m+=1
+ s+=1
+
+ R2=np.vstack((R2,r2))
+
+ if r2>0:
+ p+=1
+ R=R+r2
+ else:
+ R=R
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+
+
+ i=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ I=I+i
+
+ b=((np.array(df['C'])[-k])-(np.array(df['C'])[-k-1]))/(np.array(df['C'])[-k-1])
+ B=B+b
+
+
+ BB=np.vstack((BB,B))
+ RR=np.vstack((RR,R))
+ II=np.vstack((II,I))
+
+ maximum_markdown_final=np.vstack((maximum_markdown_final,maximum_markdown))
+ rr=np.vstack((rr,r2))
+
+ if r1!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+ if r2!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+
+ CRR=np.vstack((CRR,(CR-1)*10))
+ k=k-1
+
+
+
+
+
+
+RR=RR[1:]
+II=II[1:]
+tt=tt[1:]
+
+R1=R1[1:]
+R2=R2[1:]
+
+
+plt.plot(tt,RR)
+#plt.plot(tt,II)
+BB=BB[1:]
+plt.plot(tt,BB)
+
+
+
+maximum_markdown_final=maximum_markdown_final[1:]
+#plt.plot(tt,maximum_markdown_final)
+final_maximum=min(maximum_markdown_final)
+rr=rr[1:]
+maximum_loss=min(rr)
+
+
+print("trading dates=",m)
+print("success to sell=",n)
+print("success to buy=",p)
+print('return from short=',-r)
+print('total return of long and short=',R)
+print('Time of long',s)
+print('maximum markdown=',final_maximum)
+print('maximum loss=',maximum_loss)
+
+print('compound interest=',CR-1)
+
+import numpy as np, scipy.stats as st
+CI_R1=st.t.interval(0.9, len(R1)-1, loc=np.mean(R1), scale=st.sem(R1)) #90% Confidence interval of short
+CI_R2=st.t.interval(0.9, len(R2)-1, loc=np.mean(R2), scale=st.sem(R2)) #90% Confidence interval of long
+
+print("confidence interval of short=",CI_R1)
+print("confidence interval of long=",CI_R2)
+#
+
+TC=100000 #总资金10万
+
+C=1 #每次交易1手
+
+
+leverage=1
+
+cost_rate=0.0005
+
+
+price_per_point=1 #1点300元
+
+last_day=len(df['TIME'])
+
+
+m=0
+n=0
+p=0
+r=0
+R=0
+s=0
+
+I=0
+B=100000
+RR=[0]
+II=[0]
+BB=[0]
+tt=0
+t=0
+d=3
+d2=3
+
+rr=[0]
+maximum_markdown_final=[0]
+CR=1
+CRR=[0]
+TCC=[0]
+
+day=600
+k=day
+#for k in range(3000):
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+ #print(data2)
+
+ #j=[0,0,0,0,0]
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+ #trend=x[0][0]-MV[-1-k]-MV[-2-k] #预测正负
+
+ #trend_real=MV[-k]-MV[-1-k]-MV[-2-k] #实际正负
+
+ #P0=np.array(df['C'])[-7-k]
+ #P1=np.array(df['C'])[-6-k]
+ #P2=np.array(df['C'])[-5-k]
+ #P5=np.array(df['C'])[-2-k]
+ #P6=np.array(df['C'])[-1-k]
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ P0=np.array(df['C'])[-6-k]
+ P1=np.array(df['C'])[-5-k]
+ P5=np.array(df['C'])[-1-k]
+
+
+
+
+
+ #line=P6+P5+P2-P1-P0
+
+ line=P5+P1-P0
+
+ L=np.array(df['L'])[-k]
+ H=np.array(df['H'])[-k]
+
+ if trend<0:
+ if P5-d2>line and (np.array(df['O'])[-k])-d2>line:
+ r1=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+ profit1=C*(((np.array(df['O'])[-k])*price_per_point-np.array(df['C'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(np.array(df['O'])[-k])*price_per_point*cost_rate)
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(np.array(df['O'])[-k]),"sell price=",(np.array(df['C'])[-k]),'short profit=',profit1)
+
+
+ else:
+ if L<line<H:
+ r1=((np.array(df['C'])[-k])-(line))/(line) #r1必须小于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ profit1=C*(((line)*price_per_point-np.array(df['C'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(line)*price_per_point*cost_rate)
+ print('date:',df['TIME'][last_day-k],'buy price=',(line),"sell price=",(np.array(df['C'])[-k]),'short profit=',profit1)
+
+ else:
+ r1=0
+ maximum_markdown=0
+ profit1=0
+
+ print('date:',df['TIME'][last_day-k],'No transaction','profit=',0)
+
+
+ if r1!=0:
+ m+=1
+
+
+ if r1<0:
+ n+=1
+ r=r+r1
+ R=R-r1
+
+ else:
+ r1=0
+ r=r
+ R=R
+ maximum_markdown=0
+ profit1=0
+
+
+ if trend>0:
+
+
+ if P5<line-d and (np.array(df['O'])[-k])<line-d:
+ r2=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+ profit2=C*((np.array(df['C'])[-k])*price_per_point-(np.array(df['O'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(np.array(df['O'])[-k])*price_per_point*cost_rate)
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(np.array(df['O'])[-k]),"sell price=",(np.array(df['C'])[-k]),'long profit=',profit2)
+
+
+ else:
+ if L<line<H:
+ r2=((np.array(df['C'])[-k])-(line))/(line) #r2必须大于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ profit2=(np.array(df['C'])[-k])*price_per_point-(line)*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(line)*price_per_point*cost_rate
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(line),"sell price=",(np.array(df['C'])[-k]),'long profit=',profit2)
+
+ else:
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+ print('date:',df['TIME'][last_day-k],'No transaction','profit=',0)
+
+
+ if r2!=0:
+ m+=1
+ s+=1
+
+ if r2>0:
+ p+=1
+ R=R+r2
+ else:
+ R=R
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+ TC=TC+profit1+profit2
+ TCC=np.vstack((TCC,TC))
+
+ i=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ I=I+i
+
+ b=((np.array(df['C'])[-k])-(np.array(df['C'])[-k-1]))
+ B=B+b
+
+
+ BB=np.vstack((BB,B))
+ RR=np.vstack((RR,R))
+ II=np.vstack((II,I))
+
+ maximum_markdown_final=np.vstack((maximum_markdown_final,maximum_markdown))
+ rr=np.vstack((rr,r2))
+
+ if r1!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+ if r2!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+
+ CRR=np.vstack((CRR,(CR-1)*10))
+ k=k-1
+
+
+
+
+
+
+RR=RR[1:]
+II=II[1:]
+tt=tt[1:]
+
+TCC=TCC[1:]
+
+
+
+#plt.plot(tt,RR)
+#plt.plot(tt,II)
+plt.plot(tt,TCC)
+BB=BB[1:]
+plt.plot(tt,BB)
+
+
+
+maximum_markdown_final=maximum_markdown_final[1:]
+#plt.plot(tt,maximum_markdown_final)
+final_maximum=min(maximum_markdown_final)
+rr=rr[1:]
+maximum_loss=min(rr)
+
+
+
+
+print("Final total capital=",TC)
+past_history = 35
+
+
+
+last_day=len(df['TIME'])
+
+
+
+
+day=600
+k=day
+#for k in range(3000):
+
+h=0
+y=0
+z=0
+u=0
+
+
+
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ if trend>0 and trend_real>0:
+ h+=1
+ y+=1
+
+ if trend>0 and trend_real<0:
+ h=h
+ y+=1
+ if trend<0 and trend_real<0:
+ z+=1
+ u+=1
+ if trend<0 and trend_real>0:
+ z=z
+ u+=1
+
+
+
+
+
+
+ k=k-1
+
+print('accuracy of buy=',h/y)
+
+print('accuracy of sell=',z/u)
+past_history = 35
+
+
+
+last_day=len(df['TIME'])
+
+
+
+
+day=500
+k=day
+#for k in range(3000):
+
+h=0
+y=0
+z=0
+u=0
+
+H=[0]
+Z=[0]
+
+a=0
+
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ if trend>0 and trend_real>0:
+ h+=1
+ y+=1
+
+ if trend>0 and trend_real<0:
+ h=h
+ y+=1
+ if trend<0 and trend_real<0:
+ z+=1
+ u+=1
+ if trend<0 and trend_real>0:
+ z=z
+ u+=1
+
+ a+=1
+
+ if a%15==0:
+ #print("counter=",a)
+ if y==0:
+ H=np.vstack((H,0.6440129449838188))
+ else:
+ H=np.vstack((H,h/y))
+
+
+ h=0
+ y=0
+ Z=np.vstack((Z,z/u))
+
+
+ z=0
+ u=0
+
+
+ k=k-1
+
+H=H[1:]
+Z=Z[1:]
+
+print('success rate of buying in every 15 units',H)
+print('success rate of selling in every 15 units',Z)
+from scipy import stats
+stats.ttest_1samp(H,0.5) #t test of buying success rate
+stats.ttest_1samp(Z,0.5) #t test of selling success rate
+import numpy as np, scipy.stats as st
+st.t.interval(0.95, len(H)-1, loc=np.mean(H), scale=st.sem(H)) #95% Confidence interval of buying success rate
+import numpy as np, scipy.stats as st
+st.t.interval(0.95, len(Z)-1, loc=np.mean(Z), scale=st.sem(Z)) #95% Confidence interval of selling success rate
+
+
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+try:
+ # %tensorflow_version only exists in Colab.
+ %tensorflow_version 2.x
+except Exception:
+ pass
+import tensorflow as tf
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+
+mpl.rcParams['figure.figsize'] = (8, 6) #初始化设置,应该是展示图片大小
+
+mpl.rcParams['axes.grid'] = False
+
+df= pd.read_excel("data_summary.xlsx", sheet_name=2)
+df=np.array(df)
+df=pd.DataFrame(df)
+close_price=df[4]
+
+
+CLOSE=[0]
+
+frequency=1200 #1440min合成1条k线
+
+for i in range(len(close_price)):
+ if (i+1) %frequency==0:
+ CLOSE=np.vstack((CLOSE,close_price[i]))
+
+CLOSE=CLOSE[1:]
+
+
+#open_price=df['open']
+
+open_price=df[1]
+
+
+
+OPEN=[0]
+
+for i in range(len(close_price)):
+ if (i+1) %frequency==0:
+ OPEN=np.vstack((OPEN,open_price[i-(frequency-1)]))
+
+OPEN=OPEN[1:]
+
+
+#high_price=df['high']
+high_price=df[2]
+
+
+HIGH=[0]
+
+for i in range(len(high_price)):
+ if (i+1) %frequency==0:
+ HIGH=np.vstack((HIGH,max(high_price[i-(frequency-1):i+1])))
+
+HIGH=HIGH[1:]
+
+#low_price=df['low']
+low_price=df[3]
+
+LOW=[0]
+
+for i in range(len(low_price)):
+ if (i+1) %frequency==0:
+ LOW=np.vstack((LOW,min(low_price[i-(frequency-1):i+1])))
+
+LOW=LOW[1:]
+
+#time=df['time']
+time=df[0]
+
+TIME=[0]
+
+for i in range(len(time)):
+ if (i+1) %frequency==0:
+ TIME=np.vstack((TIME,time[i]))
+
+TIME=TIME[1:]
+
+data=np.hstack((TIME,CLOSE,HIGH,LOW,OPEN))
+data=pd.DataFrame(data)
+data.columns = ["TIME", "C","H","L","O"]
+df=data
+zero=np.array([0])
+rt=np.diff(np.array(df['C']))
+rt=np.hstack((zero,rt))
+df.insert(5, 'Diff_C', rt)
+
+moving_avg = df['Diff_C'].rolling(window=5).mean(center=True)
+MV=moving_avg[2:len(rt)-2]
+zeros=np.zeros((4))
+MV=np.hstack((zeros,MV))
+df.insert(6, 'MV_C', MV)
+features_considered=['MV_C']
+
+features = df[features_considered]
+features.index = df['TIME'] #在前面加一列时间
+
+dataset = features.values
+TRAIN_SPLIT = (len(df)*7)//10 #first 70% of the rows in dataset will be the training dataset
+tf.random.set_seed(13) # 保持random selection每次都一样
+EVALUATION_INTERVAL = 600 #原来是200
+EPOCHS = 10 #1个epoch等于使用训练集中的全部样本训练一次,通俗的讲epoch的值就是整个数据集被轮几次。
+BATCH_SIZE = 256 #训练的小批次, iteration=TRAIN_SPLIT/256
+BUFFER_SIZE = 10000 #缓存容量
+def multivariate_data(dataset, target, start_index, end_index, history_size,
+ target_size, step, single_step=False):
+ data = []
+ labels = []
+
+ start_index = start_index + history_size
+ if end_index is None:
+ end_index = len(dataset) - target_size
+
+ for i in range(start_index, end_index):
+ indices = range(i-history_size, i, step) #就是在取过去天数的样本时,是间隔着几天取的,这样可以减少训练时间
+ data.append(dataset[indices])
+
+ if single_step:
+ labels.append(target[i+target_size]) #就是说,这里可以规定只预测后面某一天或者是后面好几天
+ else:
+ labels.append(target[i:i+target_size])
+
+ return np.array(data), np.array(labels) #output 出numpy格式的数据
+
+past_history = 35 # 用过去50天的数据,本来是720
+future_target = 0 #本来是72 预测12天后的数据,或者是下一天到12天后的数据 72/6=12
+STEP = 1 #本来是6,取过去天数的样本时,是间隔着6天取的,这样可以减少训练时间
+
+#future_target = 1 # 预测12天后的数据,或者是下一天到12天后的数据 72/6=12
+#STEP = 1 #取过去天数的样本时,是间隔着6天取的,这样可以减少训练时间
+
+
+x_train_single, y_train_single = multivariate_data(dataset, dataset[:, 0], 0,
+ TRAIN_SPLIT, past_history,
+ future_target, STEP,
+ single_step=True)
+x_val_single, y_val_single = multivariate_data(dataset, dataset[:, 0],
+ TRAIN_SPLIT, None, past_history,
+ future_target, STEP,
+ single_step=True)
+
+train_data_single = tf.data.Dataset.from_tensor_slices((x_train_single, y_train_single))
+train_data_single = train_data_single.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat() #traning data
+
+val_data_single = tf.data.Dataset.from_tensor_slices((x_val_single, y_val_single))
+val_data_single = val_data_single.batch(BATCH_SIZE).repeat() #validation data
+
+
+single_step_model = tf.keras.models.Sequential()
+single_step_model.add(tf.keras.layers.LSTM(3,
+ input_shape=x_train_single.shape[-2:])) #应该是这一层有32 个神经元
+single_step_model.add(tf.keras.layers.Dense(1)) #output layer, 因为预测未来1 期的data, 所以是1个神经元
+
+single_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae') #这里优化算法用RMSprop而不是Adam 或 Momentum
+
+single_step_history = single_step_model.fit(train_data_single, epochs=EPOCHS, #EPOCHS = 10 轮10次
+ steps_per_epoch=EVALUATION_INTERVAL, #每轮test 200次 data
+ validation_data=val_data_single,
+ validation_steps=50) #设置验证多少次数据后取平均值作为此epoch训练后的效果
+
+def plot_train_history(history, title): #把training lost 和validation loss 表示出来
+ loss = history.history['loss']
+ val_loss = history.history['val_loss']
+
+ epochs = range(len(loss))
+
+ plt.figure()
+
+ plt.plot(epochs, loss, 'b', label='Training loss')
+ plt.plot(epochs, val_loss, 'r', label='Validation loss')
+ plt.title(title)
+ plt.legend()
+
+ plt.show()
+
+plot_train_history(single_step_history,
+ 'Single Step Training and validation loss')
+single_step_model.save('LTC_1200.h5')
+model = tf.keras.models.load_model('LTC_1200.h5')
+df['TIME']#
+
+R1=[0]
+R2=[0]
+
+
+past_history = 35
+TC=100000 #总资金10万
+
+C=1 #每次交易1手
+
+
+leverage=1
+
+cost_rate=0.0005
+
+
+price_per_point=1 #1点300元
+
+last_day=len(df['TIME'])
+
+
+m=0
+n=0
+p=0
+r=0
+R=0
+s=0
+
+I=0
+B=0
+RR=[0]
+II=[0]
+BB=[0]
+tt=0
+t=0
+d=0
+d2=0
+
+rr=[0]
+maximum_markdown_final=[0]
+CR=1
+CRR=[0]
+TCC=[0]
+
+day=300
+k=day
+#for k in range(3000):
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+ #print(data2)
+
+ #j=[0,0,0,0,0]
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+ #trend=x[0][0]-MV[-1-k]-MV[-2-k] #预测正负
+
+ #trend_real=MV[-k]-MV[-1-k]-MV[-2-k] #实际正负
+
+ #P0=np.array(df['C'])[-7-k]
+ #P1=np.array(df['C'])[-6-k]
+ #P2=np.array(df['C'])[-5-k]
+ #P5=np.array(df['C'])[-2-k]
+ #P6=np.array(df['C'])[-1-k]
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ P0=np.array(df['C'])[-6-k]
+ P1=np.array(df['C'])[-5-k]
+ P5=np.array(df['C'])[-1-k]
+
+
+
+
+
+ #line=P6+P5+P2-P1-P0
+
+ line=P5+P1-P0
+
+ L=np.array(df['L'])[-k]
+ H=np.array(df['H'])[-k]
+
+ if trend<0:
+ if P5-d2>line and (np.array(df['O'])[-k])-d2>line:
+ r1=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+
+
+
+ else:
+ if L<line<H:
+ r1=((np.array(df['C'])[-k])-(line))/(line) #r1必须小于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+
+ else:
+ r1=0
+ maximum_markdown=0
+
+
+ if r1!=0:
+ m+=1
+ R1=np.vstack((R1,r1))
+
+
+ if r1<0:
+ n+=1
+ r=r+r1
+ R=R-r1
+
+ else:
+ r1=0
+ r=r
+ R=R
+ maximum_markdown=0
+
+
+ if trend>0:
+
+
+ if P5<line-d and (np.array(df['O'])[-k])<line-d:
+ r2=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+
+ else:
+ if L<line<H:
+ r2=((np.array(df['C'])[-k])-(line))/(line) #r2必须大于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ else:
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+
+ if r2!=0:
+ m+=1
+ s+=1
+
+ R2=np.vstack((R2,r2))
+
+ if r2>0:
+ p+=1
+ R=R+r2
+ else:
+ R=R
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+
+
+ i=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ I=I+i
+
+ b=((np.array(df['C'])[-k])-(np.array(df['C'])[-k-1]))/(np.array(df['C'])[-k-1])
+ B=B+b
+
+
+ BB=np.vstack((BB,B))
+ RR=np.vstack((RR,R))
+ II=np.vstack((II,I))
+
+ maximum_markdown_final=np.vstack((maximum_markdown_final,maximum_markdown))
+ rr=np.vstack((rr,r2))
+
+ if r1!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+ if r2!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+
+ CRR=np.vstack((CRR,(CR-1)*10))
+ k=k-1
+
+
+
+
+
+
+RR=RR[1:]
+II=II[1:]
+tt=tt[1:]
+
+R1=R1[1:]
+R2=R2[1:]
+
+
+plt.plot(tt,RR)
+#plt.plot(tt,II)
+BB=BB[1:]
+plt.plot(tt,BB)
+
+
+
+maximum_markdown_final=maximum_markdown_final[1:]
+#plt.plot(tt,maximum_markdown_final)
+final_maximum=min(maximum_markdown_final)
+rr=rr[1:]
+maximum_loss=min(rr)
+
+
+print("trading dates=",m)
+print("success to sell=",n)
+print("success to buy=",p)
+print('return from short=',-r)
+print('total return of long and short=',R)
+print('Time of long',s)
+print('maximum markdown=',final_maximum)
+print('maximum loss=',maximum_loss)
+
+print('compound interest=',CR-1)
+
+import numpy as np, scipy.stats as st
+CI_R1=st.t.interval(0.9, len(R1)-1, loc=np.mean(R1), scale=st.sem(R1)) #90% Confidence interval of short
+CI_R2=st.t.interval(0.9, len(R2)-1, loc=np.mean(R2), scale=st.sem(R2)) #90% Confidence interval of long
+
+print("confidence interval of short=",CI_R1)
+print("confidence interval of long=",CI_R2)
+#
+
+TC=100000 #总资金10万
+
+C=1 #每次交易1手
+
+
+leverage=1
+
+cost_rate=0.0005
+
+
+price_per_point=1 #1点300元
+
+last_day=len(df['TIME'])
+
+
+m=0
+n=0
+p=0
+r=0
+R=0
+s=0
+
+I=0
+B=100000
+RR=[0]
+II=[0]
+BB=[0]
+tt=0
+t=0
+d=0
+d2=0
+
+rr=[0]
+maximum_markdown_final=[0]
+CR=1
+CRR=[0]
+TCC=[0]
+
+day=300
+k=day
+#for k in range(3000):
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+ #print(data2)
+
+ #j=[0,0,0,0,0]
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+ #trend=x[0][0]-MV[-1-k]-MV[-2-k] #预测正负
+
+ #trend_real=MV[-k]-MV[-1-k]-MV[-2-k] #实际正负
+
+ #P0=np.array(df['C'])[-7-k]
+ #P1=np.array(df['C'])[-6-k]
+ #P2=np.array(df['C'])[-5-k]
+ #P5=np.array(df['C'])[-2-k]
+ #P6=np.array(df['C'])[-1-k]
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ P0=np.array(df['C'])[-6-k]
+ P1=np.array(df['C'])[-5-k]
+ P5=np.array(df['C'])[-1-k]
+
+
+
+
+
+ #line=P6+P5+P2-P1-P0
+
+ line=P5+P1-P0
+
+ L=np.array(df['L'])[-k]
+ H=np.array(df['H'])[-k]
+
+ if trend<0:
+ if P5-d2>line and (np.array(df['O'])[-k])-d2>line:
+ r1=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+ profit1=C*(((np.array(df['O'])[-k])*price_per_point-np.array(df['C'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(np.array(df['O'])[-k])*price_per_point*cost_rate)
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(np.array(df['O'])[-k]),"sell price=",(np.array(df['C'])[-k]),'short profit=',profit1)
+
+
+ else:
+ if L<line<H:
+ r1=((np.array(df['C'])[-k])-(line))/(line) #r1必须小于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ profit1=C*(((line)*price_per_point-np.array(df['C'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(line)*price_per_point*cost_rate)
+ print('date:',df['TIME'][last_day-k],'buy price=',(line),"sell price=",(np.array(df['C'])[-k]),'short profit=',profit1)
+
+ else:
+ r1=0
+ maximum_markdown=0
+ profit1=0
+
+ print('date:',df['TIME'][last_day-k],'No transaction','profit=',0)
+
+
+ if r1!=0:
+ m+=1
+
+
+ if r1<0:
+ n+=1
+ r=r+r1
+ R=R-r1
+
+ else:
+ r1=0
+ r=r
+ R=R
+ maximum_markdown=0
+ profit1=0
+
+
+ if trend>0:
+
+
+ if P5<line-d and (np.array(df['O'])[-k])<line-d:
+ r2=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+ profit2=C*((np.array(df['C'])[-k])*price_per_point-(np.array(df['O'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(np.array(df['O'])[-k])*price_per_point*cost_rate)
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(np.array(df['O'])[-k]),"sell price=",(np.array(df['C'])[-k]),'long profit=',profit2)
+
+
+ else:
+ if L<line<H:
+ r2=((np.array(df['C'])[-k])-(line))/(line) #r2必须大于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ profit2=(np.array(df['C'])[-k])*price_per_point-(line)*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(line)*price_per_point*cost_rate
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(line),"sell price=",(np.array(df['C'])[-k]),'long profit=',profit2)
+
+ else:
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+ print('date:',df['TIME'][last_day-k],'No transaction','profit=',0)
+
+
+ if r2!=0:
+ m+=1
+ s+=1
+
+ if r2>0:
+ p+=1
+ R=R+r2
+ else:
+ R=R
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+ TC=TC+profit1+profit2
+ TCC=np.vstack((TCC,TC))
+
+ i=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ I=I+i
+
+ b=((np.array(df['C'])[-k])-(np.array(df['C'])[-k-1]))
+ B=B+b
+
+
+ BB=np.vstack((BB,B))
+ RR=np.vstack((RR,R))
+ II=np.vstack((II,I))
+
+ maximum_markdown_final=np.vstack((maximum_markdown_final,maximum_markdown))
+ rr=np.vstack((rr,r2))
+
+ if r1!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+ if r2!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+
+ CRR=np.vstack((CRR,(CR-1)*10))
+ k=k-1
+
+
+
+
+
+
+RR=RR[1:]
+II=II[1:]
+tt=tt[1:]
+
+TCC=TCC[1:]
+
+
+
+#plt.plot(tt,RR)
+#plt.plot(tt,II)
+plt.plot(tt,TCC)
+BB=BB[1:]
+plt.plot(tt,BB)
+
+
+
+maximum_markdown_final=maximum_markdown_final[1:]
+#plt.plot(tt,maximum_markdown_final)
+final_maximum=min(maximum_markdown_final)
+rr=rr[1:]
+maximum_loss=min(rr)
+
+
+
+
+print("Final total capital=",TC)
+past_history = 35
+
+
+
+last_day=len(df['TIME'])
+
+
+
+
+day=300
+k=day
+#for k in range(3000):
+
+h=0
+y=0
+z=0
+u=0
+
+
+
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ if trend>0 and trend_real>0:
+ h+=1
+ y+=1
+
+ if trend>0 and trend_real<0:
+ h=h
+ y+=1
+ if trend<0 and trend_real<0:
+ z+=1
+ u+=1
+ if trend<0 and trend_real>0:
+ z=z
+ u+=1
+
+
+
+
+
+
+ k=k-1
+
+print('accuracy of buy=',h/y)
+
+print('accuracy of sell=',z/u)
+past_history = 35
+
+
+
+last_day=len(df['TIME'])
+
+
+
+
+day=300
+k=day
+#for k in range(3000):
+
+h=0
+y=0
+z=0
+u=0
+
+H=[0]
+Z=[0]
+
+a=0
+
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ if trend>0 and trend_real>0:
+ h+=1
+ y+=1
+
+ if trend>0 and trend_real<0:
+ h=h
+ y+=1
+ if trend<0 and trend_real<0:
+ z+=1
+ u+=1
+ if trend<0 and trend_real>0:
+ z=z
+ u+=1
+
+ a+=1
+
+ if a%15==0:
+ #print("counter=",a)
+
+ H=np.vstack((H,h/y))
+ h=0
+ y=0
+ Z=np.vstack((Z,z/u))
+ z=0
+ u=0
+
+
+ k=k-1
+
+H=H[1:]
+Z=Z[1:]
+
+print('success rate of buying in every 15 units',H)
+print('success rate of selling in every 15 units',Z)
+from scipy import stats
+stats.ttest_1samp(H,0.5) #t test of buying success rate
+stats.ttest_1samp(Z,0.5) #t test of selling success rate
+import numpy as np, scipy.stats as st
+st.t.interval(0.95, len(H)-1, loc=np.mean(H), scale=st.sem(H)) #95% Confidence interval of buying success rate
+import numpy as np, scipy.stats as st
+st.t.interval(0.95, len(Z)-1, loc=np.mean(Z), scale=st.sem(Z)) #95% Confidence interval of selling success rate
+
+
+
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+try:
+ # %tensorflow_version only exists in Colab.
+ %tensorflow_version 2.x
+except Exception:
+ pass
+import tensorflow as tf
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+
+df= pd.read_excel("data_summary.xlsx", sheet_name=3)
+
+
+df=np.array(df)
+df=pd.DataFrame(df)
+close_price=df[4]
+
+
+CLOSE=[0]
+
+frequency=1200 #1440min合成1条k线
+
+for i in range(len(close_price)):
+ if (i+1) %frequency==0:
+ CLOSE=np.vstack((CLOSE,close_price[i]))
+
+CLOSE=CLOSE[1:]
+
+
+#open_price=df['open']
+
+open_price=df[1]
+
+
+
+OPEN=[0]
+
+for i in range(len(close_price)):
+ if (i+1) %frequency==0:
+ OPEN=np.vstack((OPEN,open_price[i-(frequency-1)]))
+
+OPEN=OPEN[1:]
+
+
+#high_price=df['high']
+high_price=df[2]
+
+
+HIGH=[0]
+
+for i in range(len(high_price)):
+ if (i+1) %frequency==0:
+ HIGH=np.vstack((HIGH,max(high_price[i-(frequency-1):i+1])))
+
+HIGH=HIGH[1:]
+
+#low_price=df['low']
+low_price=df[3]
+
+LOW=[0]
+
+for i in range(len(low_price)):
+ if (i+1) %frequency==0:
+ LOW=np.vstack((LOW,min(low_price[i-(frequency-1):i+1])))
+
+LOW=LOW[1:]
+
+#time=df['time']
+time=df[0]
+
+TIME=[0]
+
+for i in range(len(time)):
+ if (i+1) %frequency==0:
+ TIME=np.vstack((TIME,time[i]))
+
+TIME=TIME[1:]
+
+data=np.hstack((TIME,CLOSE,HIGH,LOW,OPEN))
+data=pd.DataFrame(data)
+data.columns = ["TIME", "C","H","L","O"]
+df=data
+zero=np.array([0])
+rt=np.diff(np.array(df['C']))
+rt=np.hstack((zero,rt))
+df.insert(5, 'Diff_C', rt)
+
+moving_avg = df['Diff_C'].rolling(window=5).mean(center=True)
+MV=moving_avg[2:len(rt)-2]
+zeros=np.zeros((4))
+MV=np.hstack((zeros,MV))
+df.insert(6, 'MV_C', MV)
+features_considered=['MV_C']
+
+features = df[features_considered]
+features.index = df['TIME'] #在前面加一列时间
+
+dataset = features.values
+TRAIN_SPLIT = (len(df)*7)//10 #first 70% of the rows in dataset will be the training dataset
+tf.random.set_seed(13) # 保持random selection每次都一样
+EVALUATION_INTERVAL = 400 #原来是200
+EPOCHS = 10 #1个epoch等于使用训练集中的全部样本训练一次,通俗的讲epoch的值就是整个数据集被轮几次。
+BATCH_SIZE = 256 #训练的小批次, iteration=TRAIN_SPLIT/256
+BUFFER_SIZE = 10000 #缓存容量
+def multivariate_data(dataset, target, start_index, end_index, history_size,
+ target_size, step, single_step=False):
+ data = []
+ labels = []
+
+ start_index = start_index + history_size
+ if end_index is None:
+ end_index = len(dataset) - target_size
+
+ for i in range(start_index, end_index):
+ indices = range(i-history_size, i, step) #就是在取过去天数的样本时,是间隔着几天取的,这样可以减少训练时间
+ data.append(dataset[indices])
+
+ if single_step:
+ labels.append(target[i+target_size]) #就是说,这里可以规定只预测后面某一天或者是后面好几天
+ else:
+ labels.append(target[i:i+target_size])
+
+ return np.array(data), np.array(labels) #output 出numpy格式的数据
+
+past_history = 35 # 用过去50天的数据,本来是720
+future_target = 0 #本来是72 预测12天后的数据,或者是下一天到12天后的数据 72/6=12
+STEP = 1 #本来是6,取过去天数的样本时,是间隔着6天取的,这样可以减少训练时间
+
+#future_target = 1 # 预测12天后的数据,或者是下一天到12天后的数据 72/6=12
+#STEP = 1 #取过去天数的样本时,是间隔着6天取的,这样可以减少训练时间
+
+
+x_train_single, y_train_single = multivariate_data(dataset, dataset[:, 0], 0,
+ TRAIN_SPLIT, past_history,
+ future_target, STEP,
+ single_step=True)
+x_val_single, y_val_single = multivariate_data(dataset, dataset[:, 0],
+ TRAIN_SPLIT, None, past_history,
+ future_target, STEP,
+ single_step=True)
+
+train_data_single = tf.data.Dataset.from_tensor_slices((x_train_single, y_train_single))
+train_data_single = train_data_single.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat() #traning data
+
+val_data_single = tf.data.Dataset.from_tensor_slices((x_val_single, y_val_single))
+val_data_single = val_data_single.batch(BATCH_SIZE).repeat() #validation data
+
+
+single_step_model = tf.keras.models.Sequential()
+single_step_model.add(tf.keras.layers.LSTM(3,
+ input_shape=x_train_single.shape[-2:])) #应该是这一层有32 个神经元
+single_step_model.add(tf.keras.layers.Dense(1)) #output layer, 因为预测未来1 期的data, 所以是1个神经元
+
+single_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae') #这里优化算法用RMSprop而不是Adam 或 Momentum
+
+single_step_history = single_step_model.fit(train_data_single, epochs=EPOCHS, #EPOCHS = 10 轮10次
+ steps_per_epoch=EVALUATION_INTERVAL, #每轮test 200次 data
+ validation_data=val_data_single,
+ validation_steps=50) #设置验证多少次数据后取平均值作为此epoch训练后的效果
+
+def plot_train_history(history, title): #把training lost 和validation loss 表示出来
+ loss = history.history['loss']
+ val_loss = history.history['val_loss']
+
+ epochs = range(len(loss))
+
+ plt.figure()
+
+ plt.plot(epochs, loss, 'b', label='Training loss')
+ plt.plot(epochs, val_loss, 'r', label='Validation loss')
+ plt.title(title)
+ plt.legend()
+
+ plt.show()
+
+plot_train_history(single_step_history,
+ 'Single Step Training and validation loss')
+
+single_step_model.save('XRP_1200.h5')
+model = tf.keras.models.load_model('XRP_1200.h5')
+df['TIME']#
+
+R1=[0]
+R2=[0]
+
+
+past_history = 35
+TC=100000 #总资金10万
+
+C=1 #每次交易1手
+
+
+leverage=1
+
+cost_rate=0.0005
+
+
+price_per_point=1 #1点300元
+
+last_day=len(df['TIME'])
+
+
+m=0
+n=0
+p=0
+r=0
+R=0
+s=0
+
+I=0
+B=0
+RR=[0]
+II=[0]
+BB=[0]
+tt=0
+t=0
+d=0
+d2=0
+
+rr=[0]
+maximum_markdown_final=[0]
+CR=1
+CRR=[0]
+TCC=[0]
+
+day=500
+k=day
+#for k in range(3000):
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+ #print(data2)
+
+ #j=[0,0,0,0,0]
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+ #trend=x[0][0]-MV[-1-k]-MV[-2-k] #预测正负
+
+ #trend_real=MV[-k]-MV[-1-k]-MV[-2-k] #实际正负
+
+ #P0=np.array(df['C'])[-7-k]
+ #P1=np.array(df['C'])[-6-k]
+ #P2=np.array(df['C'])[-5-k]
+ #P5=np.array(df['C'])[-2-k]
+ #P6=np.array(df['C'])[-1-k]
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ P0=np.array(df['C'])[-6-k]
+ P1=np.array(df['C'])[-5-k]
+ P5=np.array(df['C'])[-1-k]
+
+
+
+
+
+ #line=P6+P5+P2-P1-P0
+
+ line=P5+P1-P0
+
+ L=np.array(df['L'])[-k]
+ H=np.array(df['H'])[-k]
+
+ if trend<0:
+ if P5-d2>line and (np.array(df['O'])[-k])-d2>line:
+ r1=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+
+
+
+ else:
+ if L<line<H:
+ r1=((np.array(df['C'])[-k])-(line))/(line) #r1必须小于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+
+ else:
+ r1=0
+ maximum_markdown=0
+
+
+ if r1!=0:
+ m+=1
+ R1=np.vstack((R1,r1))
+
+
+ if r1<0:
+ n+=1
+ r=r+r1
+ R=R-r1
+
+ else:
+ r1=0
+ r=r
+ R=R
+ maximum_markdown=0
+
+
+ if trend>0:
+
+
+ if P5<line-d and (np.array(df['O'])[-k])<line-d:
+ r2=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+
+ else:
+ if L<line<H:
+ r2=((np.array(df['C'])[-k])-(line))/(line) #r2必须大于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ else:
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+
+ if r2!=0:
+ m+=1
+ s+=1
+
+ R2=np.vstack((R2,r2))
+
+ if r2>0:
+ p+=1
+ R=R+r2
+ else:
+ R=R
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+
+
+ i=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ I=I+i
+
+ b=((np.array(df['C'])[-k])-(np.array(df['C'])[-k-1]))/(np.array(df['C'])[-k-1])
+ B=B+b
+
+
+ BB=np.vstack((BB,B))
+ RR=np.vstack((RR,R))
+ II=np.vstack((II,I))
+
+ maximum_markdown_final=np.vstack((maximum_markdown_final,maximum_markdown))
+ rr=np.vstack((rr,r2))
+
+ if r1!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+ if r2!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+
+ CRR=np.vstack((CRR,(CR-1)*10))
+ k=k-1
+
+
+
+
+
+
+RR=RR[1:]
+II=II[1:]
+tt=tt[1:]
+
+R1=R1[1:]
+R2=R2[1:]
+
+
+plt.plot(tt,RR)
+#plt.plot(tt,II)
+BB=BB[1:]
+plt.plot(tt,BB)
+
+
+
+maximum_markdown_final=maximum_markdown_final[1:]
+#plt.plot(tt,maximum_markdown_final)
+final_maximum=min(maximum_markdown_final)
+rr=rr[1:]
+maximum_loss=min(rr)
+
+
+print("trading dates=",m)
+print("success to sell=",n)
+print("success to buy=",p)
+print('return from short=',-r)
+print('total return of long and short=',R)
+print('Time of long',s)
+print('maximum markdown=',final_maximum)
+print('maximum loss=',maximum_loss)
+
+print('compound interest=',CR-1)
+
+import numpy as np, scipy.stats as st
+CI_R1=st.t.interval(0.9, len(R1)-1, loc=np.mean(R1), scale=st.sem(R1)) #90% Confidence interval of short
+CI_R2=st.t.interval(0.9, len(R2)-1, loc=np.mean(R2), scale=st.sem(R2)) #90% Confidence interval of long
+
+print("confidence interval of short=",CI_R1)
+print("confidence interval of long=",CI_R2)
+#
+
+TC=100000 #总资金10万
+
+C=1 #每次交易1手
+
+
+leverage=1
+
+cost_rate=0.0005
+
+
+price_per_point=1 #1点300元
+
+last_day=len(df['TIME'])
+
+
+m=0
+n=0
+p=0
+r=0
+R=0
+s=0
+
+I=0
+B=100000
+RR=[0]
+II=[0]
+BB=[0]
+tt=0
+t=0
+d=0
+d2=0
+
+rr=[0]
+maximum_markdown_final=[0]
+CR=1
+CRR=[0]
+TCC=[0]
+
+day=500
+k=day
+#for k in range(3000):
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+ #print(data2)
+
+ #j=[0,0,0,0,0]
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+ #trend=x[0][0]-MV[-1-k]-MV[-2-k] #预测正负
+
+ #trend_real=MV[-k]-MV[-1-k]-MV[-2-k] #实际正负
+
+ #P0=np.array(df['C'])[-7-k]
+ #P1=np.array(df['C'])[-6-k]
+ #P2=np.array(df['C'])[-5-k]
+ #P5=np.array(df['C'])[-2-k]
+ #P6=np.array(df['C'])[-1-k]
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ P0=np.array(df['C'])[-6-k]
+ P1=np.array(df['C'])[-5-k]
+ P5=np.array(df['C'])[-1-k]
+
+
+
+
+
+ #line=P6+P5+P2-P1-P0
+
+ line=P5+P1-P0
+
+ L=np.array(df['L'])[-k]
+ H=np.array(df['H'])[-k]
+
+ if trend<0:
+ if P5-d2>line and (np.array(df['O'])[-k])-d2>line:
+ r1=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+ profit1=C*(((np.array(df['O'])[-k])*price_per_point-np.array(df['C'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(np.array(df['O'])[-k])*price_per_point*cost_rate)
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(np.array(df['O'])[-k]),"sell price=",(np.array(df['C'])[-k]),'short profit=',profit1)
+
+
+ else:
+ if L<line<H:
+ r1=((np.array(df['C'])[-k])-(line))/(line) #r1必须小于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ profit1=C*(((line)*price_per_point-np.array(df['C'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(line)*price_per_point*cost_rate)
+ print('date:',df['TIME'][last_day-k],'buy price=',(line),"sell price=",(np.array(df['C'])[-k]),'short profit=',profit1)
+
+ else:
+ r1=0
+ maximum_markdown=0
+ profit1=0
+
+ print('date:',df['TIME'][last_day-k],'No transaction','profit=',0)
+
+
+ if r1!=0:
+ m+=1
+
+
+ if r1<0:
+ n+=1
+ r=r+r1
+ R=R-r1
+
+ else:
+ r1=0
+ r=r
+ R=R
+ maximum_markdown=0
+ profit1=0
+
+
+ if trend>0:
+
+
+ if P5<line-d and (np.array(df['O'])[-k])<line-d:
+ r2=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ maximum_markdown=((np.array(df['L'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+
+ profit2=C*((np.array(df['C'])[-k])*price_per_point-(np.array(df['O'])[-k])*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(np.array(df['O'])[-k])*price_per_point*cost_rate)
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(np.array(df['O'])[-k]),"sell price=",(np.array(df['C'])[-k]),'long profit=',profit2)
+
+
+ else:
+ if L<line<H:
+ r2=((np.array(df['C'])[-k])-(line))/(line) #r2必须大于0
+ maximum_markdown=((np.array(df['L'])[-k])-(line))/(line)
+
+ profit2=(np.array(df['C'])[-k])*price_per_point-(line)*price_per_point-(np.array(df['C'])[-k])*price_per_point*cost_rate-(line)*price_per_point*cost_rate
+
+ print('date:',df['TIME'][last_day-k],'buy price=',(line),"sell price=",(np.array(df['C'])[-k]),'long profit=',profit2)
+
+ else:
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+ print('date:',df['TIME'][last_day-k],'No transaction','profit=',0)
+
+
+ if r2!=0:
+ m+=1
+ s+=1
+
+ if r2>0:
+ p+=1
+ R=R+r2
+ else:
+ R=R
+ r2=0
+ maximum_markdown=0
+ profit2=0
+
+
+ TC=TC+profit1+profit2
+ TCC=np.vstack((TCC,TC))
+
+ i=((np.array(df['C'])[-k])-(np.array(df['O'])[-k]))/(np.array(df['O'])[-k])
+ I=I+i
+
+ b=((np.array(df['C'])[-k])-(np.array(df['C'])[-k-1]))
+ B=B+b
+
+
+ BB=np.vstack((BB,B))
+ RR=np.vstack((RR,R))
+ II=np.vstack((II,I))
+
+ maximum_markdown_final=np.vstack((maximum_markdown_final,maximum_markdown))
+ rr=np.vstack((rr,r2))
+
+ if r1!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+ if r2!=0:
+ CR=CR*(1+r1)
+ else:
+ CR=CR
+
+ CRR=np.vstack((CRR,(CR-1)*10))
+ k=k-1
+
+
+
+
+
+
+RR=RR[1:]
+II=II[1:]
+tt=tt[1:]
+
+TCC=TCC[1:]
+
+
+
+#plt.plot(tt,RR)
+#plt.plot(tt,II)
+plt.plot(tt,TCC)
+BB=BB[1:]
+plt.plot(tt,BB)
+
+
+
+maximum_markdown_final=maximum_markdown_final[1:]
+#plt.plot(tt,maximum_markdown_final)
+final_maximum=min(maximum_markdown_final)
+rr=rr[1:]
+maximum_loss=min(rr)
+
+
+
+
+print("Final total capital=",TC)
+past_history = 35
+
+
+
+last_day=len(df['TIME'])
+
+
+
+
+day=500
+k=day
+#for k in range(3000):
+
+h=0
+y=0
+z=0
+u=0
+
+
+
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ if trend>0 and trend_real>0:
+ h+=1
+ y+=1
+
+ if trend>0 and trend_real<0:
+ h=h
+ y+=1
+ if trend<0 and trend_real<0:
+ z+=1
+ u+=1
+ if trend<0 and trend_real>0:
+ z=z
+ u+=1
+
+
+
+
+
+
+ k=k-1
+
+print('accuracy of buy=',h/y)
+
+print('accuracy of sell=',z/u)
+past_history = 35
+
+
+
+last_day=len(df['TIME'])
+
+
+
+
+day=500
+k=day
+#for k in range(3000):
+
+h=0
+y=0
+z=0
+u=0
+
+H=[0]
+Z=[0]
+
+a=0
+
+while k<=day and k>0:
+
+ if k>0:
+ data=np.array(features[-past_history-k:-1-k])
+ data2=np.array(features.iloc[-1-k])
+ data=np.vstack((data,data2))
+
+
+ t+=1
+
+ tt=np.vstack((tt,t))
+
+
+
+ j=[0]
+
+ for i in data:
+ i=np.array([i])
+ j=np.vstack((j,i))
+ #print(j.shape)
+
+ j=j[1:past_history+1]
+ #print(j.shape)
+ j=[j]
+
+
+
+
+
+
+ #data.reshape((240,1))
+ #print(data.shape)
+ data=tf.convert_to_tensor(j)
+ #print(data)
+
+
+
+ x=np.array(model.predict(data))
+
+
+
+ trend=x[0][0]-MV[-1-k] #预测正负
+
+ trend_real=MV[-k]-MV[-1-k] #实际正负
+
+
+ if trend>0 and trend_real>0:
+ h+=1
+ y+=1
+
+ if trend>0 and trend_real<0:
+ h=h
+ y+=1
+ if trend<0 and trend_real<0:
+ z+=1
+ u+=1
+ if trend<0 and trend_real>0:
+ z=z
+ u+=1
+
+ a+=1
+
+ if a%15==0:
+ #print("counter=",a)
+
+ H=np.vstack((H,h/y))
+ h=0
+ y=0
+ Z=np.vstack((Z,z/u))
+ z=0
+ u=0
+
+
+ k=k-1
+
+H=H[1:]
+Z=Z[1:]
+
+print('success rate of buying in every 15 units',H)
+print('success rate of selling in every 15 units',Z)
+from scipy import stats
+stats.ttest_1samp(H,0.5) #t test of buying success rate
+stats.ttest_1samp(Z,0.5) #t test of selling success rate
+import numpy as np, scipy.stats as st
+st.t.interval(0.95, len(H)-1, loc=np.mean(H), scale=st.sem(H)) #95% Confidence interval of buying success rate
+import numpy as np, scipy.stats as st
+st.t.interval(0.95, len(Z)-1, loc=np.mean(Z), scale=st.sem(Z)) #95% Confidence interval of selling success rate
+
+
+
+