1+ """
2+ 验证码识别模型
3+ create by Ian in 2018-4-13 16:57:10
4+ """
5+ from save_image_into_h5py import TransImg
6+ import h5py
7+ import numpy as np
8+ import tensorflow as tf
9+ from tensorflow .python .framework import ops
10+ from tensorflow .python import debug as tf_debug
11+ from cnn_utils import *
12+ import matplotlib .pyplot as plt
13+ import scipy
14+ from PIL import Image
15+ from scipy import ndimage
16+ import math
17+ from tensorflow .python import debug as tfdbg
18+
19+ class CaptchaRecognition (object ):
20+ """docstring for CaptchaRecognition
21+ """
22+ def __init__ (self , text_len , char_set ):
23+ super (CaptchaRecognition , self ).__init__ ()
24+ self .train_path = None
25+ self .test_path = None
26+ self .X_train = None
27+ self .Y_train = None
28+ self .X_test = None
29+ self .Y_test = None
30+ self .text_len = text_len
31+ self .char_set = char_set
32+ self .char_set_len = len (char_set )
33+
34+
35+
36+ def text2vec (self , text ):
37+ """字符串文本转向量
38+ """
39+ vector = np .zeros (self .text_len * self .char_set_len ) # 字符串向量,也是输出层向量
40+
41+ def char2pos (c ):
42+ """字符转成字符集相应的序号"""
43+ for i in range (self .char_set_len ):
44+ if c == self .char_set [i ]:
45+ return i
46+
47+ raise ValueError ('No Map' )
48+
49+ for i , c in enumerate (text ):
50+ idx = i * self .char_set_len + char2pos (c ) # 为1的向量序号
51+ vector [idx ] = 1
52+
53+ return vector
54+
55+ def vec2text (self , vec ):
56+ """向量转字符串文本
57+ """
58+ char_pos = vec .nonzero ()[0 ] #nonzero是numpy的函数,返回数组中非零元素索引值的数组
59+ text = []
60+ for i , c in enumerate (char_pos ):
61+ text .append (self .char_set [c ])
62+
63+ return "" .join (text )
64+
65+
66+ def load_datasets (self , train_path , test_path ):
67+ """加载训练集和测试集,并进行预处理
68+ """
69+ self .train_path = train_path
70+ self .test_path = test_path
71+ train_dataset = h5py .File (train_path , 'r' )
72+ train_set_x_orig = np .array (train_dataset ["train_set" ][:])
73+ train_set_y_orig = np .array (train_dataset ["train_setlabels" ][:])
74+
75+
76+ test_dataset = h5py .File (test_path , 'r' )
77+ test_set_x_orig = np .array (test_dataset ["test_set" ][:])
78+ test_set_y_orig = np .array (test_dataset ["test_setlabels" ][:])
79+
80+ #数据归一化和重塑形状
81+ self .X_train = train_set_x_orig / 255.
82+ self .X_test = test_set_x_orig / 255.
83+ train_set_y_orig = train_set_y_orig .T
84+ test_set_y_orig = test_set_y_orig .T
85+ self .Y_train = np .zeros ((len (train_set_y_orig ), 144 ))
86+ self .Y_test = np .zeros ((len (test_set_y_orig ), 144 ))
87+
88+ #将数据集字符串转换为相对应的向量
89+ for i in range (len (train_set_y_orig )):
90+ self .Y_train [i ][0 ] = i
91+ self .Y_train [i ] = self .text2vec (train_set_y_orig [i ])
92+
93+ for i in range (len (test_set_y_orig )):
94+ self .Y_test [i ][0 ] = i
95+ self .Y_test [i ] = self .text2vec (test_set_y_orig [i ])
96+
97+ print ("X_train shape: " + str (self .X_train .shape ))
98+ print ("Y_train shape: " + str (self .Y_train .shape ))
99+ print ("X_test shape: " + str (self .X_test .shape ))
100+ print ("Y_test shape: " + str (self .Y_test .shape ))
101+
102+
103+ def create_placeholders (self , n_H0 , n_W0 , n_C0 , n_y ):
104+ """创建占位符,存储x和y的输入值
105+ """
106+ X = tf .placeholder (tf .float32 , name = 'X' ,shape = (None , n_H0 , n_W0 , n_C0 ))
107+ Y = tf .placeholder (tf .float32 , name = 'Y' , shape = (None , n_y ))
108+ return X , Y
109+
110+
111+ def init_parameters (self ):
112+ W1 = tf .get_variable ("W1" , [11 ,11 ,3 ,48 ], initializer = tf .contrib .layers .xavier_initializer (seed = 1 ))
113+ b1 = tf .get_variable ("b1" , [48 ], initializer = tf .zeros_initializer ())
114+ W2 = tf .get_variable ("W2" , [5 ,5 ,48 ,96 ], initializer = tf .contrib .layers .xavier_initializer (seed = 1 ))
115+ b2 = tf .get_variable ("b2" , [96 ], initializer = tf .zeros_initializer ())
116+ W3 = tf .get_variable ("W3" , [3 ,3 ,96 ,128 ], initializer = tf .contrib .layers .xavier_initializer (seed = 1 ))
117+ b3 = tf .get_variable ("b3" , [128 ], initializer = tf .zeros_initializer ())
118+
119+ parameters = {"W1" :W1 , "W2" :W2 , "b1" :b1 , "b2" :b2 , "W3" :W3 , "b3" :b3 }
120+ return parameters
121+
122+
123+
124+ def forward_propagation (self , X , parameters ):
125+ """定义模型,使用LeNet-5,不过卷积层改用AlexNet的参数
126+ """
127+ #keep_prob = 0.8
128+ # 两个卷积层
129+ W1 = parameters ['W1' ]
130+ b1 = parameters ['b1' ]
131+ W2 = parameters ['W2' ]
132+ b2 = parameters ['b2' ]
133+ W3 = parameters ['W3' ]
134+ b3 = parameters ['b3' ]
135+ with tf .name_scope ("conv" ):
136+ conv1 = tf .nn .relu (tf .nn .bias_add (tf .nn .conv2d (X , W1 , strides = [1 , 4 , 4 , 1 ], padding = 'VALID' ), b1 ))
137+ conv1 = tf .nn .max_pool (conv1 , ksize = [1 , 3 , 3 , 1 ], strides = [1 , 1 , 1 , 1 ], padding = 'SAME' )
138+
139+ conv2 = tf .nn .relu (tf .nn .bias_add (tf .nn .conv2d (conv1 , W2 , strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ), b2 ))
140+ conv2 = tf .nn .max_pool (conv2 , ksize = [1 , 3 , 3 , 1 ], strides = [1 , 2 , 2 , 1 ], padding = 'SAME' )
141+
142+ conv3 = tf .nn .relu (tf .nn .bias_add (tf .nn .conv2d (conv2 , W3 , strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ), b3 ))
143+ conv3 = tf .nn .max_pool (conv3 , ksize = [1 , 3 , 3 , 1 ], strides = [1 , 2 , 2 , 1 ], padding = 'SAME' )
144+ conv3 = tf .contrib .layers .flatten (conv3 )
145+
146+ with tf .name_scope ("output" ):
147+ w_out = tf .Variable (0.01 * tf .random_normal ([3 * 3 * 128 * self .text_len , self .text_len * self .char_set_len ]))
148+ b_out = tf .Variable (0.1 * tf .random_normal ([self .text_len * self .char_set_len ]))
149+ out = tf .add (tf .matmul (conv3 , w_out ), b_out )
150+
151+ tf .summary .histogram ("W1" , W1 )
152+ tf .summary .histogram ("b1" , b1 )
153+ tf .summary .histogram ("W2" , W2 )
154+ tf .summary .histogram ("b2" , b2 )
155+ tf .summary .histogram ("W3" , W3 )
156+ tf .summary .histogram ("b3" , b3 )
157+ tf .summary .histogram ("w_out" , w_out )
158+ tf .summary .histogram ("b_out" , b_out )
159+ return out
160+
161+
162+
163+ def crack_captcha_cnn (self , X , w_alpha = 0.01 , b_alpha = 0.1 ):
164+
165+ # 3 conv layer
166+ with tf .name_scope ("conv" ):
167+ w_c1 = tf .Variable (w_alpha * tf .random_normal ([3 , 3 , 3 , 32 ]))
168+ b_c1 = tf .Variable (b_alpha * tf .random_normal ([32 ]))
169+ conv1 = tf .nn .relu (tf .nn .bias_add (tf .nn .conv2d (X , w_c1 , strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ), b_c1 ))
170+ conv1 = tf .nn .max_pool (conv1 , ksize = [1 , 2 , 2 , 1 ], strides = [1 , 2 , 2 , 1 ], padding = 'SAME' )
171+
172+
173+ w_c2 = tf .Variable (w_alpha * tf .random_normal ([3 , 3 , 32 , 64 ]))
174+ b_c2 = tf .Variable (b_alpha * tf .random_normal ([64 ]))
175+ conv2 = tf .nn .relu (tf .nn .bias_add (tf .nn .conv2d (conv1 , w_c2 , strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ), b_c2 ))
176+ conv2 = tf .nn .max_pool (conv2 , ksize = [1 , 2 , 2 , 1 ], strides = [1 , 2 , 2 , 1 ], padding = 'SAME' )
177+
178+
179+ w_c3 = tf .Variable (w_alpha * tf .random_normal ([3 , 3 , 64 , 64 ]))
180+ b_c3 = tf .Variable (b_alpha * tf .random_normal ([64 ]))
181+ conv3 = tf .nn .relu (tf .nn .bias_add (tf .nn .conv2d (conv2 , w_c3 , strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ), b_c3 ))
182+ conv3 = tf .nn .max_pool (conv3 , ksize = [1 , 2 , 2 , 1 ], strides = [1 , 2 , 2 , 1 ], padding = 'SAME' )
183+
184+ with tf .name_scope ("Fully_connected" ):
185+ w_d = tf .Variable (w_alpha * tf .random_normal ([8 * 18 * 64 , 1024 ]))
186+ b_d = tf .Variable (b_alpha * tf .random_normal ([1024 ]))
187+ dense = tf .contrib .layers .flatten (conv3 )
188+ dense = tf .nn .relu (tf .add (tf .matmul (dense , w_d ), b_d ))
189+
190+
191+ with tf .name_scope ("output" ):
192+ w_out = tf .Variable (w_alpha * tf .random_normal ([1024 , self .text_len * self .char_set_len ]))
193+ b_out = tf .Variable (b_alpha * tf .random_normal ([self .text_len * self .char_set_len ]))
194+ out = tf .add (tf .matmul (dense , w_out ), b_out )
195+ print (out )
196+ tf .summary .histogram ("w_out" , w_out )
197+ tf .summary .histogram ("b_out" , b_out )
198+ #out = tf.nn.softmax(out)
199+
200+ return out
201+
202+
203+
204+ def compute_cost (self , Y_hat , Y ):
205+ """损失函数计算
206+ """
207+ with tf .name_scope ("cost" ):
208+ #cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = Y_hat, labels = Y))
209+ cost = tf .reduce_mean (tf .nn .sigmoid_cross_entropy_with_logits (logits = Y_hat , labels = Y ))
210+ #cost = tf.reduce_mean(tf.square(Y-Y_hat))
211+ tf .summary .scalar ("cost" , cost )
212+ return cost
213+
214+
215+ def tensorflow_model (self , learning_rate = 0.009 , num_epochs = 4 , minibatch_size = 128 , print_cost = True ):
216+ """整合各个步骤
217+ """
218+ ops .reset_default_graph ()
219+ (m , n_H0 , n_W0 , n_C0 ) = self .X_train .shape # 读取训练集的宽高和channels
220+ n_y = self .Y_train .shape [1 ] # 读取标签预测值
221+ seed = 3 # 用于随机数
222+ costs = []
223+
224+ with tf .device ('/gpu:0' ):
225+ X , Y = self .create_placeholders (n_H0 , n_W0 , n_C0 , n_y )
226+ parameters = self .init_parameters ()
227+ Y_hat = self .forward_propagation (X , parameters )
228+ #Y_hat = self.crack_captcha_cnn(X)
229+ cost = self .compute_cost (Y_hat , Y )
230+ optimizer = tf .train .AdamOptimizer (learning_rate = learning_rate ).minimize (cost )
231+ init = tf .global_variables_initializer ()
232+
233+ saver = tf .train .Saver (parameters ) # 保存模型
234+
235+ with tf .Session () as sess :
236+ sess = tfdbg .LocalCLIDebugWrapperSession (sess ) # 被调试器封装的会话
237+ sess .add_tensor_filter ("has_inf_or_nan" , tfdbg .has_inf_or_nan ) # 调试器添加过滤规则
238+ writer = tf .summary .FileWriter (r'D:\code\python\DeepLearning\tmp' , tf .get_default_graph ()) # 将运行概要写入硬盘
239+ merged_summary = tf .summary .merge_all () # 将所有的概要打包
240+ sess = tf .Session (config = tf .ConfigProto (allow_soft_placement = True , log_device_placement = False )) #log_device_placement:将设备信息展示在控制台
241+ sess .run (init )
242+ for epoch in range (num_epochs ):
243+ minibatch_cost = 0
244+ num_minibatches = int (m / minibatch_size ) # 计算一共有多少mini-batch
245+ #seed = seed + 1
246+ minibatches = random_mini_batches (self .X_train , self .Y_train , minibatch_size , seed ) # 随机混淆数据
247+
248+ # 从数据集中抽取数据,并进行迭代
249+ for minibatch in minibatches :
250+ (minibatch_X , minibatch_Y ) = minibatch
251+ _ , temp_cost , summary , Y_hat_result = sess .run ([optimizer , cost , merged_summary , Y_hat ], feed_dict = {X : minibatch_X , Y : minibatch_Y })
252+ writer .add_summary (summary , epoch ) # 将数据填入tensorboard
253+
254+ minibatch_cost += temp_cost / num_minibatches
255+
256+ print (Y_hat_result )
257+ sess .run (tf .Print (Y_hat_result , [Y_hat_result ], message = "输出层:" ))
258+
259+ #print(Y_hat.eval())
260+
261+
262+ #debugtext = sess.run(tf.Print(cost, [cost], message="输出层:")) # 调试语句
263+ #print(debugtext[0] ,debugtext.shape)
264+
265+ if print_cost == True and epoch % 1 == 0 :
266+ costs .append (minibatch_cost )
267+ if print_cost == True and epoch % 1 == 0 :
268+ print ("Cost after epoch %i: %f" % (epoch , minibatch_cost ))
269+ if epoch % 50 == 0 :
270+ saver .save (sess , './model/model.ckpt' , global_step = epoch ) # 保存模型
271+
272+
273+ plt .plot (np .squeeze (costs ))
274+ plt .ylabel ('cost' )
275+ plt .xlabel ('iterations (per tens)' )
276+ plt .title ("Learning rate =" + str (learning_rate ))
277+ plt .show ()
278+
279+ parameters = sess .run (parameters )
280+ print ("Parameters have been trained!" )
281+
282+ correct_prediction = tf .equal (Y_hat , Y ) # equal:判断矩阵是否一致
283+ accuracy = tf .reduce_mean (tf .cast (correct_prediction , bool ))
284+ print ("Train Accuracy:" , accuracy .eval (session = sess , feed_dict = {X : self .X_train [:300 ], Y : self .Y_train [:300 ]}))
285+ print ("Test Accuracy:" , accuracy .eval (session = sess , feed_dict = {X : self .X_test , Y : self .Y_test }))
286+ return parameters
287+
288+
289+
290+ def predict (self ):
291+ """
292+ """
293+ n_y = self .Y_test .shape [1 ] # 读取标签预测值
294+
295+ with tf .Session () as sess :
296+ (m , n_H0 , n_W0 , n_C0 ) = self .X_test .shape # 读取训练集的宽高和channels
297+ print (self .X_test .shape )
298+ X , Y = self .create_placeholders (n_H0 , n_W0 , n_C0 , n_y )
299+ parameters = self .init_parameters ()
300+ Y_hat = self .forward_propagation (X , parameters )
301+
302+ tf .reset_default_graph ()
303+ saver = tf .train .import_meta_graph ('./model/model.ckpt-2100.meta' )
304+ with tf .Session () as sess :
305+ # 加载模型
306+ saver .restore (sess , tf .train .latest_checkpoint ("./model" ))
307+ text_list = sess .run (Y_hat , feed_dict = {X : self .X_test })
308+ print (type (text_list ))
309+ print (text_list )
310+ print (text_list .shape )
311+
312+
313+
314+
315+
316+
317+ if __name__ == '__main__' :
318+ train_path = "./datasets/train_vcode.h5"
319+ test_path = "./datasets/test_vcode.h5"
320+ char_set = ['0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' , 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' , 'j' , 'k' , 'l' , 'm' , 'n' , 'o' , 'p' , 'q' , 'r' , 's' , 't' ,
321+ 'u' , 'v' , 'w' , 'x' , 'y' , 'z' ]
322+ c = CaptchaRecognition (4 , char_set )
323+ c .load_datasets (train_path , test_path )
324+ #c.tensorflow_model(learning_rate = 0.001, num_epochs= 50, minibatch_size=128)
325+ c .predict ()
0 commit comments