本文共 14881 字,大约阅读时间需要 49 分钟。
运行环境:
Windows10,TensorFlow1.10(GPU版本),python3.6,编译器pycharm
文件说明
(1)data文件夹下包含test和train两个子文件夹,分别用于存放测试数据和训练数据,从官网上下载的数据直接解压到相应的文件夹下即可。 (2)venv文件夹用于存放加载anaconda环境。 (3)input_data.py负责实现读取数据,生成批次(batch)。 (4)model.py负责实现我们的神经网络模型。 (5)training.py负责实现模型的训练以及评估。 (6)log文件用来保存训练结果和参数(7)test-1.py 从test文件中随机测试一张图片
代码:
input_data.py完整代码与注释:
import tensorflow as tfimport numpy as npimport osdef get_files(file_dir): """ 输入: 存放训练照片的文件地址 返回: 图像列表, 标签列表 """ # 建立空列表 cats = [] label_cats = [] dogs = [] label_dogs = [] # 读取标记好的图像和加入标签 for file in os.listdir(file_dir): # file就是要读取的照片 name = file.split(sep='.') # 因为照片的格式是cat.1.jpg/cat.2.jpg if name[0] == 'cat': # 所以只用读取 . 前面这个字符串 cats.append(file_dir + file) label_cats.append(0) # 把图像和标签加入列表 else: dogs.append(file_dir + file) label_dogs.append(1) print('There are %d cats\nThere are %d dogs' % (len(cats), len(dogs))) image_list = np.hstack((cats, dogs)) # 在水平方向平铺合成一个行向量 label_list = np.hstack((label_cats, label_dogs)) temp = np.array([image_list, label_list]) # 生成一个两行数组列表,大小是2 X 25000 temp = temp.transpose() # 转置向量,大小变成25000 X 2 np.random.shuffle(temp) # 乱序,打乱这25000个例子的顺序 image_list = list(temp[:, 0]) # 所有行,列=0 label_list = list(temp[:, 1]) # 所有行,列=1 label_list = [int(float(i)) for i in label_list] # 把标签列表转化为int类型 return image_list, label_listdef get_batch(image, label, image_W, image_H, batch_size, capacity): """ 输入: image,label :要生成batch的图像和标签 image_W,image_H: 图像的宽度和高度 batch_size: 每个batch(小批次)有多少张图片数据 capacity: 队列的最大容量 返回: image_batch: 4D tensor [batch_size, width, height, 3], dtype=tf.float32 label_batch: 1D tensor [batch_size], dtype=tf.int32 """ # 将列表转换成tf能够识别的格式 image = tf.cast(image, tf.string) label = tf.cast(label, tf.int32) # 生成队列(牵扯到线程概念,便于batch训练) """ 队列的理解:每次训练时,从队列中取一个batch送到网络进行训练, 然后又有新的图片从训练库中注入队列,这样循环往复。 队列相当于起到了训练库到网络模型间数据管道的作用, 训练数据通过队列送入网络。 """ input_queue = tf.train.slice_input_producer([image, label]) # 图像的读取需要tf.read_file(),标签则可以直接赋值 image_contents = tf.read_file(input_queue[0]) image = tf.image.decode_jpeg(image_contents, channels=3) # 解码彩色的.jpg图像 label = input_queue[1] # 统一图片大小 image = tf.image.resize_images(image, [image_H, image_W], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) image = tf.cast(image, tf.float32) image = tf.image.per_image_standardization(image) # 标准化图片,因为前两行代码已经处理过了,所以可要可不要 # 打包batch的大小 image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, # 涉及到线程,配合队列 capacity=capacity) # 下面两行代码应该也多余了,放在这里确保一下格式不会出问题 image_batch = tf.cast(image_batch, tf.float32) label_batch = tf.cast(label_batch, tf.int32) return image_batch, label_batch
model.py完整代码和注释:
import tensorflow as tfdef cnn_inference(images, batch_size, n_classes): """ 输入 images 输入的图像 batch_size 每个批次的大小 n_classes n分类 返回 softmax_linear 还差一个softmax """ # 第一层的卷积层conv1,卷积核为3X3,有16个 with tf.variable_scope('conv1') as scope: # 建立weights和biases的共享变量 # conv1, shape = [kernel size, kernel size, channels, kernel numbers] weights = tf.get_variable('weights', shape=[3, 3, 3, 16], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32)) # stddev标准差 biases = tf.get_variable('biases', shape=[16], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) # 卷积层 strides = [1, x_movement, y_movement, 1], padding填充周围有valid和same可选择 conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME') pre_activation = tf.nn.bias_add(conv, biases) # 加入偏差 conv1 = tf.nn.relu(pre_activation, name=scope.name) # 加上激活函数非线性化处理,且是在conv1的命名空间 # 第一层的池化层pool1和规范化norm1(特征缩放) with tf.variable_scope('pooling1_lrn') as scope: pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME', name='pooling1') norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0, beta=0.75,name='norm1') # ksize是池化窗口的大小=[1,height,width,1],一般height=width=池化窗口的步长 # 池化窗口的步长一般是比卷积核多移动一位 # tf.nn.lrn是Local Response Normalization,(局部响应归一化) # 第二层的卷积层cov2,这里的命名空间和第一层不一样,所以可以和第一层取同名 with tf.variable_scope('conv2') as scope: weights = tf.get_variable('weights', shape=[3, 3, 16, 16], # 这里只有第三位数字16需要等于上一层的tensor维度 dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32)) biases = tf.get_variable('biases', shape=[16], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1],padding='SAME') pre_activation = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(pre_activation, name='conv2') # 第二层的池化层pool2和规范化norm2 with tf.variable_scope('pooling2_lrn') as scope: norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0, beta=0.75,name='norm2') pool2 = tf.nn.max_pool(norm2, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME',name='pooling2') # 这里选择了先规范化再池化 # 第三层为全连接层local3 with tf.variable_scope('local3') as scope: # flatten-把卷积过的多维tensor拉平成二维张量(矩阵) reshape = tf.reshape(pool2, shape=[batch_size, -1]) # batch_size表明了有多少个样本 dim = reshape.get_shape()[1].value # 知道-1(代表任意)这里具体是多少个 weights = tf.get_variable('weights', shape=[dim, 256], # 连接256个神经元 dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32)) biases = tf.get_variable('biases', shape=[256], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) # 矩阵相乘加上bias # 第四层为全连接层local4 with tf.variable_scope('local4') as scope: weights = tf.get_variable('weights', shape=[256, 512], # 再连接512个神经元 dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32)) biases = tf.get_variable('biases', shape=[512], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4') # 第五层为输出层softmax_linear with tf.variable_scope('softmax_linear') as scope: weights = tf.get_variable('weights', shape=[512, n_classes], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32)) biases = tf.get_variable('biases', shape=[n_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear') # 这里只是命名为softmax_linear,真正的softmax函数放在下面的losses函数里面和交叉熵结合在一起了,这样可以提高运算速度。 # softmax_linear的行数=local4的行数,列数=weights的列数=bias的行数=需要分类的个数 # 经过softmax函数用于分类过程中,它将多个神经元的输出,映射到(0,1)区间内,可以看成概率来理解 return softmax_lineardef losses(logits, labels): """ 输入 logits: 经过cnn_inference处理过的tensor labels: 对应的标签 返回 loss: 损失函数(交叉熵) """ with tf.variable_scope('loss') as scope: # 下面把交叉熵和softmax合到一起写是为了通过spares提高计算速度 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='loss_per_eg') loss = tf.reduce_mean(cross_entropy, name='loss') # 求所有样本的平均loss return lossdef training(loss, learning_rate): """ 输入 loss: 损失函数(交叉熵) learning_rate: 学习率 返回 train_op: 训练的最优值 """ with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # global_step不是共享变量,初始值为0,设定trainable=False 可以防止该变量被数据流图的 GraphKeys.TRAINABLE_VARIABLES 收集, # 这样我们就不会在训练的时候尝试更新它的值。 global_step = tf.Variable(0, name='global_step', trainable=False) train_op = optimizer.minimize(loss, global_step= global_step) return train_opdef evaluation(logits, labels): """ 输入 logits: 经过cnn_inference处理过的tensor labels: 返回 accuracy:正确率 """ with tf.variable_scope('accuracy') as scope: prediction = tf.nn.softmax(logits) # 这个logits有n_classes列 # prediction每行的最大元素(1)的索引和label的值相同则为1 否则为0。 correct = tf.nn.in_top_k(prediction, labels, 1) # correct = tf.nn.in_top_k(logits, labels, 1) 也可以不需要prediction过渡,因为最大值的索引没变,这里这样写是为了更好理解 correct = tf.cast(correct, tf.float16) # 记得要转换格式 accuracy = tf.reduce_mean(correct) return accuracy
training.py完整代码与注释
import osimport numpy as npimport tensorflow as tfimport matplotlib.pyplot as pltimport input_dataimport modelN_CLASSES = 2 # 猫和狗IMG_W = 208 # resize图像,太大的话训练时间久IMG_H = 208BATCH_SIZE = 16CAPACITY = 2000MAX_STEP = 10000 # 一般5K~10klearning_rate = 0.0001 # 一般小于0.0001train_dir = 'D:/python/deep-learning/CatVsDog/Project/data/train/'logs_train_dir = 'D:/python/deep-learning/CatVsDog/Project/log/' # 记录训练过程与保存模型train, train_label = input_data.get_files(train_dir)train_batch, train_label_batch = input_data.get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)train_logits = model.cnn_inference(train_batch, BATCH_SIZE, N_CLASSES)train_loss = model.losses(train_logits, train_label_batch)train_op = model.training(train_loss, learning_rate)train__acc = model.evaluation(train_logits, train_label_batch)summary_op = tf.summary.merge_all() # 这个是log汇总记录# 可视化为了画折线图step_list = list(range(100)) # 因为后来的cnn_list加了200个cnn_list1 = []cnn_list2 = []fig = plt.figure() # 建立可视化图像框ax = fig.add_subplot(1, 1, 1) # 子图总行数、列数,位置ax.yaxis.grid(True)ax.set_title('cnn_accuracy ', fontsize=14, y=1.02)ax.set_xlabel('step')ax.set_ylabel('accuracy')bx = fig.add_subplot(1, 2, 2)bx.yaxis.grid(True)bx.set_title('cnn_loss ', fontsize=14, y=1.02)bx.set_xlabel('step')bx.set_ylabel('loss')# 初始化,如果存在变量则是必不可少的操作with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # 产生一个writer来写log文件 train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) # 产生一个saver来存储训练好的模型 saver = tf.train.Saver() # 队列监控 # batch训练法用到了队列,不想用队列也可以用placeholder coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: # 执行MAX_STEP步的训练,一步一个batch for step in np.arange(MAX_STEP): if coord.should_stop(): break # 启动以下操作节点,这里不能用train_op,因为它在第二次迭代是None,会导致session出错,改为_ _op, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) # 每隔50步打印一次当前的loss以及acc,同时记录log,写入writer if step % 50 == 0: print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) # 每隔100步画个图 if step % 100 ==0: cnn_list1.append(tra_acc) cnn_list2.append(tra_loss) # 每隔5000步,保存一次训练好的模型 if step % 5000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) ax.plot(step_list, cnn_list1) bx.plot(step_list, cnn_list2) plt.show() except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop()
test-1.py完整代码与注释
from PIL import Imageimport matplotlib.pyplot as pltimport input_dataimport modelimport osimport numpy as npimport tensorflow as tfdef get_one_image(train): '''Randomly pick one image from training data Return: ndarray ''' n = len(train) ind = np.random.randint(0, n) img_dir = train[ind] image = Image.open(img_dir) plt.imshow(image) image = image.resize([208, 208]) image = np.array(image) return imagedef evaluate_one_image(): train_dir = 'D:/python/deep-learning/CatVsDog/Project/data/test/' train, train_label = input_data.get_files(train_dir) image_array = get_one_image(train) with tf.Graph().as_default(): BATCH_SIZE = 1 N_CLASSES = 2 image = tf.cast(image_array, tf.float32) image = tf.image.per_image_standardization(image) image = tf.reshape(image, [1, 208, 208, 3]) logit = model.cnn_inference(image, BATCH_SIZE, N_CLASSES) logit = tf.nn.softmax(logit) x = tf.placeholder(tf.float32, shape=[208, 208, 3]) # you need to change the directories to yours. logs_train_dir = 'D:/python/deep-learning/CatVsDog/Project/log/' saver = tf.train.Saver() with tf.Session() as sess: print("Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(logs_train_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) else: print('No checkpoint file found') prediction = sess.run(logit, feed_dict={x: image_array}) max_index = np.argmax(prediction) if max_index == 0: print('This is a cat with possibility %.6f' % prediction[:, 0]) else: print('This is a dog with possibility %.6f' % prediction[:, 1]) plt.imshow(image_array) plt.show()evaluate_one_image()
转载地址:http://qhkqf.baihongyu.com/