TensorFlow猫狗大战完整代码实现和详细注释-白红宇

TensorFlow猫狗大战完整代码实现和详细注释

阅读量：2080 次

发布时间：2019-04-29

本文共 14881 字，大约阅读时间需要 49 分钟。

运行环境：

Windows10，TensorFlow1.10（GPU版本），python3.6，编译器pycharm

文件说明

（1）data文件夹下包含test和train两个子文件夹，分别用于存放测试数据和训练数据，从官网上下载的数据直接解压到相应的文件夹下即可。

（2）venv文件夹用于存放加载anaconda环境。

（3）input_data.py负责实现读取数据，生成批次（batch）。

（4）model.py负责实现我们的神经网络模型。

（5）training.py负责实现模型的训练以及评估。

（6）log文件用来保存训练结果和参数

（7）test-1.py 从test文件中随机测试一张图片

代码：

input_data.py完整代码与注释：

import tensorflow as tfimport numpy as npimport osdef get_files(file_dir):    """    输入： 存放训练照片的文件地址    返回:  图像列表， 标签列表    """    # 建立空列表    cats = []    label_cats = []    dogs = []    label_dogs = []    # 读取标记好的图像和加入标签    for file in os.listdir(file_dir):   # file就是要读取的照片        name = file.split(sep='.')      # 因为照片的格式是cat.1.jpg/cat.2.jpg        if name[0] == 'cat':            # 所以只用读取 . 前面这个字符串            cats.append(file_dir + file)            label_cats.append(0)        # 把图像和标签加入列表        else:            dogs.append(file_dir + file)            label_dogs.append(1)    print('There are %d cats\nThere are %d dogs' % (len(cats), len(dogs)))    image_list = np.hstack((cats, dogs))  # 在水平方向平铺合成一个行向量    label_list = np.hstack((label_cats, label_dogs))    temp = np.array([image_list, label_list])  # 生成一个两行数组列表，大小是2 X 25000    temp = temp.transpose()   # 转置向量，大小变成25000 X 2    np.random.shuffle(temp)   # 乱序，打乱这25000个例子的顺序    image_list = list(temp[:, 0])  # 所有行，列=0    label_list = list(temp[:, 1])  # 所有行，列=1    label_list = [int(float(i)) for i in label_list]  # 把标签列表转化为int类型    return image_list, label_listdef get_batch(image, label, image_W, image_H, batch_size, capacity):    """    输入：    image,label ：要生成batch的图像和标签    image_W，image_H: 图像的宽度和高度    batch_size: 每个batch（小批次）有多少张图片数据    capacity: 队列的最大容量    返回：    image_batch: 4D tensor [batch_size, width, height, 3], dtype=tf.float32    label_batch: 1D tensor [batch_size], dtype=tf.int32    """    # 将列表转换成tf能够识别的格式    image = tf.cast(image, tf.string)    label = tf.cast(label, tf.int32)    # 生成队列(牵扯到线程概念，便于batch训练）    """    队列的理解：每次训练时，从队列中取一个batch送到网络进行训练，               然后又有新的图片从训练库中注入队列，这样循环往复。               队列相当于起到了训练库到网络模型间数据管道的作用，               训练数据通过队列送入网络。    """    input_queue = tf.train.slice_input_producer([image, label])    # 图像的读取需要tf.read_file()，标签则可以直接赋值    image_contents = tf.read_file(input_queue[0])    image = tf.image.decode_jpeg(image_contents, channels=3)  # 解码彩色的.jpg图像    label = input_queue[1]    # 统一图片大小    image = tf.image.resize_images(image, [image_H, image_W], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)    image = tf.cast(image, tf.float32)    image = tf.image.per_image_standardization(image)  # 标准化图片，因为前两行代码已经处理过了，所以可要可不要    # 打包batch的大小    image_batch, label_batch = tf.train.batch([image, label],                                              batch_size=batch_size,                                              num_threads=64,  # 涉及到线程，配合队列                                              capacity=capacity)    # 下面两行代码应该也多余了，放在这里确保一下格式不会出问题    image_batch = tf.cast(image_batch, tf.float32)    label_batch = tf.cast(label_batch, tf.int32)    return image_batch, label_batch

model.py完整代码和注释：

import tensorflow as tfdef cnn_inference(images, batch_size, n_classes):    """    输入    images      输入的图像    batch_size  每个批次的大小    n_classes   n分类    返回    softmax_linear 还差一个softmax    """    # 第一层的卷积层conv1，卷积核为3X3，有16个    with tf.variable_scope('conv1') as scope:        # 建立weights和biases的共享变量        # conv1, shape = [kernel size, kernel size, channels, kernel numbers]        weights = tf.get_variable('weights',                                  shape=[3, 3, 3, 16],                                  dtype=tf.float32,                                  initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))  # stddev标准差        biases = tf.get_variable('biases',                                 shape=[16],                                 dtype=tf.float32,                                 initializer=tf.constant_initializer(0.1))        # 卷积层 strides = [1, x_movement, y_movement, 1], padding填充周围有valid和same可选择        conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME')        pre_activation = tf.nn.bias_add(conv, biases)         # 加入偏差        conv1 = tf.nn.relu(pre_activation, name=scope.name)  # 加上激活函数非线性化处理，且是在conv1的命名空间    # 第一层的池化层pool1和规范化norm1(特征缩放）    with tf.variable_scope('pooling1_lrn') as scope:        pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],                               padding='SAME', name='pooling1')        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0,                          beta=0.75,name='norm1')        # ksize是池化窗口的大小=[1,height,width,1]，一般height=width=池化窗口的步长        # 池化窗口的步长一般是比卷积核多移动一位        # tf.nn.lrn是Local Response Normalization，（局部响应归一化）    # 第二层的卷积层cov2，这里的命名空间和第一层不一样，所以可以和第一层取同名    with tf.variable_scope('conv2') as scope:        weights = tf.get_variable('weights',                                  shape=[3, 3, 16, 16],  # 这里只有第三位数字16需要等于上一层的tensor维度                                  dtype=tf.float32,                                  initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))        biases = tf.get_variable('biases',                                 shape=[16],                                 dtype=tf.float32,                                 initializer=tf.constant_initializer(0.1))        conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1],padding='SAME')        pre_activation = tf.nn.bias_add(conv, biases)        conv2 = tf.nn.relu(pre_activation, name='conv2')    # 第二层的池化层pool2和规范化norm2    with tf.variable_scope('pooling2_lrn') as scope:        norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0,                          beta=0.75,name='norm2')        pool2 = tf.nn.max_pool(norm2, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1],                               padding='SAME',name='pooling2')        # 这里选择了先规范化再池化    # 第三层为全连接层local3    with tf.variable_scope('local3') as scope:        # flatten-把卷积过的多维tensor拉平成二维张量（矩阵）        reshape = tf.reshape(pool2, shape=[batch_size, -1])  # batch_size表明了有多少个样本        dim = reshape.get_shape()[1].value  # 知道-1(代表任意)这里具体是多少个        weights = tf.get_variable('weights',                                  shape=[dim, 256],  # 连接256个神经元                                  dtype=tf.float32,                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))        biases = tf.get_variable('biases',                                 shape=[256],                                 dtype=tf.float32,                                 initializer=tf.constant_initializer(0.1))        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)  # 矩阵相乘加上bias    # 第四层为全连接层local4    with tf.variable_scope('local4') as scope:        weights = tf.get_variable('weights',                                  shape=[256, 512], # 再连接512个神经元                                  dtype=tf.float32,                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))        biases = tf.get_variable('biases',                                 shape=[512],                                 dtype=tf.float32,                                 initializer=tf.constant_initializer(0.1))        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')    # 第五层为输出层softmax_linear    with tf.variable_scope('softmax_linear') as scope:        weights = tf.get_variable('weights',                                  shape=[512, n_classes],                                  dtype=tf.float32,                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))        biases = tf.get_variable('biases',                                 shape=[n_classes],                                 dtype=tf.float32,                                 initializer=tf.constant_initializer(0.1))        softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')        # 这里只是命名为softmax_linear，真正的softmax函数放在下面的losses函数里面和交叉熵结合在一起了，这样可以提高运算速度。        # softmax_linear的行数=local4的行数，列数=weights的列数=bias的行数=需要分类的个数        # 经过softmax函数用于分类过程中，它将多个神经元的输出，映射到（0,1）区间内，可以看成概率来理解    return softmax_lineardef losses(logits, labels):    """    输入    logits: 经过cnn_inference处理过的tensor    labels: 对应的标签    返回    loss： 损失函数（交叉熵）    """    with tf.variable_scope('loss') as scope:        # 下面把交叉熵和softmax合到一起写是为了通过spares提高计算速度        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='loss_per_eg')        loss = tf.reduce_mean(cross_entropy, name='loss')  # 求所有样本的平均loss    return lossdef training(loss, learning_rate):    """    输入    loss: 损失函数（交叉熵）    learning_rate： 学习率    返回    train_op: 训练的最优值    """    with tf.name_scope('optimizer'):        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)        # global_step不是共享变量，初始值为0，设定trainable=False 可以防止该变量被数据流图的 GraphKeys.TRAINABLE_VARIABLES 收集,        # 这样我们就不会在训练的时候尝试更新它的值。        global_step = tf.Variable(0, name='global_step', trainable=False)        train_op = optimizer.minimize(loss, global_step= global_step)    return train_opdef evaluation(logits, labels):    """     输入    logits: 经过cnn_inference处理过的tensor    labels:    返回    accuracy：正确率    """    with tf.variable_scope('accuracy') as scope:        prediction = tf.nn.softmax(logits)  # 这个logits有n_classes列        # prediction每行的最大元素（1）的索引和label的值相同则为1 否则为0。        correct = tf.nn.in_top_k(prediction, labels, 1)        # correct = tf.nn.in_top_k(logits, labels, 1)   也可以不需要prediction过渡，因为最大值的索引没变，这里这样写是为了更好理解        correct = tf.cast(correct, tf.float16)  # 记得要转换格式        accuracy = tf.reduce_mean(correct)    return accuracy

training.py完整代码与注释

import osimport numpy as npimport tensorflow as tfimport matplotlib.pyplot as pltimport input_dataimport modelN_CLASSES = 2  # 猫和狗IMG_W = 208  # resize图像，太大的话训练时间久IMG_H = 208BATCH_SIZE = 16CAPACITY = 2000MAX_STEP = 10000  # 一般5K~10klearning_rate = 0.0001  # 一般小于0.0001train_dir = 'D:/python/deep-learning/CatVsDog/Project/data/train/'logs_train_dir = 'D:/python/deep-learning/CatVsDog/Project/log/'  # 记录训练过程与保存模型train, train_label = input_data.get_files(train_dir)train_batch, train_label_batch = input_data.get_batch(train,                                                      train_label,                                                      IMG_W,                                                      IMG_H,                                                      BATCH_SIZE,                                                      CAPACITY)train_logits = model.cnn_inference(train_batch, BATCH_SIZE, N_CLASSES)train_loss = model.losses(train_logits, train_label_batch)train_op = model.training(train_loss, learning_rate)train__acc = model.evaluation(train_logits, train_label_batch)summary_op = tf.summary.merge_all()  # 这个是log汇总记录# 可视化为了画折线图step_list = list(range(100))  # 因为后来的cnn_list加了200个cnn_list1 = []cnn_list2 = []fig = plt.figure()  # 建立可视化图像框ax = fig.add_subplot(1, 1, 1)  # 子图总行数、列数，位置ax.yaxis.grid(True)ax.set_title('cnn_accuracy ', fontsize=14, y=1.02)ax.set_xlabel('step')ax.set_ylabel('accuracy')bx = fig.add_subplot(1, 2, 2)bx.yaxis.grid(True)bx.set_title('cnn_loss ', fontsize=14, y=1.02)bx.set_xlabel('step')bx.set_ylabel('loss')# 初始化，如果存在变量则是必不可少的操作with tf.Session() as sess:    sess.run(tf.global_variables_initializer())    # 产生一个writer来写log文件    train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)    # 产生一个saver来存储训练好的模型    saver = tf.train.Saver()    # 队列监控    # batch训练法用到了队列，不想用队列也可以用placeholder    coord = tf.train.Coordinator()    threads = tf.train.start_queue_runners(sess=sess, coord=coord)    try:        # 执行MAX_STEP步的训练，一步一个batch        for step in np.arange(MAX_STEP):            if coord.should_stop():                break            # 启动以下操作节点，这里不能用train_op，因为它在第二次迭代是None，会导致session出错，改为_            _op, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc])            # 每隔50步打印一次当前的loss以及acc，同时记录log，写入writer            if step % 50 == 0:                print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0))                summary_str = sess.run(summary_op)                train_writer.add_summary(summary_str, step)            # 每隔100步画个图            if step % 100 ==0:                cnn_list1.append(tra_acc)                cnn_list2.append(tra_loss)            # 每隔5000步，保存一次训练好的模型            if step % 5000 == 0 or (step + 1) == MAX_STEP:                checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')                saver.save(sess, checkpoint_path, global_step=step)        ax.plot(step_list, cnn_list1)        bx.plot(step_list, cnn_list2)        plt.show()    except tf.errors.OutOfRangeError:        print('Done training -- epoch limit reached')    finally:        coord.request_stop()

test-1.py完整代码与注释

from PIL import Imageimport matplotlib.pyplot as pltimport input_dataimport modelimport osimport numpy as npimport tensorflow as tfdef get_one_image(train):    '''Randomly pick one image from training data    Return: ndarray    '''    n = len(train)    ind = np.random.randint(0, n)    img_dir = train[ind]    image = Image.open(img_dir)    plt.imshow(image)    image = image.resize([208, 208])    image = np.array(image)    return imagedef evaluate_one_image():    train_dir = 'D:/python/deep-learning/CatVsDog/Project/data/test/'    train, train_label = input_data.get_files(train_dir)    image_array = get_one_image(train)    with tf.Graph().as_default():        BATCH_SIZE = 1        N_CLASSES = 2        image = tf.cast(image_array, tf.float32)        image = tf.image.per_image_standardization(image)        image = tf.reshape(image, [1, 208, 208, 3])        logit = model.cnn_inference(image, BATCH_SIZE, N_CLASSES)        logit = tf.nn.softmax(logit)        x = tf.placeholder(tf.float32, shape=[208, 208, 3])        # you need to change the directories to yours.        logs_train_dir = 'D:/python/deep-learning/CatVsDog/Project/log/'        saver = tf.train.Saver()        with tf.Session() as sess:            print("Reading checkpoints...")            ckpt = tf.train.get_checkpoint_state(logs_train_dir)            if ckpt and ckpt.model_checkpoint_path:                global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]                saver.restore(sess, ckpt.model_checkpoint_path)                print('Loading success, global_step is %s' % global_step)            else:                print('No checkpoint file found')            prediction = sess.run(logit, feed_dict={x: image_array})            max_index = np.argmax(prediction)            if max_index == 0:                print('This is a cat with possibility %.6f' % prediction[:, 0])            else:                print('This is a dog with possibility %.6f' % prediction[:, 1])    plt.imshow(image_array)    plt.show()evaluate_one_image()

转载地址：http://qhkqf.baihongyu.com/

你可能感兴趣的文章

Oracle PL/SQL语言初级教程之操作和控制语言

查看>>

Oracle PL/SQL语言初级教程之过程和函数

查看>>

Oracle PL/SQL语言初级教程之表和视图

查看>>

Oracle PL/SQL语言初级教程之完整性约束

查看>>

PL/SQL学习笔记

查看>>