Friday, 14 July 2017

Making predictions from images with models trained with the TensorFlow LeNet tutorial

Suppose we have trained a model with the TensorFlow LeNet tutorial, as outlined in this post. The following codes would allow you to read an image from disk, and make predictions with the trained LeNet model:
import tensorflow as tf
from tensorflow.contrib.layers import flatten
import cv2
import numpy as np

def LeNet(x):
    # Hyperparameters
    mu = 0
    sigma = 0.1

    # SOLUTION: Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
    conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6), mean=mu, stddev=sigma))
    conv1_b = tf.Variable(tf.zeros(6))
    conv1 = tf.nn.conv2d(x, conv1_W, strides=[1, 1, 1, 1], padding='VALID') + conv1_b

    # SOLUTION: Activation.
    conv1 = tf.nn.relu(conv1)

    # SOLUTION: Pooling. Input = 28x28x6. Output = 14x14x6.
    conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

    # SOLUTION: Layer 2: Convolutional. Output = 10x10x16.
    conv2_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean=mu, stddev=sigma))
    conv2_b = tf.Variable(tf.zeros(16))
    conv2 = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b

    # SOLUTION: Activation.
    conv2 = tf.nn.relu(conv2)

    # SOLUTION: Pooling. Input = 10x10x16. Output = 5x5x16.
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

    # SOLUTION: Flatten. Input = 5x5x16. Output = 400.
    fc0 = flatten(conv2)

    # SOLUTION: Layer 3: Fully Connected. Input = 400. Output = 200.
    fc1_W = tf.Variable(tf.truncated_normal(shape=(400, 200), mean=mu, stddev=sigma))
    fc1_b = tf.Variable(tf.zeros(200))
    fc1 = tf.matmul(fc0, fc1_W) + fc1_b

    # SOLUTION: Activation.
    fc1 = tf.nn.relu(fc1)

    # SOLUTION: Layer 4: Fully Connected. Input = 200. Output = 200.
    fc2_W = tf.Variable(tf.truncated_normal(shape=(200, 200), mean=mu, stddev=sigma))
    fc2_b = tf.Variable(tf.zeros(200))
    fc2 = tf.matmul(fc1, fc2_W) + fc2_b

    # SOLUTION: Activation.
    fc2 = tf.nn.relu(fc2)

    # SOLUTION: Layer 5: Fully Connected. Input = 200. Output = 147.
    fc3_W = tf.Variable(tf.truncated_normal(shape=(200, 147), mean=mu, stddev=sigma))
    fc3_b = tf.Variable(tf.zeros(147))
    logits = tf.matmul(fc2, fc3_W) + fc3_b

    return logits

# Create placeholders for training data,
# x is a placeholder for a batch of input images. y is a placeholder for a batch of output labels.
x = tf.placeholder(tf.float32, (None, 32, 32, 1))
logits = LeNet(x)

saver = tf.train.Saver()

# load checkpoint and make prediction

with tf.Session() as sess:

    # read one input image from disk
    im = cv2.imread('test3.jpg')
    im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)
    im = cv2.resize(im, (32, 32), interpolation=cv2.INTER_NEAREST)
    im = 255 - im  # for jpeg white is 255 black is 0, we revert so that white is 0 and black is 255
    im = np.divide(im.astype(np.float32), 255)

    # expand dims for input image
    im = np.expand_dims(im, axis=-1)
    im = np.expand_dims(im, axis=0)

    # restore training session and make prediction
    saver.restore(sess, tf.train.latest_checkpoint('.'))

    prediction = sess.run(logits, feed_dict={x: im})

    # sorted indices
    sorted_index = np.argsort(-prediction)
    print sorted_index