import tensorflow as tf
from tensorflow.contrib.layers import flatten
import cv2
import numpy as np
def LeNet(x):
# Hyperparameters
mu = 0
sigma = 0.1
# SOLUTION: Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6), mean=mu, stddev=sigma))
conv1_b = tf.Variable(tf.zeros(6))
conv1 = tf.nn.conv2d(x, conv1_W, strides=[1, 1, 1, 1], padding='VALID') + conv1_b
# SOLUTION: Activation.
conv1 = tf.nn.relu(conv1)
# SOLUTION: Pooling. Input = 28x28x6. Output = 14x14x6.
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# SOLUTION: Layer 2: Convolutional. Output = 10x10x16.
conv2_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean=mu, stddev=sigma))
conv2_b = tf.Variable(tf.zeros(16))
conv2 = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b
# SOLUTION: Activation.
conv2 = tf.nn.relu(conv2)
# SOLUTION: Pooling. Input = 10x10x16. Output = 5x5x16.
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# SOLUTION: Flatten. Input = 5x5x16. Output = 400.
fc0 = flatten(conv2)
# SOLUTION: Layer 3: Fully Connected. Input = 400. Output = 200.
fc1_W = tf.Variable(tf.truncated_normal(shape=(400, 200), mean=mu, stddev=sigma))
fc1_b = tf.Variable(tf.zeros(200))
fc1 = tf.matmul(fc0, fc1_W) + fc1_b
# SOLUTION: Activation.
fc1 = tf.nn.relu(fc1)
# SOLUTION: Layer 4: Fully Connected. Input = 200. Output = 200.
fc2_W = tf.Variable(tf.truncated_normal(shape=(200, 200), mean=mu, stddev=sigma))
fc2_b = tf.Variable(tf.zeros(200))
fc2 = tf.matmul(fc1, fc2_W) + fc2_b
# SOLUTION: Activation.
fc2 = tf.nn.relu(fc2)
# SOLUTION: Layer 5: Fully Connected. Input = 200. Output = 147.
fc3_W = tf.Variable(tf.truncated_normal(shape=(200, 147), mean=mu, stddev=sigma))
fc3_b = tf.Variable(tf.zeros(147))
logits = tf.matmul(fc2, fc3_W) + fc3_b
return logits
# Create placeholders for training data,
# x is a placeholder for a batch of input images. y is a placeholder for a batch of output labels.
x = tf.placeholder(tf.float32, (None, 32, 32, 1))
logits = LeNet(x)
saver = tf.train.Saver()
# load checkpoint and make prediction
with tf.Session() as sess:
# read one input image from disk
im = cv2.imread('test3.jpg')
im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)
im = cv2.resize(im, (32, 32), interpolation=cv2.INTER_NEAREST)
im = 255 - im # for jpeg white is 255 black is 0, we revert so that white is 0 and black is 255
im = np.divide(im.astype(np.float32), 255)
# expand dims for input image
im = np.expand_dims(im, axis=-1)
im = np.expand_dims(im, axis=0)
# restore training session and make prediction
saver.restore(sess, tf.train.latest_checkpoint('.'))
prediction = sess.run(logits, feed_dict={x: im})
# sorted indices
sorted_index = np.argsort(-prediction)
print sorted_index
Friday, 14 July 2017
Making predictions from images with models trained with the TensorFlow LeNet tutorial
Suppose we have trained a model with the TensorFlow LeNet tutorial, as outlined in this post. The following codes would allow you to read an image from disk, and make predictions with the trained LeNet model:
Subscribe to:
Comments (Atom)