kacey@ieee.org

What do CNNs see?

Neural Network Visualizations and Grad-CAM using AlexNet and VGG-16

  • Part 1: Demonstrate visual patterns learned in specific layers of the network
  • Part 2: Implement Grad-CAM, which visualizes the attended heat map (where the CNN looks at while predicting the labels of the images)
In [1]:
# A bit of setups
import tensorflow as tf

from tensorflow.python.framework import ops
from tensorflow.python.ops import gen_nn_ops
from lib.datasets import *

from matplotlib import pyplot as plt
from matplotlib.pyplot import imshow
import matplotlib.image as mpimg
os.environ['KMP_DUPLICATE_LIB_OK']='True' #req. to run on my system
%matplotlib inline
%reload_ext autoreload
%autoreload 2

Download the Model

  • Download the vgg16 pretrained model from the ftp://mi.eng.cam.ac.uk/pub/mttt2/models/vgg16.npy:
    • !wget ftp://mi.eng.cam.ac.uk/pub/mttt2/models/vgg16.npy -P lib/tf_models/
  • Download the AlexNet pretrained model from the link BVLC_ALEXNET:
    • !wget http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/bvlc_alexnet.npy -P lib/tf_models/
  • Save the two models (.npy files) to lib/tf_models/
In [2]:
import numpy as np
from lib.tf_models import vgg16
import os
import math

Visualizing the learned convolutional filters.

In [3]:
# Define the vgg network for visualizations
vgg_viz = vgg16.Vgg16()
vgg_viz.load()
vgg_viz.setup()
In [4]:
# Useful function to arrange the images to be shown as a squared grid
def viz_grid(Xs, ubound=255.0, padding=1):
    N, H, W, C = Xs.shape
    grid_size = int(math.ceil(math.sqrt(N)))
    grid_height = H * grid_size + padding * (grid_size - 1)
    grid_width = W * grid_size + padding * (grid_size - 1)
    grid = np.zeros((grid_height, grid_width, C))
    next_idx = 0
    y0, y1 = 0, H
    for y in range(grid_size):
        x0, x1 = 0, W
        for x in range(grid_size):
            if next_idx < N:
                img = Xs[next_idx]
                grid[y0:y1, x0:x1] = img
                next_idx += 1
            x0 += W + padding
            x1 += W + padding
        y0 += H + padding
        y1 += H + padding
    return grid
In [5]:
model_paths = {
    "Vgg-16": os.path.join("lib", "tf_models", "vgg16.npy"),
    "AlexNet": os.path.join("lib", "tf_models", "bvlc_alexnet.npy")
}

for net in sorted(model_paths):
    model_path = model_paths[net]
    print("Model from {}".format(model_path))
    pretrained = np.load(model_path, encoding='latin1', allow_pickle=True).item()
    print("Pretrained {} successfully loaded!".format(net))

    first_conv = "conv1_1" if net == "Vgg-16" else "conv1"
    
    # Extract the weight and bias from conv1                              
    conv1 = pretrained[first_conv]
    w=conv1[0]
    b=conv1[1]

    # Scale the kernel weights, so that they lie in [0,1]
    # In order to get the relative information across filters, you should scale 
    # across all the filters, and not individually.                
    w_01 = (w-np.min(w))/(np.max(w)-np.min(w))
    

    # Transpose the scaled kernel weights so that the number of filters comes 
    # first in the dimension as (n, H, W, C)
    rgb_w = np.transpose(w_01, (3,0,1,2))
    


    # Define a figure
    fig = plt.figure(figsize=(8,8))   
    ax1 = plt.subplot(111)
    
    # Grid the rgb_w
    grid = viz_grid(rgb_w)

    ax1.imshow(grid[...,::-1])
    ax1.set_title('{} Learned First Conv Filters'.format(net), fontsize=16)
Model from lib/tf_models/bvlc_alexnet.npy
Pretrained AlexNet successfully loaded!
<class 'list'> 2 <class 'numpy.ndarray'> (11, 11, 3, 96) (96,)
W: 11, H: 11, C: 3, N: 96
rgb_w shape:  (96, 11, 11, 3)
(11, 3)
Model from lib/tf_models/vgg16.npy
Pretrained Vgg-16 successfully loaded!
<class 'list'> 2 <class 'numpy.ndarray'> (3, 3, 3, 64) (64,)
W: 3, H: 3, C: 3, N: 64
rgb_w shape:  (64, 3, 3, 3)
(3, 3)

Load CIFAR-10 Data

In [6]:
data_train, labels_train, data_test, labels_test = CIFAR10(os.path.join('data', 'cifar-10-batches-py'))

Run the following lines of code, and whenever you encounter problems with tf graph, rerun them again

In [7]:
tf.reset_default_graph()
sess = tf.Session()

Copy layers from Problem 2

Copy and paste your implementation for all of your layers in problem 2 to below

In [8]:
#############################################################################
# Define conv2d, max_pool, flatten, fc, norm functions
#############################################################################

def conv2d(input, kernel_size, stride, num_filter):
    stride_shape = [1, stride, stride, 1]
    filter_shape = [kernel_size, kernel_size, input.get_shape()[3], num_filter]

    W = tf.get_variable('w', filter_shape, tf.float32, tf.random_normal_initializer(0.0, 0.02))
    b = tf.get_variable('b', [1, 1, 1, num_filter], initializer=tf.constant_initializer(0.0))
    return tf.nn.conv2d(input, W, stride_shape, padding='SAME') + b

def max_pool(input, kernel_size, stride):
    ksize = [1, kernel_size, kernel_size, 1]
    strides = [1, stride, stride, 1]
    return tf.nn.max_pool(input, ksize=ksize, strides=strides, padding='SAME')

def flatten(input):
    """
        - input: input tensors
        
    """
    
    return tf.layers.flatten(input)

def fc(input, num_output):
    """
        - input: input tensors
        - num_output: int, the output dimension
    """
    
    return tf.layers.dense(input, num_output)

Copy convolutional neural network from Problem 2 - Base Model

Copy and paste your implementation for the class BaseModel in problem 2 to below:

In [9]:
#############################################################################
# Define BaseModel
#############################################################################


class BaseModel(object):
    def __init__(self):
        self.num_epoch = 5
        self.batch_size = 64
        self.log_step = 50
        self._build_model()

    def _model(self):
        print('-' * 5 + '  Sample model  ' + '-' * 5)

        print('intput layer: ' + str(self.X.get_shape()))

        with tf.variable_scope('conv1'):
            self.conv1 = conv2d(self.X, 7, 1, 32)
            self.relu1 = tf.nn.relu(self.conv1)
            self.pool1 = max_pool(self.relu1, 3, 2)            
            print('conv1 layer: ' + str(self.pool1.get_shape()))

        with tf.variable_scope('conv2'):
            self.conv2 = conv2d(self.pool1, 5, 1, 64)
            self.relu2 = tf.nn.relu(self.conv2)
            self.pool2 = max_pool(self.relu2, 3, 2)            
            print('conv2 layer: ' + str(self.pool2.get_shape()))

        with tf.variable_scope('flat'):
            self.flat= flatten(self.pool2)
            print('flat layer: ' + str(self.flat.get_shape()))

        with tf.variable_scope('fc3'):
            self.fc3=fc(self.flat, 384)
            self.relu3=tf.nn.relu(self.fc3)
            print('fc3 layer: ' + str(self.relu3.get_shape()))
            
        with tf.variable_scope('fc4'):
            self.fc4=fc(self.relu3, 10)
            print('fc4 layer: ' + str(self.fc4.get_shape()))
            
        # Return the last layer
        return self.fc4

    def _input_ops(self):
        # Placeholders
        self.X = tf.placeholder(tf.float32, [None, 32, 32, 3])
        self.Y = tf.placeholder(tf.int64, [None])

    def _build_optimizer(self):
        # **Learning rate:**
        #- Set start learning rate as 5e-4 and apply exponential decay every 500 steps with a base of 0.96
        # - Use 'tf.train.exponential_decay' and 'tf.train.AdamOptimizer'
        initial_lr=.0005
        decay_every=500
        base=0.96
        global_step = tf.Variable(0, trainable=False)
        #learning rate:
        lr=tf.train.exponential_decay(initial_lr, global_step, decay_every, base, staircase=True)
        #learning step:
        self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss_op,global_step=global_step)
       
    def _loss(self, labels, logits):
        loss_tensor = tf.nn.softmax_cross_entropy_with_logits_v2(
    labels,
    logits)
        self.loss_op =tf.reduce_mean(loss_tensor)

    def _build_model(self):
        # Define input variables
        self._input_ops()

        # Convert Y to one-hot vector
        labels = tf.one_hot(self.Y, 10)

        # Build a model and get logits
        logits = self._model()

        # Compute loss
        self._loss(labels, logits)
        
        # Build optimizer
        self._build_optimizer()

        # Compute accuracy
        predict = tf.argmax(logits, 1)
        correct = tf.equal(predict, self.Y)
        self.accuracy_op = tf.reduce_mean(tf.cast(correct, tf.float32))
        
    def train(self, sess, X_train, Y_train, X_val, Y_val):
        sess.run(tf.global_variables_initializer())
        #self.is_train=True
        start_time = time.time()
        step = 0
        losses = []
        accuracies = []
        print('-' * 5 + '  Start training  ' + '-' * 5)
        cnt=0
        for epoch in range(self.num_epoch):
            print('train for epoch %d' % epoch)
            for i in range(num_training // self.batch_size):
                X_ = X_train[i * self.batch_size:(i + 1) * self.batch_size][:]
                Y_ = Y_train[i * self.batch_size:(i + 1) * self.batch_size]


                feed_dict = {self.X:X_, self.Y:Y_}
                fetches = [self.train_op, self.loss_op, self.accuracy_op]

                _, loss, accuracy = sess.run(fetches, feed_dict=feed_dict)
                losses.append(loss)
                accuracies.append(accuracy)

                if step % self.log_step == 0:
                    #print('iteration (%d): loss = %.3f, accuracy = %.3f' %(step, loss, accuracy))
                    print('iteration (%d)(%.3f s): loss = %.3f, accuracy = %.3f' %(step,(time.time() - start_time), loss, accuracy))
                step += 1

            # Print validation results
            print('validation for epoch %d' % epoch)
            val_accuracy = self.evaluate(sess, X_val, Y_val)
            print('-  epoch %d: validation accuracy = %.3f' % (epoch, val_accuracy))
            
        # Plot training curve  
        # Graph 1. X: iteration (training step), Y: training loss
        # Graph 2. X: iteration (training step), Y: training accuracy
        iterations=np.arange(step)
        
        # Plot the learning curves
        plt.subplot(2, 1, 1)
        plt.tight_layout()
        plt.title('Training loss')

        plt.plot(iterations,losses, '-o')
        plt.xlabel('Iteration')

        plt.subplot(2, 1, 2)
        plt.title('Training Accuracy')
        plt.plot(iterations,accuracies, '-o') 
        plt.xlabel('Iteration')
        #plt.gcf().set_size_inches(15, 12)
        plt.show()

    def evaluate(self, sess, X_eval, Y_eval):
        
        eval_accuracy = 0.0
        eval_iter = 0
        for i in range(X_eval.shape[0] // self.batch_size):
            X_ = X_eval[i * self.batch_size:(i + 1) * self.batch_size][:]
            Y_ = Y_eval[i * self.batch_size:(i + 1) * self.batch_size]


            feed_dict = {self.X:X_, self.Y:Y_}
            accuracy = sess.run(self.accuracy_op, feed_dict=feed_dict)
            eval_accuracy += accuracy
            eval_iter += 1
        return eval_accuracy / eval_iter

Create a unique CNN model class using previously defined layers

In [10]:
class YourModel(BaseModel):
    def __init__(self):
        super(YourModel, self).__init__()
        self.num_epoch = 15

    def _model(self):
        print('-' * 5 + '  Your model  ' + '-' * 5)
        with tf.variable_scope('conv1'):
            self.conv1 = conv2d(self.X, 3, 1, 32)
            self.relu1 = tf.nn.relu(self.conv1)
            self.batch_norm1=tf.compat.v1.keras.layers.BatchNormalization(axis=1)(self.relu1)
            print('conv1 layer: ' + str(self.batch_norm1.get_shape()))

        with tf.variable_scope('conv2'):
            self.conv2 = conv2d(self.batch_norm1, 3, 1,32)
            self.relu2 = tf.nn.relu(self.conv2)
            self.batch_norm2=tf.keras.layers.BatchNormalization(axis=1)(self.relu2)
            self.pool2 = max_pool(self.batch_norm2, 2, 2)     
            self.drop2= tf.nn.dropout(self.pool2,rate=.2)
            print('conv2 layer: ' + str(self.drop2.get_shape()))

        with tf.variable_scope('conv3'):
            self.conv3 = conv2d(self.drop2, 3, 1,64)
            self.relu3 = tf.nn.relu(self.conv3)
            self.batch_norm3=tf.keras.layers.BatchNormalization(axis=1)(self.relu3)
            self.pool3 = max_pool(self.batch_norm3, 2, 2)     
            self.drop3= tf.nn.dropout(self.pool2,rate=.3)
            print('conv3 layer: ' + str(self.drop3.get_shape()))
            
        with tf.variable_scope('conv4'):
            self.conv4 = conv2d(self.drop3, 3, 1, 128)
            self.relu4 = tf.nn.relu(self.conv4)
            self.batch_norm4=tf.keras.layers.BatchNormalization(axis=1)(self.relu4)
            print('conv4 layer: ' + str(self.batch_norm4.get_shape()))

        with tf.variable_scope('conv5'):
            self.conv5 = conv2d(self.batch_norm4, 3, 1,128)
            self.relu5 = tf.nn.relu(self.conv5)
            self.batch_norm5=tf.keras.layers.BatchNormalization(axis=1)(self.relu5)
            self.pool5 = max_pool(self.batch_norm5, 2, 2)     
            self.drop5= tf.nn.dropout(self.pool5,rate=.4)
            print('conv5 layer: ' + str(self.drop5.get_shape()))

        with tf.variable_scope('flat'):
            self.flat= flatten(self.drop5)
            print('flat layer: ' + str(self.flat.get_shape()))

        with tf.variable_scope('fc3'):
            self.fc3=fc(self.flat, 2048)
            self.relu3=tf.nn.relu(self.fc3)
            print('fc3 layer: ' + str(self.relu3.get_shape()))
            
        with tf.variable_scope('fc4'):
            self.fc4=fc(self.relu3, 10)
            print('fc4 layer: ' + str(self.fc4.get_shape()))
            
        # Return the last layer
        return self.fc4
    
    def _input_ops(self):
        # Placeholders
        self.X = tf.placeholder(tf.float32, [None, 32, 32, 3])
        self.Y = tf.placeholder(tf.int64, [None])

Visualize the learned filters on your trained CIFAR-10 network!

In [11]:
def restore_model(saved_file):
    # Restore the model using parameters dict
    variables = tf.global_variables()
    param_dict = {}
    for var in variables:
        var_name = var.name[:-2]
        print('Loading {} from checkpoint. Name: {}'.format(var.name, var_name))
        param_dict[var_name] = var
    saver = tf.train.Saver()
    saver.restore(sess, os.path.join('lib', 'tf_models', 'problem2', saved_file))
In [12]:
# TODO:
def viz_model(model_name):
    with sess.as_default():
        with tf.variable_scope("conv1", reuse=True):
            conv1 = tf.get_variable("w")
            b1    = tf.get_variable("b")
            print (conv1.shape, b1.shape)

            # Extract the weight and bias from conv1                              
            w=conv1.eval()
            b=b1.eval()

            # Scale the kernel weights to [0,1]
            w_01 = (w-np.min(w))/(np.max(w)-np.min(w))

            # Transpose the scaled kernel weights so that 
            # the number of filters comes first in the dimension (n, H, W, C)   
            rgb_w = np.transpose(w_01, (3,0,1,2)) 
            
            # Define a figure
            fig = plt.figure(figsize=(8,8))   
            ax1 = plt.subplot(111)

            grid = viz_grid(rgb_w)

            ax1.imshow(grid[...,::-1])
            ax1.set_title('{} Learned First Conv Filters'.format(model_name), fontsize=16)

Visualize Base Model [5pt]

In [13]:
# Reset TF Graph
tf.reset_default_graph()
sess = tf.Session()

# Load BaseModel
model = BaseModel()
restore_model('csci-599_sample.ckpt')
viz_model('Base Model')
#viz_model(model)
-----  Sample model  -----
intput layer: (?, 32, 32, 3)
conv1 layer: (?, 16, 16, 32)
conv2 layer: (?, 8, 8, 64)
flat layer: (?, 4096)
fc3 layer: (?, 384)
fc4 layer: (?, 10)
Loading conv1/w:0 from checkpoint. Name: conv1/w
Loading conv1/b:0 from checkpoint. Name: conv1/b
Loading conv2/w:0 from checkpoint. Name: conv2/w
Loading conv2/b:0 from checkpoint. Name: conv2/b
Loading fc3/dense/kernel:0 from checkpoint. Name: fc3/dense/kernel
Loading fc3/dense/bias:0 from checkpoint. Name: fc3/dense/bias
Loading fc4/dense/kernel:0 from checkpoint. Name: fc4/dense/kernel
Loading fc4/dense/bias:0 from checkpoint. Name: fc4/dense/bias
Loading Variable:0 from checkpoint. Name: Variable
Loading beta1_power:0 from checkpoint. Name: beta1_power
Loading beta2_power:0 from checkpoint. Name: beta2_power
Loading conv1/w/Adam:0 from checkpoint. Name: conv1/w/Adam
Loading conv1/w/Adam_1:0 from checkpoint. Name: conv1/w/Adam_1
Loading conv1/b/Adam:0 from checkpoint. Name: conv1/b/Adam
Loading conv1/b/Adam_1:0 from checkpoint. Name: conv1/b/Adam_1
Loading conv2/w/Adam:0 from checkpoint. Name: conv2/w/Adam
Loading conv2/w/Adam_1:0 from checkpoint. Name: conv2/w/Adam_1
Loading conv2/b/Adam:0 from checkpoint. Name: conv2/b/Adam
Loading conv2/b/Adam_1:0 from checkpoint. Name: conv2/b/Adam_1
Loading fc3/dense/kernel/Adam:0 from checkpoint. Name: fc3/dense/kernel/Adam
Loading fc3/dense/kernel/Adam_1:0 from checkpoint. Name: fc3/dense/kernel/Adam_1
Loading fc3/dense/bias/Adam:0 from checkpoint. Name: fc3/dense/bias/Adam
Loading fc3/dense/bias/Adam_1:0 from checkpoint. Name: fc3/dense/bias/Adam_1
Loading fc4/dense/kernel/Adam:0 from checkpoint. Name: fc4/dense/kernel/Adam
Loading fc4/dense/kernel/Adam_1:0 from checkpoint. Name: fc4/dense/kernel/Adam_1
Loading fc4/dense/bias/Adam:0 from checkpoint. Name: fc4/dense/bias/Adam
Loading fc4/dense/bias/Adam_1:0 from checkpoint. Name: fc4/dense/bias/Adam_1
(7, 7, 3, 32) (1, 1, 1, 32)

Visualize Your Model [5pt]

In [14]:
# Reset TF Graph
tf.reset_default_graph()
sess = tf.Session()

# Load BaseModel
model = YourModel()
restore_model('csci-599_mine.ckpt')
viz_model('Your Model')
-----  Your model  -----
conv1 layer: (?, 32, 32, 32)
conv2 layer: (?, 16, 16, 32)
conv3 layer: (?, 16, 16, 32)
conv4 layer: (?, 16, 16, 128)
conv5 layer: (?, 8, 8, 128)
flat layer: (?, 8192)
fc3 layer: (?, 2048)
fc4 layer: (?, 10)
Loading conv1/w:0 from checkpoint. Name: conv1/w
Loading conv1/b:0 from checkpoint. Name: conv1/b
Loading conv1/batch_normalization/gamma:0 from checkpoint. Name: conv1/batch_normalization/gamma
Loading conv1/batch_normalization/beta:0 from checkpoint. Name: conv1/batch_normalization/beta
Loading conv1/batch_normalization/moving_mean:0 from checkpoint. Name: conv1/batch_normalization/moving_mean
Loading conv1/batch_normalization/moving_variance:0 from checkpoint. Name: conv1/batch_normalization/moving_variance
Loading conv2/w:0 from checkpoint. Name: conv2/w
Loading conv2/b:0 from checkpoint. Name: conv2/b
Loading conv2/batch_normalization_1/gamma:0 from checkpoint. Name: conv2/batch_normalization_1/gamma
Loading conv2/batch_normalization_1/beta:0 from checkpoint. Name: conv2/batch_normalization_1/beta
Loading conv2/batch_normalization_1/moving_mean:0 from checkpoint. Name: conv2/batch_normalization_1/moving_mean
Loading conv2/batch_normalization_1/moving_variance:0 from checkpoint. Name: conv2/batch_normalization_1/moving_variance
Loading conv3/w:0 from checkpoint. Name: conv3/w
Loading conv3/b:0 from checkpoint. Name: conv3/b
Loading conv3/batch_normalization_2/gamma:0 from checkpoint. Name: conv3/batch_normalization_2/gamma
Loading conv3/batch_normalization_2/beta:0 from checkpoint. Name: conv3/batch_normalization_2/beta
Loading conv3/batch_normalization_2/moving_mean:0 from checkpoint. Name: conv3/batch_normalization_2/moving_mean
Loading conv3/batch_normalization_2/moving_variance:0 from checkpoint. Name: conv3/batch_normalization_2/moving_variance
Loading conv4/w:0 from checkpoint. Name: conv4/w
Loading conv4/b:0 from checkpoint. Name: conv4/b
Loading conv4/batch_normalization_3/gamma:0 from checkpoint. Name: conv4/batch_normalization_3/gamma
Loading conv4/batch_normalization_3/beta:0 from checkpoint. Name: conv4/batch_normalization_3/beta
Loading conv4/batch_normalization_3/moving_mean:0 from checkpoint. Name: conv4/batch_normalization_3/moving_mean
Loading conv4/batch_normalization_3/moving_variance:0 from checkpoint. Name: conv4/batch_normalization_3/moving_variance
Loading conv5/w:0 from checkpoint. Name: conv5/w
Loading conv5/b:0 from checkpoint. Name: conv5/b
Loading conv5/batch_normalization_4/gamma:0 from checkpoint. Name: conv5/batch_normalization_4/gamma
Loading conv5/batch_normalization_4/beta:0 from checkpoint. Name: conv5/batch_normalization_4/beta
Loading conv5/batch_normalization_4/moving_mean:0 from checkpoint. Name: conv5/batch_normalization_4/moving_mean
Loading conv5/batch_normalization_4/moving_variance:0 from checkpoint. Name: conv5/batch_normalization_4/moving_variance
Loading fc3/dense/kernel:0 from checkpoint. Name: fc3/dense/kernel
Loading fc3/dense/bias:0 from checkpoint. Name: fc3/dense/bias
Loading fc4/dense/kernel:0 from checkpoint. Name: fc4/dense/kernel
Loading fc4/dense/bias:0 from checkpoint. Name: fc4/dense/bias
Loading Variable:0 from checkpoint. Name: Variable
Loading beta1_power:0 from checkpoint. Name: beta1_power
Loading beta2_power:0 from checkpoint. Name: beta2_power
Loading conv1/w/Adam:0 from checkpoint. Name: conv1/w/Adam
Loading conv1/w/Adam_1:0 from checkpoint. Name: conv1/w/Adam_1
Loading conv1/b/Adam:0 from checkpoint. Name: conv1/b/Adam
Loading conv1/b/Adam_1:0 from checkpoint. Name: conv1/b/Adam_1
Loading conv1/batch_normalization/gamma/Adam:0 from checkpoint. Name: conv1/batch_normalization/gamma/Adam
Loading conv1/batch_normalization/gamma/Adam_1:0 from checkpoint. Name: conv1/batch_normalization/gamma/Adam_1
Loading conv1/batch_normalization/beta/Adam:0 from checkpoint. Name: conv1/batch_normalization/beta/Adam
Loading conv1/batch_normalization/beta/Adam_1:0 from checkpoint. Name: conv1/batch_normalization/beta/Adam_1
Loading conv2/w/Adam:0 from checkpoint. Name: conv2/w/Adam
Loading conv2/w/Adam_1:0 from checkpoint. Name: conv2/w/Adam_1
Loading conv2/b/Adam:0 from checkpoint. Name: conv2/b/Adam
Loading conv2/b/Adam_1:0 from checkpoint. Name: conv2/b/Adam_1
Loading conv2/batch_normalization_1/gamma/Adam:0 from checkpoint. Name: conv2/batch_normalization_1/gamma/Adam
Loading conv2/batch_normalization_1/gamma/Adam_1:0 from checkpoint. Name: conv2/batch_normalization_1/gamma/Adam_1
Loading conv2/batch_normalization_1/beta/Adam:0 from checkpoint. Name: conv2/batch_normalization_1/beta/Adam
Loading conv2/batch_normalization_1/beta/Adam_1:0 from checkpoint. Name: conv2/batch_normalization_1/beta/Adam_1
Loading conv4/w/Adam:0 from checkpoint. Name: conv4/w/Adam
Loading conv4/w/Adam_1:0 from checkpoint. Name: conv4/w/Adam_1
Loading conv4/b/Adam:0 from checkpoint. Name: conv4/b/Adam
Loading conv4/b/Adam_1:0 from checkpoint. Name: conv4/b/Adam_1
Loading conv4/batch_normalization_3/gamma/Adam:0 from checkpoint. Name: conv4/batch_normalization_3/gamma/Adam
Loading conv4/batch_normalization_3/gamma/Adam_1:0 from checkpoint. Name: conv4/batch_normalization_3/gamma/Adam_1
Loading conv4/batch_normalization_3/beta/Adam:0 from checkpoint. Name: conv4/batch_normalization_3/beta/Adam
Loading conv4/batch_normalization_3/beta/Adam_1:0 from checkpoint. Name: conv4/batch_normalization_3/beta/Adam_1
Loading conv5/w/Adam:0 from checkpoint. Name: conv5/w/Adam
Loading conv5/w/Adam_1:0 from checkpoint. Name: conv5/w/Adam_1
Loading conv5/b/Adam:0 from checkpoint. Name: conv5/b/Adam
Loading conv5/b/Adam_1:0 from checkpoint. Name: conv5/b/Adam_1
Loading conv5/batch_normalization_4/gamma/Adam:0 from checkpoint. Name: conv5/batch_normalization_4/gamma/Adam
Loading conv5/batch_normalization_4/gamma/Adam_1:0 from checkpoint. Name: conv5/batch_normalization_4/gamma/Adam_1
Loading conv5/batch_normalization_4/beta/Adam:0 from checkpoint. Name: conv5/batch_normalization_4/beta/Adam
Loading conv5/batch_normalization_4/beta/Adam_1:0 from checkpoint. Name: conv5/batch_normalization_4/beta/Adam_1
Loading fc3/dense/kernel/Adam:0 from checkpoint. Name: fc3/dense/kernel/Adam
Loading fc3/dense/kernel/Adam_1:0 from checkpoint. Name: fc3/dense/kernel/Adam_1
Loading fc3/dense/bias/Adam:0 from checkpoint. Name: fc3/dense/bias/Adam
Loading fc3/dense/bias/Adam_1:0 from checkpoint. Name: fc3/dense/bias/Adam_1
Loading fc4/dense/kernel/Adam:0 from checkpoint. Name: fc4/dense/kernel/Adam
Loading fc4/dense/kernel/Adam_1:0 from checkpoint. Name: fc4/dense/kernel/Adam_1
Loading fc4/dense/bias/Adam:0 from checkpoint. Name: fc4/dense/bias/Adam
Loading fc4/dense/bias/Adam_1:0 from checkpoint. Name: fc4/dense/bias/Adam_1
(3, 3, 3, 32) (1, 1, 1, 32)
In [15]:
print(model)
<__main__.YourModel object at 0x7fa7b0325da0>

Observations: Difference in the visualizations between my model and the pre-trained models

The visualizations are completely different. Just considering the resolutions of hidden units to pixels, the base model needs to learn ~5.5x more parameters compared to my model for the conv1 layer. Moreover, the patterns do not correspond (a redish filter area of the base model does not correspond to a redish area on my model). Although the conv1 layer of both models output 32 filters, the filter sizes are different (7x7 vs 3x3), which leads to different learned weight values and patterns. These weights are unique from each other because of the pooling and backpropagation in neural networks facilitates nonlinear transformations of the feature space to propagate to all layers of the model (one of the main advantages of using neural networks).

Visualizing Activations

Now that we have seen the learned filters, and observed that they have some patterns, we will go one step further to visualize the activation maps produced by different convolutional filters. You will see that as we go deeper through the layers of a network the activation maps gradually represent higher and higher levels of abstraction in the images. Now, let's get some warm-up by running the following visualization code blocks for a simple model trained on MNIST dataset.

In [16]:
import tensorflow.contrib.slim as slim
from tensorflow.examples.tutorials.mnist import input_data
In [17]:
mnist = input_data.read_data_sets(os.path.join('data', 'MNIST_data'), one_hot=True)
Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz
In [18]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 784],name="x-in")
y = tf.placeholder(tf.float32, [None, 10],name="y-in")
keep_prob = tf.placeholder("float")

x_reshaped = tf.reshape(x,[-1,28,28,1])
x_tiled = tf.tile(x_reshaped, [1,1,1,3])
sconv_1 = slim.conv2d(x_tiled,5,[5,5], activation_fn=None)
srelu_1 = tf.nn.relu(sconv_1)
spool_1 = slim.max_pool2d(srelu_1,[2,2])

sconv_2 = slim.conv2d(spool_1,5,[5,5], activation_fn=None)
srelu_2 = tf.nn.relu(sconv_2)
spool_2 = slim.max_pool2d(srelu_2,[2,2])

sconv_3 = slim.conv2d(spool_2,20,[5,5], activation_fn=None)
srelu_3 = tf.nn.relu(srelu_2)
s_dropout3 = slim.dropout(srelu_3, keep_prob)
output = slim.fully_connected(slim.flatten(s_dropout3), 10, activation_fn=tf.nn.softmax)

cross_entropy = -tf.reduce_sum(y * tf.log(output))
correct_prediction = tf.equal(tf.argmax(output,1 ), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
In [19]:
batchSize = 50
dropout_p = 0.5

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

# Train the network
for i in range(2001):
    batch = mnist.train.next_batch(batchSize)
    sess.run(train_step, feed_dict={x:batch[0], y:batch[1], keep_prob:dropout_p})
    if i % 100 == 0 and i != 0:
        trainAccuracy = sess.run(accuracy, feed_dict={x:batch[0], y:batch[1], keep_prob:1.0})
        print("step %d, training accuracy %g"%(i, trainAccuracy))
step 100, training accuracy 0.46
step 200, training accuracy 0.78
step 300, training accuracy 0.84
step 400, training accuracy 0.82
step 500, training accuracy 0.78
step 600, training accuracy 0.78
step 700, training accuracy 0.86
step 800, training accuracy 0.88
step 900, training accuracy 0.9
step 1000, training accuracy 0.94
step 1100, training accuracy 0.88
step 1200, training accuracy 0.92
step 1300, training accuracy 0.86
step 1400, training accuracy 0.84
step 1500, training accuracy 0.94
step 1600, training accuracy 0.88
step 1700, training accuracy 0.92
step 1800, training accuracy 0.9
step 1900, training accuracy 0.92
step 2000, training accuracy 0.92
In [20]:
testAccuracy = sess.run(accuracy, feed_dict={x:mnist.test.images,y:mnist.test.labels, keep_prob:1.0})
print("test accuracy {}".format(testAccuracy))
test accuracy 0.9225000143051147
In [21]:
# function for visualizing the activations
def getActivations_mnist(layer, features):
    #print(features.shape)
    outs = sess.run(layer, feed_dict={x:np.reshape(features,[1,784],order='F'), keep_prob:1.0})
    outs = np.transpose(outs, [3, 1, 2, 0])
    fig = plt.figure(figsize=(4,4))   
    ax1 = plt.subplot(111)
    grid = viz_grid(outs)
    ax1.imshow(grid[...,0])
    ax1.set_title('{} Activations'.format(layer.name), fontsize=16)
In [22]:
imageToUse = mnist.test.images[0]
imageToShow = np.expand_dims(np.reshape(imageToUse,[28,28]), axis=-1)
imageToShow = np.tile(imageToShow, (1,1,3))
plt.imshow(imageToShow, interpolation="nearest", cmap="gray")
print ("The Image for activation visualizations:")
The Image for activation visualizations:
In [23]:
# Visualize the first 3 activation maps after convolution layers (without ReLU)
print(imageToUse.shape)
print(type(sconv_1))
getActivations_mnist(sconv_1, imageToUse)
getActivations_mnist(sconv_2, imageToUse)
getActivations_mnist(sconv_3, imageToUse)
(784,)
<class 'tensorflow.python.framework.ops.Tensor'>

Visualize the activations on my model

In [24]:
def getActivations_cifar10(layer, input_batch):
    units = sess.run(layer, feed_dict={model.X:input_batch})
    units = np.transpose(units, [3, 1, 2, 0])
    
    # Define the figure
    fig = plt.figure(figsize=(8,8))   
    ax1 = plt.subplot(111)
    grid = viz_grid(units)
    ax1.imshow(grid[...,0])
    ax1.set_title('{} Activations'.format(layer.name), fontsize=16)
In [25]:
print ("There are total {} images in test set".format(len(data_test)))
query_idx = 998
# Try out some images from dataset by uncommenting the line below
# query_idx = np.random.randint(0,999)

# Process the indicated issue
query_idx = min(max(query_idx, 0), 999)
cifar10ToUse = data_test[query_idx].astype(np.uint8)
cifar10ToUse= cifar10ToUse[...,::-1]
plt.imshow(cifar10ToUse)
print ("Image {} in test set".format(query_idx))
There are total 10000 images in test set
Image 998 in test set
In [26]:
tf.reset_default_graph()
sess = tf.Session()

# Restore the model using parameters dict
model = BaseModel()
restore_model('csci-599_sample.ckpt')
-----  Sample model  -----
intput layer: (?, 32, 32, 3)
conv1 layer: (?, 16, 16, 32)
conv2 layer: (?, 8, 8, 64)
flat layer: (?, 4096)
fc3 layer: (?, 384)
fc4 layer: (?, 10)
Loading conv1/w:0 from checkpoint. Name: conv1/w
Loading conv1/b:0 from checkpoint. Name: conv1/b
Loading conv2/w:0 from checkpoint. Name: conv2/w
Loading conv2/b:0 from checkpoint. Name: conv2/b
Loading fc3/dense/kernel:0 from checkpoint. Name: fc3/dense/kernel
Loading fc3/dense/bias:0 from checkpoint. Name: fc3/dense/bias
Loading fc4/dense/kernel:0 from checkpoint. Name: fc4/dense/kernel
Loading fc4/dense/bias:0 from checkpoint. Name: fc4/dense/bias
Loading Variable:0 from checkpoint. Name: Variable
Loading beta1_power:0 from checkpoint. Name: beta1_power
Loading beta2_power:0 from checkpoint. Name: beta2_power
Loading conv1/w/Adam:0 from checkpoint. Name: conv1/w/Adam
Loading conv1/w/Adam_1:0 from checkpoint. Name: conv1/w/Adam_1
Loading conv1/b/Adam:0 from checkpoint. Name: conv1/b/Adam
Loading conv1/b/Adam_1:0 from checkpoint. Name: conv1/b/Adam_1
Loading conv2/w/Adam:0 from checkpoint. Name: conv2/w/Adam
Loading conv2/w/Adam_1:0 from checkpoint. Name: conv2/w/Adam_1
Loading conv2/b/Adam:0 from checkpoint. Name: conv2/b/Adam
Loading conv2/b/Adam_1:0 from checkpoint. Name: conv2/b/Adam_1
Loading fc3/dense/kernel/Adam:0 from checkpoint. Name: fc3/dense/kernel/Adam
Loading fc3/dense/kernel/Adam_1:0 from checkpoint. Name: fc3/dense/kernel/Adam_1
Loading fc3/dense/bias/Adam:0 from checkpoint. Name: fc3/dense/bias/Adam
Loading fc3/dense/bias/Adam_1:0 from checkpoint. Name: fc3/dense/bias/Adam_1
Loading fc4/dense/kernel/Adam:0 from checkpoint. Name: fc4/dense/kernel/Adam
Loading fc4/dense/kernel/Adam_1:0 from checkpoint. Name: fc4/dense/kernel/Adam_1
Loading fc4/dense/bias/Adam:0 from checkpoint. Name: fc4/dense/bias/Adam
Loading fc4/dense/bias/Adam_1:0 from checkpoint. Name: fc4/dense/bias/Adam_1
In [27]:
cifar10ToUse_with_batch = np.expand_dims(cifar10ToUse, axis=0)
# Visualize the activations of each conv layer in your model #

convs=[c for c in dir(model) if c.startswith('conv')]

getActivations_cifar10(model.conv1, cifar10ToUse_with_batch)
getActivations_cifar10(model.conv2, cifar10ToUse_with_batch)
['X', 'Y', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_build_model', '_build_optimizer', '_input_ops', '_loss', '_model', 'accuracy_op', 'batch_size', 'conv1', 'conv2', 'evaluate', 'fc3', 'fc4', 'flat', 'log_step', 'loss_op', 'num_epoch', 'pool1', 'pool2', 'relu1', 'relu2', 'relu3', 'train', 'train_op']
['OVERLOADABLE_OPERATORS', '__abs__', '__add__', '__and__', '__array_priority__', '__bool__', '__class__', '__copy__', '__delattr__', '__dict__', '__dir__', '__div__', '__doc__', '__eq__', '__floordiv__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__invert__', '__iter__', '__le__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pow__', '__radd__', '__rand__', '__rdiv__', '__reduce__', '__reduce_ex__', '__repr__', '__rfloordiv__', '__rmatmul__', '__rmod__', '__rmul__', '__ror__', '__rpow__', '__rsub__', '__rtruediv__', '__rxor__', '__setattr__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv__', '__weakref__', '__xor__', '_as_node_def_input', '_as_tf_output', '_c_api_shape', '_consumers', '_dtype', '_get_input_ops_without_shapes', '_id', '_name', '_op', '_override_operator', '_rank', '_shape', '_shape_as_list', '_shape_tuple', '_shape_val', '_tf_api_names', '_tf_api_names_v1', '_tf_output', '_value_index', 'consumers', 'device', 'dtype', 'eval', 'get_shape', 'graph', 'name', 'op', 'set_shape', 'shape', 'value_index']
['_InputList', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_add_control_input', '_add_control_inputs', '_add_outputs', '_add_while_inputs', '_c_op', '_clear_attr', '_colocation_code_locations', '_colocation_dict', '_control_flow_context', '_control_flow_post_processing', '_control_inputs', '_control_outputs', '_device_assignments', '_device_code_locations', '_get_control_flow_context', '_graph', '_id', '_id_value', '_input_types', '_inputs', '_inputs_val', '_node_def', '_op_def', '_original_op', '_output_types', '_outputs', '_reconstruct_sequence_inputs', '_remove_all_control_inputs', '_set_attr', '_set_control_flow_context', '_set_device', '_set_device_from_string', '_set_func_attr', '_set_func_list_attr', '_set_shape_list_attr', '_set_type_list_attr', '_tf_api_names', '_tf_api_names_v1', '_tf_input', '_tf_output', '_traceback', '_update_input', 'colocation_groups', 'control_inputs', 'device', 'get_attr', 'graph', 'inputs', 'name', 'node_def', 'op_def', 'outputs', 'run', 'traceback', 'traceback_with_start_lines', 'type', 'values']
['conv1', 'conv2']

Observations: How the activation changes across layers as we traverse through the network

As we move through the network and the number of hidden units/parameters increases, the activation pattern for each filter becomes less detailed. The model learns to break down the input image and separate/isolate specific features of the image so it can hopefully use them for classification. In other words, each filter becomes more attuned to a distinct detail/feature rather than processing the entire image whole. This makes it possible for the model to classify unseen images

Gradient-weighted Class Activation Mapping (Grad-CAM)

Grad-CAM is a technique for "visually interpreting" the predictions of a Convolutional Neural Network (CNN)-based model. This technique essentially uses the gradients of any target concept (a predicted class such as "cat"), flowing into the final convolutional layer to produce a coarse localization map, attending regions in the image that are important for prediction of the concept. Please read the original paper Grad-CAM for more details.

Register the guided backpropagation ReLU (Run this function only once)

In [28]:
# Replace vanila relu to guided relu to get guided backpropagation.
try:
    @ops.RegisterGradient("GuidedRelu")
    def _GuidedReluGrad(op, grad):
        return tf.where(0. < grad, gen_nn_ops.relu_grad(grad, op.outputs[0]), tf.zeros(grad.get_shape()))
except:
    print("GuidedRelu has already been registered!")
    pass

Some useful functions

In [29]:
import cv2

def imgread(path):
    print ("Image:", path.split("/")[-1])
    # Read in the image using python opencv
    img = cv2.imread(path)
    img = img / 255.0
    print ("Raw Image Shape: ", img.shape)
    
    # Center crop the image
    short_edge = min(img.shape[:2])
    W, H, C = img.shape
    to_crop = min(W, H)
    cent_w = int((img.shape[1] - short_edge) / 2)
    cent_h = int((img.shape[0] - short_edge) / 2)
    img_cropped = img[cent_h:cent_h+to_crop, cent_w:cent_w+to_crop]
    print ("Cropped Image Shape: ", img_cropped.shape)
    
    # Resize the cropped image to 224 by 224 for VGG16 network
    img_resized = cv2.resize(img_cropped, (224, 224), interpolation=cv2.INTER_LINEAR)
    print ("Resized Image Shape: ", img_resized.shape)
    return img_resized

def predicted_labels(score, synset_path):
    fi = open(synset_path, "rb")
    synset = []
    for line in fi:
        synset.append(line.rstrip().lstrip())
    
    # The predictions, reverse ordered
    pred = np.argsort(score)[::-1]

    # Top 1 and Top 5
    top1 = synset[pred[0]]
    print ("\nTop1, Label: {}, score: {}".format(top1, score[pred[0]]))
    top5 = [(synset[pred[i]], score[pred[i]]) for i in range(5)]
    for i in range(1,5):
        print ("Top{}, Label: {} score: {}".format(i+1, top5[i][0], top5[i][1]))
    return top1, top5

Visualization Function

In [30]:
def visualize(image, output, grads, gb_grads):
    # Reverse the BGR channel to RGB
    gb_grads = gb_grads[...,::-1]

    # Initialzie CAM weights
    CAM = np.ones(output.shape[0 : 2], dtype = np.float32)  

    # Taking a weighted average
    cam_w = np.mean(grads, axis = (0, 1))
    for i, w in enumerate(cam_w):
        CAM += w * output[:, :, i]

    # Passing through ReLU
    CAM = np.maximum(CAM, 0)
    # scale CAM to [0,1]
    CAM /= np.max(CAM)
    # Resize the CAM to 224 by 224
    CAM = cv2.resize(CAM, (224, 224), interpolation=cv2.INTER_LINEAR)
    
    # scale guided backprop gradients to [0,1]
    gb_grads -= np.min(gb_grads)
    gb_grads /= np.max(gb_grads)
    
    # scale the original to [0,1]
    img_toshow = image.astype(float)    
    img_toshow -= np.min(img_toshow)
    img_toshow /= img_toshow.max()

    # Render the CAM heatmap
    heatmap = cv2.applyColorMap(255 - np.uint8(CAM*255.0), cv2.COLORMAP_JET)

    # Grad-CAM
    CAM_gb = CAM.copy()
    CAM_gb = np.expand_dims(np.squeeze(CAM_gb), axis=-1)
    gd_gb = img_toshow[...,::-1] * np.tile(CAM_gb, (1,1,3))
    
    # Draw the results figures
    fig = plt.figure(figsize=(10,10))   
    ax1 = plt.subplot(221)
    ax2 = plt.subplot(222)
    ax3 = plt.subplot(223)
    ax4 = plt.subplot(224)
    
    ax1.imshow(img_toshow[...,::-1])
    ax1.set_title('Input Image')
    ax2.imshow(heatmap)
    ax2.set_title('Grad-CAM')
    ax3.imshow(gb_grads)
    ax3.set_title('guided backpropagation')
    ax4.imshow(gd_gb)
    ax4.set_title('guided Grad-CAM')

    # Show the resulting image
    plt.show()

Grad-CAM Main Implementation over a pretrained VGG network.

You can extract layers or labels from VGG by accessing with the name. (e.g. vgg.conv2_2 extracts second convolution filter in conv2 family)

In [31]:
num_classes = 1000

# Read in the image
img1 = imgread(os.path.join("images", "corgi.jpg"))
img2 = imgread(os.path.join("images", "cat_and_dog.jpg"))
img3 = imgread(os.path.join("images", "cat_and_dog.jpg"))
print('img shape: ',img1.shape)
# Expand one dimension to take on the batch dimension
img1 = np.expand_dims(img1, axis=0)
img2 = np.expand_dims(img2, axis=0)
img3 = np.expand_dims(img3, axis=0)
print('expanded img shape: ',img1.shape)



# Define a all zero gradients of the shape 1000
zero_grads = np.array([0. for i in range(num_classes)])

# The indices of the classes are provided for you
class_num1 = 263  # Pembroke, Pembroke Welsh corgi
class_num2 = 254  # Pug, pug-dog
class_num3 = 282  # Tiger cat

# Define a one-hot gradient vector where the only activated gradient
# is of the corresponding indices from above 
one_hot_grad1 = zero_grads.copy()
one_hot_grad2 = zero_grads.copy()
one_hot_grad3 = zero_grads.copy()
one_hot_grad1[class_num1] = 1.0
one_hot_grad2[class_num2] = 1.0
one_hot_grad3[class_num3] = 1.0
one_hot_grad1 = np.expand_dims(one_hot_grad1, axis=0)
one_hot_grad2 = np.expand_dims(one_hot_grad2, axis=0)
one_hot_grad3 = np.expand_dims(one_hot_grad3, axis=0)
print("one_hot_grad3: ",one_hot_grad3.shape)

# Construct a minibatch of data and labels 
minibatch = np.concatenate((img1,img2,img3), axis=0)
labels =np.concatenate((one_hot_grad1,one_hot_grad2,one_hot_grad3), axis=0)
print('batch, labels', minibatch.shape,labels.shape)
print(labels)

# Define the batch size
batch_size = 3

# Create tensorflow graph for evaluation
graph = tf.Graph()
with graph.as_default():
    with graph.gradient_override_map({'Relu': 'GuidedRelu'}):
        # Define the VGG16 network and setup
        vgg = vgg16.Vgg16()
        vgg.load()
        vgg.setup()
        print(dir(vgg))
      
        
        # Implement the signal and the loss
        # Signal: Element-wise multiplication of final fully connected layer (fc8)  
        # with the one-hot vector of labels.                                        
        # Loss: Average of the Signal                 

        signal = tf.multiply(vgg.fc8,vgg.labels)
        loss = tf.reduce_mean(signal, axis=1) 

        # Compute the gradient of loss with respect to pool5 layer
        pool5_grad = tf.gradients(loss, vgg.pool5)[0] 
        
        # Compute the gradient of loss with respect to input layer
        # Note: This is the guided backpropagated gradient 

        inputs_grad = tf.gradients(loss, vgg.inputs)[0] 


        eps = tf.constant(1e-5)
        # Normalize the gradients, and add a small number epsilon to it
        pool5_grad_normed = pool5_grad/tf.norm(pool5_grad)+eps
        
        # Initializer for the tf variables
        init = tf.global_variables_initializer()
        

# Run tensorflow 
with tf.Session(graph=graph) as sess:    
    sess.run(init)
    # Run the session to get 
    # (1) guided backpropagated gradients to the input                          #
    # (2) activation of pool5 (from vgg)                                        #
    # (3) normalized pool5 gradients                                            #
    # (4) output probabilities (from vgg)                                       #
    # Feed as input: batch of images and labels                                 #
    
    prob = sess.run(vgg.prob, feed_dict={vgg.inputs: minibatch, vgg.labels:labels})

    gb_grads, pool5_act, pool5_grads, prob = sess.run([inputs_grad, vgg.pool5, pool5_grad_normed,vgg.prob], feed_dict={vgg.inputs: minibatch, vgg.labels: labels})
    
    # Visualize the Grad-CAM
    for i in range(batch_size):
        top1, top5 = predicted_labels(prob[i], os.path.join("lib", "synset.txt"))
        visualize(minibatch[i], pool5_act[i], pool5_grads[i], gb_grads[i])