In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [None]:
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

print(train_labels[0])
plt.imshow(train_images[0], cmap="Greys_r")

# first difference: data is not reshaped to 784 anymore, but 28x28x1
# note the 1 color channel!! this is important
data = tf.data.Dataset.from_tensor_slices(
    (train_images.reshape([-1, 28, 28, 1]).astype(np.float32) / 255, train_labels.astype(np.int32)))
data = data.shuffle(buffer_size=60000).batch(128).repeat()

test_data = tf.data.Dataset.from_tensor_slices(
    (test_images.reshape([-1, 28, 28, 1]).astype(np.float32) / 255, test_labels.astype(np.int32))).batch(10000)

In [None]:
train_steps = 1000  # might need more steps

# convolutional kernels/biases
W = tf.Variable(tf.random.uniform([5, 5, 1, 16], -0.1, 0.1))
b = tf.Variable(tf.zeros([16]))
W1 = tf.Variable(tf.random.uniform([5, 5, 16, 32], -0.1, 0.1))
b1 = tf.Variable(tf.zeros([32]))

# fully connected layer at the end
W_d = tf.Variable(tf.random.uniform([7*7*32, 10]))
b_d = tf.Variable(tf.zeros([10]))

varis = [W, b, W1, b1, W_d, b_d]


# 2 conv layers, each followed by 2x2 max pool
# you should look up the parameters in the API!
def model(inp):
    conv1 = tf.nn.relu(tf.nn.conv2d(inp, W, 1, padding="SAME") + b)
    conv1 = tf.nn.max_pool2d(conv1, 2, 2, padding="SAME")
    conv2 = tf.nn.relu(tf.nn.conv2d(conv1, W1, 1, padding="SAME") + b1)
    conv2 = tf.nn.max_pool2d(conv2, 2, 2, padding="SAME")
    conv2 = tf.reshape(conv2, [-1, 7*7*32])  # "flatten"

    logits = tf.matmul(conv2, W_d) + b_d

    return logits


# Adam makes things much smoother
opt = tf.optimizers.Adam()
# from_logits = True!! #neverforget
loss_fn = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
# this basically hasn't changed
for step, (img_batch, lbl_batch) in enumerate(data):
    if step > train_steps:
        break

    with tf.GradientTape() as tape:
        logits = model(img_batch)
        xent = loss_fn(lbl_batch, logits)

    grads = tape.gradient(xent, varis)
      
    opt.apply_gradients(zip(grads, varis))
    
    if not step % 100:
        preds = tf.argmax(logits, axis=1, output_type=tf.int32)
        acc = tf.reduce_mean(tf.cast(tf.equal(preds, lbl_batch),
                             tf.float32))
        print("Loss: {} Accuracy: {}".format(xent, acc))
    

In [None]:
# here's some evaluation magic ;) bonus: figure out how this works...
# note: this should work on colab, but might crash on your machine depending on RAM
big_test_batch = next(iter(test_data))
test_preds = tf.argmax(model(big_test_batch[0]), axis=1,
                       output_type=tf.int32)
acc = tf.reduce_mean(tf.cast(tf.equal(test_preds, big_test_batch[1]),
                             tf.float32))
print(acc)