{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use shift-enter to execute a code block and move to the next one."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 2.1 Import libraries.\n",
    "import math\n",
    "import os\n",
    "from six.moves import xrange  # pylint: disable=redefined-builtin\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets\n",
    "\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 2.2 Define some constants.\n",
    "# The MNIST dataset has 10 classes, representing the digits 0 through 9.\n",
    "NUM_CLASSES = 10\n",
    "\n",
    "# The MNIST images are always 28x28 pixels.\n",
    "IMAGE_SIZE = 28\n",
    "IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE\n",
    "\n",
    "# Batch size. Must be evenly dividable by dataset sizes.\n",
    "BATCH_SIZE = 100\n",
    "EVAL_BATCH_SIZE = 1\n",
    "\n",
    "# Number of units in hidden layers.\n",
    "HIDDEN1_UNITS = 128\n",
    "HIDDEN2_UNITS = 32\n",
    "\n",
    "# Maximum number of training steps.\n",
    "MAX_STEPS = 2000\n",
    "\n",
    "# Directory to put the training data.\n",
    "TRAIN_DIR=\"/tmp/mnist\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 2.3 Get input data: get the sets of images and labels for training, validation, and\n",
    "# test on MNIST.\n",
    "data_sets = read_data_sets(TRAIN_DIR, False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 2.4 Build inference graph.\n",
    "def mnist_inference(images, hidden1_units, hidden2_units):\n",
    "    \"\"\"Build the MNIST model up to where it may be used for inference.\n",
    "    Args:\n",
    "        images: Images placeholder.\n",
    "        hidden1_units: Size of the first hidden layer.\n",
    "        hidden2_units: Size of the second hidden layer.\n",
    "    Returns:\n",
    "        logits: Output tensor with the computed logits.\n",
    "    \"\"\"\n",
    "    # Hidden 1\n",
    "    with tf.name_scope('hidden1'):\n",
    "        weights = tf.Variable(\n",
    "            tf.truncated_normal([IMAGE_PIXELS, hidden1_units],\n",
    "                                stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),\n",
    "            name='weights')\n",
    "        biases = tf.Variable(tf.zeros([hidden1_units]),\n",
    "                             name='biases')\n",
    "        hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)\n",
    "    # Hidden 2\n",
    "    with tf.name_scope('hidden2'):\n",
    "        weights = tf.Variable(\n",
    "            tf.truncated_normal([hidden1_units, hidden2_units],\n",
    "                                stddev=1.0 / math.sqrt(float(hidden1_units))),\n",
    "            name='weights')\n",
    "        biases = tf.Variable(tf.zeros([hidden2_units]),\n",
    "                             name='biases')\n",
    "        hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)\n",
    "    # Linear\n",
    "    with tf.name_scope('softmax_linear'):\n",
    "        weights = tf.Variable(\n",
    "            tf.truncated_normal([hidden2_units, NUM_CLASSES],\n",
    "                                stddev=1.0 / math.sqrt(float(hidden2_units))),\n",
    "            name='weights')\n",
    "        biases = tf.Variable(tf.zeros([NUM_CLASSES]),\n",
    "                             name='biases')\n",
    "        logits = tf.matmul(hidden2, weights) + biases\n",
    "\n",
    "    # Uncomment the following line to see what we have constructed.\n",
    "    # tf.train.write_graph(tf.get_default_graph().as_graph_def(),\n",
    "    #                      \"/tmp\", \"inference.pbtxt\", as_text=True)\n",
    "    return logits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 2.5 Build training graph.\n",
    "def mnist_training(logits, labels, learning_rate):\n",
    "    \"\"\"Build the training graph.\n",
    "\n",
    "    Args:\n",
    "        logits: Logits tensor, float - [BATCH_SIZE, NUM_CLASSES].\n",
    "        labels: Labels tensor, int32 - [BATCH_SIZE], with values in the\n",
    "          range [0, NUM_CLASSES).\n",
    "        learning_rate: The learning rate to use for gradient descent.\n",
    "    Returns:\n",
    "        train_op: The Op for training.\n",
    "        loss: The Op for calculating loss.\n",
    "    \"\"\"\n",
    "    # Create an operation that calculates loss.\n",
    "    labels = tf.to_int64(labels)\n",
    "    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
    "        logits, labels, name='xentropy')\n",
    "    loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')\n",
    "    # Create the gradient descent optimizer with the given learning rate.\n",
    "    optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
    "    # Create a variable to track the global step.\n",
    "    global_step = tf.Variable(0, name='global_step', trainable=False)\n",
    "    # Use the optimizer to apply the gradients that minimize the loss\n",
    "    # (and also increment the global step counter) as a single training step.\n",
    "    train_op = optimizer.minimize(loss, global_step=global_step)\n",
    "\n",
    "    # Uncomment the following line to see what we have constructed.\n",
    "    # tf.train.write_graph(tf.get_default_graph().as_graph_def(),\n",
    "    #                      \"/tmp\", \"train.pbtxt\", as_text=True)\n",
    "\n",
    "    return train_op, loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 2.6 Build the complete graph for feeding inputs, training, and saving checkpoints.\n",
    "mnist_graph = tf.Graph()\n",
    "with mnist_graph.as_default():\n",
    "    # Generate placeholders for the images and labels.\n",
    "    images_placeholder = tf.placeholder(tf.float32)                                       \n",
    "    labels_placeholder = tf.placeholder(tf.int32)\n",
    "    tf.add_to_collection(\"images\", images_placeholder)  # Remember this Op.\n",
    "    tf.add_to_collection(\"labels\", labels_placeholder)  # Remember this Op.\n",
    "\n",
    "    # Build a Graph that computes predictions from the inference model.\n",
    "    logits = mnist_inference(images_placeholder,\n",
    "                             HIDDEN1_UNITS,\n",
    "                             HIDDEN2_UNITS)\n",
    "    tf.add_to_collection(\"logits\", logits)  # Remember this Op.\n",
    "\n",
    "    # Add to the Graph the Ops that calculate and apply gradients.\n",
    "    train_op, loss = mnist_training(logits, labels_placeholder, 0.01)\n",
    "\n",
    "    # Add the variable initializer Op.\n",
    "    init = tf.initialize_all_variables()\n",
    "\n",
    "    # Create a saver for writing training checkpoints.\n",
    "    saver = tf.train.Saver()\n",
    "    \n",
    "    # Uncomment the following line to see what we have constructed.\n",
    "    # tf.train.write_graph(tf.get_default_graph().as_graph_def(),\n",
    "    #                      \"/tmp\", \"complete.pbtxt\", as_text=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 2.7 Run training for MAX_STEPS and save checkpoint at the end.\n",
    "with tf.Session(graph=mnist_graph) as sess:\n",
    "    # Run the Op to initialize the variables.\n",
    "    sess.run(init)\n",
    "\n",
    "    # Start the training loop.\n",
    "    for step in xrange(MAX_STEPS):\n",
    "        # Read a batch of images and labels.\n",
    "        images_feed, labels_feed = data_sets.train.next_batch(BATCH_SIZE)\n",
    "\n",
    "        # Run one step of the model.  The return values are the activations\n",
    "        # from the `train_op` (which is discarded) and the `loss` Op.  To\n",
    "        # inspect the values of your Ops or variables, you may include them\n",
    "        # in the list passed to sess.run() and the value tensors will be\n",
    "        # returned in the tuple from the call.\n",
    "        _, loss_value = sess.run([train_op, loss],\n",
    "                                 feed_dict={images_placeholder: images_feed,\n",
    "                                            labels_placeholder: labels_feed})\n",
    "\n",
    "        # Print out loss value.\n",
    "        if step % 1000 == 0:\n",
    "            print('Step %d: loss = %.2f' % (step, loss_value))\n",
    "\n",
    "    # Write a checkpoint.\n",
    "    checkpoint_file = os.path.join(TRAIN_DIR, 'checkpoint')\n",
    "    saver.save(sess, checkpoint_file, global_step=step)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 2.8 Run evaluation based on the saved checkpoint.\n",
    "with tf.Session(graph=tf.Graph()) as sess:\n",
    "    saver = tf.train.import_meta_graph(\n",
    "        os.path.join(TRAIN_DIR, \"checkpoint-1999.meta\"))\n",
    "    saver.restore(\n",
    "        sess, os.path.join(TRAIN_DIR, \"checkpoint-1999\"))\n",
    "\n",
    "    # Retrieve the Ops we 'remembered'.\n",
    "    logits = tf.get_collection(\"logits\")[0]\n",
    "    images_placeholder = tf.get_collection(\"images\")[0]\n",
    "    labels_placeholder = tf.get_collection(\"labels\")[0]\n",
    "    \n",
    "    # Add an Op that chooses the top k predictions.\n",
    "    eval_op = tf.nn.top_k(logits)\n",
    "    \n",
    "    # Run evaluation.\n",
    "    images_feed, labels_feed = data_sets.validation.next_batch(EVAL_BATCH_SIZE)\n",
    "    imgplot = plt.imshow(np.reshape(images_feed, (28, 28)))\n",
    "    prediction = sess.run(eval_op,\n",
    "                          feed_dict={images_placeholder: images_feed,\n",
    "                                     labels_placeholder: labels_feed})\n",
    "    print(\"Ground truth: %d\\nPrediction: %d\" % (labels_feed, prediction.indices[0][0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "<p>Back to [1_linear_regression_model.ipynb](1_linear_regression_model.ipynb).</p>\n",
    "<p>Next to [Bonus Lab](extras/extras_0_deepdream.ipynb).</p>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}