diff --git a/README.md b/README.md
index 7be7d91..b6537ee 100644
--- a/README.md
+++ b/README.md
@@ -8,11 +8,13 @@ Pretty Tensor provides a set of objects that behave likes Tensors, but also
 support a chainable object syntax to quickly define neural networks
 and other layered architectures in TensorFlow.
 
-    result = (pretty_tensor.wrap(input_data, m)
-              .flatten()
-              .fully_connected(200, activation_fn=tf.nn.relu)
-              .fully_connected(10, activation_fn=None)
-              .softmax(labels, name=softmax_name))
+```py
+result = (pretty_tensor.wrap(input_data, m)
+          .flatten()
+          .fully_connected(200, activation_fn=tf.nn.relu)
+          .fully_connected(10, activation_fn=None)
+          .softmax(labels, name=softmax_name))
+```          
 
 Please look here for full documentation of the PrettyTensor object for all
 available operations:
@@ -36,35 +38,43 @@ The easiest installation is just to use pip:
 ## Quick start
 
 ### Imports
-    import prettytensor as pt
-    import tensorflow as tf
+```py
+import prettytensor as pt
+import tensorflow as tf
+```
 
 ### Setup your input
-    my_inputs = # numpy array of shape (BATCHES, BATCH_SIZE, DATA_SIZE)
-    my_labels = # numpy array of shape (BATCHES, BATCH_SIZE, CLASSES)
-    input_tensor = tf.placeholder(np.float32, shape=(BATCH_SIZE, DATA_SIZE))
-    label_tensor = tf.placeholder(np.float32, shape=(BATCH_SIZE, CLASSES))
-    pretty_input = pt.wrap(input_tensor)
+```py
+my_inputs = # numpy array of shape (BATCHES, BATCH_SIZE, DATA_SIZE)
+my_labels = # numpy array of shape (BATCHES, BATCH_SIZE, CLASSES)
+input_tensor = tf.placeholder(np.float32, shape=(BATCH_SIZE, DATA_SIZE))
+label_tensor = tf.placeholder(np.float32, shape=(BATCH_SIZE, CLASSES))
+pretty_input = pt.wrap(input_tensor)
+```
 
 ### Define your model
-    softmax, loss = (pretty_input.
-                     fully_connected(100).
-                     softmax_classifier(CLASSES, labels=label_tensor))
+```py
+softmax, loss = (pretty_input.
+                 fully_connected(100).
+                 softmax_classifier(CLASSES, labels=label_tensor))
+```               
 
 ### Train and evaluate
-    accuracy = softmax.evaluate_classifier(label_tensor)
+```py
+accuracy = softmax.evaluate_classifier(label_tensor)
 
-    optimizer = tf.train.GradientDescentOptimizer(0.1)  # learning rate
-    train_op = pt.apply_optimizer(optimizer, losses=[loss])
+optimizer = tf.train.GradientDescentOptimizer(0.1)  # learning rate
+train_op = pt.apply_optimizer(optimizer, losses=[loss])
 
-    init_op = tf.initialize_all_variables()
+init_op = tf.initialize_all_variables()
 
-    with tf.Session() as sess:
-        sess.run(init_op)
-        for inp, label in zip(my_inputs, my_labels):
-            unused_loss_value, accuracy_value = sess.run([loss, accuracy],
-                                     {input_tensor: inp, label_tensor: label})
-            print 'Accuracy: %g' % accuracy_value
+with tf.Session() as sess:
+    sess.run(init_op)
+    for inp, label in zip(my_inputs, my_labels):
+        unused_loss_value, accuracy_value = sess.run([loss, accuracy],
+                                 {input_tensor: inp, label_tensor: label})
+        print 'Accuracy: %g' % accuracy_value
+```       
 
 ## Features
 
@@ -96,10 +106,12 @@ that it takes care of handling the input shape.  One other feature worth noting
 are defaults.  Using defaults you can specify reused values in a single place
 without having to repeat yourself.
 
-    with pt.defaults_scope(activation_fn=tf.nn.relu):
-      hidden_output2 = (pretty_images.flatten()
-                       .fully_connected(100)
-                       .fully_connected(100))
+```py
+with pt.defaults_scope(activation_fn=tf.nn.relu):
+  hidden_output2 = (pretty_images.flatten()
+                   .fully_connected(100)
+                   .fully_connected(100))
+```                   
 
 Check out the documentation to see
 [all supported defaults](docs/pretty_tensor_top_level.md#defaults_scope).
@@ -110,32 +122,37 @@ Sequential mode lets you break model construction across lines and provides
 the subdivide syntactic sugar that makes it easy to define and understand
 complex structures like an [inception module](http://arxiv.org/abs/1409.4842):
 
-
-    with pretty_tensor.defaults_scope(activation_fn=tf.nn.relu):
-      seq = pretty_input.sequential()
-      with seq.subdivide(4) as towers:
-        towers[0].conv2d(1, 64)
-        towers[1].conv2d(1, 112).conv2d(3, 224)
-        towers[2].conv2d(1, 32).conv2d(5, 64)
-        towers[3].max_pool(2, 3).conv2d(1, 32)
+```py
+with pretty_tensor.defaults_scope(activation_fn=tf.nn.relu):
+  seq = pretty_input.sequential()
+  with seq.subdivide(4) as towers:
+    towers[0].conv2d(1, 64)
+    towers[1].conv2d(1, 112).conv2d(3, 224)
+    towers[2].conv2d(1, 32).conv2d(5, 64)
+    towers[3].max_pool(2, 3).conv2d(1, 32)
+```
 
 ![Inception module showing branch and rejoin](inception_module.png)
 
 Templates provide guaranteed parameter reuse and make unrolling recurrent
 networks easy:
 
-    output = [], s = tf.zeros([BATCH, 256 * 2])
+```py
+output = [], s = tf.zeros([BATCH, 256 * 2])
 
-    A = (pretty_tensor.template('x')
-         .lstm_cell(num_units=256, state=UnboundVariable('state'))
+A = (pretty_tensor.template('x')
+     .lstm_cell(num_units=256, state=UnboundVariable('state'))
 
-    for x in pretty_input_array:
-      h, s = A.construct(x=x, state=s)
-      output.append(h)
+for x in pretty_input_array:
+  h, s = A.construct(x=x, state=s)
+  output.append(h)
+```
 
 There are also some convenient shorthands for LSTMs and GRUs:
 
-    pretty_input_array.sequence_lstm(num_units=256)
+```py
+pretty_input_array.sequence_lstm(num_units=256)
+```
 
 ![Unrolled RNN](unrolled_lstm.png)
 
@@ -144,7 +161,9 @@ There are also some convenient shorthands for LSTMs and GRUs:
 You can call any existing operation by using `apply` and it will simply
 subsitute the current tensor for the first argument.
 
-    pretty_input.apply(tf.mul, 5)
+```py
+pretty_input.apply(tf.mul, 5)
+```
 
 You can also create a new operation  There are two supported registration
 mechanisms to add your own functions. `@Register()` allows you to create a
@@ -153,9 +172,11 @@ a new value. Name scoping and variable scoping are handled by the framework.
 
 The following method adds the leaky_relu method to every Pretty Tensor:
 
-    @pt.Register
-    def leaky_relu(input_pt):
-      return tf.select(tf.greater(input_pt, 0.0), input_pt, 0.01 * input_pt)
+```py
+@pt.Register
+def leaky_relu(input_pt):
+  return tf.select(tf.greater(input_pt, 0.0), input_pt, 0.01 * input_pt)
+```
 
 
 `@RegisterCompoundOp()` is like adding a macro, it is designed to group together