From 64c768da254b38b1edd9956649363c9bad29c885 Mon Sep 17 00:00:00 2001
From: Rui Wang Napieralski <wru@amazon.com>
Date: Thu, 24 Sep 2020 22:48:44 -0700
Subject: [PATCH 1/4] feature: upgrade Neo MxNet to 1.7

---
 src/sagemaker/image_uri_config/neo-mxnet.json | 26 +++++++-------
 tests/data/mxnet_mnist/mnist_neo.py           | 36 ++++++++++---------
 tests/integ/test_neo_mxnet.py                 | 12 ++-----
 3 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/src/sagemaker/image_uri_config/neo-mxnet.json b/src/sagemaker/image_uri_config/neo-mxnet.json
index 9c623eb9da..d353c5830a 100644
--- a/src/sagemaker/image_uri_config/neo-mxnet.json
+++ b/src/sagemaker/image_uri_config/neo-mxnet.json
@@ -2,20 +2,20 @@
     "processors": ["cpu", "gpu"],
     "scope": ["inference"],
     "version_aliases": {
-        "0.12.1": "1.5",
-        "1.0.0": "1.5",
-        "1.1.0": "1.5",
-        "1.2": "1.5",
-        "1.2.0": "1.5",
-        "1.2.1": "1.5",
-        "1.3": "1.5",
-        "1.3.0": "1.5",
-        "1.4": "1.5",
-        "1.4.0": "1.5",
-        "1.4.1": "1.5"
+        "0.12.1": "1.7",
+        "1.0.0": "1.7",
+        "1.1.0": "1.7",
+        "1.2": "1.7",
+        "1.2.0": "1.7",
+        "1.2.1": "1.7",
+        "1.3": "1.7",
+        "1.3.0": "1.7",
+        "1.4": "1.7",
+        "1.4.0": "1.7",
+        "1.4.1": "1.7"
     },
     "versions": {
-        "1.5": {
+        "1.7": {
             "py_versions": ["py3"],
             "registries": {
                 "af-south-1": "774647643957",
@@ -42,7 +42,7 @@
                 "us-west-1": "710691900526",
                 "us-west-2": "301217895009"
             },
-            "repository": "sagemaker-neo-mxnet"
+            "repository": "sagemaker-inference-mxnet"
         }
     }
 }
diff --git a/tests/data/mxnet_mnist/mnist_neo.py b/tests/data/mxnet_mnist/mnist_neo.py
index 1b900895c9..70e8060f2e 100644
--- a/tests/data/mxnet_mnist/mnist_neo.py
+++ b/tests/data/mxnet_mnist/mnist_neo.py
@@ -19,6 +19,7 @@
 
 import mxnet as mx
 import numpy as np
+import neomxnet  # noqa: F401
 
 
 def load_data(path):
@@ -104,27 +105,28 @@ def train(
         save(model_dir, mlp_model)
 
 
-def neo_preprocess(payload, content_type):
-    logging.info("Invoking user-defined pre-processing function")
-
-    if content_type != "application/vnd+python.numpy+binary":
-        raise RuntimeError("Content type must be application/vnd+python.numpy+binary")
-
-    return np.asarray(json.loads(payload.decode("utf-8")))
+def model_fn(path_to_model_files):
+    ctx = mx.cpu()
+    sym, arg_params, aux_params = mx.model.load_checkpoint(
+        os.path.join(path_to_model_files, "compiled"), 0
+    )
+    mod = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
+    mod.bind(
+        for_training=False, data_shapes=[("data", (1, 1, 28, 28))], label_shapes=mod._label_shapes
+    )
+    mod.set_params(arg_params, aux_params, allow_missing=True)
+    return mod
 
 
-# NOTE: this function cannot use MXNet
-def neo_postprocess(result):
-    logging.info("Invoking user-defined post-processing function")
+def transform_fn(mod, payload, input_content_type, requested_output_content_type):
+    if input_content_type != "application/vnd+python.numpy+binary":
+        raise RuntimeError("Input content type must be application/vnd+python.numpy+binary")
 
-    # Softmax (assumes batch size 1)
+    inference_payload = np.asarray(json.loads(payload.decode("utf-8")))
+    result = mod.predict(inference_payload)
     result = np.squeeze(result)
-    result_exp = np.exp(result - np.max(result))
-    result = result_exp / np.sum(result_exp)
-
-    response_body = json.dumps(result.tolist())
+    response_body = json.dumps(result.asnumpy().tolist())
     content_type = "application/json"
-
     return response_body, content_type
 
 
@@ -135,7 +137,7 @@ def neo_postprocess(result):
     parser = argparse.ArgumentParser()
 
     parser.add_argument("--batch-size", type=int, default=100)
-    parser.add_argument("--epochs", type=int, default=10)
+    parser.add_argument("--epochs", type=int, default=1)
     parser.add_argument("--learning-rate", type=float, default=0.1)
 
     parser.add_argument("--model-dir", type=str, default=os.environ["SM_MODEL_DIR"])
diff --git a/tests/integ/test_neo_mxnet.py b/tests/integ/test_neo_mxnet.py
index c9747b0d88..b20710e84e 100644
--- a/tests/integ/test_neo_mxnet.py
+++ b/tests/integ/test_neo_mxnet.py
@@ -58,9 +58,6 @@ def mxnet_training_job(
 
 
 @pytest.mark.canary_quick
-@pytest.mark.skip(
-    reason="This test is failing because the image uri and the training script format has changed."
-)
 def test_attach_deploy(
     mxnet_training_job, sagemaker_session, cpu_instance_type, cpu_instance_family
 ):
@@ -71,7 +68,7 @@ def test_attach_deploy(
 
         estimator.compile_model(
             target_instance_family=cpu_instance_family,
-            input_shape={"data": [1, 1, 28, 28]},
+            input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]},
             output_path=estimator.output_path,
         )
 
@@ -89,9 +86,6 @@ def test_attach_deploy(
         predictor.predict(data)
 
 
-@pytest.mark.skip(
-    reason="This test is failing because the image uri and the training script format has changed."
-)
 def test_deploy_model(
     mxnet_training_job,
     sagemaker_session,
@@ -123,7 +117,7 @@ def test_deploy_model(
 
         model.compile(
             target_instance_family=cpu_instance_family,
-            input_shape={"data": [1, 1, 28, 28]},
+            input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]},
             role=role,
             job_name=unique_name_from_base("test-deploy-model-compilation-job"),
             output_path="/".join(model_data.split("/")[:-1]),
@@ -165,7 +159,7 @@ def test_inferentia_deploy_model(
 
         model.compile(
             target_instance_family=inf_instance_family,
-            input_shape={"data": [1, 1, 28, 28]},
+            input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]},
             role=role,
             job_name=unique_name_from_base("test-deploy-model-compilation-job"),
             output_path="/".join(model_data.split("/")[:-1]),

From e1fe7f4bd75da25204d8e35db51da3e2ca0342e2 Mon Sep 17 00:00:00 2001
From: Rui Wang Napieralski <wru@amazon.com>
Date: Mon, 28 Sep 2020 21:57:28 -0700
Subject: [PATCH 2/4] fix unit tests

---
 .../image_uri_config/neo-pytorch.json         |  2 +-
 tests/unit/sagemaker/image_uris/test_neo.py   | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/sagemaker/image_uri_config/neo-pytorch.json b/src/sagemaker/image_uri_config/neo-pytorch.json
index 2da5cd9846..f46b1d3055 100644
--- a/src/sagemaker/image_uri_config/neo-pytorch.json
+++ b/src/sagemaker/image_uri_config/neo-pytorch.json
@@ -36,7 +36,7 @@
                 "us-west-1": "710691900526",
                 "us-west-2": "301217895009"
             },
-            "repository": "sagemaker-neo-pytorch"
+            "repository": "sagemaker-inference-pytorch"
         }
     }
 }
diff --git a/tests/unit/sagemaker/image_uris/test_neo.py b/tests/unit/sagemaker/image_uris/test_neo.py
index 474e996086..45dc054a3a 100644
--- a/tests/unit/sagemaker/image_uris/test_neo.py
+++ b/tests/unit/sagemaker/image_uris/test_neo.py
@@ -62,19 +62,26 @@ def test_algo_uris(algo):
 
 
 def _test_neo_framework_uris(framework, version):
-    framework = "neo-{}".format(framework)
+    framework_in_config = f"neo-{framework}"
+    framework_in_uri = f"neo-{framework}" if framework == "tensorflow" else f"inference-{framework}"
 
     for region in regions.regions():
         if region in ACCOUNTS:
-            uri = image_uris.retrieve(framework, region, instance_type="ml_c5", version=version)
-            assert _expected_framework_uri(framework, version, region=region) == uri
+            uri = image_uris.retrieve(
+                framework_in_config, region, instance_type="ml_c5", version=version
+            )
+            assert _expected_framework_uri(framework_in_uri, version, region=region) == uri
         else:
             with pytest.raises(ValueError) as e:
-                image_uris.retrieve(framework, region, instance_type="ml_c5", version=version)
+                image_uris.retrieve(
+                    framework_in_config, region, instance_type="ml_c5", version=version
+                )
             assert "Unsupported region: {}.".format(region) in str(e.value)
 
-    uri = image_uris.retrieve(framework, "us-west-2", instance_type="ml_p2", version=version)
-    assert _expected_framework_uri(framework, version, processor="gpu") == uri
+    uri = image_uris.retrieve(
+        framework_in_config, "us-west-2", instance_type="ml_p2", version=version
+    )
+    assert _expected_framework_uri(framework_in_uri, version, processor="gpu") == uri
 
 
 def test_neo_mxnet(neo_mxnet_version):

From aae58db04e07f3440f047f3044ff9d730f18c54a Mon Sep 17 00:00:00 2001
From: Rui Wang Napieralski <wru@amazon.com>
Date: Mon, 28 Sep 2020 22:11:21 -0700
Subject: [PATCH 3/4] fix mxnet estimator unit tests

---
 tests/unit/test_mxnet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py
index 2b10fb7ef5..d3a178f233 100644
--- a/tests/unit/test_mxnet.py
+++ b/tests/unit/test_mxnet.py
@@ -175,7 +175,7 @@ def _create_compilation_job(input_shape, output_location):
 
 
 def _neo_inference_image(mxnet_version):
-    return "301217895009.dkr.ecr.us-west-2.amazonaws.com/sagemaker-neo-{}:{}-cpu-py3".format(
+    return "301217895009.dkr.ecr.us-west-2.amazonaws.com/sagemaker-inference-{}:{}-cpu-py3".format(
         FRAMEWORK.lower(), mxnet_version
     )
 

From 624c7826c212c2cae2b034136153f8abb0fa9083 Mon Sep 17 00:00:00 2001
From: Rui Wang Napieralski <wru@amazon.com>
Date: Tue, 29 Sep 2020 09:06:14 -0700
Subject: [PATCH 4/4] move import of neomxnet into inference functions

---
 tests/data/mxnet_mnist/mnist_neo.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/data/mxnet_mnist/mnist_neo.py b/tests/data/mxnet_mnist/mnist_neo.py
index 70e8060f2e..1698a49154 100644
--- a/tests/data/mxnet_mnist/mnist_neo.py
+++ b/tests/data/mxnet_mnist/mnist_neo.py
@@ -19,7 +19,6 @@
 
 import mxnet as mx
 import numpy as np
-import neomxnet  # noqa: F401
 
 
 def load_data(path):
@@ -106,6 +105,8 @@ def train(
 
 
 def model_fn(path_to_model_files):
+    import neomxnet  # noqa: F401
+
     ctx = mx.cpu()
     sym, arg_params, aux_params = mx.model.load_checkpoint(
         os.path.join(path_to_model_files, "compiled"), 0
@@ -119,6 +120,8 @@ def model_fn(path_to_model_files):
 
 
 def transform_fn(mod, payload, input_content_type, requested_output_content_type):
+    import neomxnet  # noqa: F401
+
     if input_content_type != "application/vnd+python.numpy+binary":
         raise RuntimeError("Input content type must be application/vnd+python.numpy+binary")