From 64c768da254b38b1edd9956649363c9bad29c885 Mon Sep 17 00:00:00 2001 From: Rui Wang Napieralski Date: Thu, 24 Sep 2020 22:48:44 -0700 Subject: [PATCH 1/4] feature: upgrade Neo MxNet to 1.7 --- src/sagemaker/image_uri_config/neo-mxnet.json | 26 +++++++------- tests/data/mxnet_mnist/mnist_neo.py | 36 ++++++++++--------- tests/integ/test_neo_mxnet.py | 12 ++----- 3 files changed, 35 insertions(+), 39 deletions(-) diff --git a/src/sagemaker/image_uri_config/neo-mxnet.json b/src/sagemaker/image_uri_config/neo-mxnet.json index 9c623eb9da..d353c5830a 100644 --- a/src/sagemaker/image_uri_config/neo-mxnet.json +++ b/src/sagemaker/image_uri_config/neo-mxnet.json @@ -2,20 +2,20 @@ "processors": ["cpu", "gpu"], "scope": ["inference"], "version_aliases": { - "0.12.1": "1.5", - "1.0.0": "1.5", - "1.1.0": "1.5", - "1.2": "1.5", - "1.2.0": "1.5", - "1.2.1": "1.5", - "1.3": "1.5", - "1.3.0": "1.5", - "1.4": "1.5", - "1.4.0": "1.5", - "1.4.1": "1.5" + "0.12.1": "1.7", + "1.0.0": "1.7", + "1.1.0": "1.7", + "1.2": "1.7", + "1.2.0": "1.7", + "1.2.1": "1.7", + "1.3": "1.7", + "1.3.0": "1.7", + "1.4": "1.7", + "1.4.0": "1.7", + "1.4.1": "1.7" }, "versions": { - "1.5": { + "1.7": { "py_versions": ["py3"], "registries": { "af-south-1": "774647643957", @@ -42,7 +42,7 @@ "us-west-1": "710691900526", "us-west-2": "301217895009" }, - "repository": "sagemaker-neo-mxnet" + "repository": "sagemaker-inference-mxnet" } } } diff --git a/tests/data/mxnet_mnist/mnist_neo.py b/tests/data/mxnet_mnist/mnist_neo.py index 1b900895c9..70e8060f2e 100644 --- a/tests/data/mxnet_mnist/mnist_neo.py +++ b/tests/data/mxnet_mnist/mnist_neo.py @@ -19,6 +19,7 @@ import mxnet as mx import numpy as np +import neomxnet # noqa: F401 def load_data(path): @@ -104,27 +105,28 @@ def train( save(model_dir, mlp_model) -def neo_preprocess(payload, content_type): - logging.info("Invoking user-defined pre-processing function") - - if content_type != "application/vnd+python.numpy+binary": - raise RuntimeError("Content type must be application/vnd+python.numpy+binary") - - return np.asarray(json.loads(payload.decode("utf-8"))) +def model_fn(path_to_model_files): + ctx = mx.cpu() + sym, arg_params, aux_params = mx.model.load_checkpoint( + os.path.join(path_to_model_files, "compiled"), 0 + ) + mod = mx.mod.Module(symbol=sym, context=ctx, label_names=None) + mod.bind( + for_training=False, data_shapes=[("data", (1, 1, 28, 28))], label_shapes=mod._label_shapes + ) + mod.set_params(arg_params, aux_params, allow_missing=True) + return mod -# NOTE: this function cannot use MXNet -def neo_postprocess(result): - logging.info("Invoking user-defined post-processing function") +def transform_fn(mod, payload, input_content_type, requested_output_content_type): + if input_content_type != "application/vnd+python.numpy+binary": + raise RuntimeError("Input content type must be application/vnd+python.numpy+binary") - # Softmax (assumes batch size 1) + inference_payload = np.asarray(json.loads(payload.decode("utf-8"))) + result = mod.predict(inference_payload) result = np.squeeze(result) - result_exp = np.exp(result - np.max(result)) - result = result_exp / np.sum(result_exp) - - response_body = json.dumps(result.tolist()) + response_body = json.dumps(result.asnumpy().tolist()) content_type = "application/json" - return response_body, content_type @@ -135,7 +137,7 @@ def neo_postprocess(result): parser = argparse.ArgumentParser() parser.add_argument("--batch-size", type=int, default=100) - parser.add_argument("--epochs", type=int, default=10) + parser.add_argument("--epochs", type=int, default=1) parser.add_argument("--learning-rate", type=float, default=0.1) parser.add_argument("--model-dir", type=str, default=os.environ["SM_MODEL_DIR"]) diff --git a/tests/integ/test_neo_mxnet.py b/tests/integ/test_neo_mxnet.py index c9747b0d88..b20710e84e 100644 --- a/tests/integ/test_neo_mxnet.py +++ b/tests/integ/test_neo_mxnet.py @@ -58,9 +58,6 @@ def mxnet_training_job( @pytest.mark.canary_quick -@pytest.mark.skip( - reason="This test is failing because the image uri and the training script format has changed." -) def test_attach_deploy( mxnet_training_job, sagemaker_session, cpu_instance_type, cpu_instance_family ): @@ -71,7 +68,7 @@ def test_attach_deploy( estimator.compile_model( target_instance_family=cpu_instance_family, - input_shape={"data": [1, 1, 28, 28]}, + input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]}, output_path=estimator.output_path, ) @@ -89,9 +86,6 @@ def test_attach_deploy( predictor.predict(data) -@pytest.mark.skip( - reason="This test is failing because the image uri and the training script format has changed." -) def test_deploy_model( mxnet_training_job, sagemaker_session, @@ -123,7 +117,7 @@ def test_deploy_model( model.compile( target_instance_family=cpu_instance_family, - input_shape={"data": [1, 1, 28, 28]}, + input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]}, role=role, job_name=unique_name_from_base("test-deploy-model-compilation-job"), output_path="/".join(model_data.split("/")[:-1]), @@ -165,7 +159,7 @@ def test_inferentia_deploy_model( model.compile( target_instance_family=inf_instance_family, - input_shape={"data": [1, 1, 28, 28]}, + input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]}, role=role, job_name=unique_name_from_base("test-deploy-model-compilation-job"), output_path="/".join(model_data.split("/")[:-1]), From e1fe7f4bd75da25204d8e35db51da3e2ca0342e2 Mon Sep 17 00:00:00 2001 From: Rui Wang Napieralski Date: Mon, 28 Sep 2020 21:57:28 -0700 Subject: [PATCH 2/4] fix unit tests --- .../image_uri_config/neo-pytorch.json | 2 +- tests/unit/sagemaker/image_uris/test_neo.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/sagemaker/image_uri_config/neo-pytorch.json b/src/sagemaker/image_uri_config/neo-pytorch.json index 2da5cd9846..f46b1d3055 100644 --- a/src/sagemaker/image_uri_config/neo-pytorch.json +++ b/src/sagemaker/image_uri_config/neo-pytorch.json @@ -36,7 +36,7 @@ "us-west-1": "710691900526", "us-west-2": "301217895009" }, - "repository": "sagemaker-neo-pytorch" + "repository": "sagemaker-inference-pytorch" } } } diff --git a/tests/unit/sagemaker/image_uris/test_neo.py b/tests/unit/sagemaker/image_uris/test_neo.py index 474e996086..45dc054a3a 100644 --- a/tests/unit/sagemaker/image_uris/test_neo.py +++ b/tests/unit/sagemaker/image_uris/test_neo.py @@ -62,19 +62,26 @@ def test_algo_uris(algo): def _test_neo_framework_uris(framework, version): - framework = "neo-{}".format(framework) + framework_in_config = f"neo-{framework}" + framework_in_uri = f"neo-{framework}" if framework == "tensorflow" else f"inference-{framework}" for region in regions.regions(): if region in ACCOUNTS: - uri = image_uris.retrieve(framework, region, instance_type="ml_c5", version=version) - assert _expected_framework_uri(framework, version, region=region) == uri + uri = image_uris.retrieve( + framework_in_config, region, instance_type="ml_c5", version=version + ) + assert _expected_framework_uri(framework_in_uri, version, region=region) == uri else: with pytest.raises(ValueError) as e: - image_uris.retrieve(framework, region, instance_type="ml_c5", version=version) + image_uris.retrieve( + framework_in_config, region, instance_type="ml_c5", version=version + ) assert "Unsupported region: {}.".format(region) in str(e.value) - uri = image_uris.retrieve(framework, "us-west-2", instance_type="ml_p2", version=version) - assert _expected_framework_uri(framework, version, processor="gpu") == uri + uri = image_uris.retrieve( + framework_in_config, "us-west-2", instance_type="ml_p2", version=version + ) + assert _expected_framework_uri(framework_in_uri, version, processor="gpu") == uri def test_neo_mxnet(neo_mxnet_version): From aae58db04e07f3440f047f3044ff9d730f18c54a Mon Sep 17 00:00:00 2001 From: Rui Wang Napieralski Date: Mon, 28 Sep 2020 22:11:21 -0700 Subject: [PATCH 3/4] fix mxnet estimator unit tests --- tests/unit/test_mxnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index 2b10fb7ef5..d3a178f233 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -175,7 +175,7 @@ def _create_compilation_job(input_shape, output_location): def _neo_inference_image(mxnet_version): - return "301217895009.dkr.ecr.us-west-2.amazonaws.com/sagemaker-neo-{}:{}-cpu-py3".format( + return "301217895009.dkr.ecr.us-west-2.amazonaws.com/sagemaker-inference-{}:{}-cpu-py3".format( FRAMEWORK.lower(), mxnet_version ) From 624c7826c212c2cae2b034136153f8abb0fa9083 Mon Sep 17 00:00:00 2001 From: Rui Wang Napieralski Date: Tue, 29 Sep 2020 09:06:14 -0700 Subject: [PATCH 4/4] move import of neomxnet into inference functions --- tests/data/mxnet_mnist/mnist_neo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/data/mxnet_mnist/mnist_neo.py b/tests/data/mxnet_mnist/mnist_neo.py index 70e8060f2e..1698a49154 100644 --- a/tests/data/mxnet_mnist/mnist_neo.py +++ b/tests/data/mxnet_mnist/mnist_neo.py @@ -19,7 +19,6 @@ import mxnet as mx import numpy as np -import neomxnet # noqa: F401 def load_data(path): @@ -106,6 +105,8 @@ def train( def model_fn(path_to_model_files): + import neomxnet # noqa: F401 + ctx = mx.cpu() sym, arg_params, aux_params = mx.model.load_checkpoint( os.path.join(path_to_model_files, "compiled"), 0 @@ -119,6 +120,8 @@ def model_fn(path_to_model_files): def transform_fn(mod, payload, input_content_type, requested_output_content_type): + import neomxnet # noqa: F401 + if input_content_type != "application/vnd+python.numpy+binary": raise RuntimeError("Input content type must be application/vnd+python.numpy+binary")