From 0126a6079dc49306b0702a3b3606d912f913dfc5 Mon Sep 17 00:00:00 2001 From: Rui Wang Napieralski Date: Tue, 29 Sep 2020 11:46:38 -0700 Subject: [PATCH] fix: revert "feature: upgrade Neo MxNet to 1.7 (#1928)" This reverts commit f800e0ca3b664dc80316605ca95a3ded03880cd7. --- src/sagemaker/image_uri_config/neo-mxnet.json | 26 +++++++------- .../image_uri_config/neo-pytorch.json | 2 +- tests/data/mxnet_mnist/mnist_neo.py | 35 ++++++++----------- tests/integ/test_neo_mxnet.py | 12 +++++-- tests/unit/sagemaker/image_uris/test_neo.py | 19 ++++------ tests/unit/test_mxnet.py | 2 +- 6 files changed, 45 insertions(+), 51 deletions(-) diff --git a/src/sagemaker/image_uri_config/neo-mxnet.json b/src/sagemaker/image_uri_config/neo-mxnet.json index d353c5830a..9c623eb9da 100644 --- a/src/sagemaker/image_uri_config/neo-mxnet.json +++ b/src/sagemaker/image_uri_config/neo-mxnet.json @@ -2,20 +2,20 @@ "processors": ["cpu", "gpu"], "scope": ["inference"], "version_aliases": { - "0.12.1": "1.7", - "1.0.0": "1.7", - "1.1.0": "1.7", - "1.2": "1.7", - "1.2.0": "1.7", - "1.2.1": "1.7", - "1.3": "1.7", - "1.3.0": "1.7", - "1.4": "1.7", - "1.4.0": "1.7", - "1.4.1": "1.7" + "0.12.1": "1.5", + "1.0.0": "1.5", + "1.1.0": "1.5", + "1.2": "1.5", + "1.2.0": "1.5", + "1.2.1": "1.5", + "1.3": "1.5", + "1.3.0": "1.5", + "1.4": "1.5", + "1.4.0": "1.5", + "1.4.1": "1.5" }, "versions": { - "1.7": { + "1.5": { "py_versions": ["py3"], "registries": { "af-south-1": "774647643957", @@ -42,7 +42,7 @@ "us-west-1": "710691900526", "us-west-2": "301217895009" }, - "repository": "sagemaker-inference-mxnet" + "repository": "sagemaker-neo-mxnet" } } } diff --git a/src/sagemaker/image_uri_config/neo-pytorch.json b/src/sagemaker/image_uri_config/neo-pytorch.json index f46b1d3055..2da5cd9846 100644 --- a/src/sagemaker/image_uri_config/neo-pytorch.json +++ b/src/sagemaker/image_uri_config/neo-pytorch.json @@ -36,7 +36,7 @@ "us-west-1": "710691900526", "us-west-2": "301217895009" }, - "repository": "sagemaker-inference-pytorch" + "repository": "sagemaker-neo-pytorch" } } } diff --git a/tests/data/mxnet_mnist/mnist_neo.py b/tests/data/mxnet_mnist/mnist_neo.py index 1698a49154..1b900895c9 100644 --- a/tests/data/mxnet_mnist/mnist_neo.py +++ b/tests/data/mxnet_mnist/mnist_neo.py @@ -104,32 +104,27 @@ def train( save(model_dir, mlp_model) -def model_fn(path_to_model_files): - import neomxnet # noqa: F401 +def neo_preprocess(payload, content_type): + logging.info("Invoking user-defined pre-processing function") - ctx = mx.cpu() - sym, arg_params, aux_params = mx.model.load_checkpoint( - os.path.join(path_to_model_files, "compiled"), 0 - ) - mod = mx.mod.Module(symbol=sym, context=ctx, label_names=None) - mod.bind( - for_training=False, data_shapes=[("data", (1, 1, 28, 28))], label_shapes=mod._label_shapes - ) - mod.set_params(arg_params, aux_params, allow_missing=True) - return mod + if content_type != "application/vnd+python.numpy+binary": + raise RuntimeError("Content type must be application/vnd+python.numpy+binary") + return np.asarray(json.loads(payload.decode("utf-8"))) -def transform_fn(mod, payload, input_content_type, requested_output_content_type): - import neomxnet # noqa: F401 - if input_content_type != "application/vnd+python.numpy+binary": - raise RuntimeError("Input content type must be application/vnd+python.numpy+binary") +# NOTE: this function cannot use MXNet +def neo_postprocess(result): + logging.info("Invoking user-defined post-processing function") - inference_payload = np.asarray(json.loads(payload.decode("utf-8"))) - result = mod.predict(inference_payload) + # Softmax (assumes batch size 1) result = np.squeeze(result) - response_body = json.dumps(result.asnumpy().tolist()) + result_exp = np.exp(result - np.max(result)) + result = result_exp / np.sum(result_exp) + + response_body = json.dumps(result.tolist()) content_type = "application/json" + return response_body, content_type @@ -140,7 +135,7 @@ def transform_fn(mod, payload, input_content_type, requested_output_content_type parser = argparse.ArgumentParser() parser.add_argument("--batch-size", type=int, default=100) - parser.add_argument("--epochs", type=int, default=1) + parser.add_argument("--epochs", type=int, default=10) parser.add_argument("--learning-rate", type=float, default=0.1) parser.add_argument("--model-dir", type=str, default=os.environ["SM_MODEL_DIR"]) diff --git a/tests/integ/test_neo_mxnet.py b/tests/integ/test_neo_mxnet.py index b20710e84e..c9747b0d88 100644 --- a/tests/integ/test_neo_mxnet.py +++ b/tests/integ/test_neo_mxnet.py @@ -58,6 +58,9 @@ def mxnet_training_job( @pytest.mark.canary_quick +@pytest.mark.skip( + reason="This test is failing because the image uri and the training script format has changed." +) def test_attach_deploy( mxnet_training_job, sagemaker_session, cpu_instance_type, cpu_instance_family ): @@ -68,7 +71,7 @@ def test_attach_deploy( estimator.compile_model( target_instance_family=cpu_instance_family, - input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]}, + input_shape={"data": [1, 1, 28, 28]}, output_path=estimator.output_path, ) @@ -86,6 +89,9 @@ def test_attach_deploy( predictor.predict(data) +@pytest.mark.skip( + reason="This test is failing because the image uri and the training script format has changed." +) def test_deploy_model( mxnet_training_job, sagemaker_session, @@ -117,7 +123,7 @@ def test_deploy_model( model.compile( target_instance_family=cpu_instance_family, - input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]}, + input_shape={"data": [1, 1, 28, 28]}, role=role, job_name=unique_name_from_base("test-deploy-model-compilation-job"), output_path="/".join(model_data.split("/")[:-1]), @@ -159,7 +165,7 @@ def test_inferentia_deploy_model( model.compile( target_instance_family=inf_instance_family, - input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]}, + input_shape={"data": [1, 1, 28, 28]}, role=role, job_name=unique_name_from_base("test-deploy-model-compilation-job"), output_path="/".join(model_data.split("/")[:-1]), diff --git a/tests/unit/sagemaker/image_uris/test_neo.py b/tests/unit/sagemaker/image_uris/test_neo.py index 45dc054a3a..474e996086 100644 --- a/tests/unit/sagemaker/image_uris/test_neo.py +++ b/tests/unit/sagemaker/image_uris/test_neo.py @@ -62,26 +62,19 @@ def test_algo_uris(algo): def _test_neo_framework_uris(framework, version): - framework_in_config = f"neo-{framework}" - framework_in_uri = f"neo-{framework}" if framework == "tensorflow" else f"inference-{framework}" + framework = "neo-{}".format(framework) for region in regions.regions(): if region in ACCOUNTS: - uri = image_uris.retrieve( - framework_in_config, region, instance_type="ml_c5", version=version - ) - assert _expected_framework_uri(framework_in_uri, version, region=region) == uri + uri = image_uris.retrieve(framework, region, instance_type="ml_c5", version=version) + assert _expected_framework_uri(framework, version, region=region) == uri else: with pytest.raises(ValueError) as e: - image_uris.retrieve( - framework_in_config, region, instance_type="ml_c5", version=version - ) + image_uris.retrieve(framework, region, instance_type="ml_c5", version=version) assert "Unsupported region: {}.".format(region) in str(e.value) - uri = image_uris.retrieve( - framework_in_config, "us-west-2", instance_type="ml_p2", version=version - ) - assert _expected_framework_uri(framework_in_uri, version, processor="gpu") == uri + uri = image_uris.retrieve(framework, "us-west-2", instance_type="ml_p2", version=version) + assert _expected_framework_uri(framework, version, processor="gpu") == uri def test_neo_mxnet(neo_mxnet_version): diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index d3a178f233..2b10fb7ef5 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -175,7 +175,7 @@ def _create_compilation_job(input_shape, output_location): def _neo_inference_image(mxnet_version): - return "301217895009.dkr.ecr.us-west-2.amazonaws.com/sagemaker-inference-{}:{}-cpu-py3".format( + return "301217895009.dkr.ecr.us-west-2.amazonaws.com/sagemaker-neo-{}:{}-cpu-py3".format( FRAMEWORK.lower(), mxnet_version )