diff --git a/python/tvm/contrib/ethosu/cascader/device_config.py b/python/tvm/contrib/ethosu/cascader/device_config.py index 27aa8b8c78c5..f654a2598ba4 100644 --- a/python/tvm/contrib/ethosu/cascader/device_config.py +++ b/python/tvm/contrib/ethosu/cascader/device_config.py @@ -48,9 +48,24 @@ def __init__(self, shape: List[int], layout="NHWC"): self.width = int(shape[3]) self.depth = int(shape[2]) * int(shape[4]) else: - self.height = int(shape[1]) - self.width = int(shape[2]) - self.depth = int(shape[3]) + # identity layout is NHWC but the shape is not always 4 + length = len(shape) + if length == 4: + self.height = int(shape[1]) + self.width = int(shape[2]) + self.depth = int(shape[3]) + elif length == 3: + self.height = int(shape[0]) + self.width = int(shape[1]) + self.depth = int(shape[2]) + elif length == 2: + self.height = int(shape[0]) + self.width = int(shape[1]) + self.depth = 1 + elif length == 1: + self.height = int(shape[0]) + self.width = 1 + self.depth = 1 def round_up(self, other: "_Shape"): self.height = _round_up(self.height, other.height) @@ -627,18 +642,19 @@ def _get_subkernel_propagator( stride_w = int(op_attrs.get("stride_w", 1)) transform = ifm_propagator.transform - if input_layout == "NHCWB16": - transform[1][-1] = min(transform[1][-1], self._subkernel_limits[0] - stride_h) - transform[3][-1] = min(transform[3][-1], self._subkernel_limits[1] - stride_w) - else: - transform[1][-1] = min(transform[1][-1], self._subkernel_limits[0] - stride_h) - transform[2][-1] = min(transform[2][-1], self._subkernel_limits[1] - stride_w) - - if op_type in ("ethosu_pooling", "ethosu_depthwise_conv2d"): - if output_layout == "NHCWB16" and input_layout == "NHWC": - transform[3][-1] = depth - elif output_layout == "NHCWB16" and input_layout == "NHCWB16": - transform[2][-1] = 1 + ((depth - 1) // 16) + if op_type != "ethosu_identity": + if input_layout == "NHCWB16": + transform[1][-1] = min(transform[1][-1], self._subkernel_limits[0] - stride_h) + transform[3][-1] = min(transform[3][-1], self._subkernel_limits[1] - stride_w) + else: + transform[1][-1] = min(transform[1][-1], self._subkernel_limits[0] - stride_h) + transform[2][-1] = min(transform[2][-1], self._subkernel_limits[1] - stride_w) + + if op_type in ("ethosu_pooling", "ethosu_depthwise_conv2d"): + if output_layout == "NHCWB16" and input_layout == "NHWC": + transform[3][-1] = depth + elif output_layout == "NHCWB16" and input_layout == "NHCWB16": + transform[2][-1] = 1 + ((depth - 1) // 16) return Propagator(transform, ifm_propagator.offset) diff --git a/python/tvm/relay/backend/contrib/ethosu/te/identity.py b/python/tvm/relay/backend/contrib/ethosu/te/identity.py index 271ca1542fc5..0b61e0c28b88 100644 --- a/python/tvm/relay/backend/contrib/ethosu/te/identity.py +++ b/python/tvm/relay/backend/contrib/ethosu/te/identity.py @@ -16,7 +16,10 @@ # under the License. # pylint: disable=invalid-name,unused-argument """Tensor Expression for identity""" +import numpy as np from tvm import te +from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher + from .dma import read_compute, write_compute @@ -56,7 +59,6 @@ def identity_compute( ------- te.Tensor The Output Feature Map tensor. - """ dmaed_ifm = read_compute(ifm, ifm_zero_point, ifm_scale) id_attrs = {"op": "ethosu_identity", "activation": activation} @@ -76,7 +78,86 @@ def identity_compute( name="ethosu_identity", attrs=id_attrs, ) + length = len(ifm.shape) + ifm_matrix = np.identity(length + 1) + offset = np.zeros(length, dtype="int64") + ifm_propagator = Propagator( + ifm_matrix, + offset.tolist(), + ) + propagator_attrs = { + "ifm_propagator": ifm_propagator, + } + return write_compute(identity, ofm_zero_point, ofm_scale, attrs=propagator_attrs) + + +@register_matcher +def match_ethosu_identity(output_tensor, device_config): + """Match a Tensor Expression corresponding to an NPU identity. - dmaed_ofm = write_compute(identity, ofm_zero_point, ofm_scale) + If the Tensor Expression matches, an EthosuPart will be created that models the + matched Tensor Expression. Otherwise, None will be returned. - return dmaed_ofm + Parameters + ---------- + output_tensor : tvm.te.Tensor + The tensor to attempt to match with. + device_config : EthosuDeviceConfig + Target device configuration + + Returns + ------- + Union[None, EthosuPart] + The created EthosuPart if there was a match, otherwise None. + """ + write = output_tensor + if write.op.name != "ethosu_write": + return None + identity = write.op.input_tensors[0] + if identity.op.name != "ethosu_identity": + return None + read = identity.op.input_tensors[0] + if read.op.name != "ethosu_read": + return None + + input_tensors = [ + read.op.input_tensors[0], + ] + subgraph = TESubgraph(input_tensors, output_tensor) + propagators = [ + write.op.attrs["ifm_propagator"], + ] + ifm_dtype = input_tensors[0].dtype + ofm_dtype = output_tensor.dtype + + input_tensors_shape = input_tensors[0].shape + length = len(input_tensors_shape) + assert length <= 4 + channels = int(input_tensors_shape[length - 1]) if length >= 3 else 1 + + subkernels = len(device_config.get_kernel_steps(identity.op.name, 1, 1, ifm_dtype)) + + input_layout = output_layout = "NHWC" + output_quantum = device_config.get_output_quantum(output_layout) + + valid_block_configs = device_config.get_valid_block_configs( + propagators[0], + identity.op.attrs, + output_tensor.shape, + channels, + channels, + output_layout, + input_layout, + ifm_dtype, + ofm_dtype, + 1, + 1, + ) + + return EthosuPart( + subgraph, + propagators, + output_quantum, + subkernels, + valid_block_configs, + ) diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_identity_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_identity_matcher.py new file mode 100644 index 000000000000..4609a5bc3779 --- /dev/null +++ b/tests/python/contrib/test_ethosu/cascader/test_ethosu_identity_matcher.py @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import pytest + +pytest.importorskip("ethosu.vela") + +import numpy as np + +from tvm import te +import tvm.contrib.ethosu.cascader as cs +from tvm.relay.backend.contrib.ethosu.te.identity import match_ethosu_identity, identity_compute +from .infra import make_matrices + + +def test_ethosu_identity_matcher(): + ofm_channels = 21 + ifm_shape = (1, 12, 15, ofm_channels) + ifm = te.placeholder(ifm_shape, dtype="int8") + lut = te.placeholder((), dtype="uint8") + out = identity_compute( + ifm=ifm, + lut=lut, + ifm_scale=1, + ifm_zero_point=0, + ofm_scale=1, + ofm_zero_point=0, + activation="NONE", + ) + + length = len(ifm.shape) + ifm_transform = np.identity(length + 1).tolist() + ifm_offset = np.zeros(length, dtype="int64").tolist() + + device_config = cs.EthosuDeviceConfig("ethos-u55-256") + part = match_ethosu_identity(out, device_config) + + assert isinstance(part, cs.EthosuPart) + assert len(part.propagators) == 1 + assert part.propagators[0].transform == ifm_transform + assert part.propagators[0].offset == ifm_offset + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/tests/python/contrib/test_ethosu/test_codegen.py b/tests/python/contrib/test_ethosu/test_codegen.py index ce617d14fac2..b6b78c335760 100644 --- a/tests/python/contrib/test_ethosu/test_codegen.py +++ b/tests/python/contrib/test_ethosu/test_codegen.py @@ -37,6 +37,10 @@ ACCEL_TYPES = ["ethos-u55-256", "ethos-u55-128", "ethos-u55-64", "ethos-u55-32", "ethos-u65-256"] +def is_u55_accel_type(accel_type): + return "u55" in accel_type + + @pytest.mark.parametrize("accel_type", ACCEL_TYPES + ["ethos-u65-512"]) @pytest.mark.parametrize("ifm_shape", [(1, 299, 299, 2), (1, 55, 55, 3)]) @pytest.mark.parametrize("kernel_shape", [(3, 2), (1, 3)]) @@ -270,9 +274,7 @@ def binary_elementwise(lhs, rhs): shapes=[ifm_shape, ifm2_shape], ranges=[(0, 1), (0, 2)], accel_type=accel_type, - # non 4D ops legalize into identity op that is not currently supported in the cascader - enable_cascader=(len(ifm_shape) == 4 and len(ifm2_shape) == 4) - and ("u65" not in accel_type), + enable_cascader=is_u55_accel_type(accel_type), ) @@ -301,8 +303,7 @@ def binary_elementwise(lhs, rhs): shapes=[ifm_shape, ifm2_shape], ranges=[(0, 1), (0, 2)], accel_type=accel_type, - # non 4D ops legalize into identity op that is not currently supported in the cascader - enable_cascader=False, + enable_cascader=is_u55_accel_type(accel_type), ) @@ -567,13 +568,12 @@ def generate_output_data(input_data): ethosu_mod = infra.create_ethosu_partition(cpu_mod) infra.compare_ethosu_with_reference( - # identity op is not supported in cascader ethosu_mod, input_data, output_data, accel_type, output_tolerance=1, - enable_cascader=False, + enable_cascader=is_u55_accel_type(accel_type), ) @@ -603,9 +603,12 @@ def create_model(): output_data = generate_ref_data(cpu_mod, input_data) ethosu_mod = infra.create_ethosu_partition(cpu_mod) - # reshape ops legalize into identity op that is not currently supported in the cascader infra.compare_ethosu_with_reference( - ethosu_mod, input_data, output_data, accel_type, enable_cascader=False + ethosu_mod, + input_data, + output_data, + accel_type, + enable_cascader=is_u55_accel_type(accel_type), ) @@ -626,8 +629,9 @@ def test_tflite_slice(accel_type, ifm_shape, begin, size): def slice_func(x): return tf.slice(x, begin, size) - # Ops that get legalized to identity is currently not supported by the cascader - infra.compare_tvm_with_tflite(slice_func, [ifm_shape], accel_type, enable_cascader=False) + infra.compare_tvm_with_tflite( + slice_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type) + ) @pytest.mark.parametrize("accel_type", ACCEL_TYPES) @@ -642,9 +646,8 @@ def test_tflite_strided_slice(accel_type, ifm_shape, begin, end): def strided_slice_func(x): return tf.strided_slice(x, begin, end) - # Ops that get legalized to identity are currently not supported by the cascader infra.compare_tvm_with_tflite( - strided_slice_func, [ifm_shape], accel_type, enable_cascader=False + strided_slice_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type) ) @@ -667,12 +670,11 @@ def abs_func(x): op = tf.math.abs(x) return op - # non-4D tensors are legalized to identity which are not supported by the cascader infra.compare_tvm_with_tflite( abs_func, [ifm_shape], accel_type, - enable_cascader=(len(ifm_shape) == 4) and ("u65" not in accel_type), + enable_cascader=is_u55_accel_type(accel_type), ) @@ -752,8 +754,9 @@ def tanh_func(x): op = tf.nn.tanh(x) return op - # Ops that get legalized to identity are currently not supported by the cascader - infra.compare_tvm_with_tflite(tanh_func, [ifm_shape], accel_type, enable_cascader=False) + infra.compare_tvm_with_tflite( + tanh_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type) + ) @pytest.mark.parametrize("accel_type", ACCEL_TYPES) @@ -774,7 +777,6 @@ def concat_func(*inputs): op = tf.concat(list(inputs), axis) return op - # Ops that get legalized to identity are currently not supported by the cascader infra.compare_tvm_with_tflite(concat_func, shapes, accel_type, enable_cascader=False) @@ -788,8 +790,9 @@ def sigmoid_function(x): op = tf.nn.sigmoid(x) return op - # Ops that get legalized to identity are currently not supported by the cascader - infra.compare_tvm_with_tflite(sigmoid_function, [ifm_shape], accel_type, enable_cascader=False) + infra.compare_tvm_with_tflite( + sigmoid_function, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type) + ) # This codegen test checks both, split and split_v @@ -813,7 +816,6 @@ def split_func(x): op = tf.split(x, num_or_size_splits, axis=axis) return op - # Ops that get legalized to identity are currently not supported by the cascader infra.compare_tvm_with_tflite(split_func, [ifm_shape], accel_type, enable_cascader=False) @@ -845,9 +847,12 @@ def create_model(): output_data = generate_ref_data(cpu_mod, input_data) ethosu_mod = partition_for_ethosu(cpu_mod) - # Ops that get legalized to identity are currently not supported by the cascader infra.compare_ethosu_with_reference( - ethosu_mod, input_data, output_data, accel_type, enable_cascader=False + ethosu_mod, + input_data, + output_data, + accel_type, + enable_cascader=is_u55_accel_type(accel_type), ) @@ -860,8 +865,9 @@ def test_tflite_expand_dims(accel_type, ifm_shape, axis): def expand_dims_func(x): return tf.expand_dims(x, axis=axis) - # Ops that get legalized to identity are currently not supported by the cascader - infra.compare_tvm_with_tflite(expand_dims_func, [ifm_shape], accel_type, enable_cascader=False) + infra.compare_tvm_with_tflite( + expand_dims_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type) + ) @pytest.mark.parametrize("accel_type", ACCEL_TYPES) @@ -875,8 +881,9 @@ def test_tflite_squeeze(accel_type, ifm_shape, axis): def squeeze_func(x): return tf.squeeze(x, axis=axis) - # Ops that get legalized to identity are currently not supported by the cascader - infra.compare_tvm_with_tflite(squeeze_func, [ifm_shape], accel_type, enable_cascader=False) + infra.compare_tvm_with_tflite( + squeeze_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type) + ) @pytest.mark.parametrize("accel_type", ACCEL_TYPES) @@ -894,8 +901,9 @@ def resize_model(x): x, size, align_corners=align_corners, half_pixel_centers=False ) - # Ops that get legalized to identity are currently not supported by the cascader - infra.compare_tvm_with_tflite(resize_model, [ifm_shape], accel_type, enable_cascader=False) + infra.compare_tvm_with_tflite( + resize_model, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type) + ) @pytest.mark.parametrize("accel_type", ACCEL_TYPES) @@ -918,8 +926,9 @@ def resize_model(x): x, size, align_corners=align_corners, half_pixel_centers=False ) - # Ops that get legalized to identity are currently not supported by the cascader - infra.compare_tvm_with_tflite(resize_model, [ifm_shape], accel_type, enable_cascader=False) + infra.compare_tvm_with_tflite( + resize_model, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type) + ) @pytest.mark.parametrize("accel_type", ACCEL_TYPES) @@ -959,9 +968,11 @@ def conv2d_transpose(x): op = tf.nn.bias_add(op, bias) return op - # Ops that get legalized to identity are currently not supported by the cascader infra.compare_tvm_with_tflite( - conv2d_transpose, [ifm_shape], accel_type=accel_type, enable_cascader=False + conv2d_transpose, + [ifm_shape], + accel_type=accel_type, + enable_cascader=is_u55_accel_type(accel_type), ) @@ -982,7 +993,6 @@ def test_tflite_pack(accel_type, ifm_shapes, axis): def pack_func(*inputs): return tf.stack(inputs, axis=axis) - # Ops that get legalized to identity are currently not supported by the cascader infra.compare_tvm_with_tflite(pack_func, ifm_shapes, accel_type, enable_cascader=False) @@ -998,7 +1008,6 @@ def test_tflite_unpack(accel_type, ifm_shape, axis): def unpack_func(x): return tf.unstack(x, axis=axis) - # Ops that get legalized to identity are currently not supported by the cascader infra.compare_tvm_with_tflite(unpack_func, [ifm_shape], accel_type, enable_cascader=False) @@ -1012,8 +1021,9 @@ def test_tflite_leaky_relu(accel_type, ifm_shape, alpha): def leaky_relu_func(x): return tf.nn.leaky_relu(x, alpha=alpha) - # Ops that get legalized to identity are currently not supported by the cascader - infra.compare_tvm_with_tflite(leaky_relu_func, [ifm_shape], accel_type, enable_cascader=False) + infra.compare_tvm_with_tflite( + leaky_relu_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type) + ) @pytest.mark.parametrize("accel_type", ACCEL_TYPES) @@ -1045,8 +1055,9 @@ def fully_connected(x): x = tf.nn.relu(x) return x - # Ops that get legalized to identity are currently not supported by the cascader - infra.compare_tvm_with_tflite(fully_connected, [ifm_shape], accel_type, enable_cascader=False) + infra.compare_tvm_with_tflite( + fully_connected, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type) + ) if __name__ == "__main__":