From e9a4489a1639dfd26766c05ac46d49c978396138 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 21 Mar 2025 17:29:56 -0700 Subject: [PATCH 1/4] Update [ghstack-poisoned] --- kernels/aten/functions.yaml | 2 + kernels/portable/cpu/op_elu.cpp | 62 ++++++++++++ kernels/portable/functions.yaml | 5 + kernels/test/CMakeLists.txt | 1 + kernels/test/op_elu_test.cpp | 95 +++++++++++++++++++ kernels/test/targets.bzl | 1 + .../kernels/portable/op_registration_util.bzl | 7 ++ 7 files changed, 173 insertions(+) create mode 100644 kernels/portable/cpu/op_elu.cpp create mode 100644 kernels/test/op_elu_test.cpp diff --git a/kernels/aten/functions.yaml b/kernels/aten/functions.yaml index 7069f9140ab..a8fa6611478 100644 --- a/kernels/aten/functions.yaml +++ b/kernels/aten/functions.yaml @@ -141,6 +141,8 @@ - op: div.out_mode +- op: elu.out + - op: embedding.out - op: empty.out diff --git a/kernels/portable/cpu/op_elu.cpp b/kernels/portable/cpu/op_elu.cpp new file mode 100644 index 00000000000..d4846fb1bfb --- /dev/null +++ b/kernels/portable/cpu/op_elu.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include +#include +#include + +namespace torch::executor::native { + +Tensor& elu_out( + KernelRuntimeContext& ctx, + const Tensor& in, + const Scalar& alpha, + const Scalar& scale, + const Scalar& input_scale, + Tensor& out) { + ET_KERNEL_CHECK(ctx, tensors_have_same_dtype(in, out), InvalidArgument, out); + ET_KERNEL_CHECK( + ctx, resize_tensor(out, in.sizes()) == Error::Ok, InvalidArgument, out); + + ET_KERNEL_CHECK( + ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); + + ET_KERNEL_CHECK(ctx, tensor_is_floating_type(in), InvalidArgument, out); + + ET_KERNEL_CHECK(ctx, tensors_have_same_dtype(in, out), InvalidArgument, out); + + static constexpr const char op_name[] = "elu.out"; + ET_SWITCH_FLOATHBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE, [&]() { + using MathT = std:: + conditional_t, float, CTYPE>; + MathT math_alpha = 0; + MathT math_scale = 0; + MathT math_input_scale = 0; + ET_EXTRACT_SCALAR(alpha, math_alpha); + ET_EXTRACT_SCALAR(scale, math_scale); + ET_EXTRACT_SCALAR(input_scale, math_input_scale); + const auto negcoef = math_alpha * math_scale; + utils::apply_unitensor_elementwise_fn( + [negcoef, math_scale, math_input_scale](auto x) { + return MathT(x) <= MathT(0) + ? std::expm1(MathT(x) * math_input_scale) * negcoef + : MathT(x) * math_scale; + }, + ctx, + in, + utils::SupportedTensorDtypes::FLOATHBF16, + out, + utils::SupportedTensorDtypes::SAME_AS_COMMON); + }); + return out; +} + +} // namespace torch::executor::native diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml index 29dfe8b1a0c..5e45a210a70 100644 --- a/kernels/portable/functions.yaml +++ b/kernels/portable/functions.yaml @@ -329,6 +329,11 @@ - arg_meta: null kernel_name: torch::executor::eq_tensor_out +- op: elu.out + kernels: + - arg_meta: null + kernel_name: torch::executor::elu_out + - op: erf.out kernels: - arg_meta: null diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt index b9f48f0c9a1..42578acbedd 100644 --- a/kernels/test/CMakeLists.txt +++ b/kernels/test/CMakeLists.txt @@ -135,6 +135,7 @@ set(all_test_sources "op_detach_copy_test.cpp" "op_diagonal_copy_test.cpp" "op_div_test.cpp" + "op_elu_test.cpp" "op_embedding_test.cpp" "op_empty_test.cpp" "op_eq_test.cpp" diff --git a/kernels/test/op_elu_test.cpp b/kernels/test/op_elu_test.cpp new file mode 100644 index 00000000000..73ee8ac31a7 --- /dev/null +++ b/kernels/test/op_elu_test.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include // Declares the operator +#include +#include +#include +#include +#include + +#include + +using executorch::aten::Scalar; +using executorch::aten::ScalarType; +using executorch::aten::string_view; +using executorch::aten::Tensor; +using torch::executor::testing::TensorFactory; + +class OpEluTest : public OperatorTest { + protected: + Tensor& op_elu_out( + const Tensor& self, + const Scalar& alpha, + const Scalar& scale, + const Scalar& input_scale, + Tensor& out) { + return torch::executor::aten::elu_outf( + context_, self, alpha, scale, input_scale, out); + } + + template + void test_elu_execution() { + TensorFactory tf; + + const std::vector sizes = {3, 2}; + + Tensor in = tf.make(sizes, /*data=*/{-0.125, -0.25, -1, 0, 1.25, 100}); + + Tensor out = tf.zeros(sizes); + + // Run full gelu. + op_elu_out(in, 1.25, 1, 1, out); + + // Check that it matches the expected output. + EXPECT_TENSOR_CLOSE( + out, + tf.make( + sizes, + /*data=*/ + {-0.146879, -0.276499, -0.790151, 0, 1.25, 100})); + } + + template + void test_integer_elu_dies() { + TensorFactory tf; + + Tensor in = tf.ones({1}); + Tensor out = tf.ones({1}); + ET_EXPECT_KERNEL_FAILURE(context_, op_elu_out(in, 1, 1, 1, out)); + } +}; + +TEST_F(OpEluTest, Basic) { +#define TEST_ENTRY(ctype, dtype) test_elu_execution(); + ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY); +#undef TEST_ENTRY +} + +TEST_F(OpEluTest, UnhandledDtypeDies) { +#define TEST_ENTRY(ctype, dtype) test_integer_elu_dies(); + ET_FORALL_INT_TYPES(TEST_ENTRY); +#undef TEST_ENTRY +} + +TEST_F(OpEluTest, MismatchedOutputDtypeDies) { + // Two different dtypes. This test uses two types with the same size to + // demonstrate that the ScalarType itself matters, not the size of the + // tensor elements. + TensorFactory tf_float; + TensorFactory tf_double; + + const std::vector sizes = {2, 2}; + + Tensor a = tf_float.ones(sizes); + + // Destination with a dtype different from the input. + Tensor out = tf_double.zeros(sizes); + + ET_EXPECT_KERNEL_FAILURE(context_, op_elu_out(a, 1, 1, 1, out)); +} diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl index 18ab0ac2e28..3824551a46b 100644 --- a/kernels/test/targets.bzl +++ b/kernels/test/targets.bzl @@ -215,6 +215,7 @@ def define_common_targets(): _common_op_test("op_detach_copy_test", ["aten", "portable"]) _common_op_test("op_diagonal_copy_test", ["aten", "portable"]) _common_op_test("op_div_test", ["aten", "portable", "optimized"]) + _common_op_test("op_elu_test", ["aten", "portable"]) _common_op_test("op_embedding_test", ["aten", "portable"]) _common_op_test("op_empty_test", ["aten", "portable"]) _common_op_test("op_eq_test", ["aten", "portable"]) diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl index b56413b92f4..a1ffdc1eed3 100644 --- a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl +++ b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl @@ -482,6 +482,13 @@ ATEN_OPS = ( ":scalar_utils", ], ), + op_target( + name = "op_elu", + deps = [ + ":scalar_utils", + "//executorch/kernels/portable/cpu/util:elementwise_util", + ], + ), op_target( name = "op_embedding", deps = [ From e055ac0e5f392c64b748f6ba854855c60225d2fd Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 21 Mar 2025 17:30:00 -0700 Subject: [PATCH 2/4] Update [ghstack-poisoned] --- kernels/optimized/cpu/op_elu.cpp | 102 ++++++++++++++++++++++++++++++ kernels/optimized/cpu/targets.bzl | 8 +++ kernels/optimized/optimized.yaml | 5 ++ kernels/test/CMakeLists.txt | 1 + kernels/test/targets.bzl | 2 +- 5 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 kernels/optimized/cpu/op_elu.cpp diff --git a/kernels/optimized/cpu/op_elu.cpp b/kernels/optimized/cpu/op_elu.cpp new file mode 100644 index 00000000000..c7cd7aec653 --- /dev/null +++ b/kernels/optimized/cpu/op_elu.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include + +namespace torch::executor::native { + +namespace { +template +void elu( + KernelRuntimeContext& context, + const Tensor& input, + const Scalar& alpha, + const Scalar& scale, + const Scalar& input_scale, + Tensor& out) { + const CTYPE* in_data = input.const_data_ptr(); + CTYPE* out_data = out.mutable_data_ptr(); + using MathT = + std::conditional_t, float, CTYPE>; + MathT math_alpha = 0; + MathT math_scale = 0; + MathT math_input_scale = 0; + ET_EXTRACT_SCALAR(alpha, math_alpha); + ET_EXTRACT_SCALAR(scale, math_scale); + ET_EXTRACT_SCALAR(input_scale, math_input_scale); + const auto scalar_func = + at::native::get_scalar_elu_elementwise_func( + math_alpha, math_scale, math_input_scale); + const auto vec_func = at::native::get_vectorized_elu_elementwise_func( + math_alpha, math_scale, math_input_scale); + + ::executorch::extension::parallel_for( + 0, + out.numel(), + ::executorch::extension::internal::GRAIN_SIZE, + [&](const auto begin, const auto end) { + using Vec = at::vec::Vectorized; + const auto vectorized_begin = + begin + (Vec::size() - begin % Vec::size()) % Vec::size(); + const auto vectorized_end = end - (end % Vec::size()); + // Scalar prologue. + for (const auto idx : c10::irange(begin, vectorized_begin)) { + out_data[idx] = scalar_func(in_data[idx]); + } + + // Main vectorized loop. + for (auto idx = vectorized_begin; idx < vectorized_end; + idx += Vec::size()) { + auto result_vec = vec_func(Vec::loadu(&in_data[idx])); + result_vec.store(&out_data[idx]); + } + + // Scalar epilogue. + for (const auto idx : c10::irange(vectorized_end, end)) { + out_data[idx] = scalar_func(in_data[idx]); + } + }); +} +} // namespace + +Tensor& opt_elu_out( + KernelRuntimeContext& context, + const Tensor& input, + const Scalar& alpha, + const Scalar& scale, + const Scalar& input_scale, + Tensor& out) { + ET_KERNEL_CHECK(ctx, tensors_have_same_dtype(in, out), InvalidArgument, out); + ET_KERNEL_CHECK( + ctx, + resize_tensor(out, in.sizes()) == Error::Ok, + InvalidArgument, + out); + + ET_KERNEL_CHECK( + ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); + + ET_KERNEL_CHECK( + ctx, tensor_is_floating_type(in), InvalidArgument, out); + + ET_KERNEL_CHECK( + ctx, tensors_have_same_dtype(in, out), InvalidArgument, out); + + ET_SWITCH_FLOATHBF16_TYPES( + input.scalar_type(), context, "elu.out", CTYPE, [&]() { + elu(context, input, alpha, scale, input_scale, out); + }); + return out; +} + +} // namespace torch::executor::native diff --git a/kernels/optimized/cpu/targets.bzl b/kernels/optimized/cpu/targets.bzl index b868a5901fd..5fd7b74d33e 100644 --- a/kernels/optimized/cpu/targets.bzl +++ b/kernels/optimized/cpu/targets.bzl @@ -25,6 +25,14 @@ _OPTIMIZED_ATEN_OPS = ( "//executorch/kernels/portable/cpu/util:broadcast_util", ], ), + op_target( + name = "op_elu", + deps = [ + "//executorch/extension/threadpool:threadpool", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", + ], + ), op_target(name = "op_exp"), op_target( name = "op_fft_r2c", diff --git a/kernels/optimized/optimized.yaml b/kernels/optimized/optimized.yaml index 4f90059aa93..864c3ed5780 100644 --- a/kernels/optimized/optimized.yaml +++ b/kernels/optimized/optimized.yaml @@ -37,6 +37,11 @@ - arg_meta: null kernel_name: torch::executor::opt_div_scalar_out +- op: elu.out + kernels: + - arg_meta: null + kernel_name: torch::executor::opt_elu_out + - op: exp.out kernels: - arg_meta: null diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt index 42578acbedd..2d497dfc124 100644 --- a/kernels/test/CMakeLists.txt +++ b/kernels/test/CMakeLists.txt @@ -274,6 +274,7 @@ set(_optimized_kernels_test_sources "op_add_test.cpp" "op_bmm_test.cpp" "op_div_test.cpp" + "op_elu_test.cpp" "op_exp_test.cpp" "op_fft_r2c_test.cpp" "op_gelu_test.cpp" diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl index 3824551a46b..05e678c6229 100644 --- a/kernels/test/targets.bzl +++ b/kernels/test/targets.bzl @@ -215,7 +215,7 @@ def define_common_targets(): _common_op_test("op_detach_copy_test", ["aten", "portable"]) _common_op_test("op_diagonal_copy_test", ["aten", "portable"]) _common_op_test("op_div_test", ["aten", "portable", "optimized"]) - _common_op_test("op_elu_test", ["aten", "portable"]) + _common_op_test("op_elu_test", ["aten", "portable", "optimized"]) _common_op_test("op_embedding_test", ["aten", "portable"]) _common_op_test("op_empty_test", ["aten", "portable"]) _common_op_test("op_eq_test", ["aten", "portable"]) From 84778c7083dd0940341c0c9894be5936abe73540 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 25 Mar 2025 08:56:58 -0700 Subject: [PATCH 3/4] Update [ghstack-poisoned] --- exir/dialects/edge/op/sample_input.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/exir/dialects/edge/op/sample_input.py b/exir/dialects/edge/op/sample_input.py index 23d87053c9e..449f7476be5 100644 --- a/exir/dialects/edge/op/sample_input.py +++ b/exir/dialects/edge/op/sample_input.py @@ -424,6 +424,15 @@ ], "returns": [Return(ArgType.Tensor)], }, + "elu.default": { # (Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1, *, Tensor(a!) out) -> Tensor(a!) + "args": [ + InArg(ArgType.Tensor), + InArg(ArgType.Scalar), + InArg(ArgType.Scalar), + InArg(ArgType.Scalar), + ], + "returns": [Return(ArgType.Tensor)], + }, "embedding.default": { # (Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor "args": [ InArg(ArgType.Tensor), From 58b6a5b9280cf5996c44067b79e4bdb7c885eb67 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 25 Mar 2025 08:57:03 -0700 Subject: [PATCH 4/4] Update [ghstack-poisoned] --- .ci/docker/ci_commit_pins/pytorch.txt | 2 +- install_requirements.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt index b17dd3f8f95..ee800549518 100644 --- a/.ci/docker/ci_commit_pins/pytorch.txt +++ b/.ci/docker/ci_commit_pins/pytorch.txt @@ -1 +1 @@ -295f2ed4d103017f7e19a7b8263ece606cd629db +59d5cf083b4f860dea76fe8936076177f9367f10 diff --git a/install_requirements.py b/install_requirements.py index 0331f76522a..ba76106502f 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -71,7 +71,7 @@ def python_is_compatible(): # # NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt # by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/ -NIGHTLY_VERSION = "dev20250311" +NIGHTLY_VERSION = "dev20250325" def install_requirements(use_pytorch_nightly): @@ -80,7 +80,7 @@ def install_requirements(use_pytorch_nightly): # Setting use_pytorch_nightly to false to test the pinned PyTorch commit. Note # that we don't need to set any version number there because they have already # been installed on CI before this step, so pip won't reinstall them - f"torch==2.7.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch", + f"torch==2.8.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch", ( f"torchvision==0.22.0.{NIGHTLY_VERSION}" if use_pytorch_nightly