Skip to content

Commit 7fa7fa0

Browse files
committed
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
2 parents a3df05d + 45d2e18 commit 7fa7fa0

31 files changed

+896
-115
lines changed

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,6 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add
323323
OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC )
324324
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC )
325325
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
326-
OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF ((CV_GCC OR CV_CLANG) AND PPC64LE) )
327326
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) )
328327
if(NOT IOS AND (NOT ANDROID OR OPENCV_ANDROID_USE_LEGACY_FLAGS)) # Use CPU_BASELINE instead
329328
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )

cmake/OpenCVCompilerOptimizations.cmake

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
# AVX / AVX2 / AVX_512F
66
# FMA3
77

8+
# ppc64le arch:
9+
# VSX (always available on Power8)
10+
# VSX3 (always available on Power9)
11+
812
# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag)
913
# CPU_{opt}_IMPLIES=<list>
1014
# CPU_{opt}_FORCE=<list> - subset of "implies" list
@@ -29,7 +33,7 @@
2933

3034
set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F;AVX512_SKX")
3135
list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
32-
list(APPEND CPU_ALL_OPTIMIZATIONS VSX)
36+
list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3)
3337
list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
3438

3539
ocv_update(CPU_VFPV3_FEATURE_ALIAS "")
@@ -81,7 +85,7 @@ ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON)
8185
ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF)
8286
ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF)
8387

84-
ocv_optimization_process_obsolete_option(ENABLE_VSX VSX OFF)
88+
ocv_optimization_process_obsolete_option(ENABLE_VSX VSX ON)
8589

8690
macro(ocv_is_optimization_in_list resultvar check_opt)
8791
set(__checked "")
@@ -289,14 +293,24 @@ elseif(ARM OR AARCH64)
289293
set(CPU_BASELINE "NEON;FP16" CACHE STRING "${HELP_CPU_BASELINE}")
290294
endif()
291295
elseif(PPC64LE)
292-
ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX")
296+
ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX;VSX3")
293297
ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp")
298+
ocv_update(CPU_VSX3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx3.cpp")
299+
300+
if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE)
301+
ocv_update(CPU_VSX3_IMPLIES "VSX")
302+
endif()
294303

295304
if(CV_CLANG AND (NOT ${CMAKE_CXX_COMPILER} MATCHES "xlc"))
296305
ocv_update(CPU_VSX_FLAGS_ON "-mvsx -maltivec")
306+
ocv_update(CPU_VSX3_FLAGS_ON "-mpower9-vector")
297307
else()
298308
ocv_update(CPU_VSX_FLAGS_ON "-mcpu=power8")
309+
ocv_update(CPU_VSX3_FLAGS_ON "-mcpu=power9 -mtune=power9")
299310
endif()
311+
312+
set(CPU_DISPATCH "VSX3" CACHE STRING "${HELP_CPU_DISPATCH}")
313+
set(CPU_BASELINE "VSX" CACHE STRING "${HELP_CPU_BASELINE}")
300314
endif()
301315

302316
# Helper values for cmake-gui

cmake/checks/cpu_vsx.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
# if defined(__VSX__)
2-
# include <altivec.h>
3-
# else
4-
# error "VSX is not supported"
5-
# endif
1+
#if defined(__VSX__)
2+
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
3+
#include <altivec.h>
4+
#else
5+
#error "OpenCV only supports little-endian mode"
6+
#endif
7+
#else
8+
#error "VSX is not supported"
9+
#endif
610

711
int main()
812
{

cmake/checks/cpu_vsx3.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#if defined(__VSX__)
2+
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
3+
#include <altivec.h>
4+
#else
5+
#error "OpenCV only supports little-endian mode"
6+
#endif
7+
#else
8+
#error "VSX3 is not supported"
9+
#endif
10+
11+
int main()
12+
{
13+
__vector unsigned char a = vec_splats((unsigned char)1);
14+
__vector unsigned char b = vec_splats((unsigned char)2);
15+
__vector unsigned char r = vec_absd(a, b);
16+
return 0;
17+
}

modules/core/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ set(the_description "The Core Functionality")
22

33
ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2)
44
ocv_add_dispatched_file(stat SSE4_2 AVX2)
5-
ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2)
5+
ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2 VSX3)
66

77
# dispatching for accuracy tests
88
ocv_add_dispatched_file_force_all(test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX FP16 AVX2)

modules/core/include/opencv2/core/cv_cpu_dispatch.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,18 @@
107107
# include <arm_neon.h>
108108
#endif
109109

110-
#if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
110+
#ifdef CV_CPU_COMPILE_VSX
111111
# include <altivec.h>
112112
# undef vector
113113
# undef pixel
114114
# undef bool
115115
# define CV_VSX 1
116116
#endif
117117

118+
#ifdef CV_CPU_COMPILE_VSX3
119+
# define CV_VSX3 1
120+
#endif
121+
118122
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
119123

120124
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
@@ -237,3 +241,7 @@ struct VZeroUpperGuard {
237241
#ifndef CV_VSX
238242
# define CV_VSX 0
239243
#endif
244+
245+
#ifndef CV_VSX3
246+
# define CV_VSX3 0
247+
#endif

modules/core/include/opencv2/core/cv_cpu_helper.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,5 +315,26 @@
315315
#endif
316316
#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
317317

318+
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
319+
# define CV_TRY_VSX3 1
320+
# define CV_CPU_FORCE_VSX3 1
321+
# define CV_CPU_HAS_SUPPORT_VSX3 1
322+
# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
323+
# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
324+
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
325+
# define CV_TRY_VSX3 1
326+
# define CV_CPU_FORCE_VSX3 0
327+
# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
328+
# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
329+
# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
330+
#else
331+
# define CV_TRY_VSX3 0
332+
# define CV_CPU_FORCE_VSX3 0
333+
# define CV_CPU_HAS_SUPPORT_VSX3 0
334+
# define CV_CPU_CALL_VSX3(fn, args)
335+
# define CV_CPU_CALL_VSX3_(fn, args)
336+
#endif
337+
#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
338+
318339
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
319340
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */

modules/core/include/opencv2/core/cvdef.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,10 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
226226
#define CV_CPU_AVX_512VBMI 20
227227
#define CV_CPU_AVX_512VL 21
228228

229-
#define CV_CPU_NEON 100
229+
#define CV_CPU_NEON 100
230230

231-
#define CV_CPU_VSX 200
231+
#define CV_CPU_VSX 200
232+
#define CV_CPU_VSX3 201
232233

233234
// CPU features groups
234235
#define CV_CPU_AVX512_SKX 256
@@ -266,6 +267,7 @@ enum CpuFeatures {
266267
CPU_NEON = 100,
267268

268269
CPU_VSX = 200,
270+
CPU_VSX3 = 201,
269271

270272
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
271273

modules/core/include/opencv2/core/hal/intrin_avx.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,11 @@ OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_int64x4)
905905
OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float32x8, ps)
906906
OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float64x4, pd)
907907

908+
inline v_float32x8 v_not_nan(const v_float32x8& a)
909+
{ return v_float32x8(_mm256_cmp_ps(a.val, a.val, _CMP_ORD_Q)); }
910+
inline v_float64x4 v_not_nan(const v_float64x4& a)
911+
{ return v_float64x4(_mm256_cmp_pd(a.val, a.val, _CMP_ORD_Q)); }
912+
908913
/** min/max **/
909914
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint8x32, _mm256_min_epu8)
910915
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint8x32, _mm256_max_epu8)

modules/core/include/opencv2/core/hal/intrin_cpp.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,25 @@ OPENCV_HAL_IMPL_CMP_OP(==)
683683
For all types except 64-bit integer values. */
684684
OPENCV_HAL_IMPL_CMP_OP(!=)
685685

686+
template<int n>
687+
inline v_reg<float, n> v_not_nan(const v_reg<float, n>& a)
688+
{
689+
typedef typename V_TypeTraits<float>::int_type itype;
690+
v_reg<float, n> c;
691+
for (int i = 0; i < n; i++)
692+
c.s[i] = V_TypeTraits<float>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
693+
return c;
694+
}
695+
template<int n>
696+
inline v_reg<double, n> v_not_nan(const v_reg<double, n>& a)
697+
{
698+
typedef typename V_TypeTraits<double>::int_type itype;
699+
v_reg<double, n> c;
700+
for (int i = 0; i < n; i++)
701+
c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
702+
return c;
703+
}
704+
686705
//! @brief Helper macro
687706
//! @ingroup core_hal_intrin_impl
688707
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \

0 commit comments

Comments
 (0)