Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Testing cascade lake alone.
  • Loading branch information
anijain2305 committed Oct 17, 2019
commit b74a696aa9924145145760182dfcd08058ddd1d8
10 changes: 10 additions & 0 deletions python/tvm/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,16 @@ def model(self):
return opt.value[7:]
return 'unknown'

@property
def mcpu(self):
"""Returns the mcpu from the target if it exists."""
mcpu = ''
if self.options is not None:
for opt in self.options:
if 'mcpu' in opt:
mcpu = opt.split('=')[1]
return mcpu

def __enter__(self):
_api_internal._EnterTargetScope(self)
return self
Expand Down
29 changes: 12 additions & 17 deletions tests/python/relay/test_op_level2.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,8 +576,7 @@ def _compile(ic, oc, target, data_layout, kernel_layout, dtypes):
assembly = lib.get_source("asm")
return assembly

def has_fast_int8_instruction(asm, target):
intel_device_type = None
def _has_fast_int8_instructions(asm, target):
if 'skylake-avx512' in target:
return "pmaddubs" in asm
elif 'cascadelake' in target:
Expand All @@ -586,28 +585,24 @@ def has_fast_int8_instruction(asm, target):
assert False, "Target should be Skylake or Cascadelake"

# compile conv2d for x86 (skylake, cascadelake) and test assembly contains *pmadd* instructions
# targets = ["llvm -mcpu=skylake-avx512", "llvm -mcpu=cascadelake"]
targets = ["llvm -mcpu=skylake-avx512"]
name_skylake = "llvm.x86.avx512.pmaddubs.w.512"
name_cascadelake = 'llvm.x86.avx512.vpdpbusd.512'
llvm_id_skylake = tvm.codegen.llvm_lookup_intrinsic_id(name_skylake)
llvm_id_cascadelake = tvm.codegen.llvm_lookup_intrinsic_id(name_cascadelake)
targets = ["llvm -mcpu=skylake-avx512", "llvm -mcpu=cascadelake"]
llvm_version = tvm.codegen.llvm_version_major()
for target in targets:
if llvm_id_skylake != 0 and llvm_id_cascadelake != 0:
if llvm_version >= 8:
fast_int8_dtypes = ('uint8', 'int8', 'int32')
# Sweep the input channels to check int8 robustness
# Input channels should be a multiple of 4 internally.
for ic in [1, 4, 6]:
asm = _compile(ic=ic, oc=32, target=target, data_layout="NCHW",
kernel_layout='OIHW',
dtypes=fast_int8_dtypes)
assert has_fast_int8_instruction(asm, target)
assert _has_fast_int8_instructions(asm, target)

for ic in [1, 4, 6]:
asm = _compile(ic=ic, oc=32, target=target, data_layout="NHWC",
kernel_layout='HWIO',
dtypes=fast_int8_dtypes)
assert has_fast_int8_instruction(asm, target)
assert _has_fast_int8_instructions(asm, target)


# Sweep the output channels to check int8 robustness
Expand All @@ -616,36 +611,36 @@ def has_fast_int8_instruction(asm, target):
asm = _compile(ic=16, oc=oc, target=target, data_layout="NCHW",
kernel_layout='OIHW',
dtypes=fast_int8_dtypes)
assert has_fast_int8_instruction(asm, target)
assert _has_fast_int8_instructions(asm, target)

for oc in [4, 16, 20]:
asm = _compile(ic=16, oc=oc, target=target, data_layout="NHWC",
kernel_layout='HWIO',
dtypes=fast_int8_dtypes)
assert has_fast_int8_instruction(asm, target)
assert _has_fast_int8_instructions(asm, target)

# Check that both non-divisible oc and ic work
asm = _compile(ic=17, oc=29, target=target, data_layout="NCHW", kernel_layout='OIHW',
dtypes=fast_int8_dtypes)
assert has_fast_int8_instruction(asm, target)
assert _has_fast_int8_instructions(asm, target)

asm = _compile(ic=17, oc=29, target=target, data_layout="NHWC", kernel_layout='HWIO',
dtypes=fast_int8_dtypes)
assert has_fast_int8_instruction(asm, target)
assert _has_fast_int8_instructions(asm, target)

# Ensure that code is generated when datatypes are not HW supported.
dtypes = ('int8', 'int8', 'int32')
asm = _compile(ic=16, oc=32, target=target, data_layout="NHWC", kernel_layout='HWIO',
dtypes=dtypes)
# Check that intrinisic is not present in the assembly.
assert not has_fast_int8_instruction(asm, target)
assert not _has_fast_int8_instructions(asm, target)

# Ensure that code is generated when datatypes are not HW supported.
dtypes = ('uint8', 'uint8', 'int32')
asm = _compile(ic=16, oc=32, target=target, data_layout="NHWC", kernel_layout='HWIO',
dtypes=dtypes)
# Check that intrinisic is not present in the assembly.
assert not has_fast_int8_instruction(asm, target)
assert not _has_fast_int8_instructions(asm, target)

# Check that a vectorized instruction is generated for older Intel
# generations, because we default to NCHWc layout.
Expand Down
14 changes: 5 additions & 9 deletions topi/python/topi/x86/conv2d_int8.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,14 @@ def _is_int8_hw_support(data_dtype, kernel_dtype):
is_dtype_support = data_dtype == 'uint8' and kernel_dtype == 'int8'

# 2) Check LLVM support
llvm_intrin_fast_int8_skylake = "llvm.x86.avx512.pmaddubs.w.512"
llvm_intrin_fast_int8_cascadelake = "llvm.x86.avx512.vpdpbusd.512"
llvm_id_skylake = tvm.codegen.llvm_lookup_intrinsic_id(llvm_intrin_fast_int8_skylake)
llvm_id_cascadelake = tvm.codegen.llvm_lookup_intrinsic_id(llvm_intrin_fast_int8_cascadelake)
is_llvm_support = llvm_id_skylake != 0 and llvm_id_cascadelake != 0
llvm_version = tvm.codegen.llvm_version_major()
is_llvm_support = llvm_version >= 8

# 3) Check target
target = tvm.target.current_target()
mcpu = tvm.target.current_target().mcpu
is_target_support = False
for opt in target.options:
if opt == '-mcpu=skylake-avx512' or opt == '-mcpu=cascadelake':
is_target_support = True
if mcpu == 'skylake-avx512' or mcpu == 'cascadelake':
is_target_support = True

return is_dtype_support and is_llvm_support and is_target_support

Expand Down
17 changes: 5 additions & 12 deletions topi/python/topi/x86/tensor_intrin.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,13 @@

def dot_16x1x16_uint8_int8_int32():
"""Dispatch the most optimized intrin depending on the target"""
target = tvm.target.current_target()
intel_device_type = None
for opt in target.options:
if opt == '-mcpu=skylake-avx512':
intel_device_type = "skylake"
elif opt == '-mcpu=cascadelake':
intel_device_type = "cascadelake"

assert intel_device_type is not None, \
"An old Intel machine that does not have fast Int8 support."
mcpu = tvm.target.current_target().mcpu

if intel_device_type == "skylake":
assert mcpu in ("skylake-avx512", "cascadelake"), \
"An old Intel machine that does not have fast Int8 support."
if mcpu == "skylake-avx512":
return dot_16x1x16_uint8_int8_int32_skylake()
# cascade lake
# cascadelake
return dot_16x1x16_uint8_int8_int32_cascadelake()


Expand Down
8 changes: 3 additions & 5 deletions topi/python/topi/x86/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@
import tvm

def get_fp32_len():
mcpu = tvm.target.current_target().mcpu
fp32_vec_len = 8
target = tvm.target.current_target()
if target is not None:
for opt in target.options:
if opt == '-mcpu=skylake-avx512' or opt == '-mcpu=cascadelake':
fp32_vec_len = 16
if mcpu == 'skylake-avx512' or mcpu == 'cascadelake':
fp32_vec_len = 16
return fp32_vec_len