Skip to content

Commit b10d437

Browse files
zhanghaohitechuraev
authored andcommitted
[VTA][OpenCL] intelfocl (apache#6126)
* intelfocl support * disable tsim test * bugfix to vta autotvm * disable tsim test in task_python_vta_tsim.sh * fix integration test * update vta submodule and re-enable tsim tests * remove unnecessary comments
1 parent ecf3f5e commit b10d437

23 files changed

Lines changed: 676 additions & 52 deletions

File tree

cmake/modules/VTA.cmake

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ elseif(PYTHON)
104104
find_library(__cma_lib NAMES cma PATH /usr/lib)
105105
elseif(${VTA_TARGET} STREQUAL "de10nano") # DE10-Nano rules
106106
file(GLOB FPGA_RUNTIME_SRCS ${VTA_HW_PATH}/src/de10nano/*.cc ${VTA_HW_PATH}/src/*.cc)
107+
elseif(${VTA_TARGET} STREQUAL "intelfocl") # Intel OpenCL for FPGA rules
108+
file(GLOB FOCL_SRC ${VTA_HW_PATH}/src/oclfpga/*.cc)
109+
list(APPEND FPGA_RUNTIME_SRCS ${FOCL_SRC})
110+
list(APPEND FPGA_RUNTIME_SRCS ${VTA_HW_PATH}/src/vmem/virtual_memory.cc ${VTA_HW_PATH}/src/vmem/virtual_memory.h)
107111
endif()
108112
# Target lib: vta
109113
add_library(vta SHARED ${FPGA_RUNTIME_SRCS})
@@ -123,6 +127,10 @@ elseif(PYTHON)
123127
target_include_directories(vta SYSTEM PUBLIC 3rdparty)
124128
target_include_directories(vta SYSTEM PUBLIC
125129
"/usr/local/intelFPGA_lite/18.1/embedded/ds-5/sw/gcc/arm-linux-gnueabihf/include")
130+
elseif(${VTA_TARGET} STREQUAL "intelfocl") # Intel OpenCL for FPGA rules
131+
target_include_directories(vta PUBLIC 3rdparty)
132+
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
133+
target_link_libraries(vta -lOpenCL)
126134
endif()
127135
endif()
128136

python/tvm/autotvm/task/topi_integration.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def _decorate(topi_schedule):
227227
@_register_task_schedule(task_name)
228228
def wrapper(outs, *args, **kwargs):
229229
"""wrapper function for topi schedule"""
230-
workload = get_workload(outs)
230+
workload = get_workload(outs, task_name)
231231
if workload is None:
232232
raise RuntimeError("Cannot find workload in attribute of this schedule")
233233
tgt = Target.current()
@@ -241,18 +241,21 @@ def wrapper(outs, *args, **kwargs):
241241
return _decorate
242242

243243

244-
def get_workload(outs):
244+
def get_workload(outs, task_name=None):
245245
"""Retrieve the workload from outputs"""
246246

247247
def traverse(tensors):
248248
"""traverse all ops to find attached workload"""
249249
for t in tensors:
250250
op = t.op
251-
if "workload" in op.attrs:
252-
return args_to_workload(op.attrs["workload"])
253251
wkl = traverse(op.input_tensors)
254252
if wkl:
255253
return wkl
254+
255+
if "workload" in op.attrs:
256+
ret = args_to_workload(op.attrs["workload"])
257+
if task_name is None or ret[0] == task_name:
258+
return ret
256259
return None
257260

258261
outs = [outs] if isinstance(outs, tensor.Tensor) else outs

python/tvm/relay/op/strategy/generic.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,15 @@ def wrapper(attrs, outs, target):
5353
return wrapper
5454

5555

56+
def wrap_topi_compute(topi_compute):
57+
"""Wrap TOPI compute which doesn't use attrs"""
58+
59+
def wrapper(attrs, inputs, out_type):
60+
return [topi_compute(*inputs)]
61+
62+
return wrapper
63+
64+
5665
def get_conv2d_in_channels(data_shape, data_layout):
5766
"""Get conv2d input channels"""
5867
data_shape = get_const_tuple(data_shape)

python/tvm/relay/testing/tf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
try:
3434
tf_compat_v1 = tf.compat.v1
35-
except ImportError:
35+
except (ImportError, AttributeError):
3636
tf_compat_v1 = tf
3737

3838
######################################################################

python/tvm/topi/x86/bitserial_dense.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def bitserial_dense(
122122
return matmul
123123

124124

125-
@autotvm.register_topi_schedule("biserial_dense.x86")
125+
@autotvm.register_topi_schedule("bitserial_dense.x86")
126126
def schedule_bitserial_dense(cfg, outs):
127127
"""Schedule for bitserial_dense.
128128

src/relay/backend/compile_engine.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ class ScheduleGetter : public backend::MemoizedExprTranslator<Array<te::Tensor>>
251251
<< "Cannot apply TOPI schedule to a primitive function with two complicated ops"
252252
<< " anchor=" << anchor_op_ << " current=" << op;
253253
}
254-
if (op_pattern >= anchor_op_pattern_) {
254+
if (op_pattern > anchor_op_pattern_) {
255255
anchor_op_ = op;
256256
anchor_attrs_ = call_node->attrs;
257257
anchor_op_pattern_ = op_pattern;
@@ -309,7 +309,7 @@ class ScheduleGetter : public backend::MemoizedExprTranslator<Array<te::Tensor>>
309309
tvm::Target target_;
310310
Op anchor_op_;
311311
Attrs anchor_attrs_;
312-
int anchor_op_pattern_{0};
312+
int anchor_op_pattern_{-1};
313313
OpImplementation anchor_implementation_;
314314
std::ostringstream readable_name_stream_;
315315
Array<te::Operation> scalars_;

src/runtime/workspace_pool.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ class WorkspacePool::Pool {
115115
}
116116
// Release all resources
117117
void Release(Device dev, DeviceAPI* device) {
118-
ICHECK_EQ(allocated_.size(), 1);
119118
for (size_t i = 1; i < free_list_.size(); ++i) {
120119
device->FreeDataSpace(dev, free_list_[i].data);
121120
}

src/tir/transforms/lower_tvm_builtin.cc

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -109,16 +109,6 @@ class BuiltinLower : public StmtExprMutator {
109109
op = stmt.as<AllocateNode>();
110110
// Get constant allocation bound.
111111
int64_t nbytes = GetVectorBytes(op->dtype);
112-
if (device_type_.defined()) {
113-
if (const auto* dev_type = device_type_.as<IntImmNode>()) {
114-
if (dev_type->value == kDLCPU) {
115-
int32_t constant_size = op->constant_allocation_size();
116-
if (constant_size > 0 && constant_size * nbytes < runtime::kMaxStackAlloca) {
117-
return stmt;
118-
}
119-
}
120-
}
121-
}
122112
PrimExpr total_bytes = make_const(op->extents[0].dtype(), nbytes);
123113
for (size_t i = 0; i < op->extents.size(); ++i) {
124114
total_bytes = total_bytes * op->extents[i];

vta/python/vta/autotvm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def reprogram_fpga(remote, _build_result):
4646
_build_result : tvm.autotvm.measure.measure_methods.BuildResult
4747
Artifact from the build phase, unused here.
4848
"""
49-
rpc_client.program_bitstream(remote, bitstream)
49+
rpc_client.program_fpga(remote, bitstream)
5050
rpc_client.reconfig_runtime(remote)
5151

5252
return default_module_loader(reprogram_fpga)

vta/python/vta/environment.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,13 @@ class DevContext(object):
6666
MEM_ID_INP = 2
6767
MEM_ID_ACC = 3
6868
MEM_ID_OUT = 4
69+
MEM_ID_ACC_8BIT = 5
6970
# VTA ALU Opcodes
7071
ALU_OPCODE_MIN = 0
7172
ALU_OPCODE_MAX = 1
7273
ALU_OPCODE_ADD = 2
7374
ALU_OPCODE_SHR = 3
75+
ALU_OPCODE_MUL = 4
7476
# Task queue id (pipeline stage)
7577
QID_LOAD_INP = 1
7678
QID_LOAD_WGT = 1
@@ -232,7 +234,7 @@ def target_host(self):
232234
return "llvm -mtriple=armv7-none-linux-gnueabihf"
233235
if self.TARGET == "ultra96":
234236
return "llvm -mtriple=aarch64-linux-gnu"
235-
if self.TARGET in ["sim", "tsim"]:
237+
if self.TARGET in ["sim", "tsim", "intelfocl"]:
236238
return "llvm"
237239
raise ValueError("Unknown target %s" % self.TARGET)
238240

0 commit comments

Comments
 (0)