Skip to content

Commit 7c32cb3

Browse files
committed
Implemented PE I/O local initialization with emit hacks
1 parent 27caea2 commit 7c32cb3

File tree

2 files changed

+119
-92
lines changed

2 files changed

+119
-92
lines changed

lib/mlir/Transforms/EmitHLS.cc

Lines changed: 63 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,7 @@ static std::string pragmaGen(const std::string& pragma) {
151151
}
152152

153153
static std::string pragmaGenIfAttrExists(Operation *op, const std::string& attr_name) {
154-
if (!op->hasAttr(attr_name)) {
155-
return "";
156-
}
154+
if (!op->hasAttr(attr_name)) return "";
157155
return pragmaGen(getStringFromAttribute(op->getAttr(attr_name)));
158156
}
159157

@@ -329,14 +327,12 @@ static void initArgTypeMap(mlir::FuncOp funcOp, std::string hlsParam) {
329327
// Utils
330328
// ------------------------------------------------------
331329

330+
// localName is used for local initialization in systolic arrays
331+
static std::string localName = "";
332+
332333
static std::string getValueName(Value value) {
333334
if (valueMap.count(value) == 0) {
334-
// TODO how to best implement it
335-
// if (value.hasAttr("phism.hls_pragma.inline_off"))
336-
if (false)
337-
valueMap[value] = "v" + std::to_string(valueID++);
338-
else
339-
valueMap[value] = "v" + std::to_string(valueID++);
335+
valueMap[value] = "v" + std::to_string(valueID++);
340336
}
341337

342338
return valueMap[value];
@@ -713,10 +709,33 @@ static std::string emitOp(AffineForOp affineForOp) {
713709
upperBound += ", " + upperBoundPrinter.getAffineExpr(expr) + ")";
714710
}
715711

716-
return indent() + "for (" + getTypeName(iter) + " " + iterName + " = " +
717-
lowerBound + "; " + iterName + " < " + upperBound + "; " + iterName +
718-
" += " + std::to_string(step) + ") {\n" +
719-
emitBlock(*affineForOp.getBody()) + indent() + "}\n";
712+
std::string affineForOpBuff = indent() + "for (" + getTypeName(iter) + " " + iterName + " = " + lowerBound + "; ";
713+
affineForOpBuff += iterName + " < " + upperBound + "; " + iterName + " += " + std::to_string(step) + ") {\n";
714+
715+
indent.add();
716+
affineForOpBuff += pragmaGenIfAttrExists(affineForOp, "phism.hls_pragma");
717+
indent.sub();
718+
719+
if (affineForOp->hasAttr("phism.include_union_hack")) {
720+
indent.add();
721+
722+
affineForOpBuff += indent() + "union {unsigned int ui; float ut;} u;\n";
723+
affineForOpBuff += indent() + "u.ui = (unsigned int) " + localName + "(31, 0);\n";
724+
affineForOpBuff += indent() + "local_A[0][n] = u.ut;\n"; // TODO this line doesnt make sense yet, because it uses local_A[0][n]
725+
affineForOpBuff += indent() + localName + " = " + localName + " >> 32;\n";
726+
727+
// TODO should everything be handled by the hack or some things are still emitted?
728+
// affineForOpBuff += emitBlock(*affineForOp.getBody());
729+
730+
indent.sub();
731+
} else {
732+
affineForOpBuff += emitBlock(*affineForOp.getBody());
733+
}
734+
735+
affineForOpBuff += indent() + "}\n";
736+
737+
738+
return affineForOpBuff;
720739
}
721740

722741
static std::string emitOp(AffineIfOp affineIfOp) {
@@ -781,30 +800,40 @@ static std::string emitOp(mlir::CallOp callOp) {
781800
std::string callOpBuff = "";
782801

783802
// Emit the function call
784-
callOpBuff += indent() + callOp.getCallee().str() + "(";
803+
if (callOp->hasAttr("phism.hls_stream_read")) {
804+
assert(callOp.getOperands().size() == 1 && "HLS stream read() must have one operand");
805+
assert(callOp.getResults().size() == 1 && "HLS stream read() must have one result");
785806

786-
// Emit input arguments
787-
for (auto arg : callOp.getOperands()) {
788-
auto argName = getValueName(arg);
789-
callOpBuff += argName + ", ";
790-
}
807+
localName = getValueName(callOp.getResults()[0]);
808+
callOpBuff += indent() + localName + " = ";
809+
callOpBuff += getValueName(callOp.getOperands()[0]) + ".read();\n";
791810

792-
// Emit output arguments
793-
for (auto result : callOp.getResults()) {
794-
// Pass address for scalar result arguments
795-
if (!result.getType().isa<ShapedType>())
796-
callOpBuff += "&";
811+
} else {
812+
callOpBuff += indent() + callOp.getCallee().str() + "(";
797813

798-
callOpBuff += getValueName(result) + ", ";
799-
}
814+
// Emit input arguments
815+
for (auto arg : callOp.getOperands()) {
816+
auto argName = getValueName(arg);
817+
callOpBuff += argName + ", ";
818+
}
800819

801-
// Get rid of the last comma and space unless the there were no arguments
802-
if (callOp.getResults().size() > 0) {
803-
callOpBuff.pop_back();
804-
callOpBuff.pop_back();
805-
}
820+
// Emit output arguments
821+
for (auto result : callOp.getResults()) {
822+
// Pass address for scalar result arguments
823+
if (!result.getType().isa<ShapedType>())
824+
callOpBuff += "&";
825+
826+
callOpBuff += getValueName(result) + ", ";
827+
}
806828

807-
callOpBuff += ");\n";
829+
// Get rid of the last comma and space unless the there were no arguments
830+
if (callOp.getOperands().size() + callOp.getResults().size() > 0) {
831+
callOpBuff.pop_back();
832+
callOpBuff.pop_back();
833+
}
834+
835+
callOpBuff += ");\n";
836+
}
808837

809838
return callOpBuff;
810839
}
@@ -1266,6 +1295,8 @@ static std::string emitOp(mlir::FuncOp funcOp) {
12661295
funcOpBuff += "\n";
12671296
indent.sub();
12681297

1298+
// TODO if I wanted to actually std.return something from funcop i would have to iterate over getresults()...
1299+
12691300
// Emit funcOption body.
12701301
funcOpBuff += emitBlock(funcOp.front());
12711302
funcOpBuff += "}\n";

lib/mlir/Transforms/SystolicArrayTimeLoop.cc

Lines changed: 56 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -237,15 +237,18 @@ static void printRegionInfo(Region &region, const std::string& info = "") {
237237
LLVM_DEBUG({dbgs() << "\n";});
238238
}
239239

240-
static mlir::FuncOp createDummyFuncOp(mlir::FuncOp current_func_op, std::string suffix, ConversionPatternRewriter& b){
240+
static unsigned dummy_func_op_ID = 0;
241+
242+
static mlir::FuncOp createDummyFuncOp(mlir::FuncOp current_func_op, Type arg_type, Type res_type, std::string suffix, ConversionPatternRewriter& b){
241243
SmallVector<Type> func_result_types;
244+
func_result_types.push_back(res_type);
242245
SmallVector<Type> func_arg_types;
243-
// func_arg_types.push_back(IndexType::get(current_func_op.getContext())); // TODO use correct type
246+
func_arg_types.push_back(arg_type);
244247

245248
b.setInsertionPointAfter(current_func_op); // Insertion point right before the current Func Op
246249
FuncOp func_op = b.create<FuncOp>(
247250
current_func_op.getLoc(),
248-
std::string(current_func_op.getName()) + "_" + suffix,
251+
std::string(current_func_op.getName()) + "_" + suffix + "_" + std::to_string(dummy_func_op_ID++),
249252
b.getFunctionType(func_arg_types, func_result_types)
250253
);
251254
func_op->setAttr("phism.no_emit", b.getUnitAttr()); // TODO this could be added to constructor create
@@ -255,13 +258,49 @@ static mlir::FuncOp createDummyFuncOp(mlir::FuncOp current_func_op, std::string
255258

256259
Block *entry = func_op.addEntryBlock();
257260
b.setInsertionPointToStart(entry);
258-
b.create<mlir::ReturnOp>(func_op.getLoc());
261+
b.create<mlir::ReturnOp>(func_op.getLoc(), func_op.getArguments());
259262

260263
LLVM_DEBUG({dbgs() << "Dummy func op after adding block with return op:\n"; func_op.dump();});
261264

262265
return func_op;
263266
}
264267

268+
static void initializeInputVariable(ConversionPatternRewriter &b, MLIRContext *context, Value v_in, FuncOp &callee, Block *innermost_block) {
269+
// Add a dummy func op that won't be emitted directly, but allows for custom .read() call
270+
memref::AllocaOp v_local = b.create<memref::AllocaOp>(callee.getLoc(), v_in.getType().cast<MemRefType>());
271+
FuncOp hls_stream_read_func_op = createDummyFuncOp(callee, v_in.getType(), v_local.getType(), "hls_stream_read", b);
272+
273+
b.setInsertionPointToStart(innermost_block);
274+
275+
SmallVector<Value> operands;
276+
operands.push_back(v_in);
277+
278+
// Use a custom call op for reading from hls stream
279+
CallOp hls_stream_read = b.create<CallOp>(
280+
hls_stream_read_func_op.getLoc(),
281+
hls_stream_read_func_op,
282+
operands
283+
);
284+
hls_stream_read->setAttr("phism.hls_stream_read", b.getUnitAttr()); // TODO this could be added to constructor create
285+
LLVM_DEBUG({dbgs() << "hls_stream_read:\n"; hls_stream_read.dump();});
286+
287+
// Add inner loop
288+
AffineForOp inner_loop = b.create<AffineForOp>(callee.getLoc(), 0, 2, 1);
289+
inner_loop->setAttr("phism.hls_pragma", StringAttr::get(context, "UNROLL")); // TODO this could be added to constructor create
290+
inner_loop->setAttr("phism.include_union_hack", b.getUnitAttr());// TODO this could be added to constructor create
291+
// Block *inner_block = b.createBlock(&inner_loop.getBody());
292+
// inner_loop.getBody();
293+
294+
// local_A[0][n] = u.ut; -> memref::store op from u to local_A + attrbitured with ".ut"
295+
// store op takies vector of indexes, here: 0, n
296+
297+
// TODO maybe use actual MLIR for union hack by having a custom callop that gets emitted as the two lines of u.ui -> u.ut instead of emit hacks?
298+
299+
// TODO maybe use actual MLIR shift right instead of emit hacks?
300+
// auto thirty_two = b.create<arith::ConstantOp>(callee.getLoc(), b.getI32IntegerAttr(32));
301+
// arith::ShRUIOp shift_right_op = b.create<arith::ShRUIOp>(callee.getLoc(), hls_stream_read.getResults()[0], thirty_two);
302+
}
303+
265304
static void handleCalleePE(mlir::FuncOp PE_func_op) {
266305
std::string IO_type = "IO_t";
267306
std::string local_type = "local_t";
@@ -308,8 +347,7 @@ static void handleCalleePE(mlir::FuncOp PE_func_op) {
308347
b.setInsertionPointToStart(loop0.getBody());
309348
b.inlineRegionBefore(PE_func_op.getBody(), loop0.region(), loop0.region().end());
310349
// printRegionInfo(loop0.region(), "After inlineRegionBefore");
311-
printOperation(loop0, "After inlineRegionBefore");
312-
resetIndent();
350+
printOperation(loop0, "After inlineRegionBefore"); resetIndent();
313351
LLVM_DEBUG({dbgs() << "Callee:\n"; newCallee.dump();});
314352

315353
// Move affine yield (created by default during create<AffineForOp>) after the inlined region,
@@ -322,38 +360,24 @@ static void handleCalleePE(mlir::FuncOp PE_func_op) {
322360
// return op -> moveafter + erase block | yield op -> moveafter ===> this would avoid saving and using oldRet
323361
}
324362
// printRegionInfo(loop0.region(), "After moving yield op and erasing block 0");
325-
printOperation(loop0, "After moving yield op and erasing block 0");
326-
resetIndent();
363+
printOperation(loop0, "After moving yield op and erasing block 0"); resetIndent();
327364
LLVM_DEBUG({dbgs() << "Callee:\n"; newCallee.dump();});
328365

329366
// Move old return op to the end of function call
330367
oldRet->moveAfter(loop0);
331-
// printRegionInfo(loop0.region(), "After moving old return to the end of function");
332-
printOperation(loop0, "After moving old return to the end of function");
333-
resetIndent();
368+
printOperation(loop0, "After moving old return to the end of function"); resetIndent();
334369
LLVM_DEBUG({dbgs() << "Callee:\n"; newCallee.dump();});
335370

336371
// Replace original value uses with new values
337372
auto i = 0;
338373
for (auto arg : loop0.region().getArguments()) {
339374
arg.replaceAllUsesWith(newCallee.getArgument(i++));
340375
}
341-
// printRegionInfo(loop0.region(), "After replaceAllUsesWith");
342-
printOperation(loop0, "After replaceAllUsesWith");
343-
resetIndent();
376+
printOperation(loop0, "After replaceAllUsesWith"); resetIndent();
344377
LLVM_DEBUG({dbgs() << "Callee:\n"; newCallee.dump();});
345378

346379
// Change outer affine for body block argument types to a single index + find innermost affine for op
347-
// AffineForOp* innermost_affine_for_op;
348380
for (Block &block : loop0.region().getBlocks()) {
349-
// unsigned i = 0;
350-
// block.walk([&](AffineForOp affine_for_op) {
351-
// innermost_affine_for_op = &affine_for_op;
352-
// i++;
353-
// LLVM_DEBUG({dbgs() << "Walking block, i: " << i << "\n";});
354-
// });
355-
// assert(i == 1 && "Found more than one AffineForOp in the PE.");
356-
357381
// Erase all existing block argument types (that came from the original FuncOP) using BitVector of all 1s
358382
llvm::BitVector eraseIndices(block.getNumArguments(), true);
359383
block.eraseArguments(eraseIndices);
@@ -362,56 +386,28 @@ static void handleCalleePE(mlir::FuncOp PE_func_op) {
362386
block.addArgument(IndexType::get(context), newCallee.getLoc());
363387
}
364388
LLVM_DEBUG({dbgs() << "Callee:\n"; newCallee.dump();});
365-
// LLVM_DEBUG({dbgs() << "innermost_affine_for_op: " << innermost_affine_for_op->getName() << "\n";});
366-
// printRegionInfo(innermost_affine_for_op->region(), "Found AffineForOp");
367389
SmallVector<AffineForOp> affine_for_ops;
368390
loop0.walk([&](AffineForOp op) {
369391
affine_for_ops.push_back(op);
370392
});
371393
LLVM_DEBUG({dbgs() << "Found " << affine_for_ops.size() << " affineForOps\n";});
372394
AffineForOp innermost_affine_for_op = affine_for_ops[0];
373-
printOperation(innermost_affine_for_op, "Found AffineForOp");
374-
resetIndent();
375-
376-
// Add systolic array specific I/O
377-
// Add a dummy func op that won't be emitted directly, but allows for custom .read() call
378-
mlir::FuncOp hls_stream_read_func_op = createDummyFuncOp(newCallee, "hls_stream_read", b);
395+
printOperation(innermost_affine_for_op, "Found AffineForOp"); resetIndent();
379396

380397
// Find innermost block
381-
Block* innermost_block;
382-
for (Region &region : innermost_affine_for_op->getRegions()) { // TODO how to do this efficiently?
383-
for (Block &block : region.getBlocks()) {
384-
innermost_block = &block;
385-
break;
386-
}
387-
break;
388-
}
389-
printBlock(*innermost_block, "Found innermost_block");
390-
resetIndent();
398+
Block* innermost_block = &(innermost_affine_for_op->getRegions().front().getBlocks().front());
399+
printBlock(*innermost_block, "Found innermost_block"); resetIndent();
391400
LLVM_DEBUG({dbgs() << "Found innermost_block: \n";});
392401

402+
// Add systolic array specific I/O
393403
// 1. Load A_in to A_local
394-
auto A_in = newCallee.getArguments()[3];
395-
b.setInsertionPointToStart(innermost_block);
396-
memref::AllocaOp A_local = b.create<memref::AllocaOp>(newCallee.getLoc(), A_in.getType().cast<MemRefType>());
397-
// use a custom call op for reading from hls stream
398-
SmallVector<Value> operands;
399-
CallOp hls_stream_read = b.create<CallOp>(
400-
hls_stream_read_func_op.getLoc(),
401-
hls_stream_read_func_op,
402-
operands
403-
);
404-
404+
Value A_in = newCallee.getArguments()[3];
405+
initializeInputVariable(b, context, A_in, newCallee, innermost_block);
405406

406-
407-
408-
// attr per op -> but better to infer from func ops
409-
// A_local.setAttr("requires .read()")
410-
411-
// local_A[0][n] = u.ut; -> memref::store op from u to local_A + attrbitured with ".ut"
412-
// store op takies vector of indexes, here: 0, n
413-
414407
// 2. Load B_in to B_local
408+
Value B_in = newCallee.getArguments()[4];
409+
initializeInputVariable(b, context, B_in, newCallee, innermost_block);
410+
415411
// 3. C_local := op(A_local, B_local) or 0
416412
// 4. C_local drain to C_out
417413
// 5. Drain B_local to B_out

0 commit comments

Comments
 (0)