@@ -237,15 +237,18 @@ static void printRegionInfo(Region ®ion, const std::string& info = "") {
237237 LLVM_DEBUG ({dbgs () << " \n " ;});
238238}
239239
240- static mlir::FuncOp createDummyFuncOp (mlir::FuncOp current_func_op, std::string suffix, ConversionPatternRewriter& b){
240+ static unsigned dummy_func_op_ID = 0 ;
241+
242+ static mlir::FuncOp createDummyFuncOp (mlir::FuncOp current_func_op, Type arg_type, Type res_type, std::string suffix, ConversionPatternRewriter& b){
241243 SmallVector<Type> func_result_types;
244+ func_result_types.push_back (res_type);
242245 SmallVector<Type> func_arg_types;
243- // func_arg_types.push_back(IndexType::get(current_func_op.getContext())); // TODO use correct type
246+ func_arg_types.push_back (arg_type);
244247
245248 b.setInsertionPointAfter (current_func_op); // Insertion point right before the current Func Op
246249 FuncOp func_op = b.create <FuncOp>(
247250 current_func_op.getLoc (),
248- std::string (current_func_op.getName ()) + " _" + suffix,
251+ std::string (current_func_op.getName ()) + " _" + suffix + " _ " + std::to_string (dummy_func_op_ID++) ,
249252 b.getFunctionType (func_arg_types, func_result_types)
250253 );
251254 func_op->setAttr (" phism.no_emit" , b.getUnitAttr ()); // TODO this could be added to constructor create
@@ -255,13 +258,49 @@ static mlir::FuncOp createDummyFuncOp(mlir::FuncOp current_func_op, std::string
255258
256259 Block *entry = func_op.addEntryBlock ();
257260 b.setInsertionPointToStart (entry);
258- b.create <mlir::ReturnOp>(func_op.getLoc ());
261+ b.create <mlir::ReturnOp>(func_op.getLoc (), func_op. getArguments () );
259262
260263 LLVM_DEBUG ({dbgs () << " Dummy func op after adding block with return op:\n " ; func_op.dump ();});
261264
262265 return func_op;
263266}
264267
268+ static void initializeInputVariable (ConversionPatternRewriter &b, MLIRContext *context, Value v_in, FuncOp &callee, Block *innermost_block) {
269+ // Add a dummy func op that won't be emitted directly, but allows for custom .read() call
270+ memref::AllocaOp v_local = b.create <memref::AllocaOp>(callee.getLoc (), v_in.getType ().cast <MemRefType>());
271+ FuncOp hls_stream_read_func_op = createDummyFuncOp (callee, v_in.getType (), v_local.getType (), " hls_stream_read" , b);
272+
273+ b.setInsertionPointToStart (innermost_block);
274+
275+ SmallVector<Value> operands;
276+ operands.push_back (v_in);
277+
278+ // Use a custom call op for reading from hls stream
279+ CallOp hls_stream_read = b.create <CallOp>(
280+ hls_stream_read_func_op.getLoc (),
281+ hls_stream_read_func_op,
282+ operands
283+ );
284+ hls_stream_read->setAttr (" phism.hls_stream_read" , b.getUnitAttr ()); // TODO this could be added to constructor create
285+ LLVM_DEBUG ({dbgs () << " hls_stream_read:\n " ; hls_stream_read.dump ();});
286+
287+ // Add inner loop
288+ AffineForOp inner_loop = b.create <AffineForOp>(callee.getLoc (), 0 , 2 , 1 );
289+ inner_loop->setAttr (" phism.hls_pragma" , StringAttr::get (context, " UNROLL" )); // TODO this could be added to constructor create
290+ inner_loop->setAttr (" phism.include_union_hack" , b.getUnitAttr ());// TODO this could be added to constructor create
291+ // Block *inner_block = b.createBlock(&inner_loop.getBody());
292+ // inner_loop.getBody();
293+
294+ // local_A[0][n] = u.ut; -> memref::store op from u to local_A + attrbitured with ".ut"
295+ // store op takies vector of indexes, here: 0, n
296+
297+ // TODO maybe use actual MLIR for union hack by having a custom callop that gets emitted as the two lines of u.ui -> u.ut instead of emit hacks?
298+
299+ // TODO maybe use actual MLIR shift right instead of emit hacks?
300+ // auto thirty_two = b.create<arith::ConstantOp>(callee.getLoc(), b.getI32IntegerAttr(32));
301+ // arith::ShRUIOp shift_right_op = b.create<arith::ShRUIOp>(callee.getLoc(), hls_stream_read.getResults()[0], thirty_two);
302+ }
303+
265304static void handleCalleePE (mlir::FuncOp PE_func_op) {
266305 std::string IO_type = " IO_t" ;
267306 std::string local_type = " local_t" ;
@@ -308,8 +347,7 @@ static void handleCalleePE(mlir::FuncOp PE_func_op) {
308347 b.setInsertionPointToStart (loop0.getBody ());
309348 b.inlineRegionBefore (PE_func_op.getBody (), loop0.region (), loop0.region ().end ());
310349 // printRegionInfo(loop0.region(), "After inlineRegionBefore");
311- printOperation (loop0, " After inlineRegionBefore" );
312- resetIndent ();
350+ printOperation (loop0, " After inlineRegionBefore" ); resetIndent ();
313351 LLVM_DEBUG ({dbgs () << " Callee:\n " ; newCallee.dump ();});
314352
315353 // Move affine yield (created by default during create<AffineForOp>) after the inlined region,
@@ -322,38 +360,24 @@ static void handleCalleePE(mlir::FuncOp PE_func_op) {
322360 // return op -> moveafter + erase block | yield op -> moveafter ===> this would avoid saving and using oldRet
323361 }
324362 // printRegionInfo(loop0.region(), "After moving yield op and erasing block 0");
325- printOperation (loop0, " After moving yield op and erasing block 0" );
326- resetIndent ();
363+ printOperation (loop0, " After moving yield op and erasing block 0" ); resetIndent ();
327364 LLVM_DEBUG ({dbgs () << " Callee:\n " ; newCallee.dump ();});
328365
329366 // Move old return op to the end of function call
330367 oldRet->moveAfter (loop0);
331- // printRegionInfo(loop0.region(), "After moving old return to the end of function");
332- printOperation (loop0, " After moving old return to the end of function" );
333- resetIndent ();
368+ printOperation (loop0, " After moving old return to the end of function" ); resetIndent ();
334369 LLVM_DEBUG ({dbgs () << " Callee:\n " ; newCallee.dump ();});
335370
336371 // Replace original value uses with new values
337372 auto i = 0 ;
338373 for (auto arg : loop0.region ().getArguments ()) {
339374 arg.replaceAllUsesWith (newCallee.getArgument (i++));
340375 }
341- // printRegionInfo(loop0.region(), "After replaceAllUsesWith");
342- printOperation (loop0, " After replaceAllUsesWith" );
343- resetIndent ();
376+ printOperation (loop0, " After replaceAllUsesWith" ); resetIndent ();
344377 LLVM_DEBUG ({dbgs () << " Callee:\n " ; newCallee.dump ();});
345378
346379 // Change outer affine for body block argument types to a single index + find innermost affine for op
347- // AffineForOp* innermost_affine_for_op;
348380 for (Block &block : loop0.region ().getBlocks ()) {
349- // unsigned i = 0;
350- // block.walk([&](AffineForOp affine_for_op) {
351- // innermost_affine_for_op = &affine_for_op;
352- // i++;
353- // LLVM_DEBUG({dbgs() << "Walking block, i: " << i << "\n";});
354- // });
355- // assert(i == 1 && "Found more than one AffineForOp in the PE.");
356-
357381 // Erase all existing block argument types (that came from the original FuncOP) using BitVector of all 1s
358382 llvm::BitVector eraseIndices (block.getNumArguments (), true );
359383 block.eraseArguments (eraseIndices);
@@ -362,56 +386,28 @@ static void handleCalleePE(mlir::FuncOp PE_func_op) {
362386 block.addArgument (IndexType::get (context), newCallee.getLoc ());
363387 }
364388 LLVM_DEBUG ({dbgs () << " Callee:\n " ; newCallee.dump ();});
365- // LLVM_DEBUG({dbgs() << "innermost_affine_for_op: " << innermost_affine_for_op->getName() << "\n";});
366- // printRegionInfo(innermost_affine_for_op->region(), "Found AffineForOp");
367389 SmallVector<AffineForOp> affine_for_ops;
368390 loop0.walk ([&](AffineForOp op) {
369391 affine_for_ops.push_back (op);
370392 });
371393 LLVM_DEBUG ({dbgs () << " Found " << affine_for_ops.size () << " affineForOps\n " ;});
372394 AffineForOp innermost_affine_for_op = affine_for_ops[0 ];
373- printOperation (innermost_affine_for_op, " Found AffineForOp" );
374- resetIndent ();
375-
376- // Add systolic array specific I/O
377- // Add a dummy func op that won't be emitted directly, but allows for custom .read() call
378- mlir::FuncOp hls_stream_read_func_op = createDummyFuncOp (newCallee, " hls_stream_read" , b);
395+ printOperation (innermost_affine_for_op, " Found AffineForOp" ); resetIndent ();
379396
380397 // Find innermost block
381- Block* innermost_block;
382- for (Region ®ion : innermost_affine_for_op->getRegions ()) { // TODO how to do this efficiently?
383- for (Block &block : region.getBlocks ()) {
384- innermost_block = █
385- break ;
386- }
387- break ;
388- }
389- printBlock (*innermost_block, " Found innermost_block" );
390- resetIndent ();
398+ Block* innermost_block = &(innermost_affine_for_op->getRegions ().front ().getBlocks ().front ());
399+ printBlock (*innermost_block, " Found innermost_block" ); resetIndent ();
391400 LLVM_DEBUG ({dbgs () << " Found innermost_block: \n " ;});
392401
402+ // Add systolic array specific I/O
393403 // 1. Load A_in to A_local
394- auto A_in = newCallee.getArguments ()[3 ];
395- b.setInsertionPointToStart (innermost_block);
396- memref::AllocaOp A_local = b.create <memref::AllocaOp>(newCallee.getLoc (), A_in.getType ().cast <MemRefType>());
397- // use a custom call op for reading from hls stream
398- SmallVector<Value> operands;
399- CallOp hls_stream_read = b.create <CallOp>(
400- hls_stream_read_func_op.getLoc (),
401- hls_stream_read_func_op,
402- operands
403- );
404-
404+ Value A_in = newCallee.getArguments ()[3 ];
405+ initializeInputVariable (b, context, A_in, newCallee, innermost_block);
405406
406-
407-
408- // attr per op -> but better to infer from func ops
409- // A_local.setAttr("requires .read()")
410-
411- // local_A[0][n] = u.ut; -> memref::store op from u to local_A + attrbitured with ".ut"
412- // store op takies vector of indexes, here: 0, n
413-
414407 // 2. Load B_in to B_local
408+ Value B_in = newCallee.getArguments ()[4 ];
409+ initializeInputVariable (b, context, B_in, newCallee, innermost_block);
410+
415411 // 3. C_local := op(A_local, B_local) or 0
416412 // 4. C_local drain to C_out
417413 // 5. Drain B_local to B_out
0 commit comments