Skip to content

Commit f9b82a1

Browse files
committed
add remap_id to pintool
1 parent 47cb250 commit f9b82a1

File tree

2 files changed

+129
-2
lines changed

2 files changed

+129
-2
lines changed

src/sst/elements/ariel/api/arielapi.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
#include <stdio.h>
1818
#include <stdlib.h>
1919
#include <inttypes.h>
20+
/*
2021
#ifdef ENABLE_ARIEL_MPI
2122
#include <mpi.h>
2223
#endif
24+
*/
2325

2426
/* These definitions are replaced during simulation */
2527

@@ -36,6 +38,7 @@ void ariel_fence() {
3638
}
3739

3840
uint64_t ariel_cycles() {
41+
printf("ARIEL: ariel_cycles called in Ariel API.\n");
3942
return 0;
4043
}
4144

@@ -47,6 +50,7 @@ void ariel_malloc_flag(int64_t id, int count, int level) {
4750
printf("ARIEL: flagging next %d mallocs at id %" PRId64 "\n", count, id);
4851
}
4952

53+
/*
5054
// To ensure that the Pintool (fesimple.cc) numbers our application's OpenMP threads
5155
// from 0..N-1, we need to run an OpenMP parallel region before calling MPI Init.
5256
// Otherwise, some MPI threads which aren't used for our application will be
@@ -103,3 +107,4 @@ int MPI_Init_thread(int *argc, char ***argv, int required, int *provided) {
103107
exit(1);
104108
#endif
105109
}
110+
*/

src/sst/elements/ariel/frontend/pin3/fesimple.cc

Lines changed: 124 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#include "atomic.hpp"
2323
#include <time.h>
2424
#include <inttypes.h>
25+
#include <sys/syscall.h>
26+
#include <unistd.h>
27+
#include <unordered_map>
2528

2629
#include <string>
2730
#include <map>
@@ -180,6 +183,96 @@ class StackRecord {
180183

181184
std::vector<std::vector<StackRecord> > arielStack; // Per-thread stacks
182185

186+
static const UINT32 MAX_FRAMES = 64;
187+
bool is_mpi_thread(CONTEXT* ctxt) {
188+
void *buf[256];
189+
190+
// PIN_Backtrace fills the trace array with return addresses.
191+
// numFrames holds the number of captured frames.
192+
UINT32 numFrames = PIN_Backtrace(ctxt, buf, sizeof(buf) / sizeof(buf[0]));
193+
//std::cout << "numFrames: " << numFrames << std::endl;
194+
195+
for (UINT32 i = 0; i < numFrames; i++)
196+
{
197+
// Get the image (module) that contains the current address.
198+
IMG img = IMG_FindByAddress((ADDRINT)buf[i]);
199+
if (IMG_Valid(img))
200+
{
201+
// Retrieve the image name as a std::string.
202+
std::string imgName = IMG_Name(img);
203+
//std::cout << "imgName: " << imgName << std::endl;
204+
// Check if "libmpi.so" appears in the module name.
205+
if (imgName.find("libmpi.so") != std::string::npos)
206+
{
207+
return true;
208+
}
209+
}
210+
}
211+
return false;
212+
}
213+
214+
UINT32 num_threads;
215+
216+
// Used for synchronizing access to the thread id map
217+
std::unordered_map<THREADID, THREADID> remap_id;
218+
219+
TLS_KEY syscall_id;
220+
TLS_KEY clone_is_mpi;
221+
TLS_KEY _os_tid;
222+
VOID ThreadStart(THREADID tid, CONTEXT *ctxt, INT32 flags, VOID* v) {
223+
// Initialize data for storing syscall number as it is only availble on Entry, not Exit
224+
PIN_SetThreadData(syscall_id, new ADDRINT(0), tid);
225+
PIN_SetThreadData(clone_is_mpi, new bool(false), tid);
226+
227+
//PIN_SetThreadData(_os_tid, new ADDRINT(0), tid);
228+
}
229+
/*
230+
VOID ThreadFini(THREADID tid, CONTEXT *ctxt, INT32 flags, VOID* v) {
231+
// Delete thread local storage
232+
ADDRINT *syscallNum = static_cast<ADDRINT *>(PIN_GetThreadData(tlsKey, threadId));
233+
delete syscallNum;
234+
}
235+
*/
236+
UINT32 next_app_thread;
237+
UINT32 next_mpi_thread;
238+
239+
VOID SyscallEntry(THREADID threadid, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v)
240+
{
241+
// Retrieve the system call number.
242+
PIN_LockClient();
243+
ADDRINT scNo = PIN_GetSyscallNumber(ctxt, std);
244+
ADDRINT *syscall_id_ptr = static_cast<ADDRINT *>(PIN_GetThreadData(syscall_id, threadid));
245+
*syscall_id_ptr = scNo;
246+
247+
248+
bool is_mpi = false;
249+
if (scNo == SYS_clone || scNo == __NR_clone)
250+
{
251+
PIN_GetLock(&mainLock, threadid);
252+
UINT32 next_thread = num_threads;
253+
num_threads++;
254+
255+
bool is_mpi = is_mpi_thread(ctxt);
256+
std::cout << "next_thread is " << next_thread << std::endl;
257+
if (is_mpi) {
258+
remap_id[next_thread] = next_mpi_thread++;
259+
} else {
260+
remap_id[next_thread] = next_app_thread++;
261+
}
262+
PIN_ReleaseLock(&mainLock);
263+
264+
std::cout << "--------------\n";
265+
for (const auto& pair : remap_id) {
266+
std::cout << " --> " << pair.first << " -- " << pair.second << std::endl;
267+
}
268+
std::cout << "--------------\n";
269+
270+
}
271+
bool *clone_is_mpi_ptr = static_cast<bool *>(PIN_GetThreadData(clone_is_mpi, threadid));
272+
*clone_is_mpi_ptr = is_mpi;
273+
PIN_UnlockClient();
274+
}
275+
183276
/* Instrumentation function to be called on function calls */
184277
VOID ariel_stack_call(THREADID thr, ADDRINT stackPtr, ADDRINT target, ADDRINT ip)
185278
{
@@ -833,17 +926,21 @@ void mapped_ariel_fence(void *virtualAddress)
833926
WriteFenceInstructionMarker(thr, ip);
834927
}
835928

929+
/*
836930
void mapped_api_mpi_init() {
837931
api_mpi_init_used = 1;
838932
}
933+
*/
839934

935+
/*
840936
int check_for_api_mpi_init() {
841937
if (!api_mpi_init_used && !getenv("ARIEL_DISABLE_MPI_INIT_CHECK")) {
842938
fprintf(stderr, "Error: fesimple.cc: The Ariel API verion of MPI_Init_{thread} was not used, which can result in errors when used in conjunction with OpenMP. Please link against the Ariel API (included in this distribution at src/sst/elements/ariel/api) or disable this message by setting the environment variable `ARIEL_DISABLE_MPI_INIT_CHECK`\n");
843939
exit(1);
844940
}
845941
return 0;
846942
}
943+
*/
847944

848945
int ariel_mlm_memcpy(void* dest, void* source, size_t size) {
849946
#ifdef ARIEL_DEBUG
@@ -1240,6 +1337,7 @@ VOID InstrumentRoutine(RTN rtn, VOID* args)
12401337
RTN_Replace(rtn, (AFUNPTR) mapped_ariel_cycles);
12411338
fprintf(stderr, "Replacement complete\n");
12421339
return;
1340+
/*
12431341
} else if (RTN_Name(rtn) == "MPI_Init" || RTN_Name(rtn) == "_MPI_Init") {
12441342
fprintf(stderr, "Identified routine: MPI_Init. Instrumenting.\n");
12451343
RTN_Open(rtn);
@@ -1257,7 +1355,7 @@ VOID InstrumentRoutine(RTN rtn, VOID* args)
12571355
RTN_Replace(rtn, (AFUNPTR) mapped_api_mpi_init);
12581356
fprintf(stderr, "Replacement complete\n");
12591357
return;
1260-
return;
1358+
*/
12611359
#if ! defined(__APPLE__)
12621360
} else if (RTN_Name(rtn) == "clock_gettime" || RTN_Name(rtn) == "_clock_gettime" ||
12631361
RTN_Name(rtn) == "__clock_gettime") {
@@ -1276,7 +1374,6 @@ VOID InstrumentRoutine(RTN rtn, VOID* args)
12761374
RTN_Replace(rtn, (AFUNPTR) ariel_update_RTL_signals);
12771375
fprintf(stderr,"Replacement complete.\n");
12781376
return;
1279-
12801377
} else if ((InterceptMemAllocations.Value() > 0) && RTN_Name(rtn) == "mlm_malloc") {
12811378
// This means we want a special malloc to be used (needs a TLB map inside the virtual core)
12821379
fprintf(stderr,"Identified routine: mlm_malloc, replacing with Ariel equivalent...\n");
@@ -1399,10 +1496,30 @@ int main(int argc, char *argv[])
13991496
{
14001497
if (PIN_Init(argc, argv)) return Usage();
14011498

1499+
syscall_id = PIN_CreateThreadDataKey(nullptr);
1500+
if (syscall_id == INVALID_TLS_KEY) {
1501+
std::cerr << "Failed to create thread-local storage key!" << std::endl;
1502+
return 1;
1503+
}
1504+
clone_is_mpi = PIN_CreateThreadDataKey(nullptr);
1505+
if (clone_is_mpi == INVALID_TLS_KEY) {
1506+
std::cerr << "Failed to create thread-local storage key!" << std::endl;
1507+
return 1;
1508+
}
1509+
_os_tid = PIN_CreateThreadDataKey(nullptr);
1510+
if (_os_tid == INVALID_TLS_KEY) {
1511+
std::cerr << "Failed to create thread-local storage key!" << std::endl;
1512+
return 1;
1513+
}
1514+
num_threads = 1;
1515+
1516+
14021517
// Load the symbols ready for us to mangle functions.
14031518
//PIN_InitSymbolsAlt(IFUNC_SYMBOLS);
14041519
PIN_InitSymbols();
14051520
PIN_AddFiniFunction(Fini, 0);
1521+
PIN_AddThreadStartFunction(ThreadStart, nullptr);
1522+
PIN_AddSyscallEntryFunction(SyscallEntry, 0);
14061523

14071524
PIN_InitLock(&mainLock);
14081525
PIN_InitLock(&mallocIndexLock);
@@ -1442,6 +1559,11 @@ int main(int argc, char *argv[])
14421559
}
14431560

14441561
core_count = MaxCoreCount.Value();
1562+
1563+
remap_id[0] = 0;
1564+
next_app_thread = 1;
1565+
next_mpi_thread = core_count;
1566+
14451567
instrument_instructions = InstrumentInstructions.Value();
14461568

14471569
// Pin version specific tunnel attach

0 commit comments

Comments
 (0)