2222#include " atomic.hpp"
2323#include < time.h>
2424#include < inttypes.h>
25+ #include < sys/syscall.h>
26+ #include < unistd.h>
27+ #include < unordered_map>
2528
2629#include < string>
2730#include < map>
@@ -180,6 +183,96 @@ class StackRecord {
180183
181184std::vector<std::vector<StackRecord> > arielStack; // Per-thread stacks
182185
186+ static const UINT32 MAX_FRAMES = 64 ;
187+ bool is_mpi_thread (CONTEXT* ctxt) {
188+ void *buf[256 ];
189+
190+ // PIN_Backtrace fills the trace array with return addresses.
191+ // numFrames holds the number of captured frames.
192+ UINT32 numFrames = PIN_Backtrace (ctxt, buf, sizeof (buf) / sizeof (buf[0 ]));
193+ // std::cout << "numFrames: " << numFrames << std::endl;
194+
195+ for (UINT32 i = 0 ; i < numFrames; i++)
196+ {
197+ // Get the image (module) that contains the current address.
198+ IMG img = IMG_FindByAddress ((ADDRINT)buf[i]);
199+ if (IMG_Valid (img))
200+ {
201+ // Retrieve the image name as a std::string.
202+ std::string imgName = IMG_Name (img);
203+ // std::cout << "imgName: " << imgName << std::endl;
204+ // Check if "libmpi.so" appears in the module name.
205+ if (imgName.find (" libmpi.so" ) != std::string::npos)
206+ {
207+ return true ;
208+ }
209+ }
210+ }
211+ return false ;
212+ }
213+
214+ UINT32 num_threads;
215+
216+ // Used for synchronizing access to the thread id map
217+ std::unordered_map<THREADID, THREADID> remap_id;
218+
219+ TLS_KEY syscall_id;
220+ TLS_KEY clone_is_mpi;
221+ TLS_KEY _os_tid;
222+ VOID ThreadStart (THREADID tid, CONTEXT *ctxt, INT32 flags, VOID* v) {
223+ // Initialize data for storing syscall number as it is only availble on Entry, not Exit
224+ PIN_SetThreadData (syscall_id, new ADDRINT (0 ), tid);
225+ PIN_SetThreadData (clone_is_mpi, new bool (false ), tid);
226+
227+ // PIN_SetThreadData(_os_tid, new ADDRINT(0), tid);
228+ }
229+ /*
230+ VOID ThreadFini(THREADID tid, CONTEXT *ctxt, INT32 flags, VOID* v) {
231+ // Delete thread local storage
232+ ADDRINT *syscallNum = static_cast<ADDRINT *>(PIN_GetThreadData(tlsKey, threadId));
233+ delete syscallNum;
234+ }
235+ */
236+ UINT32 next_app_thread;
237+ UINT32 next_mpi_thread;
238+
239+ VOID SyscallEntry (THREADID threadid, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v)
240+ {
241+ // Retrieve the system call number.
242+ PIN_LockClient ();
243+ ADDRINT scNo = PIN_GetSyscallNumber (ctxt, std);
244+ ADDRINT *syscall_id_ptr = static_cast <ADDRINT *>(PIN_GetThreadData (syscall_id, threadid));
245+ *syscall_id_ptr = scNo;
246+
247+
248+ bool is_mpi = false ;
249+ if (scNo == SYS_clone || scNo == __NR_clone)
250+ {
251+ PIN_GetLock (&mainLock, threadid);
252+ UINT32 next_thread = num_threads;
253+ num_threads++;
254+
255+ bool is_mpi = is_mpi_thread (ctxt);
256+ std::cout << " next_thread is " << next_thread << std::endl;
257+ if (is_mpi) {
258+ remap_id[next_thread] = next_mpi_thread++;
259+ } else {
260+ remap_id[next_thread] = next_app_thread++;
261+ }
262+ PIN_ReleaseLock (&mainLock);
263+
264+ std::cout << " --------------\n " ;
265+ for (const auto & pair : remap_id) {
266+ std::cout << " --> " << pair.first << " -- " << pair.second << std::endl;
267+ }
268+ std::cout << " --------------\n " ;
269+
270+ }
271+ bool *clone_is_mpi_ptr = static_cast <bool *>(PIN_GetThreadData (clone_is_mpi, threadid));
272+ *clone_is_mpi_ptr = is_mpi;
273+ PIN_UnlockClient ();
274+ }
275+
183276/* Instrumentation function to be called on function calls */
184277VOID ariel_stack_call (THREADID thr, ADDRINT stackPtr, ADDRINT target, ADDRINT ip)
185278{
@@ -833,17 +926,21 @@ void mapped_ariel_fence(void *virtualAddress)
833926 WriteFenceInstructionMarker (thr, ip);
834927}
835928
929+ /*
836930void mapped_api_mpi_init() {
837931 api_mpi_init_used = 1;
838932}
933+ */
839934
935+ /*
840936int check_for_api_mpi_init() {
841937 if (!api_mpi_init_used && !getenv("ARIEL_DISABLE_MPI_INIT_CHECK")) {
842938 fprintf(stderr, "Error: fesimple.cc: The Ariel API verion of MPI_Init_{thread} was not used, which can result in errors when used in conjunction with OpenMP. Please link against the Ariel API (included in this distribution at src/sst/elements/ariel/api) or disable this message by setting the environment variable `ARIEL_DISABLE_MPI_INIT_CHECK`\n");
843939 exit(1);
844940 }
845941 return 0;
846942}
943+ */
847944
848945int ariel_mlm_memcpy (void * dest, void * source, size_t size) {
849946#ifdef ARIEL_DEBUG
@@ -1240,6 +1337,7 @@ VOID InstrumentRoutine(RTN rtn, VOID* args)
12401337 RTN_Replace (rtn, (AFUNPTR) mapped_ariel_cycles);
12411338 fprintf (stderr, " Replacement complete\n " );
12421339 return ;
1340+ /*
12431341 } else if (RTN_Name(rtn) == "MPI_Init" || RTN_Name(rtn) == "_MPI_Init") {
12441342 fprintf(stderr, "Identified routine: MPI_Init. Instrumenting.\n");
12451343 RTN_Open(rtn);
@@ -1257,7 +1355,7 @@ VOID InstrumentRoutine(RTN rtn, VOID* args)
12571355 RTN_Replace(rtn, (AFUNPTR) mapped_api_mpi_init);
12581356 fprintf(stderr, "Replacement complete\n");
12591357 return;
1260- return ;
1358+ */
12611359#if ! defined(__APPLE__)
12621360 } else if (RTN_Name (rtn) == " clock_gettime" || RTN_Name (rtn) == " _clock_gettime" ||
12631361 RTN_Name (rtn) == " __clock_gettime" ) {
@@ -1276,7 +1374,6 @@ VOID InstrumentRoutine(RTN rtn, VOID* args)
12761374 RTN_Replace (rtn, (AFUNPTR) ariel_update_RTL_signals);
12771375 fprintf (stderr," Replacement complete.\n " );
12781376 return ;
1279-
12801377 } else if ((InterceptMemAllocations.Value () > 0 ) && RTN_Name (rtn) == " mlm_malloc" ) {
12811378 // This means we want a special malloc to be used (needs a TLB map inside the virtual core)
12821379 fprintf (stderr," Identified routine: mlm_malloc, replacing with Ariel equivalent...\n " );
@@ -1399,10 +1496,30 @@ int main(int argc, char *argv[])
13991496{
14001497 if (PIN_Init (argc, argv)) return Usage ();
14011498
1499+ syscall_id = PIN_CreateThreadDataKey (nullptr );
1500+ if (syscall_id == INVALID_TLS_KEY) {
1501+ std::cerr << " Failed to create thread-local storage key!" << std::endl;
1502+ return 1 ;
1503+ }
1504+ clone_is_mpi = PIN_CreateThreadDataKey (nullptr );
1505+ if (clone_is_mpi == INVALID_TLS_KEY) {
1506+ std::cerr << " Failed to create thread-local storage key!" << std::endl;
1507+ return 1 ;
1508+ }
1509+ _os_tid = PIN_CreateThreadDataKey (nullptr );
1510+ if (_os_tid == INVALID_TLS_KEY) {
1511+ std::cerr << " Failed to create thread-local storage key!" << std::endl;
1512+ return 1 ;
1513+ }
1514+ num_threads = 1 ;
1515+
1516+
14021517 // Load the symbols ready for us to mangle functions.
14031518 // PIN_InitSymbolsAlt(IFUNC_SYMBOLS);
14041519 PIN_InitSymbols ();
14051520 PIN_AddFiniFunction (Fini, 0 );
1521+ PIN_AddThreadStartFunction (ThreadStart, nullptr );
1522+ PIN_AddSyscallEntryFunction (SyscallEntry, 0 );
14061523
14071524 PIN_InitLock (&mainLock);
14081525 PIN_InitLock (&mallocIndexLock);
@@ -1442,6 +1559,11 @@ int main(int argc, char *argv[])
14421559 }
14431560
14441561 core_count = MaxCoreCount.Value ();
1562+
1563+ remap_id[0 ] = 0 ;
1564+ next_app_thread = 1 ;
1565+ next_mpi_thread = core_count;
1566+
14451567 instrument_instructions = InstrumentInstructions.Value ();
14461568
14471569// Pin version specific tunnel attach
0 commit comments