diff --git a/Core/BGDriver.cpp b/Core/BGDriver.cpp index 83be51d3..27a44f22 100644 --- a/Core/BGDriver.cpp +++ b/Core/BGDriver.cpp @@ -216,10 +216,22 @@ bool createAllModelClassInstances(TiXmlDocument* simDoc, SimulationInfo *simInfo // create clusters int numClusterNeurons = simInfo->totalNeurons / g_numClusters; // number of neurons in cluster - for (int iCluster = 0; iCluster < g_numClusters; iCluster++) { + //https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine + unsigned int nCores = std::thread::hardware_concurrency(); + int jumpPerThread = nCores / g_numClusters; + int coreAssigner = 0; + + + for (int iCluster = 0; iCluster < g_numClusters; iCluster++) { // This is looping from 0 to num of threads. // create a cluster information ClusterInfo *clusterInfo = new ClusterInfo(); clusterInfo->clusterID = iCluster; + if(iCluster > nCores) { + coreAssigner = 0; + } + clusterInfo->assignedCore = (jumpPerThread * coreAssigner); + coreAssigner++; + clusterInfo->clusterNeuronsBegin = numClusterNeurons * iCluster; if (iCluster == g_numClusters - 1) { clusterInfo->totalClusterNeurons = simInfo->totalNeurons - numClusterNeurons * (g_numClusters - 1); diff --git a/Core/Cluster.cpp b/Core/Cluster.cpp index 6fdf59cc..6d56529a 100644 --- a/Core/Cluster.cpp +++ b/Core/Cluster.cpp @@ -1,7 +1,17 @@ #include "Cluster.h" #include "ISInput.h" +#include +#include +#include +#include +#include +#include +#include -// Initialize the Barrier Synchnonize object for advanceThreads. + + + +// Initialize the Barrier Synchronize object for advanceThreads. Barrier *Cluster::m_barrierAdvance = NULL; // Initialize the flag for advanceThreads. true if terminating advanceThreads. @@ -10,6 +20,20 @@ bool Cluster::m_isAdvanceExit = false; // Initialize the synaptic transmission delay, descretized into time steps. int Cluster::m_nSynapticTransDelay = 0; +unsigned long threadID = 0; + +std::thread* threadReference = nullptr; + +pid_t mypidt; + +mutex m; + +condition_variable cv; + +bool ready = false; +bool done = false; + + /* * Constructor */ @@ -130,12 +154,58 @@ void Cluster::createAdvanceThread(const SimulationInfo *sim_info, ClusterInfo *c } // Create an advanceThread - std::thread thAdvance(&Cluster::advanceThread, this, sim_info, clr_info); + int lockedCore = clr_info->assignedCore; + cpu_set_t my_set; //http://man7.org/linux/man-pages/man3/pthread_setaffinity_np.3.html + CPU_ZERO(&my_set); //https://stackoverflow.com/questions/10490756/how-to-use-sched-getaffinity2-and-sched-setaffinity2-please-give-code-samp + CPU_SET(lockedCore, &my_set); + std::thread thAdvance(&Cluster::processAdvanceThread, this, sim_info, clr_info, my_set); //Schedule this! + + + { + std::lock_guard lk(m); + ready = true; + std::cout << "main thread signals AdvanceThread\n"; + } + cv.notify_one(); + { + std::unique_lock lk(m); + cv.wait(lk, [&]{return done;}); + } + + + cout << "mypidt is now " << mypidt << endl; + threadReference = &thAdvance; + // cout << "thread " << mypidt << " locked to core: " << lockedCore << endl; + // cout << "CONFIRMATION THREAD " << mypidt << " is running on core " << + int success = sched_getaffinity(mypidt,sizeof(cpu_set_t), &my_set); + + + for(int i = 0; i <= 16; i++) { + if(CPU_ISSET(i, &my_set) == 1) { + cout << "Core " << i << " member of this mask? " << CPU_ISSET(i, &my_set) << endl; + } + } // Leave it running thAdvance.detach(); } +void Cluster::processAdvanceThread(const SimulationInfo *sim_info, ClusterInfo *clr_info, cpu_set_t my_set) { + std::unique_lock lk(m); + cv.wait(lk, [&]{return ready;}); + + mypidt = syscall(SYS_gettid); + + done = true; + sched_setaffinity(mypidt, sizeof(cpu_set_t), &my_set); + + cout << "Set thread to core is finished for " << mypidt << " total cores " << CPU_COUNT(& my_set) << endl; + + lk.unlock(); + cv.notify_one(); + advanceThread(sim_info, clr_info); +} + /* * Thread for advance a cluster. * @@ -154,7 +224,7 @@ void Cluster::advanceThread(const SimulationInfo *sim_info, ClusterInfo *clr_inf break; } - // Advance neurons and synapses indepedently (without barrier synchronization) + // Advance neurons and synapses independently (without barrier synchronization) // within synaptic transmission delay period. for (int iStepOffset = 0; iStepOffset < m_nSynapticTransDelay; iStepOffset++) { if (sim_info->pInput != NULL) { diff --git a/Core/Cluster.h b/Core/Cluster.h index 68113647..9c1912fe 100644 --- a/Core/Cluster.h +++ b/Core/Cluster.h @@ -53,12 +53,28 @@ #include "Layout.h" #include #include "Barrier.hpp" +#include +#include +#include +#include + + class Cluster { public: Cluster(IAllNeurons *neurons, IAllSynapses *synapses); + int assignedCore; virtual ~Cluster(); + std::thread* threadReference; + mutex m; + condition_variable cv; + + bool ready; + bool done; + + + pid_t mypidt; /** * Deserializes internal state from a prior run of the simulation. @@ -158,6 +174,10 @@ class Cluster * @param sim_info SimulationInfo class to read information from. * @param clr_info ClusterInfo class to read information from. */ + + void processAdvanceThread(const SimulationInfo *sim_info, ClusterInfo *clr_info, cpu_set_t my_set); + + void advanceThread(const SimulationInfo *sim_info, ClusterInfo *clr_info); /** @@ -166,6 +186,7 @@ class Cluster * @param sim_info SimulationInfo class to read information from. * @param iStep Simulation steps to advance. */ + static void runAdvance(const SimulationInfo *sim_info, int iStep); /** diff --git a/Core/ClusterInfo.h b/Core/ClusterInfo.h index 632810f3..5a99c1b6 100644 --- a/Core/ClusterInfo.h +++ b/Core/ClusterInfo.h @@ -75,6 +75,9 @@ class ClusterInfo //! Count of neurons in the cluster int totalClusterNeurons; + //! Core to which this cluster is locked + int assignedCore; + //! List of summation points (either host or device memory) BGFLOAT* pClusterSummationMap; diff --git a/Core/IModel.h b/Core/IModel.h index 73e4fa2a..aa525aa5 100644 --- a/Core/IModel.h +++ b/Core/IModel.h @@ -108,6 +108,9 @@ class IModel { * @param currentStep - The epoch step in which the connections are being updated. * @param sim_info - parameters defining the simulation to be run with the given collection of neurons. */ + + virtual void printThreadCoreData() = 0; + virtual void updateConnections(const SimulationInfo *sim_info) = 0; /** diff --git a/Core/Model.cpp b/Core/Model.cpp index 787efd52..3c0726ab 100644 --- a/Core/Model.cpp +++ b/Core/Model.cpp @@ -5,7 +5,12 @@ #include "ParseParamError.h" #include "Util.h" #include "ConnGrowth.h" +#include #include "ISInput.h" +#include +#include +#include + #if defined(USE_GPU) #include "GPUSpikingCluster.h" #endif @@ -287,6 +292,21 @@ void Model::updateConnections(const SimulationInfo *sim_info) } } +void Model::printThreadCoreData(){ + for (unsigned i = 0; i < m_vtClr.size(); i++){ + + cpu_set_t internalSet; + CPU_ZERO(&internalSet); + + sched_getaffinity(m_vtClr[i]->mypidt, sizeof(internalSet), &internalSet); + for(int j = 0; j <= 16; j++) { + if(CPU_ISSET(j, &internalSet)) { + cout << "Cluster " << i << " is running on core " << j << endl; + } + } + } +} + #if defined(PERFORMANCE_METRICS) /* diff --git a/Core/Model.h b/Core/Model.h index a9298d08..6a31b1ab 100644 --- a/Core/Model.h +++ b/Core/Model.h @@ -51,6 +51,11 @@ #include #include +#include +#include +#include + + using namespace std; @@ -93,6 +98,7 @@ class Model : public IModel * @param sim_info - parameters defining the simulation to be run with the given collection of neurons. * @param simRecorder Pointer to the simulation recordig object. */ + virtual void setupSim(SimulationInfo *sim_info); /** @@ -107,6 +113,10 @@ class Model : public IModel * * @return Pointer to the Connections class object. */ + + virtual void printThreadCoreData(); + + virtual Connections* getConnections(); /** diff --git a/Core/Simulator.cpp b/Core/Simulator.cpp index 77a35d12..7eb98313 100644 --- a/Core/Simulator.cpp +++ b/Core/Simulator.cpp @@ -98,7 +98,8 @@ void Simulator::simulate(SimulationInfo *sim_info) // Main simulation loop - execute maxGrowthSteps for (int currentStep = 1; currentStep <= sim_info->maxSteps; currentStep++) { - + //grab cluster stuff here. + sim_info->model->printThreadCoreData(); DEBUG(cout << endl << endl;) DEBUG(cout << "Performing simulation number " << currentStep << endl;) DEBUG(cout << "Begin network state:" << endl;)