diff --git a/.github/workflows/ci-matrix.yml b/.github/workflows/ci-matrix.yml index 74bf7a83c..826307e45 100644 --- a/.github/workflows/ci-matrix.yml +++ b/.github/workflows/ci-matrix.yml @@ -88,7 +88,7 @@ jobs: wget https://apache.jfrog.io/artifactory/arrow/ubuntu/apache-arrow-apt-source-latest-noble.deb sudo apt-get install -y ./apache-arrow-apt-source-latest-noble.deb sudo apt-get update - sudo apt-get install -y clang-20 libomp-20-dev ninja-build libarrow-dev + sudo apt-get install -y clang-20 libomp-20-dev ninja-build libarrow-dev=23.0.1-1 - name: Install prerequisites (Linux) if: matrix.os == 'linux' && matrix.compiler == 'gcc-14' @@ -96,7 +96,7 @@ jobs: wget https://apache.jfrog.io/artifactory/arrow/ubuntu/apache-arrow-apt-source-latest-noble.deb sudo apt-get install -y ./apache-arrow-apt-source-latest-noble.deb sudo apt-get update - sudo apt-get install -y g++-14 ninja-build libarrow-dev + sudo apt-get install -y g++-14 ninja-build libarrow-dev=23.0.1-1 - name: Install prerequisites (macOS) if: matrix.os == 'macos' diff --git a/.gitignore b/.gitignore index 8b8495c32..ebeb86d1e 100644 --- a/.gitignore +++ b/.gitignore @@ -101,3 +101,6 @@ CTestTestfile.cmake ### MacOS .DS_Store + +### uv +uv.lock diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e9f1df23..a4b231ba0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -268,6 +268,9 @@ if(NETWORKIT_BUILD_CORE AND NETWORKIT_MONOLITH) endif() target_link_libraries(networkit PRIVATE OpenMP::OpenMP_CXX PUBLIC tlx Arrow::arrow_shared) + if(CMAKE_DL_LIBS) + target_link_libraries(networkit PRIVATE ${CMAKE_DL_LIBS}) + endif() set_target_properties(networkit PROPERTIES CXX_STANDARD ${NETWORKIT_CXX_STANDARD} diff --git a/examples/parallel_leiden_scoring_extension_demo.py b/examples/parallel_leiden_scoring_extension_demo.py new file mode 100644 index 000000000..06381df91 --- /dev/null +++ b/examples/parallel_leiden_scoring_extension_demo.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +import argparse +import os +from pathlib import Path + +import networkit as nk + + +def build_demo_graph(): + graph = nk.Graph(6, weighted=False, directed=False) + for u, v in [(0, 1), (1, 2), (0, 2), (3, 4), (4, 5), (3, 5), (2, 3)]: + graph.addEdge(u, v) + return graph + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Demo ParallelLeidenView's shared-library move scoring extension mechanism." + ) + parser.add_argument( + "--plugin", + type=Path, + help=( + "Path to a scorer shared library. " + "Example: build/networkit/cpp/community/libnetworkit_parallel_leiden_modularity_extension.dylib" + ), + ) + parser.add_argument( + "--use-env", + action="store_true", + help="Load the plugin through NETWORKIT_LEIDEN_MOVE_SCORING_LIB instead of the Python API.", + ) + parser.add_argument("--iterations", type=int, default=2, help="Number of Leiden iterations.") + parser.add_argument("--gamma", type=float, default=1.0, help="Resolution parameter.") + parser.add_argument( + "--randomize", + action="store_true", + help="Randomize node order. Disabled by default to keep output stable.", + ) + return parser.parse_args() + + +def main(): + args = parse_args() + graph = build_demo_graph() + plugin_loaded_via = None + + if args.use_env: + if args.plugin is None: + raise SystemExit("--use-env requires --plugin") + os.environ["NETWORKIT_LEIDEN_MOVE_SCORING_LIB"] = str(args.plugin.resolve()) + plugin_loaded_via = "env" + + leiden = nk.community.ParallelLeidenView( + graph, iterations=args.iterations, randomize=args.randomize, gamma=args.gamma + ) + + if args.plugin is not None and not args.use_env: + if hasattr(leiden, "loadMoveScoringExtension"): + leiden.loadMoveScoringExtension(str(args.plugin.resolve())) + plugin_loaded_via = "api" + else: + # Fallback for older Python extension builds where the C++ env hook exists + # but the wrapper method has not been rebuilt yet. + os.environ["NETWORKIT_LEIDEN_MOVE_SCORING_LIB"] = str(args.plugin.resolve()) + leiden = nk.community.ParallelLeidenView( + graph, iterations=args.iterations, randomize=args.randomize, gamma=args.gamma + ) + plugin_loaded_via = "env-fallback" + + leiden.run() + partition = leiden.getPartition() + + print("communities:", partition.numberOfSubsets()) + print("assignment:", [partition[i] for i in range(graph.numberOfNodes())]) + if args.plugin is None: + print("scorer: built-in modularity") + elif plugin_loaded_via == "api": + print("scorer: plugin loaded via ParallelLeidenView.loadMoveScoringExtension()") + elif plugin_loaded_via == "env": + print("scorer: plugin loaded from NETWORKIT_LEIDEN_MOVE_SCORING_LIB") + elif plugin_loaded_via == "env-fallback": + print("scorer: plugin loaded from NETWORKIT_LEIDEN_MOVE_SCORING_LIB") + print("note: Python wrapper method not available in this build, used env fallback") + else: + print("scorer: plugin requested, but no loading path was used") + + +if __name__ == "__main__": + main() diff --git a/include/networkit/community/ParallelLeidenScoringExtension.hpp b/include/networkit/community/ParallelLeidenScoringExtension.hpp new file mode 100644 index 000000000..5c07db948 --- /dev/null +++ b/include/networkit/community/ParallelLeidenScoringExtension.hpp @@ -0,0 +1,68 @@ +/* + * ParallelLeidenScoringExtension.hpp + * + * Shared-library ABI for custom ParallelLeidenView move scoring metrics. + */ + +#ifndef NETWORKIT_COMMUNITY_PARALLEL_LEIDEN_SCORING_EXTENSION_HPP_ +#define NETWORKIT_COMMUNITY_PARALLEL_LEIDEN_SCORING_EXTENSION_HPP_ + +#include + +namespace NetworKit { + +using ParallelLeidenCommunityScoreFunction = + double (*)(double cutWeight, double degree, double communityVolume, count subsetSize, + count communitySize, double gamma, double inverseGraphVolume); + +using ParallelLeidenRefineSetConditionFunction = + bool (*)(double cutWeight, double subsetVolume, count subsetSize, double targetVolume, + count targetSize, double sourceVolume, count sourceSize, double gamma, + double inverseGraphVolume); + +} // namespace NetworKit + +extern "C" { + +/** + * Required: score a candidate community during the move phase. + */ +double networkitParallelLeidenCommunityScore(double cutWeight, double degree, double communityVolume, + NetworKit::count subsetSize, + NetworKit::count communitySize, double gamma, + double inverseGraphVolume); + +/** + * Optional: override the current-community stay threshold used to accept or reject the best move. + * When omitted, ParallelLeidenView falls back to the built-in modularity threshold. + */ +double networkitParallelLeidenCurrentCommunityThreshold(double cutWeight, double degree, + double communityVolume, + NetworKit::count subsetSize, + NetworKit::count communitySize, + double gamma, + double inverseGraphVolume); + +/** + * Optional: override the refine-phase R-set condition. + * When omitted, ParallelLeidenView falls back to the built-in modularity condition. + */ +bool networkitParallelLeidenRefineRSetCondition(double cutWeight, double subsetVolume, + NetworKit::count subsetSize, double targetVolume, + NetworKit::count targetSize, double sourceVolume, + NetworKit::count sourceSize, double gamma, + double inverseGraphVolume); + +/** + * Optional: override the refine-phase T-set condition. + * When omitted, ParallelLeidenView falls back to the built-in modularity condition. + */ +bool networkitParallelLeidenRefineTSetCondition(double cutWeight, double subsetVolume, + NetworKit::count subsetSize, double targetVolume, + NetworKit::count targetSize, double sourceVolume, + NetworKit::count sourceSize, double gamma, + double inverseGraphVolume); + +} // extern "C" + +#endif // NETWORKIT_COMMUNITY_PARALLEL_LEIDEN_SCORING_EXTENSION_HPP_ diff --git a/include/networkit/community/ParallelLeidenView.hpp b/include/networkit/community/ParallelLeidenView.hpp index 19e9f9072..d59122780 100644 --- a/include/networkit/community/ParallelLeidenView.hpp +++ b/include/networkit/community/ParallelLeidenView.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ #include #include #include +#include #include #include @@ -59,6 +61,17 @@ class ParallelLeidenView final : public CommunityDetectionAlgorithm { void run() override; + /** + * Load a shared library that customizes the move-phase scoring metric. + * + * The library must export `networkitParallelLeidenCommunityScore`. It may additionally export + * `networkitParallelLeidenCurrentCommunityThreshold` to replace the default modularity-based + * stay threshold as well. + */ + void loadMoveScoringExtension(const std::string &sharedLibraryPath); + + void unloadMoveScoringExtension(); + int VECTOR_OVERSIZE = 10000; private: @@ -81,14 +94,79 @@ class ParallelLeidenView final : public CommunityDetectionAlgorithm { template Partition parallelRefine(const GraphType &graph); - inline double modularityDelta(double cutD, double degreeV, double volD) const { + static count nodeSize(const Graph &graph, node u); + + static count nodeSize(const CoarsenedGraphView &graph, node u); + + static double modularityCommunityScore(double cutD, double degreeV, double volD, + count subsetSize, count sizeD, double gamma, + double inverseGraphVolume) { + tlx::unused(subsetSize); + tlx::unused(sizeD); return cutD - gamma * degreeV * volD * inverseGraphVolume; - }; + } - inline double modularityThreshold(double cutC, double volC, double degreeV) const { + static double modularityThresholdScore(double cutC, double degreeV, double volC, + count subsetSize, count sizeC, double gamma, + double inverseGraphVolume) { + tlx::unused(subsetSize); + tlx::unused(sizeC); return cutC - gamma * (volC - degreeV) * degreeV * inverseGraphVolume; } + static bool modularityRefineRSetCondition(double cutWeight, double subsetVolume, + count subsetSize, double targetVolume, + count targetSize, double sourceVolume, + count sourceSize, double gamma, + double inverseGraphVolume) { + tlx::unused(subsetSize); + tlx::unused(targetSize); + tlx::unused(sourceVolume); + tlx::unused(sourceSize); + return cutWeight >= gamma * subsetVolume * targetVolume * inverseGraphVolume; + } + + static bool modularityRefineTSetCondition(double cutWeight, double subsetVolume, + count subsetSize, double targetVolume, + count targetSize, double sourceVolume, + count sourceSize, double gamma, + double inverseGraphVolume) { + tlx::unused(subsetSize); + tlx::unused(targetSize); + tlx::unused(sourceVolume); + tlx::unused(sourceSize); + return cutWeight >= gamma * subsetVolume * targetVolume * inverseGraphVolume; + } + + inline double scoreCommunity(double cutWeight, double degree, double communityVolume, + count subsetSize, count communitySize) const { + return communityScoreFunction_(cutWeight, degree, communityVolume, subsetSize, + communitySize, gamma, inverseGraphVolume); + } + + inline double scoreCurrentCommunityThreshold(double cutWeight, double degree, + double communityVolume, count subsetSize, + count communitySize) const { + return currentCommunityThresholdFunction_(cutWeight, degree, communityVolume, subsetSize, + communitySize, gamma, inverseGraphVolume); + } + + inline bool refineRSetCondition(double cutWeight, double subsetVolume, count subsetSize, + double targetVolume, count targetSize, double sourceVolume, + count sourceSize) const { + return refineRSetConditionFunction_(cutWeight, subsetVolume, subsetSize, targetVolume, + targetSize, sourceVolume, sourceSize, gamma, + inverseGraphVolume); + } + + inline bool refineTSetCondition(double cutWeight, double subsetVolume, count subsetSize, + double targetVolume, count targetSize, double sourceVolume, + count sourceSize) const { + return refineTSetConditionFunction_(cutWeight, subsetVolume, subsetSize, targetVolume, + targetSize, sourceVolume, sourceSize, gamma, + inverseGraphVolume); + } + static inline void lockLowerFirst(index a, index b, std::vector &locks) { if (a < b) { locks[a].lock(); @@ -104,6 +182,7 @@ class ParallelLeidenView final : public CommunityDetectionAlgorithm { double inverseGraphVolume; // 1/vol(V) std::vector communityVolumes; + std::vector communitySizes; std::vector composedMapping; @@ -131,6 +210,16 @@ class ParallelLeidenView final : public CommunityDetectionAlgorithm { // Optional convergence stop: minimum relative reduction in community count per inner iter. // 0.0 disables this criterion. double minCommunityReduction = 0.0; + + void *scoringExtensionHandle_ = nullptr; + ParallelLeidenCommunityScoreFunction communityScoreFunction_ = &modularityCommunityScore; + ParallelLeidenCommunityScoreFunction currentCommunityThresholdFunction_ = + &modularityThresholdScore; + ParallelLeidenRefineSetConditionFunction refineRSetConditionFunction_ = + &modularityRefineRSetCondition; + ParallelLeidenRefineSetConditionFunction refineTSetConditionFunction_ = + &modularityRefineTSetCondition; + std::string scoringExtensionPath_; }; } // namespace NetworKit diff --git a/networkit/community.pyx b/networkit/community.pyx index 47acdc1d3..5f0fca0e1 100644 --- a/networkit/community.pyx +++ b/networkit/community.pyx @@ -710,6 +710,8 @@ cdef extern from "": cdef cppclass _ParallelLeidenView "NetworKit::ParallelLeidenView"(_CommunityDetectionAlgorithm): _ParallelLeidenView(_Graph _G) except + _ParallelLeidenView(_Graph _G, int iterations, bool_t randomize, double gamma) except + + void loadMoveScoringExtension(const string &sharedLibraryPath) except + + void unloadMoveScoringExtension() except + cdef class ParallelLeiden(CommunityDetector): """ @@ -777,6 +779,28 @@ cdef class ParallelLeidenView(CommunityDetector): self._G = G self._this = new _ParallelLeidenView(dereference(G._this),iterations,randomize,gamma) + def loadMoveScoringExtension(self, shared_library_path): + """ + loadMoveScoringExtension(shared_library_path) + + Load a shared library that overrides the move-phase community scoring used by + ParallelLeidenView. The library must export + `networkitParallelLeidenCommunityScore` and may additionally export + `networkitParallelLeidenCurrentCommunityThreshold`. + """ + (<_ParallelLeidenView*>(self._this)).loadMoveScoringExtension(stdstring(shared_library_path)) + return self + + def unloadMoveScoringExtension(self): + """ + unloadMoveScoringExtension() + + Unload a previously configured move-scoring extension and restore the + built-in modularity scorer. + """ + (<_ParallelLeidenView*>(self._this)).unloadMoveScoringExtension() + return self + cdef extern from "": cdef cppclass _LouvainMapEquation "NetworKit::LouvainMapEquation"(_CommunityDetectionAlgorithm): _LouvainMapEquation(_Graph, bool, count, string ) except + @@ -2257,4 +2281,4 @@ class SpectralPartitioner: networkit.Partition The resulting partition. Only valid if :code:`run()` was called before. """ - return self.partition \ No newline at end of file + return self.partition diff --git a/networkit/cpp/community/CMakeLists.txt b/networkit/cpp/community/CMakeLists.txt index 80fba0ea2..cff72c43f 100644 --- a/networkit/cpp/community/CMakeLists.txt +++ b/networkit/cpp/community/CMakeLists.txt @@ -43,5 +43,22 @@ networkit_add_module(community networkit_module_link_modules(community auxiliary coarsening components flow graph matching scd structures) -add_subdirectory(test) +add_library(networkit_parallel_leiden_modularity_extension SHARED + ParallelLeidenModularityScoringExtension.cpp) +target_include_directories(networkit_parallel_leiden_modularity_extension BEFORE PUBLIC + "${PROJECT_SOURCE_DIR}/include") +set_target_properties(networkit_parallel_leiden_modularity_extension PROPERTIES + OUTPUT_NAME "networkit_parallel_leiden_modularity_extension" + CXX_STANDARD ${NETWORKIT_CXX_STANDARD} + CXX_STANDARD_REQUIRED YES) + +add_library(networkit_parallel_leiden_cpm_extension SHARED + ParallelLeidenCPMScoringExtension.cpp) +target_include_directories(networkit_parallel_leiden_cpm_extension BEFORE PUBLIC + "${PROJECT_SOURCE_DIR}/include") +set_target_properties(networkit_parallel_leiden_cpm_extension PROPERTIES + OUTPUT_NAME "networkit_parallel_leiden_cpm_extension" + CXX_STANDARD ${NETWORKIT_CXX_STANDARD} + CXX_STANDARD_REQUIRED YES) +add_subdirectory(test) diff --git a/networkit/cpp/community/ParallelLeidenCPMScoringExtension.cpp b/networkit/cpp/community/ParallelLeidenCPMScoringExtension.cpp new file mode 100644 index 000000000..d434fb834 --- /dev/null +++ b/networkit/cpp/community/ParallelLeidenCPMScoringExtension.cpp @@ -0,0 +1,65 @@ +/* + * ParallelLeidenCPMScoringExtension.cpp + * + * Constant Potts Model scorer exported through the ParallelLeidenView extension ABI. + */ + +#include + +extern "C" double networkitParallelLeidenCommunityScore(double cutWeight, double degree, + double communityVolume, + NetworKit::count subsetSize, + NetworKit::count communitySize, + double gamma, + double inverseGraphVolume) { + (void)degree; + (void)communityVolume; + (void)inverseGraphVolume; + return cutWeight + - gamma * static_cast(subsetSize) * static_cast(communitySize); +} + +extern "C" double networkitParallelLeidenCurrentCommunityThreshold(double cutWeight, double degree, + double communityVolume, + NetworKit::count subsetSize, + NetworKit::count communitySize, + double gamma, + double inverseGraphVolume) { + (void)degree; + (void)communityVolume; + (void)inverseGraphVolume; + return cutWeight - gamma * static_cast(subsetSize) + * static_cast(communitySize - subsetSize); +} + +extern "C" bool networkitParallelLeidenRefineRSetCondition(double cutWeight, double subsetVolume, + NetworKit::count subsetSize, + double targetVolume, + NetworKit::count targetSize, + double sourceVolume, + NetworKit::count sourceSize, + double gamma, + double inverseGraphVolume) { + (void)subsetVolume; + (void)targetVolume; + (void)sourceVolume; + (void)sourceSize; + (void)inverseGraphVolume; + return cutWeight >= gamma * static_cast(subsetSize) * static_cast(targetSize); +} + +extern "C" bool networkitParallelLeidenRefineTSetCondition(double cutWeight, double subsetVolume, + NetworKit::count subsetSize, + double targetVolume, + NetworKit::count targetSize, + double sourceVolume, + NetworKit::count sourceSize, + double gamma, + double inverseGraphVolume) { + (void)subsetVolume; + (void)targetVolume; + (void)sourceVolume; + (void)sourceSize; + (void)inverseGraphVolume; + return cutWeight >= gamma * static_cast(subsetSize) * static_cast(targetSize); +} diff --git a/networkit/cpp/community/ParallelLeidenModularityScoringExtension.cpp b/networkit/cpp/community/ParallelLeidenModularityScoringExtension.cpp new file mode 100644 index 000000000..adf78fbb5 --- /dev/null +++ b/networkit/cpp/community/ParallelLeidenModularityScoringExtension.cpp @@ -0,0 +1,59 @@ +/* + * ParallelLeidenModularityScoringExtension.cpp + * + * Default modularity scorer exported through the ParallelLeidenView extension ABI. + */ + +#include + +extern "C" double networkitParallelLeidenCommunityScore(double cutWeight, double degree, + double communityVolume, + NetworKit::count subsetSize, + NetworKit::count communitySize, + double gamma, + double inverseGraphVolume) { + (void)subsetSize; + (void)communitySize; + return cutWeight - gamma * degree * communityVolume * inverseGraphVolume; +} + +extern "C" double networkitParallelLeidenCurrentCommunityThreshold(double cutWeight, double degree, + double communityVolume, + NetworKit::count subsetSize, + NetworKit::count communitySize, + double gamma, + double inverseGraphVolume) { + (void)subsetSize; + (void)communitySize; + return cutWeight - gamma * (communityVolume - degree) * degree * inverseGraphVolume; +} + +extern "C" bool networkitParallelLeidenRefineRSetCondition(double cutWeight, double subsetVolume, + NetworKit::count subsetSize, + double targetVolume, + NetworKit::count targetSize, + double sourceVolume, + NetworKit::count sourceSize, + double gamma, + double inverseGraphVolume) { + (void)subsetSize; + (void)targetSize; + (void)sourceVolume; + (void)sourceSize; + return cutWeight >= gamma * subsetVolume * targetVolume * inverseGraphVolume; +} + +extern "C" bool networkitParallelLeidenRefineTSetCondition(double cutWeight, double subsetVolume, + NetworKit::count subsetSize, + double targetVolume, + NetworKit::count targetSize, + double sourceVolume, + NetworKit::count sourceSize, + double gamma, + double inverseGraphVolume) { + (void)subsetSize; + (void)targetSize; + (void)sourceVolume; + (void)sourceSize; + return cutWeight >= gamma * subsetVolume * targetVolume * inverseGraphVolume; +} diff --git a/networkit/cpp/community/ParallelLeidenView.cpp b/networkit/cpp/community/ParallelLeidenView.cpp index 6769c2090..22a2f218c 100644 --- a/networkit/cpp/community/ParallelLeidenView.cpp +++ b/networkit/cpp/community/ParallelLeidenView.cpp @@ -6,9 +6,23 @@ #include #include +#ifndef _WIN32 +#include +#endif +#include namespace NetworKit { +count ParallelLeidenView::nodeSize(const Graph &graph, node u) { + tlx::unused(graph); + tlx::unused(u); + return 1; +} + +count ParallelLeidenView::nodeSize(const CoarsenedGraphView &graph, node u) { + return graph.getOriginalNodes(u).size(); +} + ParallelLeidenView::ParallelLeidenView(const Graph &graph, int iterations, bool randomize, double gamma) : CommunityDetectionAlgorithm(graph), gamma(gamma), numberOfIterations(iterations), @@ -56,13 +70,88 @@ ParallelLeidenView::ParallelLeidenView(const Graph &graph, int iterations, bool // Keep default if parsing fails. } } + if (const char *scoringLibEnv = std::getenv("NETWORKIT_LEIDEN_MOVE_SCORING_LIB")) { + loadMoveScoringExtension(scoringLibEnv); + } } ParallelLeidenView::~ParallelLeidenView() { + unloadMoveScoringExtension(); currentCoarsenedView.reset(); composedMapping.clear(); composedMapping.shrink_to_fit(); communityVolumes.clear(); + communitySizes.clear(); +} + +void ParallelLeidenView::loadMoveScoringExtension(const std::string &sharedLibraryPath) { +#ifdef _WIN32 + throw std::runtime_error( + "ParallelLeidenView shared-library scoring extensions are not supported on Windows"); +#else + void *handle = dlopen(sharedLibraryPath.c_str(), RTLD_NOW | RTLD_LOCAL); + if (handle == nullptr) { + throw std::runtime_error("Failed to load ParallelLeidenView scoring extension '" + + sharedLibraryPath + "': " + dlerror()); + } + + dlerror(); + auto *communityScore = reinterpret_cast( + dlsym(handle, "networkitParallelLeidenCommunityScore")); + const char *communityScoreError = dlerror(); + if (communityScoreError != nullptr || communityScore == nullptr) { + dlclose(handle); + throw std::runtime_error( + "ParallelLeidenView scoring extension '" + sharedLibraryPath + + "' does not export required symbol networkitParallelLeidenCommunityScore"); + } + + dlerror(); + auto *thresholdScore = reinterpret_cast( + dlsym(handle, "networkitParallelLeidenCurrentCommunityThreshold")); + const char *thresholdScoreError = dlerror(); + if (thresholdScoreError != nullptr) { + thresholdScore = &modularityThresholdScore; + } + + unloadMoveScoringExtension(); + scoringExtensionHandle_ = handle; + communityScoreFunction_ = communityScore; + currentCommunityThresholdFunction_ = thresholdScore != nullptr ? thresholdScore + : &modularityThresholdScore; + dlerror(); + auto *refineRSet = reinterpret_cast( + dlsym(handle, "networkitParallelLeidenRefineRSetCondition")); + const char *refineRSetError = dlerror(); + refineRSetConditionFunction_ = + refineRSetError == nullptr && refineRSet != nullptr ? refineRSet + : &modularityRefineRSetCondition; + + dlerror(); + auto *refineTSet = reinterpret_cast( + dlsym(handle, "networkitParallelLeidenRefineTSetCondition")); + const char *refineTSetError = dlerror(); + refineTSetConditionFunction_ = + refineTSetError == nullptr && refineTSet != nullptr ? refineTSet + : &modularityRefineTSetCondition; + scoringExtensionPath_ = sharedLibraryPath; +#endif +} + +void ParallelLeidenView::unloadMoveScoringExtension() { + communityScoreFunction_ = &modularityCommunityScore; + currentCommunityThresholdFunction_ = &modularityThresholdScore; + refineRSetConditionFunction_ = &modularityRefineRSetCondition; + refineTSetConditionFunction_ = &modularityRefineTSetCondition; + scoringExtensionPath_.clear(); +#ifndef _WIN32 + if (scoringExtensionHandle_ != nullptr) { + dlclose(scoringExtensionHandle_); + scoringExtensionHandle_ = nullptr; + } +#else + scoringExtensionHandle_ = nullptr; +#endif } void ParallelLeidenView::run() { @@ -248,15 +337,20 @@ void ParallelLeidenView::calculateVolumes(const GraphType &graph) { // thread safe reduction. Avoid atomic calculation of total graph volume for unweighted graphs. communityVolumes.clear(); + communitySizes.clear(); communityVolumes.resize(result.upperBound() + VECTOR_OVERSIZE); + communitySizes.resize(result.upperBound() + VECTOR_OVERSIZE); inverseGraphVolume = 0.0; // Reset to 0 before accumulation if (graph.isWeighted()) { std::vector threadVolumes(omp_get_max_threads()); graph.parallelForNodes([&](node a) { edgeweight ew = graph.weightedDegree(a, true); + count size = nodeSize(graph, a); #pragma omp atomic communityVolumes[result[a]] += ew; +#pragma omp atomic + communitySizes[result[a]] += size; threadVolumes[omp_get_thread_num()] += ew; }); for (const auto vol : threadVolumes) { @@ -266,8 +360,11 @@ void ParallelLeidenView::calculateVolumes(const GraphType &graph) { } else { inverseGraphVolume = 1.0 / (2 * graph.numberOfEdges()); graph.parallelForNodes([&](node a) { + count size = nodeSize(graph, a); #pragma omp atomic communityVolumes[result[a]] += graph.weightedDegree(a, true); +#pragma omp atomic + communitySizes[result[a]] += size; }); } TRACE("Calculating Volumes took " + timer.elapsedTag()); @@ -372,6 +469,7 @@ ParallelLeidenView::MoveStats ParallelLeidenView::parallelMove(const GraphType & double maxDelta = std::numeric_limits::lowest(); index bestCommunity = none; double degree = 0; + count nodeMass = nodeSize(graph, u); for (auto z : pointers) { // Reset the clearlist : Set all cutweights to 0 and clear the pointer vector cutWeights[z] = 0; @@ -394,33 +492,34 @@ ParallelLeidenView::MoveStats ParallelLeidenView::parallelMove(const GraphType & if (pointers.empty()) continue; - // Determine Modularity delta for all neighbor communities + double singletonScore = scoreCommunity(0.0, degree, 0.0, nodeMass, 0); + + // Determine move score for all neighbor communities for (auto community : pointers) { // "Moving" a node to its current community is pointless if (community != currentCommunity) { double delta; - delta = modularityDelta(cutWeights[community], degree, - communityVolumes[community]); + delta = scoreCommunity(cutWeights[community], degree, + communityVolumes[community], nodeMass, + communitySizes[community]); if (delta > maxDelta) { maxDelta = delta; bestCommunity = community; } } } - double modThreshold = modularityThreshold( - cutWeights[currentCommunity], communityVolumes[currentCommunity], degree); - - if (0 > modThreshold || maxDelta > modThreshold) { + double modThreshold = scoreCurrentCommunityThreshold( + cutWeights[currentCommunity], degree, communityVolumes[currentCommunity], + nodeMass, + communitySizes[currentCommunity]); + + bool singletonMove = singletonScore > maxDelta; + double selectedScore = singletonMove ? singletonScore : maxDelta; + if (selectedScore > modThreshold) { bool acceptedMove = false; - bool singletonMove = (0 > maxDelta); double gainMargin = 0.0; - if (singletonMove) { - gainMargin = -modThreshold; - acceptedMove = (0 > modThreshold); - } else { - gainMargin = maxDelta - modThreshold; - acceptedMove = (maxDelta > modThreshold); - } + gainMargin = selectedScore - modThreshold; + acceptedMove = true; if (acceptedMove && gainMargin <= moveGainMarginEpsilon) { marginalMovesRejected[omp_get_thread_num()]++; acceptedMove = false; @@ -444,6 +543,7 @@ ParallelLeidenView::MoveStats ParallelLeidenView::parallelMove(const GraphType & } // all other threads are yielding, so resize is fine communityVolumes.resize(vectorSize); + communitySizes.resize(vectorSize); expected = true; resize.compare_exchange_strong(expected, false); } else { @@ -464,6 +564,10 @@ ParallelLeidenView::MoveStats ParallelLeidenView::parallelMove(const GraphType & communityVolumes[bestCommunity] += degree; #pragma omp atomic communityVolumes[currentCommunity] -= degree; +#pragma omp atomic + communitySizes[bestCommunity] += nodeMass; +#pragma omp atomic + communitySizes[currentCommunity] -= nodeMass; changed = true; bool expected = true; inQueue[u].compare_exchange_strong(expected, false); @@ -572,6 +676,7 @@ Partition ParallelLeidenView::parallelRefine(const GraphType &graph) { std::vector singleton(refined.upperBound(), true); std::vector cutCtoSminusC(refined.upperBound()); std::vector refinedVolumes(refined.upperBound()); // Community Volumes P_refined + std::vector refinedSizes(refined.upperBound()); std::vector locks(refined.upperBound()); std::vector nodes(graph.upperNodeIdBound(), none); @@ -585,6 +690,7 @@ Partition ParallelLeidenView::parallelRefine(const GraphType &graph) { for (omp_index u = 0; u < static_cast(graph.upperNodeIdBound()); u++) { if (graph.hasNode(u)) { nodes[u] = u; + refinedSizes[u] = nodeSize(graph, u); graph.forNeighborsOf(u, [&](node neighbor, edgeweight ew) { if (u != neighbor) { if (result[neighbor] == result[u]) { @@ -628,6 +734,7 @@ Partition ParallelLeidenView::parallelRefine(const GraphType &graph) { // Nodes whose community ID equals their Node ID. These may be singletons that can // affect the cut which we need to update later double degree = 0; + count subsetSize = nodeSize(graph, u); graph.forNeighborsOf(u, [&](node neighbor, edgeweight ew) { // Calculate degree and cut degree += ew; @@ -647,8 +754,9 @@ Partition ParallelLeidenView::parallelRefine(const GraphType &graph) { degree += ew; } }); - if (cutCtoSminusC[u] < this->gamma * degree * (communityVolumes[S] - degree) - * inverseGraphVolume) { // R-Set Condition + if (!refineRSetCondition(cutCtoSminusC[u], degree, subsetSize, + communityVolumes[S] - degree, communitySizes[S] - subsetSize, + communityVolumes[S], communitySizes[S])) { continue; } @@ -656,6 +764,7 @@ Partition ParallelLeidenView::parallelRefine(const GraphType &graph) { continue; } + double singletonScore = scoreCommunity(0.0, degree, 0.0, subsetSize, 0); double delta; index bestC = none; double bestDelta = std::numeric_limits::lowest(); @@ -667,16 +776,20 @@ Partition ParallelLeidenView::parallelRefine(const GraphType &graph) { if (C == none) { continue; } - delta = modularityDelta(cutWeights[C], degree, refinedVolumes[C]); + delta = scoreCommunity(cutWeights[C], degree, refinedVolumes[C], subsetSize, + refinedSizes[C]); - if (delta < 0) { // modThreshold is 0, since cutw(v,C-) = 0 and volw(C-) = 0 + if (delta <= singletonScore) { continue; } auto volC = refinedVolumes[C]; + auto sizeC = refinedSizes[C]; if (delta > bestDelta - && cutCtoSminusC[C] >= this->gamma * volC * (communityVolumes[S] - volC) - * inverseGraphVolume) { // T-Set Condition + && refineTSetCondition(cutCtoSminusC[C], volC, sizeC, + communityVolumes[S] - volC, + communitySizes[S] - sizeC, communityVolumes[S], + communitySizes[S])) { bestDelta = delta; bestC = C; idx = j; @@ -746,6 +859,7 @@ Partition ParallelLeidenView::parallelRefine(const GraphType &graph) { singleton[bestC] = false; refined[u] = bestC; refinedVolumes[bestC] += degree; + refinedSizes[bestC] += subsetSize; updateCut(); cutCtoSminusC[bestC] += cutCtoSminusC[u] - 2 * cutWeights[bestC]; } diff --git a/pyproject.toml b/pyproject.toml index 2057ecba8..291874e22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "icebug" version = "12.5" description = "High performance graph analytics backed by read-only memory" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" license = {file = "License.txt"} keywords =["graph algorithm", "network analysis", "social network", "apache arrow", "columnar"] authors = [ @@ -37,7 +37,7 @@ classifiers = [ dependencies = [ "scipy", "numpy", - "pyarrow" + "pyarrow>=23.0.1,<24.0" ] [project.optional-dependencies]