From 177aac26c772418407c4e413cbeed7be0528a4fd Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 15:01:12 -0400 Subject: [PATCH 01/19] bump to TF 2.12 Signed-off-by: Jinzhe Zeng --- .github/workflows/build_cc.yml | 2 +- .github/workflows/build_wheel.yml | 10 +++++----- .github/workflows/test_cc.yml | 2 +- .github/workflows/test_python.yml | 10 ++-------- setup.py | 5 +++-- source/install/build_cc.sh | 5 +---- source/install/docker_package_c.sh | 4 ++-- source/install/test_cc_local.sh | 4 ++-- 8 files changed, 17 insertions(+), 25 deletions(-) diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index 0ebb80fa98..a324bf5903 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -6,7 +6,7 @@ jobs: buildcc: name: Build C++ runs-on: ubuntu-latest - container: ghcr.io/deepmodeling/deepmd-kit-test-cc:latest + container: ghcr.io/deepmodeling/deepmd-kit-test-environment:py3.11-gcc8-tf strategy: matrix: include: diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index e11ae58bbe..9729aac14e 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -14,22 +14,22 @@ jobs: include: # linux-64 - os: ubuntu-latest - python: 310 + python: 311 platform_id: manylinux_x86_64 dp_variant: cuda # macos-x86-64 - os: macos-latest - python: 310 + python: 311 platform_id: macosx_x86_64 dp_variant: cpu # win-64 - os: windows-2019 - python: 310 + python: 311 platform_id: win_amd64 dp_variant: cpu # linux-aarch64 - os: ubuntu-latest - python: 310 + python: 311 platform_id: manylinux_aarch64 dp_variant: cpu steps: @@ -61,7 +61,7 @@ jobs: - uses: actions/setup-python@v4 name: Install Python with: - python-version: '3.10' + python-version: '3.11' - run: python -m pip install build - name: Build sdist run: python -m build --sdist diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml index a6eb4727f1..559d2fdeb2 100644 --- a/.github/workflows/test_cc.yml +++ b/.github/workflows/test_cc.yml @@ -6,7 +6,7 @@ jobs: testcc: name: Test C++ runs-on: ubuntu-latest - container: ghcr.io/deepmodeling/deepmd-kit-test-cc:latest + container: ghcr.io/deepmodeling/deepmd-kit-test-environment:py3.11-gcc8-tf steps: - name: work around permission issue run: git config --global --add safe.directory /__w/deepmd-kit/deepmd-kit diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index c690e16328..3a6c500554 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -21,16 +21,10 @@ jobs: - python: 3.7 gcc: 8 tf: 1.14 - - python: 3.7 - gcc: 5 - tf: - - python: 3.7 + - python: 3.8 gcc: 8 tf: - - python: "3.10" - gcc: 5 - tf: - - python: "3.10" + - python: "3.11" gcc: 8 tf: diff --git a/setup.py b/setup.py index f96c119186..e0838e3e90 100644 --- a/setup.py +++ b/setup.py @@ -65,8 +65,7 @@ tf_install_dir, _ = find_tensorflow() tf_version = get_tf_version(tf_install_dir) -# TODO: change to "tf_version == "" or" after tensorflow 2.12 is released -if tf_version != "" and Version(tf_version) >= Version("2.12"): +if tf_version == "" or Version(tf_version) >= Version("2.12"): find_libpython_requires = [] else: find_libpython_requires = ["find_libpython"] @@ -76,6 +75,8 @@ class bdist_wheel_abi3(bdist_wheel): def get_tag(self): python, abi, plat = super().get_tag() if python.startswith("cp"): + if tf_version == "" or Version(tf_version) >= Version("2.12"): + return "py38", "none", plat return "py37", "none", plat return python, abi, plat diff --git a/source/install/build_cc.sh b/source/install/build_cc.sh index 9aaa51bab8..55f4730ac5 100755 --- a/source/install/build_cc.sh +++ b/source/install/build_cc.sh @@ -11,9 +11,6 @@ SCRIPT_PATH=$(dirname $(realpath -s $0)) if [ -z "$INSTALL_PREFIX" ]; then INSTALL_PREFIX=$(realpath -s ${SCRIPT_PATH}/../../dp) fi -if [ -z "$DOWNLOAD_TENSORFLOW" ]; then - DOWNLOAD_TENSORFLOW=TRUE -fi mkdir -p ${INSTALL_PREFIX} echo "Installing DeePMD-kit to ${INSTALL_PREFIX}" NPROC=$(nproc --all) @@ -23,7 +20,7 @@ NPROC=$(nproc --all) BUILD_TMP_DIR=${SCRIPT_PATH}/../build mkdir -p ${BUILD_TMP_DIR} cd ${BUILD_TMP_DIR} -cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DINSTALL_TENSORFLOW=${DOWNLOAD_TENSORFLOW} ${CUDA_ARGS} -DLAMMPS_VERSION=stable_23Jun2022_update3 .. +cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DUSE_TF_PYTHON_LIBS=TRUE ${CUDA_ARGS} -DLAMMPS_VERSION=stable_23Jun2022_update3 .. make -j${NPROC} make install diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh index 95291dfbd5..86dd6d4620 100755 --- a/source/install/docker_package_c.sh +++ b/source/install/docker_package_c.sh @@ -3,7 +3,7 @@ set -e SCRIPT_PATH=$(dirname $(realpath -s $0)) docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \ - ghcr.io/deepmodeling/libtensorflow_cc:2.9.2_cuda11.6_centos7_cmake \ - /bin/sh -c "source /opt/rh/devtoolset-10/enable \ + tensorflow/build:2.13-python3.11 \ + /bin/sh -c "pip install tensorflow cmake \ && cd /root/deepmd-kit/source/install \ && /bin/sh package_c.sh" diff --git a/source/install/test_cc_local.sh b/source/install/test_cc_local.sh index a48fdbd4bf..bf3e3587fa 100755 --- a/source/install/test_cc_local.sh +++ b/source/install/test_cc_local.sh @@ -7,12 +7,12 @@ NPROC=$(nproc --all) #------------------ -echo "try to find tensorflow in ${tensorflow_root}" +echo "try to find tensorflow in the Python environment" INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests mkdir -p ${BUILD_TMP_DIR} cd ${BUILD_TMP_DIR} -cmake -DINSTALL_TENSORFLOW=FALSE -DTENSORFLOW_ROOT=${tensorflow_root} -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_23Jun2022_update3 .. +cmake -DINSTALL_TENSORFLOW=FALSE -DDUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_23Jun2022_update3 .. make -j${NPROC} make install From 99489942ae84cd6cdc1096935ef7ae804a30478b Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 15:06:45 -0400 Subject: [PATCH 02/19] use TF python library for package_c Signed-off-by: Jinzhe Zeng --- source/install/package_c.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/source/install/package_c.sh b/source/install/package_c.sh index ce188425c9..3181a03e1d 100755 --- a/source/install/package_c.sh +++ b/source/install/package_c.sh @@ -19,6 +19,7 @@ cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ -DUSE_CUDA_TOOLKIT=TRUE \ -DOP_CXX_ABI=0 \ -DPACKAGE_C=TRUE \ + -DUSE_TF_PYTHON_LIBS=TRUE \ .. make -j${NPROC} make install From 37c70f82f30b27fc543d990098e498385f263722 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 15:30:05 -0400 Subject: [PATCH 03/19] remove OP_CXX_ABI flag Signed-off-by: Jinzhe Zeng --- source/install/package_c.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/source/install/package_c.sh b/source/install/package_c.sh index 3181a03e1d..6a3371eeda 100755 --- a/source/install/package_c.sh +++ b/source/install/package_c.sh @@ -17,7 +17,6 @@ mkdir -p ${BUILD_TMP_DIR} cd ${BUILD_TMP_DIR} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ -DUSE_CUDA_TOOLKIT=TRUE \ - -DOP_CXX_ABI=0 \ -DPACKAGE_C=TRUE \ -DUSE_TF_PYTHON_LIBS=TRUE \ .. From c6208d2db5675fdc60685e0c37ddf16447e1af97 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 15:55:02 -0400 Subject: [PATCH 04/19] set CC and CXX Signed-off-by: Jinzhe Zeng --- source/install/docker_package_c.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh index 86dd6d4620..379e248396 100755 --- a/source/install/docker_package_c.sh +++ b/source/install/docker_package_c.sh @@ -6,4 +6,6 @@ docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \ tensorflow/build:2.13-python3.11 \ /bin/sh -c "pip install tensorflow cmake \ && cd /root/deepmd-kit/source/install \ + && export CC = /dt9/usr/bin/gcc \ + && export CXX = /dt9/usr/bin/g++ \ && /bin/sh package_c.sh" From e008e2582417cfb947ac402f4752653112586c35 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 15:56:27 -0400 Subject: [PATCH 05/19] fix errors Signed-off-by: Jinzhe Zeng --- .github/workflows/build_cc.yml | 2 +- source/install/test_cc_local.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index a324bf5903..f2b0945d7e 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -39,7 +39,7 @@ jobs: env: DEBIAN_FRONTEND: noninteractive - run: | - apt-get update && apt-get install -y gnupg2 \ + apt-get update && apt-get install -y gnupg2 curl \ && echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ jammy main' | tee /etc/apt/sources.list.d/rocm.list \ && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 \ && curl -s https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \ diff --git a/source/install/test_cc_local.sh b/source/install/test_cc_local.sh index bf3e3587fa..b4594a569c 100755 --- a/source/install/test_cc_local.sh +++ b/source/install/test_cc_local.sh @@ -12,7 +12,7 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests mkdir -p ${BUILD_TMP_DIR} cd ${BUILD_TMP_DIR} -cmake -DINSTALL_TENSORFLOW=FALSE -DDUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_23Jun2022_update3 .. +cmake -DINSTALL_TENSORFLOW=FALSE -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_23Jun2022_update3 .. make -j${NPROC} make install From cdb85fe12e585a1c09dd2044aa0b90526b489dd3 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 16:23:59 -0400 Subject: [PATCH 06/19] bugfix Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cc.yml | 2 +- source/install/docker_package_c.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml index 559d2fdeb2..45f2fe2fb5 100644 --- a/.github/workflows/test_cc.yml +++ b/.github/workflows/test_cc.yml @@ -18,7 +18,7 @@ jobs: TF_INTER_OP_PARALLELISM_THREADS: 1 tensorflow_root: /usr/local # test lammps - - run: apt-get update && apt-get install -y python3-pip python3-venv + - run: apt-get update && apt-get install -y python3-pip python3-venv curl - run: source/install/build_lammps.sh - run: | python -m pip install -U pip diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh index 379e248396..ca75f97055 100755 --- a/source/install/docker_package_c.sh +++ b/source/install/docker_package_c.sh @@ -6,6 +6,6 @@ docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \ tensorflow/build:2.13-python3.11 \ /bin/sh -c "pip install tensorflow cmake \ && cd /root/deepmd-kit/source/install \ - && export CC = /dt9/usr/bin/gcc \ - && export CXX = /dt9/usr/bin/g++ \ - && /bin/sh package_c.sh" + && CC = /dt9/usr/bin/gcc \ + CXX = /dt9/usr/bin/g++ \ + /bin/sh package_c.sh" From 130f4a433bc1f0515095828997f7eaae8f48f38e Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 16:40:20 -0400 Subject: [PATCH 07/19] do not use containers for cc Signed-off-by: Jinzhe Zeng --- .github/workflows/build_cc.yml | 7 ++++--- .github/workflows/test_cc.yml | 8 ++++---- source/install/docker_package_c.sh | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index f2b0945d7e..25d0c34132 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -6,7 +6,6 @@ jobs: buildcc: name: Build C++ runs-on: ubuntu-latest - container: ghcr.io/deepmodeling/deepmd-kit-test-environment:py3.11-gcc8-tf strategy: matrix: include: @@ -21,11 +20,13 @@ jobs: - variant: clang dp_variant: clang steps: - - name: work around permission issue - run: git config --global --add safe.directory /__w/deepmd-kit/deepmd-kit - uses: actions/checkout@v3 with: submodules: true + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - run: python -m pip install tensorflow - run: apt-get update && apt-get install -y nvidia-cuda-toolkit if: matrix.variant == 'cuda' - run: | diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml index 45f2fe2fb5..0c9b2ed487 100644 --- a/.github/workflows/test_cc.yml +++ b/.github/workflows/test_cc.yml @@ -6,11 +6,12 @@ jobs: testcc: name: Test C++ runs-on: ubuntu-latest - container: ghcr.io/deepmodeling/deepmd-kit-test-environment:py3.11-gcc8-tf steps: - - name: work around permission issue - run: git config --global --add safe.directory /__w/deepmd-kit/deepmd-kit - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - run: python -m pip install tensorflow - run: source/install/test_cc_local.sh env: OMP_NUM_THREADS: 1 @@ -18,7 +19,6 @@ jobs: TF_INTER_OP_PARALLELISM_THREADS: 1 tensorflow_root: /usr/local # test lammps - - run: apt-get update && apt-get install -y python3-pip python3-venv curl - run: source/install/build_lammps.sh - run: | python -m pip install -U pip diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh index ca75f97055..d6fb269acd 100755 --- a/source/install/docker_package_c.sh +++ b/source/install/docker_package_c.sh @@ -6,6 +6,6 @@ docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \ tensorflow/build:2.13-python3.11 \ /bin/sh -c "pip install tensorflow cmake \ && cd /root/deepmd-kit/source/install \ - && CC = /dt9/usr/bin/gcc \ - CXX = /dt9/usr/bin/g++ \ + && CC=/dt9/usr/bin/gcc \ + CXX=/dt9/usr/bin/g++ \ /bin/sh package_c.sh" From 92a6206a6633661742638f42f320d3bd1ec7dcf8 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 16:50:02 -0400 Subject: [PATCH 08/19] fix permission Signed-off-by: Jinzhe Zeng --- .github/workflows/build_cc.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index 25d0c34132..762f9d6dfa 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -27,27 +27,27 @@ jobs: with: python-version: '3.11' - run: python -m pip install tensorflow - - run: apt-get update && apt-get install -y nvidia-cuda-toolkit + - run: sudo apt-get update && apt-get install -y nvidia-cuda-toolkit if: matrix.variant == 'cuda' - run: | - apt-get update \ + sudo apt-get update \ && apt-get -y install wget \ && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ && dpkg -i cuda-keyring_1.0-1_all.deb \ - && apt-get update \ - && apt-get -y install cuda-12-0 + && sudo apt-get update \ + && sudo apt-get -y install cuda-12-0 if: matrix.variant == 'cuda120' env: DEBIAN_FRONTEND: noninteractive - run: | - apt-get update && apt-get install -y gnupg2 curl \ + sudo apt-get update && apt-get install -y gnupg2 curl \ && echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ jammy main' | tee /etc/apt/sources.list.d/rocm.list \ && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 \ && curl -s https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \ - && apt-get update \ - && apt-get install -y rocm-dev hipcub-dev + && sudo apt-get update \ + && sudo apt-get install -y rocm-dev hipcub-dev if: matrix.variant == 'rocm' - - run: apt-get update && apt-get install -y clang + - run: sudo apt-get update && apt-get install -y clang if: matrix.variant == 'clang' - run: source/install/build_cc.sh env: From 2e3411799d79ff43a7a811f452bd348c80f439fa Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 16:56:39 -0400 Subject: [PATCH 09/19] upload before test Signed-off-by: Jinzhe Zeng --- .github/workflows/package_c.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml index 6cc5ed8dc8..982dfc44a2 100644 --- a/.github/workflows/package_c.yml +++ b/.github/workflows/package_c.yml @@ -12,13 +12,13 @@ jobs: - uses: actions/checkout@v3 - name: Package C library run: ./source/install/docker_package_c.sh - - name: Test C library - run: ./source/install/docker_test_package_c.sh # for download and debug - name: Upload artifact uses: actions/upload-artifact@v3 with: path: ./libdeepmd_c.tar.gz + - name: Test C library + run: ./source/install/docker_test_package_c.sh - name: Release uses: softprops/action-gh-release@v1 if: startsWith(github.ref, 'refs/tags/') From 6ba945a0c5ea603d3d36b855628e46fa035806d2 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 17:00:37 -0400 Subject: [PATCH 10/19] fix permission Signed-off-by: Jinzhe Zeng --- .github/workflows/build_cc.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index 762f9d6dfa..6b715525cf 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -27,7 +27,7 @@ jobs: with: python-version: '3.11' - run: python -m pip install tensorflow - - run: sudo apt-get update && apt-get install -y nvidia-cuda-toolkit + - run: sudo apt-get update && sudo apt-get install -y nvidia-cuda-toolkit if: matrix.variant == 'cuda' - run: | sudo apt-get update \ @@ -40,14 +40,14 @@ jobs: env: DEBIAN_FRONTEND: noninteractive - run: | - sudo apt-get update && apt-get install -y gnupg2 curl \ + sudo apt-get update && sudo apt-get install -y gnupg2 \ && echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ jammy main' | tee /etc/apt/sources.list.d/rocm.list \ && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 \ && curl -s https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \ && sudo apt-get update \ && sudo apt-get install -y rocm-dev hipcub-dev if: matrix.variant == 'rocm' - - run: sudo apt-get update && apt-get install -y clang + - run: sudo apt-get update && sudo apt-get install -y clang if: matrix.variant == 'clang' - run: source/install/build_cc.sh env: From 2844c69bc7cf6b6a435e34b4d6529019705b0a54 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 17:17:10 -0400 Subject: [PATCH 11/19] fix permission Signed-off-by: Jinzhe Zeng --- .github/workflows/build_cc.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index 6b715525cf..8186f93ae4 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -30,9 +30,7 @@ jobs: - run: sudo apt-get update && sudo apt-get install -y nvidia-cuda-toolkit if: matrix.variant == 'cuda' - run: | - sudo apt-get update \ - && apt-get -y install wget \ - && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ && dpkg -i cuda-keyring_1.0-1_all.deb \ && sudo apt-get update \ && sudo apt-get -y install cuda-12-0 @@ -40,8 +38,7 @@ jobs: env: DEBIAN_FRONTEND: noninteractive - run: | - sudo apt-get update && sudo apt-get install -y gnupg2 \ - && echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ jammy main' | tee /etc/apt/sources.list.d/rocm.list \ + echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ jammy main' | tee /etc/apt/sources.list.d/rocm.list \ && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 \ && curl -s https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \ && sudo apt-get update \ From 0c8f50684604693fedf46487ac396c3f750b8b3b Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 17:35:21 -0400 Subject: [PATCH 12/19] fix runpath Signed-off-by: Jinzhe Zeng --- source/install/package_c.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/install/package_c.sh b/source/install/package_c.sh index 6a3371eeda..10f78dcab3 100755 --- a/source/install/package_c.sh +++ b/source/install/package_c.sh @@ -25,4 +25,9 @@ make install #------------------ +# fix runpath +for ii in ${INSTALL_PREFIX}/lib/*; do + patchelf --set-rpath \$ORIGIN $ii +done + tar vczf ${SCRIPT_PATH}/../../libdeepmd_c.tar.gz -C ${BUILD_TMP_DIR} libdeepmd_c From ddbd273b242e9c246abb07fb992415d60d377d82 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 17:42:36 -0400 Subject: [PATCH 13/19] update documentation Signed-off-by: Jinzhe Zeng --- doc/install/install-from-source.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md index 8ce2524806..e326be1598 100644 --- a/doc/install/install-from-source.md +++ b/doc/install/install-from-source.md @@ -39,7 +39,7 @@ deactivate ``` If one has multiple python interpreters named something like python3.x, it can be specified by, for example ```bash -virtualenv -p python3.7 $tensorflow_venv +virtualenv -p python3.8 $tensorflow_venv ``` If one does not need the GPU support of DeePMD-kit and is concerned about package size, the CPU-only version of TensorFlow should be installed by ```bash @@ -142,7 +142,7 @@ Available Tensor Operations: [X] Gloo ``` -From version 2.0.1, Horovod and mpi4py with MPICH support are shipped with the installer. +Since version 2.0.1, Horovod and mpi4py with MPICH support are shipped with the installer. If you don't install Horovod, DeePMD-kit will fall back to serial mode. @@ -150,7 +150,9 @@ If you don't install Horovod, DeePMD-kit will fall back to serial mode. If one does not need to use DeePMD-kit with Lammps or I-Pi, then the python interface installed in the previous section does everything and he/she can safely skip this section. -### Install Tensorflow's C++ interface +### Install Tensorflow's C++ interface (optional) + +Since TensorFlow 2.12, TensorFlow C++ library (`libtensorflow_cc`) is packaged inside the Python library. Thus, you can skip building TensorFlow C++ library manually. If that does not work for you, you can still build it manually. The C++ interface of DeePMD-kit was tested with compiler GCC >= 4.8. It is noticed that the I-Pi support is only compiled with GCC >= 4.8. Note that TensorFlow may have specific requirements for the compiler version. @@ -164,11 +166,12 @@ cd $deepmd_source_dir/source mkdir build cd build ``` -I assume you want to install DeePMD-kit into path `$deepmd_root`, then execute CMake +I assume you have activated the TensorFlow Python environment and want to install DeePMD-kit into path `$deepmd_root`, then execute CMake ```bash -cmake -DTENSORFLOW_ROOT=$tensorflow_root -DCMAKE_INSTALL_PREFIX=$deepmd_root .. +cmake -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=$deepmd_root .. ``` -where the variable `tensorflow_root` stores the location where TensorFlow's C++ interface is installed. + +If you specify `-DUSE_TF_PYTHON_LIBS=FALSE`, you need to give the location where TensorFlow's C++ interface is installed to `-DTENSORFLOW_ROOT=${tensorflow_root}`. One may add the following arguments to `cmake`: From 301292ff23fee46c952881618fe65349a04736f1 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 17:52:46 -0400 Subject: [PATCH 14/19] fix bug Signed-off-by: Jinzhe Zeng --- source/install/package_c.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/install/package_c.sh b/source/install/package_c.sh index 10f78dcab3..18c4943db3 100755 --- a/source/install/package_c.sh +++ b/source/install/package_c.sh @@ -26,7 +26,7 @@ make install #------------------ # fix runpath -for ii in ${INSTALL_PREFIX}/lib/*; do +for ii in ${INSTALL_PREFIX}/lib/*.so*; do patchelf --set-rpath \$ORIGIN $ii done From fd464a881efb236dd31e6674eff83495baf8db42 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 17:55:38 -0400 Subject: [PATCH 15/19] fix permission Signed-off-by: Jinzhe Zeng --- .github/workflows/build_cc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index 8186f93ae4..f7c88b86c1 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -31,7 +31,7 @@ jobs: if: matrix.variant == 'cuda' - run: | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ - && dpkg -i cuda-keyring_1.0-1_all.deb \ + && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ && sudo apt-get update \ && sudo apt-get -y install cuda-12-0 if: matrix.variant == 'cuda120' From dadb3d62b43330d953328296d095128b79f3b73d Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 18:02:40 -0400 Subject: [PATCH 16/19] fix permission Signed-off-by: Jinzhe Zeng --- .github/workflows/build_cc.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index f7c88b86c1..c2d2613464 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -38,9 +38,9 @@ jobs: env: DEBIAN_FRONTEND: noninteractive - run: | - echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ jammy main' | tee /etc/apt/sources.list.d/rocm.list \ - && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 \ - && curl -s https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \ + echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ jammy main' | sudo tee /etc/apt/sources.list.d/rocm.list \ + && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 \ + && curl -s https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - \ && sudo apt-get update \ && sudo apt-get install -y rocm-dev hipcub-dev if: matrix.variant == 'rocm' From 85ea2a2c50860b5a23964a07b20cf5f474d75e00 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 18:08:48 -0400 Subject: [PATCH 17/19] fix runpath Signed-off-by: Jinzhe Zeng --- source/install/package_c.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/install/package_c.sh b/source/install/package_c.sh index 18c4943db3..c250956e19 100755 --- a/source/install/package_c.sh +++ b/source/install/package_c.sh @@ -26,7 +26,7 @@ make install #------------------ # fix runpath -for ii in ${INSTALL_PREFIX}/lib/*.so*; do +for ii in ${BUILD_TMP_DIR}/libdeepmd_c/lib/*.so*; do patchelf --set-rpath \$ORIGIN $ii done From 49b5aed1d999c06a83d2d085ec4c2354a3c83fff Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 23 Mar 2023 18:39:25 -0400 Subject: [PATCH 18/19] revert aarch64 Python version to 3.10 per https://github.com/grpc/grpc/issues/32454 Signed-off-by: Jinzhe Zeng --- .github/workflows/build_wheel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 9729aac14e..b1b705fd49 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -29,7 +29,7 @@ jobs: dp_variant: cpu # linux-aarch64 - os: ubuntu-latest - python: 311 + python: 310 platform_id: manylinux_aarch64 dp_variant: cpu steps: From c052091661941394555125d03d582a05e5fde181 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 24 Mar 2023 16:04:51 -0400 Subject: [PATCH 19/19] update documentation of lammps Signed-off-by: Jinzhe Zeng --- doc/install/install-lammps.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/install/install-lammps.md b/doc/install/install-lammps.md index eb3dc0fea6..22262d9b95 100644 --- a/doc/install/install-lammps.md +++ b/doc/install/install-lammps.md @@ -69,7 +69,7 @@ ${deepmd_root}/bin/lmp -h ``` :::{note} -If `${tensorflow_root}` or `${deepmd_root}` is different from the prefix of LAMMPS, you need to append the library path to [`RUNPATH`](https://man7.org/linux/man-pages/man8/ld.so.8.html) of `liblammps.so`. For example, +If `${tensorflow_root}`, `${deepmd_root}`, or the path to TensorFlow Python package if applicable is different from the prefix of LAMMPS, you need to append the library path to [`RUNPATH`](https://man7.org/linux/man-pages/man8/ld.so.8.html) of `liblammps.so`. For example, ```sh patchelf --set-rpath "${tensorflow_root}/lib" liblammps.so