From 8dfa6c0cbf8cd3d67f98b2493feca02046d071dc Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Mon, 10 Feb 2025 12:00:02 +0800 Subject: [PATCH 1/5] fix --- .github/workflows/build_and_test.yml | 2 +- .github/workflows/build_python_connect.yml | 2 +- .github/workflows/build_python_connect35.yml | 2 +- .github/workflows/python_macos_test.yml | 2 +- dev/spark-test-image/python-309/Dockerfile | 2 +- dev/spark-test-image/python-310/Dockerfile | 2 +- dev/spark-test-image/python-311/Dockerfile | 2 +- dev/spark-test-image/python-312/Dockerfile | 2 +- dev/spark-test-image/python-313/Dockerfile | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 6d39ae900927d..3f4e4702c17e1 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -979,7 +979,7 @@ jobs: # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' python3.9 -m pip install ipython_genutils # See SPARK-38517 - python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' + python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly==5.24.1' python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 - name: List Python packages run: python3.9 -m pip list diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml index d437763d36d35..6c3aab73f35de 100644 --- a/.github/workflows/build_python_connect.yml +++ b/.github/workflows/build_python_connect.yml @@ -72,7 +72,7 @@ jobs: python packaging/connect/setup.py sdist cd dist pip install pyspark*connect-*.tar.gz - pip install 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' torch torchvision torcheval deepspeed unittest-xml-reporting 'plotly>=4.8' + pip install 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly==5.24.1' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' torch torchvision torcheval deepspeed unittest-xml-reporting 'plotly>=4.8' - name: Run tests env: SPARK_TESTING: 1 diff --git a/.github/workflows/build_python_connect35.yml b/.github/workflows/build_python_connect35.yml index ba77f2dff75a9..a4c4c9b610e31 100644 --- a/.github/workflows/build_python_connect35.yml +++ b/.github/workflows/build_python_connect35.yml @@ -68,7 +68,7 @@ jobs: ./build/sbt -Phive Test/package - name: Install Python dependencies run: | - pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' + pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly==5.24.1' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' # Add Python deps for Spark Connect. pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' diff --git a/.github/workflows/python_macos_test.yml b/.github/workflows/python_macos_test.yml index 231816750236b..1c4af41cdf1b8 100644 --- a/.github/workflows/python_macos_test.yml +++ b/.github/workflows/python_macos_test.yml @@ -133,7 +133,7 @@ jobs: run: | python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2' python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0' - python${{matrix.python}} -m pip install numpy 'pyarrow>=15.0.0' 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ + python${{matrix.python}} -m pip install numpy 'pyarrow>=15.0.0' 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly==5.24.1' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \ python${{matrix.python}} -m pip cache purge && \ python${{matrix.python}} -m pip list diff --git a/dev/spark-test-image/python-309/Dockerfile b/dev/spark-test-image/python-309/Dockerfile index bfe23bf572add..aeea03a23fb2d 100644 --- a/dev/spark-test-image/python-309/Dockerfile +++ b/dev/spark-test-image/python-309/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly==5.24.1 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" diff --git a/dev/spark-test-image/python-310/Dockerfile b/dev/spark-test-image/python-310/Dockerfile index b9875ba969f8d..c5399be15b710 100644 --- a/dev/spark-test-image/python-310/Dockerfile +++ b/dev/spark-test-image/python-310/Dockerfile @@ -63,7 +63,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly==5.24.1 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" diff --git a/dev/spark-test-image/python-311/Dockerfile b/dev/spark-test-image/python-311/Dockerfile index 48f1fede03c05..10bbed393a509 100644 --- a/dev/spark-test-image/python-311/Dockerfile +++ b/dev/spark-test-image/python-311/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly==5.24.1 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" diff --git a/dev/spark-test-image/python-312/Dockerfile b/dev/spark-test-image/python-312/Dockerfile index 090c20742e652..b691c6e30f71d 100644 --- a/dev/spark-test-image/python-312/Dockerfile +++ b/dev/spark-test-image/python-312/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly==5.24.1 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" diff --git a/dev/spark-test-image/python-313/Dockerfile b/dev/spark-test-image/python-313/Dockerfile index 473f3df8fdb7c..6967475888bfc 100644 --- a/dev/spark-test-image/python-313/Dockerfile +++ b/dev/spark-test-image/python-313/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly==5.24.1 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" From 7ede1755ecb202e1ed44183022fff59c866956ad Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Mon, 10 Feb 2025 12:03:27 +0800 Subject: [PATCH 2/5] req --- dev/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/requirements.txt b/dev/requirements.txt index 36548c2eae408..aa651b47aa559 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -7,7 +7,7 @@ pyarrow>=11.0.0 six==1.16.0 pandas>=2.2.0 scipy -plotly>=4.8 +plotly==5.24.1 mlflow>=2.3.1 scikit-learn matplotlib From f0769a8457f7254d18651c9da2f9fa943ab254ca Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Mon, 10 Feb 2025 15:01:27 +0800 Subject: [PATCH 3/5] fix --- .github/workflows/build_and_test.yml | 2 +- .github/workflows/build_python_connect.yml | 2 +- .github/workflows/build_python_connect35.yml | 2 +- .github/workflows/python_macos_test.yml | 2 +- dev/requirements.txt | 4 ++-- dev/spark-test-image/python-309/Dockerfile | 4 ++-- dev/spark-test-image/python-310/Dockerfile | 4 ++-- dev/spark-test-image/python-311/Dockerfile | 4 ++-- dev/spark-test-image/python-312/Dockerfile | 4 ++-- dev/spark-test-image/python-313/Dockerfile | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3f4e4702c17e1..8ec6b384a6403 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -979,7 +979,7 @@ jobs: # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' python3.9 -m pip install ipython_genutils # See SPARK-38517 - python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly==5.24.1' + python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly<6.0.0' python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 - name: List Python packages run: python3.9 -m pip list diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml index 6c3aab73f35de..f27e11c938509 100644 --- a/.github/workflows/build_python_connect.yml +++ b/.github/workflows/build_python_connect.yml @@ -72,7 +72,7 @@ jobs: python packaging/connect/setup.py sdist cd dist pip install pyspark*connect-*.tar.gz - pip install 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly==5.24.1' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' torch torchvision torcheval deepspeed unittest-xml-reporting 'plotly>=4.8' + pip install 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly==5.24.1' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting 'plotly>=4.8' - name: Run tests env: SPARK_TESTING: 1 diff --git a/.github/workflows/build_python_connect35.yml b/.github/workflows/build_python_connect35.yml index a4c4c9b610e31..7b854f8b1a28c 100644 --- a/.github/workflows/build_python_connect35.yml +++ b/.github/workflows/build_python_connect35.yml @@ -68,7 +68,7 @@ jobs: ./build/sbt -Phive Test/package - name: Install Python dependencies run: | - pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly==5.24.1' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' + pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly<6.0.0' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' # Add Python deps for Spark Connect. pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' diff --git a/.github/workflows/python_macos_test.yml b/.github/workflows/python_macos_test.yml index 1c4af41cdf1b8..cb6b33fb2a508 100644 --- a/.github/workflows/python_macos_test.yml +++ b/.github/workflows/python_macos_test.yml @@ -133,7 +133,7 @@ jobs: run: | python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2' python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0' - python${{matrix.python}} -m pip install numpy 'pyarrow>=15.0.0' 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly==5.24.1' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ + python${{matrix.python}} -m pip install numpy 'pyarrow>=15.0.0' 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \ python${{matrix.python}} -m pip cache purge && \ python${{matrix.python}} -m pip list diff --git a/dev/requirements.txt b/dev/requirements.txt index aa651b47aa559..1ed5b4f72d655 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -7,7 +7,7 @@ pyarrow>=11.0.0 six==1.16.0 pandas>=2.2.0 scipy -plotly==5.24.1 +plotly<6.0.0 mlflow>=2.3.1 scikit-learn matplotlib @@ -73,7 +73,7 @@ graphviz==0.20.3 flameprof==0.4 # TorchDistributor dependencies -torch +torch<2.6.0 torchvision torcheval diff --git a/dev/spark-test-image/python-309/Dockerfile b/dev/spark-test-image/python-309/Dockerfile index aeea03a23fb2d..7559e1e5c3a3c 100644 --- a/dev/spark-test-image/python-309/Dockerfile +++ b/dev/spark-test-image/python-309/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly==5.24.1 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" @@ -75,6 +75,6 @@ ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 goog RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 RUN python3.9 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \ - python3.9 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \ + python3.9 -m pip install torch<2.6.0 torchvision --index-url https://download.pytorch.org/whl/cpu && \ python3.9 -m pip install torcheval && \ python3.9 -m pip cache purge diff --git a/dev/spark-test-image/python-310/Dockerfile b/dev/spark-test-image/python-310/Dockerfile index c5399be15b710..51ee0d30e3b57 100644 --- a/dev/spark-test-image/python-310/Dockerfile +++ b/dev/spark-test-image/python-310/Dockerfile @@ -63,7 +63,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly==5.24.1 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" @@ -72,6 +72,6 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 RUN python3.10 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this RUN python3.10 -m pip install --ignore-installed 'six==1.16.0' # Avoid `python3-six` installation RUN python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \ - python3.10 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \ + python3.10 -m pip install torch<2.6.0 torchvision --index-url https://download.pytorch.org/whl/cpu && \ python3.10 -m pip install deepspeed torcheval && \ python3.10 -m pip cache purge diff --git a/dev/spark-test-image/python-311/Dockerfile b/dev/spark-test-image/python-311/Dockerfile index 10bbed393a509..9fd2c18133d8c 100644 --- a/dev/spark-test-image/python-311/Dockerfile +++ b/dev/spark-test-image/python-311/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly==5.24.1 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" @@ -75,6 +75,6 @@ ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 goog RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 RUN python3.11 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this RUN python3.11 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \ - python3.11 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \ + python3.11 -m pip install torch<2.6.0 torchvision --index-url https://download.pytorch.org/whl/cpu && \ python3.11 -m pip install deepspeed torcheval && \ python3.11 -m pip cache purge diff --git a/dev/spark-test-image/python-312/Dockerfile b/dev/spark-test-image/python-312/Dockerfile index b691c6e30f71d..db3c05040df5f 100644 --- a/dev/spark-test-image/python-312/Dockerfile +++ b/dev/spark-test-image/python-312/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly==5.24.1 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" @@ -75,6 +75,6 @@ ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 goog RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 RUN python3.12 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \ - python3.12 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \ + python3.12 -m pip install torch<2.6.0 torchvision --index-url https://download.pytorch.org/whl/cpu && \ python3.12 -m pip install torcheval && \ python3.12 -m pip cache purge diff --git a/dev/spark-test-image/python-313/Dockerfile b/dev/spark-test-image/python-313/Dockerfile index 6967475888bfc..6ad741d890da7 100644 --- a/dev/spark-test-image/python-313/Dockerfile +++ b/dev/spark-test-image/python-313/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly==5.24.1 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" From d818fdc4093e79e37841ae05c2204d64ea5d2ae3 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Mon, 10 Feb 2025 19:01:55 +0800 Subject: [PATCH 4/5] fix --- dev/spark-test-image/python-309/Dockerfile | 2 +- dev/spark-test-image/python-310/Dockerfile | 2 +- dev/spark-test-image/python-311/Dockerfile | 2 +- dev/spark-test-image/python-312/Dockerfile | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/spark-test-image/python-309/Dockerfile b/dev/spark-test-image/python-309/Dockerfile index 7559e1e5c3a3c..c8709205b8e38 100644 --- a/dev/spark-test-image/python-309/Dockerfile +++ b/dev/spark-test-image/python-309/Dockerfile @@ -75,6 +75,6 @@ ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 goog RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 RUN python3.9 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \ - python3.9 -m pip install torch<2.6.0 torchvision --index-url https://download.pytorch.org/whl/cpu && \ + python3.9 -m pip install 'torch<2.6.0' torchvision --index-url https://download.pytorch.org/whl/cpu && \ python3.9 -m pip install torcheval && \ python3.9 -m pip cache purge diff --git a/dev/spark-test-image/python-310/Dockerfile b/dev/spark-test-image/python-310/Dockerfile index 51ee0d30e3b57..a44a8b4a2691b 100644 --- a/dev/spark-test-image/python-310/Dockerfile +++ b/dev/spark-test-image/python-310/Dockerfile @@ -72,6 +72,6 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 RUN python3.10 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this RUN python3.10 -m pip install --ignore-installed 'six==1.16.0' # Avoid `python3-six` installation RUN python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \ - python3.10 -m pip install torch<2.6.0 torchvision --index-url https://download.pytorch.org/whl/cpu && \ + python3.10 -m pip install 'torch<2.6.0' torchvision --index-url https://download.pytorch.org/whl/cpu && \ python3.10 -m pip install deepspeed torcheval && \ python3.10 -m pip cache purge diff --git a/dev/spark-test-image/python-311/Dockerfile b/dev/spark-test-image/python-311/Dockerfile index 9fd2c18133d8c..646d5a63fc510 100644 --- a/dev/spark-test-image/python-311/Dockerfile +++ b/dev/spark-test-image/python-311/Dockerfile @@ -75,6 +75,6 @@ ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 goog RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 RUN python3.11 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this RUN python3.11 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \ - python3.11 -m pip install torch<2.6.0 torchvision --index-url https://download.pytorch.org/whl/cpu && \ + python3.11 -m pip install 'torch<2.6.0' torchvision --index-url https://download.pytorch.org/whl/cpu && \ python3.11 -m pip install deepspeed torcheval && \ python3.11 -m pip cache purge diff --git a/dev/spark-test-image/python-312/Dockerfile b/dev/spark-test-image/python-312/Dockerfile index db3c05040df5f..c2c9fe211695a 100644 --- a/dev/spark-test-image/python-312/Dockerfile +++ b/dev/spark-test-image/python-312/Dockerfile @@ -75,6 +75,6 @@ ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 goog RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 RUN python3.12 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \ - python3.12 -m pip install torch<2.6.0 torchvision --index-url https://download.pytorch.org/whl/cpu && \ + python3.12 -m pip install 'torch<2.6.0' torchvision --index-url https://download.pytorch.org/whl/cpu && \ python3.12 -m pip install torcheval && \ python3.12 -m pip cache purge From 51a1df9bb9cbef2c01948c2aba49c23ac7d7d036 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Mon, 10 Feb 2025 19:23:47 +0800 Subject: [PATCH 5/5] nit --- .github/workflows/build_python_connect.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml index f27e11c938509..b85acad5b4ede 100644 --- a/.github/workflows/build_python_connect.yml +++ b/.github/workflows/build_python_connect.yml @@ -72,7 +72,7 @@ jobs: python packaging/connect/setup.py sdist cd dist pip install pyspark*connect-*.tar.gz - pip install 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly==5.24.1' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting 'plotly>=4.8' + pip install 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting 'plotly>=4.8' - name: Run tests env: SPARK_TESTING: 1