Skip to content

Commit b74fcf7

Browse files
authored
[BEAM-11731][BEAM-10582] Allow pyarrow<4,numpy<1.21.0, improve pyarrow verification (#13892)
1 parent 2151954 commit b74fcf7

File tree

11 files changed

+52
-23
lines changed

11 files changed

+52
-23
lines changed

sdks/python/apache_beam/coders/row_coder_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
class RowCoderTest(unittest.TestCase):
5858
JON_SNOW = Person(
5959
name="Jon Snow",
60-
age=23,
60+
age=np.int32(23),
6161
address=None,
6262
aliases=["crow", "wildling"],
6363
knows_javascript=False,
@@ -69,7 +69,7 @@ class RowCoderTest(unittest.TestCase):
6969
JON_SNOW,
7070
Person(
7171
"Daenerys Targaryen",
72-
25,
72+
np.int32(25),
7373
"Westeros",
7474
["Mother of Dragons"],
7575
False,
@@ -79,7 +79,7 @@ class RowCoderTest(unittest.TestCase):
7979
),
8080
Person(
8181
"Michael Bluth",
82-
30,
82+
np.int32(30),
8383
None, [],
8484
True,
8585
b"I've made a huge mistake", {},

sdks/python/apache_beam/dataframe/io_test.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
import pandas as pd
3333
import pandas.testing
34+
import pytest
3435
from pandas.testing import assert_frame_equal
3536
from parameterized import parameterized
3637

@@ -77,6 +78,11 @@ def test_read_write_csv(self):
7778
self.assertCountEqual(['a,b,c', '1,2,3', '3,4,7'],
7879
set(self.read_all_lines(output + 'out.csv*')))
7980

81+
@pytest.mark.uses_pyarrow
82+
def test_read_write_parquet(self):
83+
self._run_read_write_test(
84+
'parquet', {}, {}, dict(check_index=False), ['pyarrow'])
85+
8086
@parameterized.expand([
8187
('csv', dict(index_col=0)),
8288
('csv', dict(index_col=0, splittable=True)),
@@ -100,7 +106,6 @@ def test_read_write_csv(self):
100106
dict(check_index=False)),
101107
('html', dict(index_col=0), {}, {}, ['lxml']),
102108
('excel', dict(index_col=0), {}, {}, ['openpyxl', 'xlrd']),
103-
('parquet', {}, {}, dict(check_index=False), ['pyarrow']),
104109
])
105110
# pylint: disable=dangerous-default-value
106111
def test_read_write(
@@ -110,6 +115,18 @@ def test_read_write(
110115
write_kwargs={},
111116
check_options={},
112117
requires=()):
118+
self._run_read_write_test(
119+
format, read_kwargs, write_kwargs, check_options, requires)
120+
121+
# pylint: disable=dangerous-default-value
122+
def _run_read_write_test(
123+
self,
124+
format,
125+
read_kwargs={},
126+
write_kwargs={},
127+
check_options={},
128+
requires=()):
129+
113130
for module in requires:
114131
try:
115132
importlib.import_module(module)

sdks/python/apache_beam/dataframe/schemas.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989

9090
# Generate type map (presented visually in the docstring)
9191
_BIDIRECTIONAL = [
92-
(np.bool, np.bool),
92+
(bool, bool),
9393
(np.int8, np.int8),
9494
(np.int16, np.int16),
9595
(np.int32, np.int32),
@@ -100,9 +100,9 @@
100100
(pd.Int64Dtype(), Optional[np.int64]),
101101
(np.float32, Optional[np.float32]),
102102
(np.float64, Optional[np.float64]),
103-
(np.object, Any),
103+
(object, Any),
104104
(pd.StringDtype(), Optional[str]),
105-
(pd.BooleanDtype(), Optional[np.bool]),
105+
(pd.BooleanDtype(), Optional[bool]),
106106
]
107107

108108
PANDAS_TO_BEAM = {
@@ -173,7 +173,7 @@ def generate_proxy(element_type):
173173
for name, typehint in fields:
174174
# Default to np.object. This is lossy, we won't be able to recover
175175
# the type at the output.
176-
dtype = BEAM_TO_PANDAS.get(typehint, np.object)
176+
dtype = BEAM_TO_PANDAS.get(typehint, object)
177177
proxy[name] = proxy[name].astype(dtype)
178178

179179
return proxy

sdks/python/apache_beam/dataframe/schemas_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ def check_df_pcoll_equal(actual):
7575
([375, 24, None, 10, 16], pd.Int64Dtype(), 'i64_nullable'),
7676
([375., 24., None, 10., 16.], np.float64, 'f64'),
7777
([375., 24., None, 10., 16.], np.float32, 'f32'),
78-
([True, False, True, True, False], np.bool, 'bool'),
79-
(['Falcon', 'Ostrich', None, 3.14, 0], np.object, 'any'),
78+
([True, False, True, True, False], bool, 'bool'),
79+
(['Falcon', 'Ostrich', None, 3.14, 0], object, 'any'),
8080
([True, False, True, None, False], pd.BooleanDtype(), 'bool_nullable'),
8181
(['Falcon', 'Ostrich', None, 'Aardvark', 'Elephant'],
8282
pd.StringDtype(),

sdks/python/apache_beam/io/parquetio_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import hamcrest as hc
3030
import pandas
31+
import pytest
3132
from parameterized import param
3233
from parameterized import parameterized
3334

@@ -62,6 +63,7 @@
6263

6364

6465
@unittest.skipIf(pa is None, "PyArrow is not installed.")
66+
@pytest.mark.uses_pyarrow
6567
class TestParquet(unittest.TestCase):
6668
@classmethod
6769
def setUpClass(cls):

sdks/python/apache_beam/testing/synthetic_pipeline_test.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@
3636
from apache_beam.testing.util import equal_to
3737

3838
try:
39-
import numpy as np
39+
import numpy # pylint: disable=unused-import
4040
except ImportError:
41-
np = None
41+
NP_INSTALLED = False
42+
else:
43+
NP_INSTALLED = True
4244

4345

4446
def input_spec(
@@ -60,7 +62,8 @@ def input_spec(
6062
}
6163

6264

63-
@unittest.skipIf(np is None, 'Synthetic source dependencies are not installed')
65+
@unittest.skipIf(
66+
not NP_INSTALLED, 'Synthetic source dependencies are not installed')
6467
class SyntheticPipelineTest(unittest.TestCase):
6568

6669
# pylint: disable=expression-not-assigned

sdks/python/pytest.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ markers =
3030
# Tests using this marker conflict with the xdist plugin in some way, such
3131
# as enabling save_main_session.
3232
no_xdist: run without pytest-xdist plugin
33+
# We run these tests with multiple major pyarrow versions (BEAM-11211)
34+
uses_pyarrow: tests that utilize pyarrow in some way
3335

3436
# Default timeout intended for unit tests.
3537
# If certain tests need a different value, please see the docs on how to

sdks/python/setup.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,11 @@ def get_version():
143143
'hdfs>=2.1.0,<3.0.0',
144144
'httplib2>=0.8,<0.18.0',
145145
'mock>=1.0.1,<3.0.0',
146-
# TODO(BEAM-11731): Support numpy 1.20.0
147-
'numpy>=1.14.3,<1.20.0',
146+
'numpy>=1.14.3,<1.21.0',
148147
'pymongo>=3.8.0,<4.0.0',
149148
'oauth2client>=2.0.1,<5',
150149
'protobuf>=3.12.2,<4',
151-
'pyarrow>=0.15.1,<3.0.0',
150+
'pyarrow>=0.15.1,<4.0.0',
152151
'pydot>=1.2.0,<2',
153152
'python-dateutil>=2.8.0,<3',
154153
'pytz>=2018.3',

sdks/python/test-suites/tox/common.gradle

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ project.task("preCommitPy${pythonVersionSuffix}") {
3434
// Generates coverage reports only once, in Py38, to remove duplicated work
3535
if (pythonVersionSuffix.equals('38')) {
3636
dependsOn = ["testPy38CloudCoverage", "testPy38Cython",
37-
"testPy38pyarrow-0", "testPy38pyarrow-1", "testPy38pyarrow-2"]
37+
"testPy38pyarrow-0", "testPy38pyarrow-1", "testPy38pyarrow-2",
38+
"testPy38pyarrow-3"]
3839
} else {
3940
dependsOn = ["testPy${pythonVersionSuffix}Cloud", "testPy${pythonVersionSuffix}Cython"]
4041
}

sdks/python/test-suites/tox/py38/build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@ testPy38Cython.mustRunAfter testPython38, testPy38CloudCoverage
3737
toxTask "testPy38pyarrow-0", "py38-pyarrow-0"
3838
toxTask "testPy38pyarrow-1", "py38-pyarrow-1"
3939
toxTask "testPy38pyarrow-2", "py38-pyarrow-2"
40+
toxTask "testPy38pyarrow-3", "py38-pyarrow-3"
4041
test.dependsOn "testPy38pyarrow-0"
4142
test.dependsOn "testPy38pyarrow-1"
4243
test.dependsOn "testPy38pyarrow-2"
44+
test.dependsOn "testPy38pyarrow-3"
4345

4446
toxTask "whitespacelint", "whitespacelint"
4547

0 commit comments

Comments
 (0)