From a1f323c88559218efa2222aad1c74cfb5e0e6970 Mon Sep 17 00:00:00 2001 From: Dan Nugent Date: Fri, 13 Mar 2020 03:25:33 -0400 Subject: [PATCH 1/3] ARROW-8105: [Python] Fix segfault when shrunken masked array is passed to pyarrow.array Needed to validate that the mask of the masked array wasn't equal to the nomask constant which indicates that the masked array was shrunk --- python/pyarrow/array.pxi | 3 ++- python/pyarrow/tests/test_array.py | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index bca67cc0bf96..85b5a44ac977 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -208,7 +208,8 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None, raise ValueError("Cannot pass a numpy masked array and " "specify a mask at the same time") else: - mask = values.mask + # don't use shrunken masks + mask = None if values.mask is np.ma.nomask else values.mask values = values.data if hasattr(values, '__arrow_array__'): diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 9f1808e77dd3..47dffa6a16b1 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -1655,6 +1655,12 @@ def test_array_from_masked(): with pytest.raises(ValueError, match="Cannot pass a numpy masked array"): pa.array(ma, mask=np.array([True, False, False, False])) +def test_array_from_shrunken_masked(): + ma = np.ma.array([0]) + result = pa.array(ma) + expected = pa.array([0], type='int64') + assert expected.equals(result) + def test_array_from_invalid_dim_raises(): msg = "only handle 1-dimensional arrays" From ea9f56a76c5bea29264c004524fcf4abe233d3b6 Mon Sep 17 00:00:00 2001 From: Daniel Nugent Date: Fri, 13 Mar 2020 03:40:13 -0400 Subject: [PATCH 2/3] added missing newline for linter --- python/pyarrow/tests/test_array.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 47dffa6a16b1..8288c6781867 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -1655,6 +1655,7 @@ def test_array_from_masked(): with pytest.raises(ValueError, match="Cannot pass a numpy masked array"): pa.array(ma, mask=np.array([True, False, False, False])) + def test_array_from_shrunken_masked(): ma = np.ma.array([0]) result = pa.array(ma) From aa923d5fcea2dd2cd36734127cb7de942ff65cd8 Mon Sep 17 00:00:00 2001 From: Dan Nugent Date: Tue, 17 Mar 2020 06:07:38 -0400 Subject: [PATCH 3/3] Fix type of masked array to int64 --- python/pyarrow/tests/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 8288c6781867..b13c6e3b7c85 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -1657,7 +1657,7 @@ def test_array_from_masked(): def test_array_from_shrunken_masked(): - ma = np.ma.array([0]) + ma = np.ma.array([0], dtype='int64') result = pa.array(ma) expected = pa.array([0], type='int64') assert expected.equals(result)