apache · AnkitAhlawat7742 · May 22, 2026 · May 22, 2026
@@ -401,7 +401,15 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
             result = _sequence_to_array(obj, mask, size, type, pool, c_from_pandas)
 
     if extension_type is not None:
-        result = ExtensionArray.from_storage(extension_type, result)
+        if isinstance(result, ChunkedArray):
+            # Handle ChunkedArray case (e.g., when data overflows int32)
+            chunks = []
+            for chunk in result.chunks:
+                ext_chunk = ExtensionArray.from_storage(extension_type, chunk)
+                chunks.append(ext_chunk)
+            result = chunked_array(chunks, type=extension_type)
+        else:
+            result = ExtensionArray.from_storage(extension_type, result)
     return result
 
 

@@ -2120,3 +2120,80 @@ def test_json(storage_type, pickle_module):
                 pa.ArrowInvalid,
                 match=f"Invalid storage type for JsonExtensionType: {storage_type}"):
             pa.json_(storage_type)
+
+
+class ListExtensionType(pa.ExtensionType):
+    """Extension type with a list field for testing int32 overflow."""
+
+    def __init__(self):
+        super().__init__(
+            pa.struct({"data": pa.list_(pa.uint8())}),
+            "test_list_ext",
+        )
+
+    def __arrow_ext_serialize__(self):
+        return b""
+
+    @classmethod
+    def __arrow_ext_deserialize__(cls, storage_type, serialized):
+        return cls()
+
+
+@pytest.mark.large_memory
+@pytest.mark.numpy
+def test_extension_type_list_overflow():
+    """
+    Test that extension types with list fields handle int32 offset overflow.
+    """
+    try:
+        pa.register_extension_type(ListExtensionType())
+    except pa.ArrowKeyError:
+        pass
+
+    schema = pa.schema({"col": ListExtensionType()})
+
+    # Create data that exceeds int32 max cumulative values
+    # 5 rows × 500M values = 2.5B > int32 max (2,147,483,647)
+    arr = np.zeros(500_000_000, dtype=np.uint8)
+    rows = [{"col": {"data": arr}} for _ in range(5)]
+
+    result = pa.Table.from_pylist(rows, schema=schema)
+
+    assert result.num_rows == 5
+    assert result.num_columns == 1
+    assert result.schema[0].type == ListExtensionType()
+
+    col = result.column(0)
+    assert isinstance(col, pa.ChunkedArray)
+    assert col.type == ListExtensionType()
+
+    for chunk_idx in range(col.num_chunks):
+        chunk_data = col.chunk(chunk_idx)
+        assert chunk_data.type == ListExtensionType()
+
+
+@pytest.mark.numpy
+def test_extension_type_no_overflow():
+    """Test that extension types work normally when there's no overflow."""
+    try:
+        pa.register_extension_type(ListExtensionType())
+    except pa.ArrowKeyError:
+        # Already registered
+        pass
+
+    schema = pa.schema({"col": ListExtensionType()})
+
+    # Small data that won't overflow
+    arr = np.array([1, 2, 3], dtype=np.uint8)
+    rows = [{"col": {"data": arr}} for _ in range(3)]
+
+    result = pa.Table.from_pylist(rows, schema=schema)
+
+    assert result.num_rows == 3
+    assert result.num_columns == 1
+    assert result.schema[0].type == ListExtensionType()
+
+    # The column should be a ChunkedArray with a single chunk
+    col = result.column(0)
+    assert isinstance(col, pa.ChunkedArray)
+    assert col.type == ListExtensionType()