use schema_to_pyarrow directly for backporting

HonahX · HonahX · commit 37d70c898862 · 2024-03-30T21:00:40.000-04:00
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
@@ -1721,7 +1721,7 @@ def write_file(table: Table, tasks: Iterator[WriteTask]) -> Iterator[DataFile]:
     parquet_writer_kwargs = _get_parquet_writer_kwargs(table.properties)
 
     file_path = f'{table.location()}/data/{task.generate_data_file_filename("parquet")}'
-    file_schema = table.schema().as_arrow()
+    file_schema = schema_to_pyarrow(table.schema())
 
     fo = table.io.new_output(file_path)
     row_group_size = PropertyUtil.property_as_int(
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
@@ -1052,10 +1052,12 @@ def append(self, df: pa.Table) -> None:
         if len(self.spec().fields) > 0:
             raise ValueError("Cannot write to partitioned tables")
 
+        from pyiceberg.io.pyarrow import schema_to_pyarrow
+
         _check_schema_compatible(self.schema(), other_schema=df.schema)
         # cast if the two schemas are compatible but not equal
-        if self.schema().as_arrow() != df.schema:
-            df = df.cast(self.schema().as_arrow())
+        if schema_to_pyarrow(self.schema()) != df.schema:
+            df = df.cast(schema_to_pyarrow(self.schema()))
 
         merge = _MergingSnapshotProducer(operation=Operation.APPEND, table=self)
 
@@ -1090,10 +1092,12 @@ def overwrite(self, df: pa.Table, overwrite_filter: BooleanExpression = ALWAYS_T
         if len(self.spec().fields) > 0:
             raise ValueError("Cannot write to partitioned tables")
 
+        from pyiceberg.io.pyarrow import schema_to_pyarrow
+
         _check_schema_compatible(self.schema(), other_schema=df.schema)
         # cast if the two schemas are compatible but not equal
-        if self.schema().as_arrow() != df.schema:
-            df = df.cast(self.schema().as_arrow())
+        if schema_to_pyarrow(self.schema()) != df.schema:
+            df = df.cast(schema_to_pyarrow(self.schema()))
 
         merge = _MergingSnapshotProducer(
             operation=Operation.OVERWRITE if self.current_snapshot() is not None else Operation.APPEND,