1010import numpy as np
1111import pandas
1212import pandas .api .types
13- import pandas .util . testing as tm
13+ import pandas .testing as tm
1414from pandas import DataFrame , NaT
1515
1616try :
2121import pytz
2222
2323from pandas_gbq import gbq
24+ from pandas_gbq .features import FEATURES
2425import pandas_gbq .schema
2526
2627
@@ -32,6 +33,18 @@ def test_imports():
3233 gbq ._test_google_api_imports ()
3334
3435
36+ def make_mixed_dataframe_v1 ():
37+ # Re-implementation of private pandas.util.testing.makeMixedDataFrame
38+ return pandas .DataFrame (
39+ {
40+ "A" : [0.0 , 1.0 , 2.0 , 3.0 , 4.0 ],
41+ "B" : [0.0 , 1.0 , 0.0 , 1.0 , 0.0 ],
42+ "C" : ["foo1" , "foo2" , "foo3" , "foo4" , "foo5" ],
43+ "D" : pandas .bdate_range ("1/1/2009" , periods = 5 ),
44+ }
45+ )
46+
47+
3548def make_mixed_dataframe_v2 (test_size ):
3649 # create df to test for all BQ datatypes except RECORD
3750 bools = np .random .randint (2 , size = (1 , test_size )).astype (bool )
@@ -168,7 +181,7 @@ def test_should_properly_handle_valid_integers(self, project_id):
168181 credentials = self .credentials ,
169182 dialect = "standard" ,
170183 )
171- tm .assert_frame_equal (df , DataFrame ({"valid_integer" : [3 ]}))
184+ tm .assert_frame_equal (df , DataFrame ({"valid_integer" : [3 ]}, dtype = "Int64" ))
172185
173186 def test_should_properly_handle_nullable_integers (self , project_id ):
174187 query = """SELECT * FROM
@@ -194,7 +207,7 @@ def test_should_properly_handle_valid_longs(self, project_id):
194207 credentials = self .credentials ,
195208 dialect = "standard" ,
196209 )
197- tm .assert_frame_equal (df , DataFrame ({"valid_long" : [1 << 62 ]}))
210+ tm .assert_frame_equal (df , DataFrame ({"valid_long" : [1 << 62 ]}, dtype = "Int64" ))
198211
199212 def test_should_properly_handle_nullable_longs (self , project_id ):
200213 query = """SELECT * FROM
@@ -433,7 +446,10 @@ def test_should_properly_handle_null_boolean(self, project_id):
433446 credentials = self .credentials ,
434447 dialect = "legacy" ,
435448 )
436- tm .assert_frame_equal (df , DataFrame ({"null_boolean" : [None ]}))
449+ expected_dtype = "boolean" if FEATURES .pandas_has_boolean_dtype else None
450+ tm .assert_frame_equal (
451+ df , DataFrame ({"null_boolean" : [None ]}, dtype = expected_dtype )
452+ )
437453
438454 def test_should_properly_handle_nullable_booleans (self , project_id ):
439455 query = """SELECT * FROM
@@ -445,8 +461,9 @@ def test_should_properly_handle_nullable_booleans(self, project_id):
445461 credentials = self .credentials ,
446462 dialect = "legacy" ,
447463 )
464+ expected_dtype = "boolean" if FEATURES .pandas_has_boolean_dtype else None
448465 tm .assert_frame_equal (
449- df , DataFrame ({"nullable_boolean" : [True , None ]}). astype ( object )
466+ df , DataFrame ({"nullable_boolean" : [True , None ]}, dtype = expected_dtype )
450467 )
451468
452469 def test_unicode_string_conversion_and_normalization (self , project_id ):
@@ -629,7 +646,7 @@ def test_one_row_one_column(self, project_id):
629646 credentials = self .credentials ,
630647 dialect = "standard" ,
631648 )
632- expected_result = DataFrame (dict (v = [3 ]))
649+ expected_result = DataFrame (dict (v = [3 ]), dtype = "Int64" )
633650 tm .assert_frame_equal (df , expected_result )
634651
635652 def test_legacy_sql (self , project_id ):
@@ -719,7 +736,7 @@ def test_query_with_parameters(self, project_id):
719736 configuration = config ,
720737 dialect = "legacy" ,
721738 )
722- tm .assert_frame_equal (df , DataFrame ({"valid_result" : [3 ]}))
739+ tm .assert_frame_equal (df , DataFrame ({"valid_result" : [3 ]}, dtype = "Int64" ))
723740
724741 def test_query_inside_configuration (self , project_id ):
725742 query_no_use = 'SELECT "PI_WRONG" AS valid_string'
@@ -842,7 +859,11 @@ def test_struct(self, project_id):
842859 dialect = "standard" ,
843860 )
844861 expected = DataFrame (
845- [[1 , {"letter" : "a" , "num" : 1 }]], columns = ["int_field" , "struct_field" ],
862+ {
863+ "int_field" : pandas .Series ([1 ], dtype = "Int64" ),
864+ "struct_field" : [{"letter" : "a" , "num" : 1 }],
865+ },
866+ columns = ["int_field" , "struct_field" ],
846867 )
847868 tm .assert_frame_equal (df , expected )
848869
@@ -874,7 +895,12 @@ def test_array_length_zero(self, project_id):
874895 dialect = "standard" ,
875896 )
876897 expected = DataFrame (
877- [["a" , ["" ], 1 ], ["b" , [], 0 ]], columns = ["letter" , "array_field" , "len" ],
898+ {
899+ "letter" : ["a" , "b" ],
900+ "array_field" : [["" ], []],
901+ "len" : pandas .Series ([1 , 0 ], dtype = "Int64" ),
902+ },
903+ columns = ["letter" , "array_field" , "len" ],
878904 )
879905 tm .assert_frame_equal (df , expected )
880906
@@ -908,7 +934,13 @@ def test_array_of_floats(self, project_id):
908934 credentials = self .credentials ,
909935 dialect = "standard" ,
910936 )
911- tm .assert_frame_equal (df , DataFrame ([[[1.1 , 2.2 , 3.3 ], 4 ]], columns = ["a" , "b" ]))
937+ tm .assert_frame_equal (
938+ df ,
939+ DataFrame (
940+ {"a" : [[1.1 , 2.2 , 3.3 ]], "b" : pandas .Series ([4 ], dtype = "Int64" )},
941+ columns = ["a" , "b" ],
942+ ),
943+ )
912944
913945 def test_tokyo (self , tokyo_dataset , tokyo_table , project_id ):
914946 df = gbq .read_gbq (
@@ -1021,7 +1053,7 @@ def test_upload_data_if_table_exists_append(self, project_id):
10211053 test_id = "3"
10221054 test_size = 10
10231055 df = make_mixed_dataframe_v2 (test_size )
1024- df_different_schema = tm . makeMixedDataFrame ()
1056+ df_different_schema = make_mixed_dataframe_v1 ()
10251057
10261058 # Initialize table with sample data
10271059 gbq .to_gbq (
@@ -1101,7 +1133,7 @@ def test_upload_data_if_table_exists_replace(self, project_id):
11011133 test_id = "4"
11021134 test_size = 10
11031135 df = make_mixed_dataframe_v2 (test_size )
1104- df_different_schema = tm . makeMixedDataFrame ()
1136+ df_different_schema = make_mixed_dataframe_v1 ()
11051137
11061138 # Initialize table with sample data
11071139 gbq .to_gbq (
@@ -1225,7 +1257,7 @@ def test_upload_data_with_newlines(self, project_id):
12251257 result = result_df ["s" ].sort_values ()
12261258 expected = df ["s" ].sort_values ()
12271259
1228- tm .assert_numpy_array_equal (expected . values , result . values )
1260+ tm .assert_series_equal (expected , result )
12291261
12301262 def test_upload_data_flexible_column_order (self , project_id ):
12311263 test_id = "13"
@@ -1254,7 +1286,7 @@ def test_upload_data_flexible_column_order(self, project_id):
12541286 def test_upload_data_with_valid_user_schema (self , project_id ):
12551287 # Issue #46; tests test scenarios with user-provided
12561288 # schemas
1257- df = tm . makeMixedDataFrame ()
1289+ df = make_mixed_dataframe_v1 ()
12581290 test_id = "18"
12591291 test_schema = [
12601292 {"name" : "A" , "type" : "FLOAT" },
@@ -1276,7 +1308,7 @@ def test_upload_data_with_valid_user_schema(self, project_id):
12761308 )
12771309
12781310 def test_upload_data_with_invalid_user_schema_raises_error (self , project_id ):
1279- df = tm . makeMixedDataFrame ()
1311+ df = make_mixed_dataframe_v1 ()
12801312 test_id = "19"
12811313 test_schema = [
12821314 {"name" : "A" , "type" : "FLOAT" },
@@ -1295,7 +1327,7 @@ def test_upload_data_with_invalid_user_schema_raises_error(self, project_id):
12951327 )
12961328
12971329 def test_upload_data_with_missing_schema_fields_raises_error (self , project_id ):
1298- df = tm . makeMixedDataFrame ()
1330+ df = make_mixed_dataframe_v1 ()
12991331 test_id = "20"
13001332 test_schema = [
13011333 {"name" : "A" , "type" : "FLOAT" },
@@ -1351,7 +1383,7 @@ def test_upload_data_with_timestamp(self, project_id):
13511383 tm .assert_series_equal (expected , result )
13521384
13531385 def test_upload_data_with_different_df_and_user_schema (self , project_id ):
1354- df = tm . makeMixedDataFrame ()
1386+ df = make_mixed_dataframe_v1 ()
13551387 df ["A" ] = df ["A" ].astype (str )
13561388 df ["B" ] = df ["B" ].astype (str )
13571389 test_id = "22"
@@ -1460,13 +1492,13 @@ def test_dataset_does_not_exist(gbq_dataset, random_dataset_id):
14601492
14611493
14621494def test_create_table (gbq_table ):
1463- schema = gbq ._generate_bq_schema (tm . makeMixedDataFrame ())
1495+ schema = gbq ._generate_bq_schema (make_mixed_dataframe_v1 ())
14641496 gbq_table .create ("test_create_table" , schema )
14651497 assert gbq_table .exists ("test_create_table" )
14661498
14671499
14681500def test_create_table_already_exists (gbq_table ):
1469- schema = gbq ._generate_bq_schema (tm . makeMixedDataFrame ())
1501+ schema = gbq ._generate_bq_schema (make_mixed_dataframe_v1 ())
14701502 gbq_table .create ("test_create_table_exists" , schema )
14711503 with pytest .raises (gbq .TableCreationError ):
14721504 gbq_table .create ("test_create_table_exists" , schema )
0 commit comments