From 86c504d1cd583323502a4b16a21e9d82f03f2577 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 16 Jan 2025 15:49:12 +0000 Subject: [PATCH 1/2] Updates most of external_catalog_table_options --- google/cloud/bigquery/external_config.py | 107 ++++++++++++++++++ google/cloud/bigquery/table.py | 24 ++++ tests/unit/test_external_config.py | 137 +++++++++++++++++++++++ 3 files changed, 268 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 7f2b58f2b..02a479558 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -30,6 +30,7 @@ from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery import _helpers from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions +from google.cloud.bigquery import schema from google.cloud.bigquery.schema import SchemaField @@ -1077,3 +1078,109 @@ def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions: config = cls() config._properties = api_repr return config + + +class ExternalCatalogTableOptions: + """Metadata about open source compatible table. The fields contained in these + options correspond to hive metastore's table level properties. + + Args: + connection_id (Optional[str]): The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. + parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters + and properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information + about the physical storage of this table. + """ + + def __init__( + self, + connection_id: Optional[str] = None, + parameters: Union[Dict[str, Any], None] = None, + storage_descriptor: Optional[schema.StorageDescriptor] = None, + ): + self._properties: Dict[str, Any] = {} + self.connection_id = connection_id + self.parameters = parameters + self.storage_descriptor = storage_descriptor + + @property + def connection_id(self): + """Optional. The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. + """ + + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["connectionId"] = value + + @property + def parameters(self) -> Any: + """Optional. A map of key value pairs defining the parameters and + properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + """ + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Union[Dict[str, Any], None]): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + @property + def storage_descriptor(self) -> Any: + """Optional. A storage descriptor containing information about the + physical storage of this table.""" + + prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"]) + + if prop is not None: + return schema.StorageDescriptor.from_api_repr(prop) + return None + + @storage_descriptor.setter + def storage_descriptor(self, value): + value = _helpers._isinstance_or_raise( + value, (schema.StorageDescriptor, dict), none_allowed=True + ) + if isinstance(value, schema.StorageDescriptor): + self._properties["storageDescriptor"] = value.to_api_repr() + else: + self._properties["storageDescriptor"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'api_repr'. + """ + config = cls() + config._properties = api_repr + return config diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 80ab330ba..ac27030e5 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -69,6 +69,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery import external_config if typing.TYPE_CHECKING: # pragma: NO COVER # Unconditionally import optional dependencies again to tell pytype that @@ -408,6 +409,7 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", "max_staleness": "maxStaleness", + "external_catalog_table_options": "externalCatalogTableOptions", } def __init__(self, table_ref, schema=None) -> None: @@ -1023,6 +1025,28 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @property + def external_catalog_table_options(self): + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ) + if prop is not None: + prop = external_config.ExternalCatalogTableOptions.from_api_repr(prop) + return prop + + @external_catalog_table_options.setter + def external_catalog_table_options(self, value): + value = _helpers._isinstance_or_raise( + value, external_config.ExternalCatalogTableOptions, none_allowed=True + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value.to_api_repr() + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 0c27d8e56..7f84a9f5b 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -14,6 +14,7 @@ import base64 import copy +from typing import Any, Dict, Optional import unittest from google.cloud.bigquery import external_config @@ -979,3 +980,139 @@ def test_from_api_repr(self): assert isinstance(result, external_config.ExternalCatalogDatasetOptions) assert result._properties == api_repr + + +class TestExternalCatalogTableOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogTableOptions + + return ExternalCatalogTableOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + storage_descriptor_repr = { + "inputFormat": "testpath.to.OrcInputFormat", + "locationUri": "gs://test/path/", + "outputFormat": "testpath.to.OrcOutputFormat", + "serDeInfo": { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + }, + } + + CONNECTIONID = "connection123" + PARAMETERS = {"key": "value"} + STORAGEDESCRIPTOR = schema.StorageDescriptor.from_api_repr(storage_descriptor_repr) + EXTERNALCATALOGTABLEOPTIONS = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": STORAGEDESCRIPTOR.to_api_repr(), + } + + @pytest.mark.parametrize( + "connection_id,parameters,storage_descriptor", + [ + ( + CONNECTIONID, + PARAMETERS, + STORAGEDESCRIPTOR, + ), # set all parameters at once + (CONNECTIONID, None, None), # set only one parameter at a time + (None, PARAMETERS, None), + (None, None, STORAGEDESCRIPTOR), # set storage descriptor using obj + (None, None, storage_descriptor_repr), # set storage descriptor using dict + (None, None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, + connection_id, + parameters, + storage_descriptor, + ): + instance = self._make_one( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + assert instance.connection_id == connection_id + assert instance.parameters == parameters + + if isinstance(storage_descriptor, schema.StorageDescriptor): + assert ( + instance.storage_descriptor.to_api_repr() + == storage_descriptor.to_api_repr() + ) + elif isinstance(storage_descriptor, dict): + assert instance.storage_descriptor.to_api_repr() == storage_descriptor + else: + assert instance.storage_descriptor is None + + @pytest.mark.parametrize( + "connection_id,parameters,storage_descriptor", + [ + pytest.param( + 123, + PARAMETERS, + STORAGEDESCRIPTOR, + id="connection_id-invalid-type", + ), + pytest.param( + CONNECTIONID, + 123, + STORAGEDESCRIPTOR, + id="parameters-invalid-type", + ), + pytest.param( + CONNECTIONID, + PARAMETERS, + 123, + id="storage_descriptor-invalid-type", + ), + ], + ) + def test_ctor_invalid_input( + self, + connection_id: str, + parameters: Dict[str, Any], + storage_descriptor: Optional[schema.StorageDescriptor], + ): + with pytest.raises(TypeError) as e: + external_config.ExternalCatalogTableOptions( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + instance = self._make_one( + connection_id=self.CONNECTIONID, + parameters=self.PARAMETERS, + storage_descriptor=self.STORAGEDESCRIPTOR, + ) + + result = instance.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + + assert result == expected + + def test_from_api_repr(self): + result = self._make_one( + connection_id=self.CONNECTIONID, + parameters=self.PARAMETERS, + storage_descriptor=self.STORAGEDESCRIPTOR, + ) + + instance = self._make_one() + api_repr = self.EXTERNALCATALOGTABLEOPTIONS + result = instance.from_api_repr(api_repr) + + assert isinstance(result, external_config.ExternalCatalogTableOptions) + assert result._properties == api_repr From c928620e3f0b7ed4c5734017ee5632e6829a1c7b Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Jan 2025 14:59:15 +0000 Subject: [PATCH 2/2] Adds ExternalCatalogTableOptions and tests --- google/cloud/bigquery/external_config.py | 6 +- google/cloud/bigquery/magics/magics.py | 2 +- google/cloud/bigquery/table.py | 27 +++++--- tests/unit/test_table.py | 87 ++++++++++++++++++++++++ 4 files changed, 110 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 02a479558..73c4acabf 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1109,7 +1109,7 @@ def __init__( self.storage_descriptor = storage_descriptor @property - def connection_id(self): + def connection_id(self) -> Optional[str]: """Optional. The connection specifying the credentials to be used to read external storage, such as Azure Blob, Cloud Storage, or S3. The connection is needed to read the open source table from @@ -1125,7 +1125,7 @@ def connection_id(self, value: Optional[str]): self._properties["connectionId"] = value @property - def parameters(self) -> Any: + def parameters(self) -> Union[Dict[str, Any], None]: """Optional. A map of key value pairs defining the parameters and properties of the open source table. Corresponds with hive meta store table parameters. Maximum size of 4Mib. @@ -1150,7 +1150,7 @@ def storage_descriptor(self) -> Any: return None @storage_descriptor.setter - def storage_descriptor(self, value): + def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]): value = _helpers._isinstance_or_raise( value, (schema.StorageDescriptor, dict), none_allowed=True ) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index b153d959a..a5be95185 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -56,7 +56,7 @@ bigquery_magics = None -IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) +IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore class Context(object): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index ac27030e5..fa8d81962 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1026,7 +1026,9 @@ def table_constraints(self) -> Optional["TableConstraints"]: return table_constraints @property - def external_catalog_table_options(self): + def external_catalog_table_options( + self, + ) -> Optional[external_config.ExternalCatalogTableOptions]: """Options defining open source compatible datasets living in the BigQuery catalog. Contains metadata of open source database, schema or namespace represented by the current dataset.""" @@ -1035,17 +1037,26 @@ def external_catalog_table_options(self): self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] ) if prop is not None: - prop = external_config.ExternalCatalogTableOptions.from_api_repr(prop) - return prop + return external_config.ExternalCatalogTableOptions.from_api_repr(prop) + return None @external_catalog_table_options.setter - def external_catalog_table_options(self, value): + def external_catalog_table_options( + self, value: Union[external_config.ExternalCatalogTableOptions, dict, None] + ): value = _helpers._isinstance_or_raise( - value, external_config.ExternalCatalogTableOptions, none_allowed=True + value, + (external_config.ExternalCatalogTableOptions, dict), + none_allowed=True, ) - self._properties[ - self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] - ] = value.to_api_repr() + if isinstance(value, external_config.ExternalCatalogTableOptions): + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value.to_api_repr() + else: + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value @classmethod def from_string(cls, full_table_id: str) -> "Table": diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index e9d461e9d..de8b331f5 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -30,6 +30,7 @@ from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions +from google.cloud.bigquery import external_config from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -5879,6 +5880,92 @@ def test_from_api_repr_only_foreign_keys_resource(self): self.assertIsNotNone(instance.foreign_keys) +class TestExternalCatalogTableOptions: + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + DATASET = DatasetReference(PROJECT, DATASET_ID) + TABLEREF = DATASET.table(TABLE_ID) + + @staticmethod + def _get_target_class(self): + from google.cloud.bigquery.table import Table + + return Table + + def _make_one(self, *args, **kw): + return self._get_target_class(self)(*args, **kw) + + EXTERNALCATALOGTABLEOPTIONS = { + "connection_id": "connection123", + "parameters": {"key": "value"}, + "storage_descriptor": { + "input_format": "testpath.to.OrcInputFormat", + "location_uri": "gs://test/path/", + "output_format": "testpath.to.OrcOutputFormat", + "serde_info": { + "serialization_library": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + }, + }, + } + + def test_external_catalog_table_options_default_initialization(self): + table = self._make_one(self.TABLEREF) + + assert table.external_catalog_table_options is None + + def test_external_catalog_table_options_valid_inputs(self): + table = self._make_one(self.TABLEREF) + + # supplied in api_repr format + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + # supplied in obj format + ecto = external_config.ExternalCatalogTableOptions.from_api_repr( + self.EXTERNALCATALOGTABLEOPTIONS + ) + assert isinstance(ecto, external_config.ExternalCatalogTableOptions) + + table.external_catalog_table_options = ecto + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + def test_external_catalog_table_options_invalid_input(self): + table = self._make_one(self.TABLEREF) + + # invalid on the whole + with pytest.raises(TypeError) as e: + table.external_catalog_table_options = 123 + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_external_catalog_table_options_to_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + def test_external_catalog_table_options_from_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + ecto = external_config.ExternalCatalogTableOptions.from_api_repr( + self.EXTERNALCATALOGTABLEOPTIONS + ) + result = ecto.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + @pytest.mark.parametrize( "table_path", (