From 1a9431d9e02eeb99e4712b61c623f9cca80134a6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 30 Sep 2021 11:53:30 -0500 Subject: [PATCH] feat: add `AvroOptions` to configure AVRO external data (#994) * feat: add `AvroOptions` to configure AVRO external data Also: * Unify `ExternalConfig` class to use `_properties` for everything. This does result in more code, but it should make maintenance easier as it aligns with our other mutable resource classes. * Adds `bigtable_options`, `csv_options`, and `google_sheets_options` properties. This aligns with `parquet_options`. * remove unnecessary check for options in to_api_repr * add missing tests for to_api_repr * remove redundant type identifiers --- docs/format_options.rst | 6 + docs/reference.rst | 5 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/external_config.py | 149 +++++++++-- google/cloud/bigquery/format_options.py | 54 +++- tests/unit/test_external_config.py | 327 +++++++++++++++++++++-- tests/unit/test_format_options.py | 23 ++ 7 files changed, 518 insertions(+), 48 deletions(-) create mode 100644 docs/format_options.rst diff --git a/docs/format_options.rst b/docs/format_options.rst new file mode 100644 index 000000000..b3948209e --- /dev/null +++ b/docs/format_options.rst @@ -0,0 +1,6 @@ +BigQuery Format Options +======================= + +.. automodule:: google.cloud.bigquery.format_options + :members: + :undoc-members: diff --git a/docs/reference.rst b/docs/reference.rst index f4f11abc9..d2d2eed31 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -167,6 +167,11 @@ External Configuration external_config.CSVOptions external_config.GoogleSheetsOptions +.. toctree:: + :maxdepth: 2 + + format_options + Magics ====== diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 931e0f3e6..d2b1dd26d 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -50,6 +50,7 @@ from google.cloud.bigquery.external_config import CSVOptions from google.cloud.bigquery.external_config import GoogleSheetsOptions from google.cloud.bigquery.external_config import ExternalSourceFormat +from google.cloud.bigquery.format_options import AvroOptions from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import Compression from google.cloud.bigquery.job import CopyJob @@ -144,6 +145,7 @@ "PolicyTagList", "UDFResource", "ExternalConfig", + "AvroOptions", "BigtableOptions", "BigtableColumnFamily", "BigtableColumn", diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index f1692ba50..5f284c639 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -22,13 +22,13 @@ import base64 import copy -from typing import FrozenSet, Iterable, Optional +from typing import FrozenSet, Iterable, Optional, Union from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery.format_options import ParquetOptions +from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -548,7 +548,13 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": return config -_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions) +_OPTION_CLASSES = ( + AvroOptions, + BigtableOptions, + CSVOptions, + GoogleSheetsOptions, + ParquetOptions, +) class HivePartitioningOptions(object): @@ -646,11 +652,6 @@ class ExternalConfig(object): def __init__(self, source_format): self._properties = {"sourceFormat": source_format} - self._options = None - for optcls in _OPTION_CLASSES: - if source_format == optcls._SOURCE_FORMAT: - self._options = optcls() - break @property def source_format(self): @@ -663,9 +664,17 @@ def source_format(self): return self._properties["sourceFormat"] @property - def options(self): - """Optional[Dict[str, Any]]: Source-specific options.""" - return self._options + def options(self) -> Optional[Union[_OPTION_CLASSES]]: + """Source-specific options.""" + for optcls in _OPTION_CLASSES: + if self.source_format == optcls._SOURCE_FORMAT: + options = optcls() + self._properties.setdefault(optcls._RESOURCE_NAME, {}) + options._properties = self._properties[optcls._RESOURCE_NAME] + return options + + # No matching source format found. + return None @property def autodetect(self): @@ -815,23 +824,120 @@ def schema(self, value): self._properties["schema"] = prop @property - def parquet_options(self): - """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional - properties to set if ``sourceFormat`` is set to PARQUET. + def avro_options(self) -> Optional[AvroOptions]: + """Additional properties to set if ``sourceFormat`` is set to AVRO. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.avro_options + """ + if self.source_format == ExternalSourceFormat.AVRO: + self._properties.setdefault(AvroOptions._RESOURCE_NAME, {}) + resource = self._properties.get(AvroOptions._RESOURCE_NAME) + if resource is None: + return None + options = AvroOptions() + options._properties = resource + return options + + @avro_options.setter + def avro_options(self, value): + if self.source_format != ExternalSourceFormat.AVRO: + msg = f"Cannot set Avro options, source format is {self.source_format}" + raise TypeError(msg) + self._properties[AvroOptions._RESOURCE_NAME] = value._properties + + @property + def bigtable_options(self) -> Optional[BigtableOptions]: + """Additional properties to set if ``sourceFormat`` is set to BIGTABLE. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.bigtable_options + """ + if self.source_format == ExternalSourceFormat.BIGTABLE: + self._properties.setdefault(BigtableOptions._RESOURCE_NAME, {}) + resource = self._properties.get(BigtableOptions._RESOURCE_NAME) + if resource is None: + return None + options = BigtableOptions() + options._properties = resource + return options + + @bigtable_options.setter + def bigtable_options(self, value): + if self.source_format != ExternalSourceFormat.BIGTABLE: + msg = f"Cannot set Bigtable options, source format is {self.source_format}" + raise TypeError(msg) + self._properties[BigtableOptions._RESOURCE_NAME] = value._properties + + @property + def csv_options(self) -> Optional[CSVOptions]: + """Additional properties to set if ``sourceFormat`` is set to CSV. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.csv_options + """ + if self.source_format == ExternalSourceFormat.CSV: + self._properties.setdefault(CSVOptions._RESOURCE_NAME, {}) + resource = self._properties.get(CSVOptions._RESOURCE_NAME) + if resource is None: + return None + options = CSVOptions() + options._properties = resource + return options + + @csv_options.setter + def csv_options(self, value): + if self.source_format != ExternalSourceFormat.CSV: + msg = f"Cannot set CSV options, source format is {self.source_format}" + raise TypeError(msg) + self._properties[CSVOptions._RESOURCE_NAME] = value._properties + + @property + def google_sheets_options(self) -> Optional[GoogleSheetsOptions]: + """Additional properties to set if ``sourceFormat`` is set to + GOOGLE_SHEETS. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.google_sheets_options + """ + if self.source_format == ExternalSourceFormat.GOOGLE_SHEETS: + self._properties.setdefault(GoogleSheetsOptions._RESOURCE_NAME, {}) + resource = self._properties.get(GoogleSheetsOptions._RESOURCE_NAME) + if resource is None: + return None + options = GoogleSheetsOptions() + options._properties = resource + return options + + @google_sheets_options.setter + def google_sheets_options(self, value): + if self.source_format != ExternalSourceFormat.GOOGLE_SHEETS: + msg = f"Cannot set Google Sheets options, source format is {self.source_format}" + raise TypeError(msg) + self._properties[GoogleSheetsOptions._RESOURCE_NAME] = value._properties + + @property + def parquet_options(self) -> Optional[ParquetOptions]: + """Additional properties to set if ``sourceFormat`` is set to PARQUET. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options """ - if self.source_format != ExternalSourceFormat.PARQUET: + if self.source_format == ExternalSourceFormat.PARQUET: + self._properties.setdefault(ParquetOptions._RESOURCE_NAME, {}) + resource = self._properties.get(ParquetOptions._RESOURCE_NAME) + if resource is None: return None - return self._options + options = ParquetOptions() + options._properties = resource + return options @parquet_options.setter def parquet_options(self, value): if self.source_format != ExternalSourceFormat.PARQUET: msg = f"Cannot set Parquet options, source format is {self.source_format}" raise TypeError(msg) - self._options = value + self._properties[ParquetOptions._RESOURCE_NAME] = value._properties def to_api_repr(self) -> dict: """Build an API representation of this object. @@ -841,10 +947,6 @@ def to_api_repr(self) -> dict: A dictionary in the format used by the BigQuery API. """ config = copy.deepcopy(self._properties) - if self.options is not None: - r = self.options.to_api_repr() - if r != {}: - config[self.options._RESOURCE_NAME] = r return config @classmethod @@ -862,10 +964,5 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": ExternalConfig: Configuration parsed from ``resource``. """ config = cls(resource["sourceFormat"]) - for optcls in _OPTION_CLASSES: - opts = resource.get(optcls._RESOURCE_NAME) - if opts is not None: - config._options = optcls.from_api_repr(opts) - break config._properties = copy.deepcopy(resource) return config diff --git a/google/cloud/bigquery/format_options.py b/google/cloud/bigquery/format_options.py index 2c9a2ce20..1208565a9 100644 --- a/google/cloud/bigquery/format_options.py +++ b/google/cloud/bigquery/format_options.py @@ -13,7 +13,59 @@ # limitations under the License. import copy -from typing import Dict +from typing import Dict, Optional + + +class AvroOptions: + """Options if source format is set to AVRO.""" + + _SOURCE_FORMAT = "AVRO" + _RESOURCE_NAME = "avroOptions" + + def __init__(self): + self._properties = {} + + @property + def use_avro_logical_types(self) -> Optional[bool]: + """[Optional] If sourceFormat is set to 'AVRO', indicates whether to + interpret logical types as the corresponding BigQuery data type (for + example, TIMESTAMP), instead of using the raw type (for example, + INTEGER). + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#AvroOptions.FIELDS.use_avro_logical_types + """ + return self._properties.get("useAvroLogicalTypes") + + @use_avro_logical_types.setter + def use_avro_logical_types(self, value): + self._properties["useAvroLogicalTypes"] = value + + @classmethod + def from_api_repr(cls, resource: Dict[str, bool]) -> "AvroOptions": + """Factory: construct an instance from a resource dict. + + Args: + resource (Dict[str, bool]): + Definition of a :class:`~.format_options.AvroOptions` instance in + the same representation as is returned from the API. + + Returns: + :class:`~.format_options.AvroOptions`: + Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, bool]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) class ParquetOptions: diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 3dc9dd179..3ef61d738 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -163,7 +163,7 @@ def test_to_api_repr_sheets(self): options = external_config.GoogleSheetsOptions() options.skip_leading_rows = 123 options.range = "Sheet1!A5:B10" - ec._options = options + ec.google_sheets_options = options exp_resource = { "sourceFormat": "GOOGLE_SHEETS", @@ -277,7 +277,7 @@ def test_to_api_repr_csv(self): options.quote_character = "quote" options.skip_leading_rows = 123 options.allow_jagged_rows = False - ec._options = options + ec.csv_options = options exp_resource = { "sourceFormat": "CSV", @@ -368,7 +368,7 @@ def test_to_api_repr_bigtable(self): options = external_config.BigtableOptions() options.ignore_unspecified_column_families = True options.read_rowkey_as_string = False - ec._options = options + ec.bigtable_options = options fam1 = external_config.BigtableColumnFamily() fam1.family_id = "familyId" @@ -425,10 +425,166 @@ def test_to_api_repr_bigtable(self): self.assertEqual(got_resource, exp_resource) - def test_parquet_options_getter(self): + def test_avro_options_getter_and_setter(self): + from google.cloud.bigquery.external_config import AvroOptions + + options = AvroOptions.from_api_repr({"useAvroLogicalTypes": True}) + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO) + + self.assertIsNone(ec.avro_options.use_avro_logical_types) + + ec.avro_options = options + + self.assertTrue(ec.avro_options.use_avro_logical_types) + self.assertIs( + ec.options._properties, ec._properties[AvroOptions._RESOURCE_NAME] + ) + self.assertIs( + ec.avro_options._properties, ec._properties[AvroOptions._RESOURCE_NAME] + ) + + def test_avro_options_getter_empty(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO) + self.assertIsNotNone(ec.avro_options) + + def test_avro_options_getter_wrong_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.avro_options) + + def test_avro_options_setter_wrong_format(self): + from google.cloud.bigquery.format_options import AvroOptions + + options = AvroOptions() + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"): + ec.avro_options = options + + def test_bigtable_options_getter_and_setter(self): + from google.cloud.bigquery.external_config import BigtableOptions + + options = BigtableOptions.from_api_repr( + {"ignoreUnspecifiedColumnFamilies": True, "readRowkeyAsString": False} + ) + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.BIGTABLE + ) + + self.assertIsNone(ec.bigtable_options.ignore_unspecified_column_families) + self.assertIsNone(ec.bigtable_options.read_rowkey_as_string) + + ec.bigtable_options = options + + self.assertTrue(ec.bigtable_options.ignore_unspecified_column_families) + self.assertFalse(ec.bigtable_options.read_rowkey_as_string) + self.assertIs( + ec.options._properties, ec._properties[BigtableOptions._RESOURCE_NAME] + ) + self.assertIs( + ec.bigtable_options._properties, + ec._properties[BigtableOptions._RESOURCE_NAME], + ) + + def test_bigtable_options_getter_empty(self): + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.BIGTABLE + ) + self.assertIsNotNone(ec.bigtable_options) + + def test_bigtable_options_getter_wrong_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.bigtable_options) + + def test_bigtable_options_setter_wrong_format(self): + from google.cloud.bigquery.external_config import BigtableOptions + + options = BigtableOptions() + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"): + ec.bigtable_options = options + + def test_csv_options_getter_and_setter(self): + from google.cloud.bigquery.external_config import CSVOptions + + options = CSVOptions.from_api_repr( + {"allowJaggedRows": True, "allowQuotedNewlines": False} + ) + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + self.assertIsNone(ec.csv_options.allow_jagged_rows) + self.assertIsNone(ec.csv_options.allow_quoted_newlines) + + ec.csv_options = options + + self.assertTrue(ec.csv_options.allow_jagged_rows) + self.assertFalse(ec.csv_options.allow_quoted_newlines) + self.assertIs(ec.options._properties, ec._properties[CSVOptions._RESOURCE_NAME]) + self.assertIs( + ec.csv_options._properties, ec._properties[CSVOptions._RESOURCE_NAME] + ) + + def test_csv_options_getter_empty(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNotNone(ec.csv_options) + + def test_csv_options_getter_wrong_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO) + self.assertIsNone(ec.csv_options) + + def test_csv_options_setter_wrong_format(self): + from google.cloud.bigquery.external_config import CSVOptions + + options = CSVOptions() + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is AVRO"): + ec.csv_options = options + + def test_google_sheets_options_getter_and_setter(self): + from google.cloud.bigquery.external_config import GoogleSheetsOptions + + options = GoogleSheetsOptions.from_api_repr({"skipLeadingRows": "123"}) + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.GOOGLE_SHEETS + ) + + self.assertIsNone(ec.google_sheets_options.skip_leading_rows) + + ec.google_sheets_options = options + + self.assertEqual(ec.google_sheets_options.skip_leading_rows, 123) + self.assertIs( + ec.options._properties, ec._properties[GoogleSheetsOptions._RESOURCE_NAME] + ) + self.assertIs( + ec.google_sheets_options._properties, + ec._properties[GoogleSheetsOptions._RESOURCE_NAME], + ) + + def test_google_sheets_options_getter_empty(self): + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.GOOGLE_SHEETS + ) + self.assertIsNotNone(ec.google_sheets_options) + + def test_google_sheets_options_getter_wrong_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.google_sheets_options) + + def test_google_sheets_options_setter_wrong_format(self): + from google.cloud.bigquery.external_config import GoogleSheetsOptions + + options = GoogleSheetsOptions() + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"): + ec.google_sheets_options = options + + def test_parquet_options_getter_and_setter(self): from google.cloud.bigquery.format_options import ParquetOptions - parquet_options = ParquetOptions.from_api_repr( + options = ParquetOptions.from_api_repr( {"enumAsString": True, "enableListInference": False} ) ec = external_config.ExternalConfig( @@ -438,32 +594,50 @@ def test_parquet_options_getter(self): self.assertIsNone(ec.parquet_options.enum_as_string) self.assertIsNone(ec.parquet_options.enable_list_inference) - ec._options = parquet_options + ec.parquet_options = options self.assertTrue(ec.parquet_options.enum_as_string) self.assertFalse(ec.parquet_options.enable_list_inference) + self.assertIs( + ec.options._properties, ec._properties[ParquetOptions._RESOURCE_NAME] + ) + self.assertIs( + ec.parquet_options._properties, + ec._properties[ParquetOptions._RESOURCE_NAME], + ) - self.assertIs(ec.parquet_options, ec.options) - - def test_parquet_options_getter_non_parquet_format(self): - ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) - self.assertIsNone(ec.parquet_options) + def test_parquet_options_set_properties(self): + """Check that setting sub-properties works without having to create a + new ParquetOptions instance. - def test_parquet_options_setter(self): - from google.cloud.bigquery.format_options import ParquetOptions + This is required for compatibility with previous + ExternalConfig._options implementation. + """ - parquet_options = ParquetOptions.from_api_repr( - {"enumAsString": False, "enableListInference": True} - ) ec = external_config.ExternalConfig( external_config.ExternalSourceFormat.PARQUET ) - ec.parquet_options = parquet_options + self.assertIsNone(ec.parquet_options.enum_as_string) + self.assertIsNone(ec.parquet_options.enable_list_inference) + + ec.parquet_options.enum_as_string = True + ec.parquet_options.enable_list_inference = False + + self.assertTrue(ec.options.enum_as_string) + self.assertFalse(ec.options.enable_list_inference) + self.assertTrue(ec.parquet_options.enum_as_string) + self.assertFalse(ec.parquet_options.enable_list_inference) + + def test_parquet_options_getter_empty(self): + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + self.assertIsNotNone(ec.parquet_options) - # Setting Parquet options should be reflected in the generic options attribute. - self.assertFalse(ec.options.enum_as_string) - self.assertTrue(ec.options.enable_list_inference) + def test_parquet_options_getter_non_parquet_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.parquet_options) def test_parquet_options_setter_non_parquet_format(self): from google.cloud.bigquery.format_options import ParquetOptions @@ -514,7 +688,7 @@ def test_to_api_repr_parquet(self): options = ParquetOptions.from_api_repr( dict(enumAsString=False, enableListInference=True) ) - ec._options = options + ec.parquet_options = options exp_resource = { "sourceFormat": external_config.ExternalSourceFormat.PARQUET, @@ -584,6 +758,117 @@ def test_to_api_repr_decimal_target_types_unset(self): ec.decimal_target_types = None # No error if unsetting when already unset. +class BigtableOptions(unittest.TestCase): + def test_to_api_repr(self): + options = external_config.BigtableOptions() + family1 = external_config.BigtableColumnFamily() + column1 = external_config.BigtableColumn() + column1.qualifier_string = "col1" + column1.field_name = "bqcol1" + column1.type_ = "FLOAT" + column1.encoding = "TEXT" + column1.only_read_latest = True + column2 = external_config.BigtableColumn() + column2.qualifier_encoded = b"col2" + column2.field_name = "bqcol2" + column2.type_ = "STRING" + column2.only_read_latest = False + family1.family_id = "family1" + family1.type_ = "INTEGER" + family1.encoding = "BINARY" + family1.columns = [column1, column2] + family1.only_read_latest = False + family2 = external_config.BigtableColumnFamily() + column3 = external_config.BigtableColumn() + column3.qualifier_string = "col3" + family2.family_id = "family2" + family2.type_ = "BYTES" + family2.encoding = "TEXT" + family2.columns = [column3] + family2.only_read_latest = True + options.column_families = [family1, family2] + options.ignore_unspecified_column_families = False + options.read_rowkey_as_string = True + + resource = options.to_api_repr() + + expected_column_families = [ + { + "familyId": "family1", + "type": "INTEGER", + "encoding": "BINARY", + "columns": [ + { + "qualifierString": "col1", + "fieldName": "bqcol1", + "type": "FLOAT", + "encoding": "TEXT", + "onlyReadLatest": True, + }, + { + "qualifierEncoded": "Y29sMg==", + "fieldName": "bqcol2", + "type": "STRING", + "onlyReadLatest": False, + }, + ], + "onlyReadLatest": False, + }, + { + "familyId": "family2", + "type": "BYTES", + "encoding": "TEXT", + "columns": [{"qualifierString": "col3"}], + "onlyReadLatest": True, + }, + ] + self.maxDiff = None + self.assertEqual( + resource, + { + "columnFamilies": expected_column_families, + "ignoreUnspecifiedColumnFamilies": False, + "readRowkeyAsString": True, + }, + ) + + +class CSVOptions(unittest.TestCase): + def test_to_api_repr(self): + options = external_config.CSVOptions() + options.field_delimiter = "\t" + options.skip_leading_rows = 42 + options.quote_character = '"' + options.allow_quoted_newlines = True + options.allow_jagged_rows = False + options.encoding = "UTF-8" + + resource = options.to_api_repr() + + self.assertEqual( + resource, + { + "fieldDelimiter": "\t", + "skipLeadingRows": "42", + "quote": '"', + "allowQuotedNewlines": True, + "allowJaggedRows": False, + "encoding": "UTF-8", + }, + ) + + +class TestGoogleSheetsOptions(unittest.TestCase): + def test_to_api_repr(self): + options = external_config.GoogleSheetsOptions() + options.range = "sheet1!A1:B20" + options.skip_leading_rows = 107 + + resource = options.to_api_repr() + + self.assertEqual(resource, {"range": "sheet1!A1:B20", "skipLeadingRows": "107"}) + + def _copy_and_update(d, u): d = copy.deepcopy(d) d.update(u) diff --git a/tests/unit/test_format_options.py b/tests/unit/test_format_options.py index ab5f9e05c..c8fecbfa6 100644 --- a/tests/unit/test_format_options.py +++ b/tests/unit/test_format_options.py @@ -13,6 +13,29 @@ # limitations under the License. +class TestAvroOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.format_options import AvroOptions + + return AvroOptions + + def test_ctor(self): + config = self._get_target_class()() + assert config.use_avro_logical_types is None + + def test_from_api_repr(self): + config = self._get_target_class().from_api_repr({"useAvroLogicalTypes": True}) + assert config.use_avro_logical_types + + def test_to_api_repr(self): + config = self._get_target_class()() + config.use_avro_logical_types = False + + result = config.to_api_repr() + assert result == {"useAvroLogicalTypes": False} + + class TestParquetOptions: @staticmethod def _get_target_class():