diff --git a/azure-pipelines-cloud-db-integration.yml b/azure-pipelines-cloud-db-integration.yml new file mode 100644 index 000000000000..a27dc5047b95 --- /dev/null +++ b/azure-pipelines-cloud-db-integration.yml @@ -0,0 +1,49 @@ +stages: + - stage: cloud_db_integration + pool: + vmImage: 'ubuntu-latest' + + jobs: + - job: bigquery_expectations + timeoutInMinutes: 0 # Maximize the time that pipelines remain open (6 hours currently) + variables: + python.version: '3.8' + + strategy: + matrix: + expectations_cfe: + test_script: 'tests/test_definitions/test_expectations_cfe.py' + expectations: + test_script: 'tests/test_definitions/test_expectations.py' + maxParallel: 1 + + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '$(python.version)' + displayName: 'Use Python $(python.version)' + + - bash: python -m pip install --upgrade pip==20.2.4 + displayName: 'Update pip' + + - script: | + pip install -r requirements-dev.txt + + displayName: 'Install dependencies' + + - task: DownloadSecureFile@1 + name: gcp_authkey + displayName: 'Download Google Service Account' + inputs: + secureFile: 'superconductive-service-acct.json' + retryCount: '2' + + - script: | + pip install pytest pytest-azurepipelines + pytest -v --no-spark --no-postgresql --bigquery --napoleon-docstrings --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html --ignore=tests/cli --ignore=tests/integration/usage_statistics $(test_script) + + displayName: 'pytest' + env: + GOOGLE_APPLICATION_CREDENTIALS: $(gcp_authkey.secureFilePath) + GE_TEST_BIGQUERY_PROJECT: $(GE_TEST_BIGQUERY_PROJECT) + GE_TEST_BIGQUERY_DATASET: $(GE_TEST_BIGQUERY_DATASET) diff --git a/docs/contributing/contributing_test.md b/docs/contributing/contributing_test.md index 22aa456ce417..2d98e80f93c8 100644 --- a/docs/contributing/contributing_test.md +++ b/docs/contributing/contributing_test.md @@ -17,6 +17,23 @@ For example, you can run `pytest --no-spark --no-sqlalchemy` to skip all local b Note: as of early 2020, the tests generate many warnings. Most of these are generated by dependencies (pandas, sqlalchemy, etc.) You can suppress them with pytest’s `--disable-pytest-warnings` flag: `pytest --no-spark --no-sqlalchemy --disable-pytest-warnings`. +#### BigQuery tests + +In order to run BigQuery tests, you first need to go through the following steps: + +1. [Select or create a Cloud Platform project](https://console.cloud.google.com/project). +2. [Setup Authentication](https://googleapis.dev/python/google-api-core/latest/auth.html). +3. In your project, [create a BigQuery dataset](https://cloud.google.com/bigquery/docs/datasets) named `test_ci` and [set the dataset default table expiration](https://cloud.google.com/bigquery/docs/updating-datasets#table-expiration) to `.1` days + +After setting up authentication, you can run with your project using the environment variable `GE_TEST_BIGQUERY_PROJECT`, e.g. + +```bash + GE_TEST_BIGQUERY_PROJECT= + pytest tests/test_definitions/test_expectations_cfe.py --bigquery --no-spark --no-postgresql +``` + +Note that if you prefer to use a different dataset besides "test_ci", you can specify a different dataset with `GE_TEST_BIGQUERY_DATASET`. + ### Writing unit and integration tests Production code in Great Expectations must be thoroughly tested. In general, we insist on unit tests for all branches of every method, including likely error states. Most new feature contributions should include several unit tests. Contributions that modify or extend existing features should include a test of the new behavior. @@ -25,7 +42,7 @@ Experimental code in Great Expectations need only be tested lightly. We are movi Most of Great Expectations’ integration testing is in the CLI, which naturally exercises most of the core code paths. Because integration tests require a lot of developer time to maintain, most contributions should not include new integration tests, unless they change the CLI itself. -Note: we do not currently test Great Expectations against all types of SQL database. CI test coverage for SQL is limited to postgresql and sqlite. We have observed some bugs because of unsupported features or differences in SQL dialects, and we are actively working to improve dialect-specific support and testing. +Note: we do not currently test Great Expectations against all types of SQL database. CI test coverage for SQL is limited to postgresql, sqlite, mssql, and BigQuery. We have observed some bugs because of unsupported features or differences in SQL dialects, and we are actively working to improve dialect-specific support and testing. ### Unit tests for Expectations One of Great Expectations’ important promises is that the same Expectation will produce the same result across all supported execution environments: pandas, sqlalchemy, and Spark. diff --git a/docs/guides/validation/advanced/how_to_implement_a_custom_validation_action.md b/docs/guides/validation/advanced/how_to_implement_a_custom_validation_action.md index 4591b2021b91..62c4882c28f2 100644 --- a/docs/guides/validation/advanced/how_to_implement_a_custom_validation_action.md +++ b/docs/guides/validation/advanced/how_to_implement_a_custom_validation_action.md @@ -1,3 +1,4 @@ + --- title: How to implement a custom Validation Action --- diff --git a/docs_rtd/contributing/testing.rst b/docs_rtd/contributing/testing.rst index d43d68223938..70eda29047f6 100644 --- a/docs_rtd/contributing/testing.rst +++ b/docs_rtd/contributing/testing.rst @@ -21,7 +21,7 @@ If you did not configure optional backends for testing, tests against these back You can suppress these tests by adding the following flags: - ``--no-postgresql`` will skip postgres tests -- ``--no-spark`` will skip spark tests +- ``--no-spark`` will skip spark tests - ``--no-sqlalchemy`` will skip all tests using sqlalchemy (i.e. all database backends) For example, you can run ``pytest --no-spark --no-sqlalchemy`` to skip all local backend tests (with the exception of the pandas backend). Please note that these tests will still be run by the CI as soon as you open a PR, so some tests might fail if your code changes affected them. @@ -34,6 +34,28 @@ Note: as of early 2020, the tests generate many warnings. Most of these are gene .. _contributing_testing__writing_unit_tests: +Running BigQuery tests +---------------------- + +In order to run BigQuery tests, you first need to go through the following steps: + +1. `Select or create a Cloud Platform project.`_ +2. `Setup Authentication.`_ +3. `In your project, create a BigQuery dataset named "test_ci"`_ and `set the dataset default table expiration to .1 days`_ + +.. _Select or create a Cloud Platform project.: https://console.cloud.google.com/project +.. _Setup Authentication.: https://googleapis.dev/python/google-api-core/latest/auth.html +.. _`In your project, create a BigQuery dataset named "test_ci"`: https://cloud.google.com/bigquery/docs/datasets +.. _`set the dataset default table expiration to .1 days`: https://cloud.google.com/bigquery/docs/updating-datasets#table-expiration + +After setting up authentication, you can run with your project using the environment variable `GE_TEST_BIGQUERY_PROJECT`, e.g. + +.. code-block:: + + GE_TEST_BIGQUERY_PROJECT= pytest tests/test_definitions/test_expectations_cfe.py --bigquery --no-spark --no-postgresql -k bigquery + +Note that if you prefer to use a different dataset besides "test_ci", you can specify a different dataset with `GE_TEST_BIGQUERY_DATASET`. + Writing unit and integration tests ---------------------------------- @@ -43,7 +65,7 @@ Experimental code in Great Expectations need only be tested lightly. We are movi Most of Great Expectations' integration testing is in the CLI, which naturally exercises most of the core code paths. Because integration tests require a lot of developer time to maintain, most contributions should *not* include new integration tests, unless they change the CLI itself. -Note: we do not currently test Great Expectations against all types of SQL database. CI test coverage for SQL is limited to postgresql and sqlite. We have observed some bugs because of unsupported features or differences in SQL dialects, and we are actively working to improve dialect-specific support and testing. +Note: we do not currently test Great Expectations against all types of SQL database. CI test coverage for SQL is limited to postgresql, sqlite, mssql, and BigQuery. We have observed some bugs because of unsupported features or differences in SQL dialects, and we are actively working to improve dialect-specific support and testing. Unit tests for Expectations @@ -113,7 +135,7 @@ Each item under ``datasets`` includes three entries: ``data``, ``schemas``, and **tests** ...define the tests to be executed against the dataframe. Each item in ``tests`` must have ``title``, ``exact_match_out``, ``in``, and ``out``. The test runner will execute the named Expectation once for each item, with the values in ``in`` supplied as kwargs. - + The test passes if the values in the expectation validation result correspond with the values in ``out``. If ``exact_match_out`` is true, then every field in the Expectation output must have a corresponding, matching field in ``out``. If it's false, then only the fields specified in ``out`` need to match. For most use cases, false is a better fit, because it allows narrower targeting of the relevant output. ``suppress_test_for`` is an optional parameter to disable an Expectation for a specific list of backends. diff --git a/great_expectations/dataset/sqlalchemy_dataset.py b/great_expectations/dataset/sqlalchemy_dataset.py index 2a98c35e29fd..301ada2221c2 100644 --- a/great_expectations/dataset/sqlalchemy_dataset.py +++ b/great_expectations/dataset/sqlalchemy_dataset.py @@ -93,6 +93,16 @@ try: import pybigquery.sqlalchemy_bigquery + ### + # NOTE: 20210816 - jdimatteo: A convention we rely on is for SqlAlchemy dialects + # to define an attribute "dialect". A PR has been submitted to fix this upstream + # with https://github.com/googleapis/python-bigquery-sqlalchemy/pull/251. If that + # fix isn't present, add this "dialect" attribute here: + if not hasattr(pybigquery.sqlalchemy_bigquery, "dialect"): + pybigquery.sqlalchemy_bigquery.dialect = ( + pybigquery.sqlalchemy_bigquery.BigQueryDialect + ) + # Sometimes "pybigquery.sqlalchemy_bigquery" fails to self-register in certain environments, so we do it explicitly. # (see https://stackoverflow.com/questions/53284762/nosuchmoduleerror-cant-load-plugin-sqlalchemy-dialectssnowflake) registry.register("bigquery", "pybigquery.sqlalchemy_bigquery", "BigQueryDialect") @@ -542,32 +552,33 @@ def __init__( self._table = sa.Table(table_name, sa.MetaData(), schema=schema) # Get the dialect **for purposes of identifying types** - if self.engine.dialect.name.lower() in [ + dialect_name: str = self.engine.dialect.name.lower() + + if dialect_name in [ "postgresql", "mysql", "sqlite", "oracle", "mssql", + "bigquery", ]: # These are the officially included and supported dialects by sqlalchemy self.dialect = import_library_module( module_name="sqlalchemy.dialects." + self.engine.dialect.name ) - - elif self.engine.dialect.name.lower() == "snowflake": + elif dialect_name == "snowflake": self.dialect = import_library_module( module_name="snowflake.sqlalchemy.snowdialect" ) - - elif self.engine.dialect.name.lower() == "redshift": + elif dialect_name == "redshift": self.dialect = import_library_module( module_name="sqlalchemy_redshift.dialect" ) - elif self.engine.dialect.name.lower() == "bigquery": + elif dialect_name == "bigquery": self.dialect = import_library_module( module_name="pybigquery.sqlalchemy_bigquery" ) - elif self.engine.dialect.name.lower() == "awsathena": + elif dialect_name == "awsathena": self.dialect = import_library_module( module_name="pyathena.sqlalchemy_athena" ) @@ -2093,14 +2104,16 @@ def _get_dialect_like_pattern_expression(self, column, like_pattern, positive=Tr try: # Bigquery - if isinstance( - self.sql_engine_dialect, pybigquery.sqlalchemy_bigquery.BigQueryDialect - ): + if hasattr(self.sql_engine_dialect, "BigQueryDialect"): dialect_supported = True except ( AttributeError, TypeError, ): # TypeError can occur if the driver was not installed and so is None + logger.debug( + "Unable to load BigQueryDialect dialect while running _get_dialect_like_pattern_expression", + exc_info=True, + ) pass if isinstance( diff --git a/great_expectations/execution_engine/sqlalchemy_execution_engine.py b/great_expectations/execution_engine/sqlalchemy_execution_engine.py index 44d87824c147..d7d3d57a533e 100644 --- a/great_expectations/execution_engine/sqlalchemy_execution_engine.py +++ b/great_expectations/execution_engine/sqlalchemy_execution_engine.py @@ -85,10 +85,20 @@ try: import pybigquery.sqlalchemy_bigquery - # Sometimes "pybigquery.sqlalchemy_bigquery" fails to self-register in certain environments, so we do it explicitly. + ### + # NOTE: 20210816 - jdimatteo: A convention we rely on is for SqlAlchemy dialects + # to define an attribute "dialect". A PR has been submitted to fix this upstream + # with https://github.com/googleapis/python-bigquery-sqlalchemy/pull/251. If that + # fix isn't present, add this "dialect" attribute here: + if not hasattr(pybigquery.sqlalchemy_bigquery, "dialect"): + pybigquery.sqlalchemy_bigquery.dialect = ( + pybigquery.sqlalchemy_bigquery.BigQueryDialect + ) + + # Sometimes "pybigquery.sqlalchemy_bigquery" fails to self-register in Azure (our CI/CD pipeline) in certain cases, so we do it explicitly. # (see https://stackoverflow.com/questions/53284762/nosuchmoduleerror-cant-load-plugin-sqlalchemy-dialectssnowflake) sa.dialects.registry.register( - "bigquery", "pybigquery.sqlalchemy_bigquery", "BigQueryDialect" + "bigquery", "pybigquery.sqlalchemy_bigquery", "dialect" ) try: getattr(pybigquery.sqlalchemy_bigquery, "INTEGER") diff --git a/great_expectations/expectations/core/expect_column_values_to_be_of_type.py b/great_expectations/expectations/core/expect_column_values_to_be_of_type.py index 194c9112624b..f04f505cc968 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_of_type.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_of_type.py @@ -56,9 +56,19 @@ try: import pybigquery.sqlalchemy_bigquery - # Sometimes "pybigquery.sqlalchemy_bigquery" fails to self-register in certain environments, so we do it explicitly. + ### + # NOTE: 20210816 - jdimatteo: A convention we rely on is for SqlAlchemy dialects + # to define an attribute "dialect". A PR has been submitted to fix this upstream + # with https://github.com/googleapis/python-bigquery-sqlalchemy/pull/251. If that + # fix isn't present, add this "dialect" attribute here: + if not hasattr(pybigquery.sqlalchemy_bigquery, "dialect"): + pybigquery.sqlalchemy_bigquery.dialect = ( + pybigquery.sqlalchemy_bigquery.BigQueryDialect + ) + + # Sometimes "pybigquery.sqlalchemy_bigquery" fails to self-register in Azure (our CI/CD pipeline) in certain cases, so we do it explicitly. # (see https://stackoverflow.com/questions/53284762/nosuchmoduleerror-cant-load-plugin-sqlalchemy-dialectssnowflake) - registry.register("bigquery", "pybigquery.sqlalchemy_bigquery", "BigQueryDialect") + registry.register("bigquery", "pybigquery.sqlalchemy_bigquery", "dialect") try: getattr(pybigquery.sqlalchemy_bigquery, "INTEGER") bigquery_types_tuple = None diff --git a/great_expectations/expectations/metrics/util.py b/great_expectations/expectations/metrics/util.py index 3a364b161a0c..1de6cfb99b08 100644 --- a/great_expectations/expectations/metrics/util.py +++ b/great_expectations/expectations/metrics/util.py @@ -49,9 +49,18 @@ try: import pybigquery.sqlalchemy_bigquery - # Sometimes "pybigquery.sqlalchemy_bigquery" fails to self-register in certain environments, so we do it explicitly. + ### + # NOTE: 20210816 - jdimatteo: A convention we rely on is for SqlAlchemy dialects + # to define an attribute "dialect". A PR has been submitted to fix this upstream + # with https://github.com/googleapis/python-bigquery-sqlalchemy/pull/251. If that + # fix isn't present, add this "dialect" attribute here: + if not hasattr(pybigquery.sqlalchemy_bigquery, "dialect"): + pybigquery.sqlalchemy_bigquery.dialect = ( + pybigquery.sqlalchemy_bigquery.BigQueryDialect + ) + # Sometimes "pybigquery.sqlalchemy_bigquery" fails to self-register in Azure (our CI/CD pipeline) in certain cases, so we do it explicitly. # (see https://stackoverflow.com/questions/53284762/nosuchmoduleerror-cant-load-plugin-sqlalchemy-dialectssnowflake) - registry.register("bigquery", "pybigquery.sqlalchemy_bigquery", "BigQueryDialect") + registry.register("bigquery", "pybigquery.sqlalchemy_bigquery", "dialect") try: getattr(pybigquery.sqlalchemy_bigquery, "INTEGER") bigquery_types_tuple = None @@ -123,7 +132,7 @@ def get_dialect_regex_expression(column, regex, dialect, positive=True): try: # Bigquery - if issubclass(dialect.dialect, pybigquery.sqlalchemy_bigquery.BigQueryDialect): + if hasattr(dialect, "BigQueryDialect"): if positive: return sa.func.REGEXP_CONTAINS(column, literal(regex)) else: @@ -132,6 +141,10 @@ def get_dialect_regex_expression(column, regex, dialect, positive=True): AttributeError, TypeError, ): # TypeError can occur if the driver was not installed and so is None + logger.debug( + "Unable to load BigQueryDialect dialect while running get_dialect_regex_expression in expectations.metrics.util", + exc_info=True, + ) pass return None @@ -251,10 +264,10 @@ def column_reflection_fallback( columns_query: str = f""" SELECT SCHEMA_NAME(tab.schema_id) AS schema_name, - tab.name AS table_name, + tab.name AS table_name, col.column_id AS column_id, - col.name AS column_name, - t.name AS column_data_type, + col.name AS column_name, + t.name AS column_data_type, col.max_length AS column_max_length, col.precision AS column_precision FROM sys.tables AS tab @@ -264,7 +277,7 @@ def column_reflection_fallback( ON col.user_type_id = t.user_type_id WHERE tab.name = '{selectable}' ORDER BY schema_name, - table_name, + table_name, column_id """ col_info_query: TextClause = sa.text(columns_query) @@ -301,7 +314,7 @@ def get_dialect_like_pattern_expression(column, dialect, like_pattern, positive= try: # Bigquery - if isinstance(dialect, pybigquery.sqlalchemy_bigquery.BigQueryDialect): + if hasattr(dialect, "BigQueryDialect"): dialect_supported = True except ( AttributeError, diff --git a/great_expectations/self_check/util.py b/great_expectations/self_check/util.py index bc4563052e6f..e1c1e103651b 100644 --- a/great_expectations/self_check/util.py +++ b/great_expectations/self_check/util.py @@ -99,6 +99,59 @@ sqliteDialect = None SQLITE_TYPES = {} +try: + import pybigquery.sqlalchemy_bigquery + import pybigquery.sqlalchemy_bigquery as BigQueryDialect + + ### + # NOTE: 20210816 - jdimatteo: A convention we rely on is for SqlAlchemy dialects + # to define an attribute "dialect". A PR has been submitted to fix this upstream + # with https://github.com/googleapis/python-bigquery-sqlalchemy/pull/251. If that + # fix isn't present, add this "dialect" attribute here: + if not hasattr(pybigquery.sqlalchemy_bigquery, "dialect"): + pybigquery.sqlalchemy_bigquery.dialect = ( + pybigquery.sqlalchemy_bigquery.BigQueryDialect + ) + + # Sometimes "pybigquery.sqlalchemy_bigquery" fails to self-register in Azure (our CI/CD pipeline) in certain cases, so we do it explicitly. + # (see https://stackoverflow.com/questions/53284762/nosuchmoduleerror-cant-load-plugin-sqlalchemy-dialectssnowflake) + sqlalchemy.dialects.registry.register( + "bigquery", "pybigquery.sqlalchemy_bigquery", "dialect" + ) + try: + getattr(pybigquery.sqlalchemy_bigquery, "INTEGER") + bigquery_types_tuple = {} + BIGQUERY_TYPES = { + "INTEGER": pybigquery.sqlalchemy_bigquery.INTEGER, + "NUMERIC": pybigquery.sqlalchemy_bigquery.NUMERIC, + "STRING": pybigquery.sqlalchemy_bigquery.STRING, + "BIGNUMERIC": pybigquery.sqlalchemy_bigquery.BIGNUMERIC, + "BYTES": pybigquery.sqlalchemy_bigquery.BYTES, + "BOOL": pybigquery.sqlalchemy_bigquery.BOOL, + "BOOLEAN": pybigquery.sqlalchemy_bigquery.BOOLEAN, + "TIMESTAMP": pybigquery.sqlalchemy_bigquery.TIMESTAMP, + "TIME": pybigquery.sqlalchemy_bigquery.TIME, + "FLOAT": pybigquery.sqlalchemy_bigquery.FLOAT, + "DATE": pybigquery.sqlalchemy_bigquery.DATE, + "DATETIME": pybigquery.sqlalchemy_bigquery.DATETIME, + } + except AttributeError: + # In older versions of the pybigquery driver, types were not exported, so we use a hack + logger.warning( + "Old pybigquery driver version detected. Consider upgrading to 0.4.14 or later." + ) + from collections import namedtuple + + BigQueryTypes = namedtuple( + "BigQueryTypes", sorted(pybigquery.sqlalchemy_bigquery._type_map) + ) + bigquery_types_tuple = BigQueryTypes(**pybigquery.sqlalchemy_bigquery._type_map) +except (ImportError, AttributeError): + bigquery_types_tuple = None + BigQueryDialect = None + pybigquery = None + + try: import sqlalchemy.dialects.postgresql as postgresqltypes from sqlalchemy.dialects.postgresql import dialect as postgresqlDialect @@ -249,7 +302,7 @@ def get_dataset( table_name=None, sqlite_db_path=None, ): - """Utility to create datasets for json-formatted tests.""" + """Utility to create datasets for json-formatted tests""" df = pd.DataFrame(data) if dataset_type == "PandasDataset": if schemas and "pandas" in schemas: @@ -466,6 +519,30 @@ def get_dataset( return SqlAlchemyDataset( custom_sql=custom_sql, engine=engine, profiler=profiler, caching=caching ) + elif dataset_type == "bigquery": + if not create_engine: + return None + engine = _create_bigquery_engine() + schema = None + if schemas and dataset_type in schemas: + schema = schemas[dataset_type] + # BigQuery does not allow for column names to have spaces + schema = {k.replace(" ", "_"): v for k, v in schema.items()} + + df.columns = df.columns.str.replace(" ", "_") + + if table_name is None: + table_name = generate_test_table_name() + df.to_sql( + name=table_name, + con=engine, + index=False, + if_exists="replace", + ) + custom_sql = f"SELECT * FROM {_bigquery_dataset()}.{table_name}" + return SqlAlchemyDataset( + custom_sql=custom_sql, engine=engine, profiler=profiler, caching=caching + ) elif dataset_type == "mssql": if not create_engine: @@ -639,7 +716,6 @@ def get_dataset( columns = list(data.keys()) spark_df = spark.createDataFrame(data_reshaped, columns) return SparkDFDataset(spark_df, profiler=profiler, caching=caching) - else: raise ValueError("Unknown dataset_type " + str(dataset_type)) @@ -689,7 +765,7 @@ def get_test_validator_with_data( return build_pandas_validator_with_data(df=df) - elif execution_engine in ["sqlite", "postgresql", "mysql", "mssql"]: + elif execution_engine in ["sqlite", "postgresql", "mysql", "mssql", "bigquery"]: if not create_engine: return None return build_sa_validator_with_data( @@ -839,12 +915,14 @@ def build_sa_validator_with_data( "postgresql": postgresqltypes.dialect, "mysql": mysqltypes.dialect, "mssql": mssqltypes.dialect, + "bigquery": pybigquery.sqlalchemy_bigquery.BigQueryDialect, } dialect_types = { "sqlite": SQLITE_TYPES, "postgresql": POSTGRESQL_TYPES, "mysql": MYSQL_TYPES, "mssql": MSSQL_TYPES, + "bigquery": BIGQUERY_TYPES, } db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost") if sa_engine_name == "sqlite": @@ -861,6 +939,8 @@ def build_sa_validator_with_data( "for SQL Server&charset=utf8&autocommit=true", # echo=True, ) + elif sa_engine_name == "bigquery": + engine = _create_bigquery_engine() else: engine = None @@ -870,6 +950,15 @@ def build_sa_validator_with_data( # Add the data to the database as a new table + if sa_engine_name == "bigquery": + schema = None + if schemas and sa_engine_name in schemas: + schema = schemas[sa_engine_name] + # bigquery does not allow column names to have spaces + schema = {k.replace(" ", "_"): v for k, v in schema.items()} + + df.columns = df.columns.str.replace(" ", "_") + sql_dtypes = {} if ( schemas @@ -877,6 +966,7 @@ def build_sa_validator_with_data( and isinstance(engine.dialect, dialect_classes.get(sa_engine_name)) ): schema = schemas[sa_engine_name] + sql_dtypes = { col: dialect_types.get(sa_engine_name)[dtype] for (col, dtype) in schema.items() @@ -1072,51 +1162,15 @@ def candidate_getter_is_on_temporary_notimplemented_list(context, getter): def candidate_test_is_on_temporary_notimplemented_list(context, expectation_type): if context in ["sqlite", "postgresql", "mysql", "mssql"]: return expectation_type in [ - # "expect_column_to_exist", - # "expect_table_row_count_to_be_between", - # "expect_table_row_count_to_equal", - # "expect_table_columns_to_match_ordered_list", - # "expect_table_columns_to_match_set", - # "expect_column_values_to_be_unique", - # "expect_column_values_to_not_be_null", - # "expect_column_values_to_be_null", - # "expect_column_values_to_be_of_type", - # "expect_column_values_to_be_in_type_list", - # "expect_column_values_to_be_in_set", - # "expect_column_values_to_not_be_in_set", - # "expect_column_distinct_values_to_be_in_set", - # "expect_column_distinct_values_to_equal_set", - # "expect_column_distinct_values_to_contain_set", - # "expect_column_values_to_be_between", "expect_column_values_to_be_increasing", "expect_column_values_to_be_decreasing", - # "expect_column_value_lengths_to_be_between", - # "expect_column_value_lengths_to_equal", - # "expect_column_values_to_match_regex", - # "expect_column_values_to_not_match_regex", - # "expect_column_values_to_match_regex_list", - # "expect_column_values_to_not_match_regex_list", - # "expect_column_values_to_match_like_pattern", - # "expect_column_values_to_not_match_like_pattern", - # "expect_column_values_to_match_like_pattern_list", - # "expect_column_values_to_not_match_like_pattern_list", "expect_column_values_to_match_strftime_format", "expect_column_values_to_be_dateutil_parseable", "expect_column_values_to_be_json_parseable", "expect_column_values_to_match_json_schema", - # "expect_column_mean_to_be_between", - # "expect_column_median_to_be_between", - # "expect_column_quantile_values_to_be_between", "expect_column_stdev_to_be_between", - # "expect_column_unique_value_count_to_be_between", - # "expect_column_proportion_of_unique_values_to_be_between", "expect_column_most_common_value_to_be_in_set", - # "expect_column_sum_to_be_between", - # "expect_column_min_to_be_between", - # "expect_column_max_to_be_between", - # "expect_column_chisquare_test_p_value_to_be_greater_than", "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", - # "expect_column_kl_divergence_to_be_less_than", "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", "expect_column_pair_values_to_be_equal", "expect_column_pair_values_A_to_be_greater_than_B", @@ -1125,62 +1179,47 @@ def candidate_test_is_on_temporary_notimplemented_list(context, expectation_type "expect_compound_columns_to_be_unique", "expect_multicolumn_values_to_be_unique", "expect_column_pair_cramers_phi_value_to_be_less_than", - # "expect_table_row_count_to_equal_other_table", "expect_multicolumn_sum_to_equal", ] + if context in ["bigquery"]: + return expectation_type in [ + "expect_column_values_to_be_increasing", + "expect_column_values_to_be_decreasing", + "expect_column_values_to_match_strftime_format", + "expect_column_values_to_be_dateutil_parseable", + "expect_column_values_to_be_json_parseable", + "expect_column_values_to_match_json_schema", + "expect_column_stdev_to_be_between", + "expect_column_most_common_value_to_be_in_set", + "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", + "expect_column_kl_divergence_to_be_less_than", + "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", + "expect_column_chisquare_test_p_value_to_be_greater_than", + "expect_column_pair_values_to_be_equal", + "expect_column_pair_values_A_to_be_greater_than_B", + "expect_column_pair_values_to_be_in_set", + "expect_select_column_values_to_be_unique_within_record", + "expect_compound_columns_to_be_unique", + "expect_multicolumn_values_to_be_unique", + "expect_column_pair_cramers_phi_value_to_be_less_than", + "expect_multicolumn_sum_to_equal", + "expect_column_values_to_be_between", # unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 + "expect_column_values_to_be_of_type", # unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 + "expect_column_values_to_be_in_set", # unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 + "expect_column_values_to_be_in_type_list", # unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 + "expect_column_values_to_match_like_pattern_list", # unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 + "expect_column_values_to_not_match_like_pattern_list", # unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 + ] + if context == "SparkDFDataset": return expectation_type in [ - # "expect_column_to_exist", - # "expect_table_row_count_to_be_between", - # "expect_table_row_count_to_equal", - # "expect_table_columns_to_match_ordered_list", - # "expect_table_columns_to_match_set", - # "expect_column_values_to_be_unique", - # "expect_column_values_to_not_be_null", - # "expect_column_values_to_be_null", - # "expect_column_values_to_be_of_type", - # "expect_column_values_to_be_in_type_list", - # "expect_column_values_to_be_in_set", - # "expect_column_values_to_not_be_in_set", - # "expect_column_distinct_values_to_be_in_set", - # "expect_column_distinct_values_to_equal_set", - # "expect_column_distinct_values_to_contain_set", - # "expect_column_values_to_be_between", - # "expect_column_values_to_be_increasing", - # "expect_column_values_to_be_decreasing", - # "expect_column_value_lengths_to_be_between", - # "expect_column_value_lengths_to_equal", - # "expect_column_values_to_match_regex", - # "expect_column_values_to_not_match_regex", - # "expect_column_values_to_match_regex_list", - # "expect_column_values_to_not_match_regex_list", - # "expect_column_values_to_match_strftime_format", "expect_column_values_to_be_dateutil_parseable", "expect_column_values_to_be_json_parseable", - # "expect_column_values_to_match_json_schema", - # "expect_column_mean_to_be_between", - # "expect_column_median_to_be_between", - # "expect_column_quantile_values_to_be_between", - # "expect_column_stdev_to_be_between", - # "expect_column_unique_value_count_to_be_between", - # "expect_column_proportion_of_unique_values_to_be_between", - # "expect_column_most_common_value_to_be_in_set", - # "expect_column_sum_to_be_between", - # "expect_column_min_to_be_between", - # "expect_column_max_to_be_between", - # "expect_column_chisquare_test_p_value_to_be_greater_than", "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", - # "expect_column_kl_divergence_to_be_less_than", "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", - # "expect_column_pair_values_to_be_equal", - # "expect_column_pair_values_A_to_be_greater_than_B", - # "expect_column_pair_values_to_be_in_set", - # "expect_select_column_values_to_be_unique_within_record", "expect_compound_columns_to_be_unique", - # "expect_multicolumn_values_to_be_unique", "expect_column_pair_cramers_phi_value_to_be_less_than", "expect_table_row_count_to_equal_other_table", - # "expect_multicolumn_sum_to_equal", ] if context == "PandasDataset": return expectation_type in [ @@ -1193,58 +1232,58 @@ def candidate_test_is_on_temporary_notimplemented_list_cfe(context, expectation_ if context in ["sqlite", "postgresql", "mysql", "mssql"]: return expectation_type in [ "expect_select_column_values_to_be_unique_within_record", - # "expect_table_columns_to_match_set", - # "expect_table_column_count_to_be_between", - # "expect_table_column_count_to_equal", - # "expect_column_to_exist", - # "expect_table_columns_to_match_ordered_list", - # "expect_table_row_count_to_be_between", - # "expect_table_row_count_to_equal", - # "expect_table_row_count_to_equal_other_table", - # "expect_column_values_to_be_unique", - # "expect_column_values_to_not_be_null", - # "expect_column_values_to_be_null", - # "expect_column_values_to_be_of_type", - # "expect_column_values_to_be_in_type_list", - # "expect_column_values_to_be_in_set", - # "expect_column_values_to_not_be_in_set", - # "expect_column_values_to_be_between", "expect_column_values_to_be_increasing", "expect_column_values_to_be_decreasing", - # "expect_column_value_lengths_to_be_between", - # "expect_column_value_lengths_to_equal", - # "expect_column_values_to_match_regex", - # "expect_column_values_to_not_match_regex", - # "expect_column_values_to_match_regex_list", - # "expect_column_values_to_not_match_regex_list", - # "expect_column_values_to_match_like_pattern", - # "expect_column_values_to_not_match_like_pattern", - # "expect_column_values_to_match_like_pattern_list", - # "expect_column_values_to_not_match_like_pattern_list", "expect_column_values_to_match_strftime_format", "expect_column_values_to_be_dateutil_parseable", "expect_column_values_to_be_json_parseable", "expect_column_values_to_match_json_schema", - # "expect_column_distinct_values_to_be_in_set", - # "expect_column_distinct_values_to_contain_set", - # "expect_column_distinct_values_to_equal_set", - # "expect_column_mean_to_be_between", - # "expect_column_median_to_be_between", - # "expect_column_quantile_values_to_be_between", "expect_column_stdev_to_be_between", - # "expect_column_unique_value_count_to_be_between", - # "expect_column_proportion_of_unique_values_to_be_between", - # "expect_column_most_common_value_to_be_in_set", - # "expect_column_max_to_be_between", - # "expect_column_min_to_be_between", - # "expect_column_sum_to_be_between", "expect_column_pair_values_A_to_be_greater_than_B", "expect_column_pair_values_to_be_equal", "expect_column_pair_values_to_be_in_set", "expect_multicolumn_values_to_be_unique", "expect_multicolumn_sum_to_equal", "expect_column_pair_cramers_phi_value_to_be_less_than", - # "expect_column_kl_divergence_to_be_less_than", + "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", + "expect_column_chisquare_test_p_value_to_be_greater_than", + "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", + "expect_compound_columns_to_be_unique", + ] + + if context == "bigquery": + ### + # NOTE: 20210729 - jdimatteo: It is relatively slow to create tables for + # all these tests in BigQuery, and if you want to run a single test then + # you can uncomment and modify the below line (which results in only the + # tests for "expect_column_values_to_not_be_null" being run): + # return expectation_type != "expect_column_values_to_not_be_null" + ### + # NOTE: 20210729 - jdimatteo: Below are temporarily not being tested + # with BigQuery. For each disabled test below, please include a link to + # a github issue tracking adding the test with BigQuery. + ### + return expectation_type in [ + "expect_column_kl_divergence_to_be_less_than", # TODO: Takes over 64 minutes to "collect" (haven't actually seen it complete yet) -- https://github.com/great-expectations/great_expectations/issues/3260 + "expect_column_values_to_be_in_set", # TODO: No matching signature for operator and AssertionError: expected ['2018-01-01T00:00:00'] but got ['2018-01-01'] -- https://github.com/great-expectations/great_expectations/issues/3260 + "expect_column_values_to_be_in_type_list", # TODO: AssertionError -- https://github.com/great-expectations/great_expectations/issues/3260 + "expect_column_values_to_be_between", # TODO: "400 No matching signature for operator >=" -- https://github.com/great-expectations/great_expectations/issues/3260 + "expect_column_quantile_values_to_be_between", # TODO: takes over 15 minutes to "collect" (haven't actually seen it complete yet) -- https://github.com/great-expectations/great_expectations/issues/3260 + "expect_column_mean_to_be_between", # TODO: "400 No matching signature for operator *" -- https://github.com/great-expectations/great_expectations/issues/3260 + "expect_select_column_values_to_be_unique_within_record", + "expect_column_values_to_be_increasing", + "expect_column_values_to_be_decreasing", + "expect_column_values_to_match_strftime_format", + "expect_column_values_to_be_dateutil_parseable", + "expect_column_values_to_be_json_parseable", + "expect_column_values_to_match_json_schema", + "expect_column_stdev_to_be_between", + "expect_column_pair_values_A_to_be_greater_than_B", + "expect_column_pair_values_to_be_equal", + "expect_column_pair_values_to_be_in_set", + "expect_multicolumn_values_to_be_unique", + "expect_multicolumn_sum_to_equal", + "expect_column_pair_cramers_phi_value_to_be_less_than", "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", "expect_column_chisquare_test_p_value_to_be_greater_than", "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", @@ -1253,58 +1292,21 @@ def candidate_test_is_on_temporary_notimplemented_list_cfe(context, expectation_ if context == "spark": return expectation_type in [ "expect_select_column_values_to_be_unique_within_record", - # "expect_table_columns_to_match_set", - # "expect_table_column_count_to_be_between", - # "expect_table_column_count_to_equal", - # "expect_column_to_exist", - # "expect_table_columns_to_match_ordered_list", - # "expect_table_row_count_to_be_between", - # "expect_table_row_count_to_equal", "expect_table_row_count_to_equal_other_table", - # "expect_column_values_to_be_unique", - # "expect_column_values_to_not_be_null", - # "expect_column_values_to_be_null", - # "expect_column_values_to_be_of_type", - # "expect_column_values_to_be_in_type_list", "expect_column_values_to_be_in_set", "expect_column_values_to_not_be_in_set", - # "expect_column_values_to_be_between", - # "expect_column_values_to_be_increasing", - # "expect_column_values_to_be_decreasing", - # "expect_column_value_lengths_to_be_between", - # "expect_column_value_lengths_to_equal", - # "expect_column_values_to_match_regex", - # "expect_column_values_to_not_match_regex", - # "expect_column_values_to_match_regex_list", "expect_column_values_to_not_match_regex_list", "expect_column_values_to_match_like_pattern", "expect_column_values_to_not_match_like_pattern", "expect_column_values_to_match_like_pattern_list", "expect_column_values_to_not_match_like_pattern_list", - # "expect_column_values_to_match_strftime_format", "expect_column_values_to_be_dateutil_parseable", - # "expect_column_values_to_be_json_parseable", - # "expect_column_values_to_match_json_schema", - # "expect_column_distinct_values_to_be_in_set", - # "expect_column_distinct_values_to_contain_set", - # "expect_column_distinct_values_to_equal_set", - # "expect_column_mean_to_be_between", - # "expect_column_median_to_be_between", - # "expect_column_quantile_values_to_be_between", - # "expect_column_stdev_to_be_between", - # "expect_column_unique_value_count_to_be_between", - # "expect_column_proportion_of_unique_values_to_be_between", - # "expect_column_most_common_value_to_be_in_set", - # "expect_column_max_to_be_between", - # "expect_column_min_to_be_between", - # "expect_column_sum_to_be_between", "expect_column_pair_values_A_to_be_greater_than_B", "expect_column_pair_values_to_be_equal", "expect_column_pair_values_to_be_in_set", "expect_multicolumn_values_to_be_unique", "expect_multicolumn_sum_to_equal", "expect_column_pair_cramers_phi_value_to_be_less_than", - # "expect_column_kl_divergence_to_be_less_than", "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", "expect_column_chisquare_test_p_value_to_be_greater_than", "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", @@ -1312,64 +1314,20 @@ def candidate_test_is_on_temporary_notimplemented_list_cfe(context, expectation_ ] if context == "pandas": return expectation_type in [ - # "expect_table_columns_to_match_set", - # "expect_select_column_values_to_be_unique_within_record", - # "expect_table_column_count_to_be_between", - # "expect_table_column_count_to_equal", - # "expect_column_to_exist", - # "expect_table_columns_to_match_ordered_list", - # "expect_table_row_count_to_be_between", - # "expect_table_row_count_to_equal", "expect_table_row_count_to_equal_other_table", - # "expect_column_values_to_be_unique", - # "expect_column_values_to_not_be_null", - # "expect_column_values_to_be_null", - # "expect_column_values_to_be_of_type", - # "expect_column_values_to_be_in_type_list", - # "expect_column_values_to_be_in_set", - # "expect_column_values_to_not_be_in_set", - # "expect_column_values_to_be_between", - # "expect_column_values_to_be_increasing", - # "expect_column_values_to_be_decreasing", - # "expect_column_value_lengths_to_be_between", - # "expect_column_value_lengths_to_equal", - # "expect_column_values_to_match_regex", - # "expect_column_values_to_not_match_regex", - # "expect_column_values_to_match_regex_list", - # "expect_column_values_to_not_match_regex_list", "expect_column_values_to_match_like_pattern", "expect_column_values_to_not_match_like_pattern", "expect_column_values_to_match_like_pattern_list", "expect_column_values_to_not_match_like_pattern_list", - # "expect_column_values_to_match_strftime_format", - # "expect_column_values_to_be_dateutil_parseable", - # "expect_column_values_to_be_json_parseable", - # "expect_column_values_to_match_json_schema", - # "expect_column_distinct_values_to_be_in_set", - # "expect_column_distinct_values_to_contain_set", - # "expect_column_distinct_values_to_equal_set", - # "expect_column_mean_to_be_between", - # "expect_column_median_to_be_between", - # "expect_column_quantile_values_to_be_between", - # "expect_column_stdev_to_be_between", - # "expect_column_unique_value_count_to_be_between", - # "expect_column_proportion_of_unique_values_to_be_between", - # "expect_column_most_common_value_to_be_in_set", - # "expect_column_max_to_be_between", - # "expect_column_min_to_be_between", - # "expect_column_sum_to_be_between", "expect_column_pair_values_A_to_be_greater_than_B", - # "expect_column_pair_values_to_be_equal", "expect_column_pair_values_to_be_in_set", "expect_multicolumn_values_to_be_unique", - # "expect_multicolumn_sum_to_equal", "expect_column_pair_cramers_phi_value_to_be_less_than", - # "expect_column_kl_divergence_to_be_less_than", "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", "expect_column_chisquare_test_p_value_to_be_greater_than", "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", - # "expect_compound_columns_to_be_unique", ] + return False @@ -1380,6 +1338,7 @@ def build_test_backends_list( include_postgresql=False, include_mysql=False, include_mssql=False, + include_bigquery=False, ): test_backends = [] @@ -1453,6 +1412,17 @@ def build_test_backends_list( ) test_backends += ["mssql"] + if include_bigquery: + try: + engine = _create_bigquery_engine() + conn = engine.connect() + conn.close() + except (ImportError, sa.exc.SQLAlchemyError) as e: + raise ImportError( + "bigquery tests are requested, but unable to connect" + ) from e + test_backends += ["bigquery"] + return test_backends @@ -2004,3 +1974,16 @@ def generate_test_table_name( [random.choice(string.ascii_letters + string.digits) for _ in range(8)] ) return table_name + + +def _create_bigquery_engine() -> Engine: + gcp_project = os.getenv("GE_TEST_BIGQUERY_PROJECT") + if not gcp_project: + raise ValueError( + "Environment Variable GE_TEST_BIGQUERY_PROJECT is required to run expectation tests" + ) + return create_engine(f"bigquery://{gcp_project}/{_bigquery_dataset()}") + + +def _bigquery_dataset() -> str: + return os.getenv("GE_TEST_BIGQUERY_DATASET") diff --git a/tests/conftest.py b/tests/conftest.py index 23b36f033ce9..6ca24b8d5ad2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -102,6 +102,11 @@ def pytest_addoption(parser): action="store_true", help="If set, execute tests against mssql", ) + parser.addoption( + "--bigquery", + action="store_true", + help="If set, execute tests against bigquery", + ) parser.addoption( "--aws-integration", action="store_true", @@ -134,6 +139,7 @@ def build_test_backends_list_cfe(metafunc): include_postgresql = not metafunc.config.getoption("--no-postgresql") include_mysql: bool = metafunc.config.getoption("--mysql") include_mssql: bool = metafunc.config.getoption("--mssql") + include_bigquery: bool = metafunc.config.getoption("--bigquery") test_backend_names: List[str] = build_test_backends_list_v3( include_pandas=include_pandas, include_spark=include_spark, @@ -141,6 +147,7 @@ def build_test_backends_list_cfe(metafunc): include_postgresql=include_postgresql, include_mysql=include_mysql, include_mssql=include_mssql, + include_bigquery=include_bigquery, ) return test_backend_names diff --git a/tests/expectations/test_util.py b/tests/expectations/test_util.py index 17143039c384..f685ed68f633 100644 --- a/tests/expectations/test_util.py +++ b/tests/expectations/test_util.py @@ -320,6 +320,7 @@ def test_table_column_reflection_fallback(test_backends, sa): include_postgresql: bool = "postgresql" in test_backends include_mysql: bool = "mysql" in test_backends include_mssql: bool = "mssql" in test_backends + include_bigquery: bool = "bigquery" in test_backends if not create_engine: pytest.skip("Unable to import sqlalchemy.create_engine() -- skipping.") @@ -331,6 +332,7 @@ def test_table_column_reflection_fallback(test_backends, sa): include_postgresql=include_postgresql, include_mysql=include_mysql, include_mssql=include_mssql, + include_bigquery=include_bigquery, ) df: pd.DataFrame = pd.DataFrame( diff --git a/tests/test_definitions/column_aggregate_expectations/expect_column_mean_to_be_between.json b/tests/test_definitions/column_aggregate_expectations/expect_column_mean_to_be_between.json index 1a3085a1ab03..3ed511376899 100644 --- a/tests/test_definitions/column_aggregate_expectations/expect_column_mean_to_be_between.json +++ b/tests/test_definitions/column_aggregate_expectations/expect_column_mean_to_be_between.json @@ -165,8 +165,8 @@ "success": true, "observed_value": 0.5 }, - "suppress_test_for": ["postgresql", "mssql", "spark"], - "_comment": "postgresql, mssql, and spark will not allow coercing the boolean type to an average" + "suppress_test_for": ["postgresql", "mssql", "spark", "bigquery"], + "_comment": "postgresql, mssql, bigquery, and spark will not allow coercing the boolean type to an average" }, { "title": "coerced_types_true_false_and_null", @@ -180,8 +180,8 @@ "success": true, "observed_value": 0.5 }, - "suppress_test_for": ["postgresql", "mssql", "spark"], - "_comment": "postgresql, mssql, and spark will not allow coercing the boolean type to an average" + "suppress_test_for": ["postgresql", "mssql", "bigquery","spark"], + "_comment": "postgresql, mssql, bigquery and spark will not allow coercing the boolean type to an average" }, { "title": "catch_exceptions___non_number_min_value", diff --git a/tests/test_definitions/column_aggregate_expectations/expect_column_sum_to_be_between.json b/tests/test_definitions/column_aggregate_expectations/expect_column_sum_to_be_between.json index 4dfc3f2e61b1..8f25b7f055e1 100644 --- a/tests/test_definitions/column_aggregate_expectations/expect_column_sum_to_be_between.json +++ b/tests/test_definitions/column_aggregate_expectations/expect_column_sum_to_be_between.json @@ -29,6 +29,15 @@ "a": "INTEGER", "b": "INTEGER" }, + "bigquery": { + "w": "INTEGER", + "x": "INTEGER", + "y": "INTEGER", + "z": "STRING", + "zz": "TIMESTAMP", + "a": "INTEGER", + "b": "INTEGER" + }, "spark": { "w": "IntegerType", "x": "IntegerType", diff --git a/tests/test_definitions/column_map_expectations/expect_column_values_to_match_regex.json b/tests/test_definitions/column_map_expectations/expect_column_values_to_match_regex.json index dc6ce3f8eab8..be1741fa51ce 100644 --- a/tests/test_definitions/column_map_expectations/expect_column_values_to_match_regex.json +++ b/tests/test_definitions/column_map_expectations/expect_column_values_to_match_regex.json @@ -59,7 +59,7 @@ "unexpected_index_list": [4], "unexpected_list": ["bee"] }, - "suppress_test_for": ["sqlite", "mssql"] + "suppress_test_for": ["sqlite", "mssql", "bigquery"] }, { "title": "positive_test_sufficient_mostly_w_one_non_matching_value", diff --git a/tests/test_definitions/column_map_expectations/expect_column_values_to_match_regex_list.json b/tests/test_definitions/column_map_expectations/expect_column_values_to_match_regex_list.json index 335a1721a7da..d01c73c7f94a 100644 --- a/tests/test_definitions/column_map_expectations/expect_column_values_to_match_regex_list.json +++ b/tests/test_definitions/column_map_expectations/expect_column_values_to_match_regex_list.json @@ -100,7 +100,7 @@ "unexpected_index_list": [], "success": true }, - "suppress_test_for": ["sqlite", "mssql"] + "suppress_test_for": ["sqlite", "mssql", "bigquery"] }] }] } diff --git a/tests/test_definitions/test_expectations.py b/tests/test_definitions/test_expectations.py index 76982a2d804d..0a7aae43dd66 100644 --- a/tests/test_definitions/test_expectations.py +++ b/tests/test_definitions/test_expectations.py @@ -12,6 +12,7 @@ from great_expectations.dataset import PandasDataset, SparkDFDataset, SqlAlchemyDataset from great_expectations.self_check.util import ( + BigQueryDialect, candidate_test_is_on_temporary_notimplemented_list, evaluate_json_test, get_dataset, @@ -38,7 +39,6 @@ def pytest_generate_tests(metafunc): parametrized_tests = [] ids = [] - for expectation_category in expectation_dirs: test_configuration_files = glob.glob( @@ -136,6 +136,15 @@ def pytest_generate_tests(metafunc): ) ): generate_test = True + elif ( + "bigquery" in test["only_for"] + and BigQueryDialect is not None + and isinstance(data_asset, SqlAlchemyDataset) + and hasattr(data_asset.engine.dialect, "name") + and data_asset.engine.dialect.name.lower() + == "bigquery" + ): + generate_test = True elif isinstance(data_asset, PandasDataset): if "pandas" in test["only_for"]: generate_test = True @@ -186,6 +195,13 @@ def pytest_generate_tests(metafunc): and isinstance(data_asset, SqlAlchemyDataset) and isinstance(data_asset.engine.dialect, mssqlDialect) ) + or ( + "bigquery" in test["suppress_test_for"] + and BigQueryDialect is not None + and isinstance(data_asset, SqlAlchemyDataset) + and hasattr(data_asset.engine.dialect, "name") + and data_asset.engine.dialect.name.lower() == "bigquery" + ) or ( "pandas" in test["suppress_test_for"] and isinstance(data_asset, PandasDataset) diff --git a/tests/test_definitions/test_expectations_cfe.py b/tests/test_definitions/test_expectations_cfe.py index ddc9b33bc943..e8ab6b2c854a 100644 --- a/tests/test_definitions/test_expectations_cfe.py +++ b/tests/test_definitions/test_expectations_cfe.py @@ -13,17 +13,16 @@ SqlAlchemyBatchData, ) from great_expectations.self_check.util import ( + BigQueryDialect, candidate_test_is_on_temporary_notimplemented_list_cfe, evaluate_json_test_cfe, get_test_validator_with_data, + mssqlDialect, + mysqlDialect, + postgresqlDialect, + sqliteDialect, ) from tests.conftest import build_test_backends_list_cfe -from tests.test_definitions.test_expectations import mssqlDialect as mssqlDialect -from tests.test_definitions.test_expectations import mysqlDialect as mysqlDialect -from tests.test_definitions.test_expectations import ( - postgresqlDialect as postgresqlDialect, -) -from tests.test_definitions.test_expectations import sqliteDialect as sqliteDialect from tests.test_definitions.test_expectations import tmp_dir @@ -35,11 +34,9 @@ def pytest_generate_tests(metafunc): for dir_ in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, dir_)) ] - parametrized_tests = [] ids = [] backends = build_test_backends_list_cfe(metafunc) - for expectation_category in expectation_dirs: test_configuration_files = glob.glob( @@ -144,6 +141,18 @@ def pytest_generate_tests(metafunc): ) ): generate_test = True + elif ( + "bigquery" in test["only_for"] + and BigQueryDialect is not None + and hasattr( + validator_with_data.execution_engine.active_batch_data.sql_engine_dialect, + "name", + ) + and validator_with_data.execution_engine.active_batch_data.sql_engine_dialect.name + == "bigquery" + ): + generate_test = True + elif validator_with_data and isinstance( validator_with_data.execution_engine.active_batch_data, PandasBatchData, @@ -230,6 +239,21 @@ def pytest_generate_tests(metafunc): mssqlDialect, ) ) + or ( + "bigquery" in test["suppress_test_for"] + and BigQueryDialect is not None + and validator_with_data + and isinstance( + validator_with_data.execution_engine.active_batch_data, + SqlAlchemyBatchData, + ) + and hasattr( + validator_with_data.execution_engine.active_batch_data.sql_engine_dialect, + "name", + ) + and validator_with_data.execution_engine.active_batch_data.sql_engine_dialect.name + == "bigquery" + ) or ( "pandas" in test["suppress_test_for"] and validator_with_data