Merge pull request #1266 from opendatacube/typehint_compliance

Typehint cleanup - mypy compliance
opendatacube · Jun 6, 2022 · ec9a8c6 · ec9a8c6
2 parents 68603e3 + b600a93
commit ec9a8c6
Show file tree

Hide file tree

Showing 63 changed files with 349 additions and 255 deletions.
diff --git a/datacube/_celery_runner.py b/datacube/_celery_runner.py
@@ -2,6 +2,8 @@
 #
 # Copyright (c) 2015-2020 ODC Contributors
 # SPDX-License-Identifier: Apache-2.0
+#
+# type: ignore
 import cloudpickle
 import logging
 from celery import Celery

diff --git a/datacube/api/core.py b/datacube/api/core.py
@@ -5,7 +5,7 @@
 import uuid
 import collections.abc
 from itertools import groupby
-from typing import Union, Optional, Dict, Tuple
+from typing import Set, Union, Optional, Dict, Tuple, cast
 import datetime
 
 import numpy
@@ -927,13 +927,13 @@ def _calculate_chunk_sizes(sources: xarray.DataArray,
                            geobox: GeoBox,
                            dask_chunks: Dict[str, Union[str, int]],
                            extra_dims: Optional[ExtraDimensions] = None):
-    extra_dim_names = ()
-    extra_dim_shapes = ()
+    extra_dim_names: Tuple[str, ...] = ()
+    extra_dim_shapes: Tuple[int, ...] = ()
     if extra_dims is not None:
         extra_dim_names, extra_dim_shapes = extra_dims.chunk_size()
 
     valid_keys = sources.dims + extra_dim_names + geobox.dimensions
-    bad_keys = set(dask_chunks) - set(valid_keys)
+    bad_keys = cast(Set[str], set(dask_chunks)) - cast(Set[str], set(valid_keys))
     if bad_keys:
         raise KeyError('Unknown dask_chunk dimension {}. Valid dimensions are: {}'.format(bad_keys, valid_keys))
 

diff --git a/datacube/config.py b/datacube/config.py
@@ -10,10 +10,11 @@
 from pathlib import Path
 import configparser
 from urllib.parse import unquote_plus, urlparse, parse_qsl
-from typing import Optional, Iterable, Union, Any, Tuple, Dict
+from typing import Any, Dict, Iterable, MutableMapping, Optional, Tuple, Union, cast
 
 PathLike = Union[str, 'os.PathLike[Any]']
 
+ConfigDict = Dict[str, Union[str, int, bool]]
 
 ENVIRONMENT_VARNAME = 'DATACUBE_CONFIG_PATH'
 #: Config locations in order. Properties found in latter locations override
@@ -161,7 +162,7 @@ def __repr__(self) -> str:
 DB_KEYS = ('hostname', 'port', 'database', 'username', 'password')
 
 
-def parse_connect_url(url: str) -> Dict[str, str]:
+def parse_connect_url(url: str) -> ConfigDict:
     """ Extract database,hostname,port,username,password from db URL.
 
     Example: postgresql://username:password@hostname:port/database
@@ -181,7 +182,7 @@ def split2(s: str, separator: str) -> Tuple[str, str]:
         user, password = '', ''
         host, port = split2(netloc, ':')
 
-    oo = dict(hostname=host, database=db)
+    oo: ConfigDict = dict(hostname=host, database=db)
 
     if port:
         oo['port'] = port
@@ -202,15 +203,15 @@ def split2(s: str, separator: str) -> Tuple[str, str]:
     return oo
 
 
-def parse_env_params() -> Dict[str, str]:
+def parse_env_params() -> ConfigDict:
     """
     - Read DATACUBE_IAM_* environment variables.
     - Extract parameters from DATACUBE_DB_URL if present
     - Else look for DB_HOSTNAME, DB_USERNAME, DB_PASSWORD, DB_DATABASE
     - Return {} otherwise
     """
     # Handle environment vars that cannot fit in the DB URL
-    non_url_params = {}
+    non_url_params: MutableMapping[str, Union[bool, int]] = {}
     iam_auth = os.environ.get('DATACUBE_IAM_AUTHENTICATION')
     if iam_auth is not None and iam_auth.lower() in ['y', 'yes']:
         non_url_params["iam_authentication"] = True
@@ -232,13 +233,21 @@ def parse_env_params() -> Dict[str, str]:
     return params
 
 
-def _cfg_from_env_opts(opts: Dict[str, str],
+def _cfg_from_env_opts(opts: ConfigDict,
                        base: configparser.ConfigParser) -> LocalConfig:
-    base['default'] = {'db_'+k: v for k, v in opts.items()}
+    def stringify(vin: Union[int, str, bool]) -> str:
+        if isinstance(vin, bool):
+            if vin:
+                return "yes"
+            else:
+                return "no"
+        else:
+            return str(vin)
+    base['default'] = {'db_'+k: stringify(v) for k, v in opts.items()}
     return LocalConfig(base, files_loaded=[], env='default')
 
 
-def render_dc_config(params: Dict[str, Any],
+def render_dc_config(params: ConfigDict,
                      section_name: str = 'default') -> str:
     """ Render output of parse_env_params to a string that can be written to config file.
     """
@@ -263,7 +272,7 @@ def auto_config() -> str:
     option3:
        default config
     """
-    cfg_path = os.environ.get('DATACUBE_CONFIG_PATH', None)
+    cfg_path: Optional[PathLike] = os.environ.get('DATACUBE_CONFIG_PATH', None)
     cfg_path = Path(cfg_path) if cfg_path else Path.home()/'.datacube.conf'
 
     if cfg_path.exists():

diff --git a/datacube/drivers/indexes.py b/datacube/drivers/indexes.py
@@ -6,6 +6,7 @@
 
 from ._tools import singleton_setup
 from .driver_cache import load_drivers
+from ..index.abstract import AbstractIndexDriver
 
 
 class IndexDriverCache(object):
@@ -21,7 +22,7 @@ def __init__(self, group: str) -> None:
                 for alias in driver.aliases:
                     self._drivers[alias] = driver
 
-    def __call__(self, name: str) -> "datacube.index.abstract.AbstractIndexDriver":
+    def __call__(self, name: str) -> AbstractIndexDriver:
         """
         :returns: None if driver with a given name is not found
 
@@ -50,7 +51,7 @@ def index_drivers() -> List[str]:
     return index_cache().drivers()
 
 
-def index_driver_by_name(name: str) -> Optional["datacube.index.AbstractIndexDriver"]:
+def index_driver_by_name(name: str) -> Optional[AbstractIndexDriver]:
     """ Lookup writer driver by name
 
     :returns: Initialised writer driver instance

diff --git a/datacube/drivers/netcdf/_safestrings.py b/datacube/drivers/netcdf/_safestrings.py
@@ -10,7 +10,7 @@
 
 For more information see https://github.com/Unidata/netcdf4-python/issues/448
 """
-import netCDF4
+import netCDF4   # type: ignore[import]
 
 
 class _VariableProxy(object):

diff --git a/datacube/drivers/postgis/_api.py b/datacube/drivers/postgis/_api.py
@@ -308,7 +308,7 @@ def get_datasets_for_location(self, uri, mode=None):
 
     def all_dataset_ids(self, archived: bool):
         query = select(
-            DATASET.c.id
+            DATASET.c.id  # type: ignore[arg-type]
         ).select_from(
             DATASET
         )

diff --git a/datacube/drivers/postgis/_schema.py b/datacube/drivers/postgis/_schema.py
@@ -49,7 +49,8 @@
     Column('metadata', postgres.JSONB, nullable=False),
 
     # The metadata format expected (eg. what fields to search by)
-    Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False),
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False),  # type: ignore[call-overload]
 
     Column('definition', postgres.JSONB, nullable=False),
 
@@ -68,8 +69,10 @@
     'dataset', _core.METADATA,
     Column('id', postgres.UUID(as_uuid=True), primary_key=True),
 
-    Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False),
-    Column('dataset_type_ref', None, ForeignKey(PRODUCT.c.id), index=True, nullable=False),
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False),  # type: ignore[call-overload]
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('dataset_type_ref', None, ForeignKey(PRODUCT.c.id), index=True, nullable=False),  # type: ignore[call-overload]
 
     Column('metadata', postgres.JSONB, index=False, nullable=False),
 
@@ -87,7 +90,8 @@
 DATASET_LOCATION = Table(
     'dataset_location', _core.METADATA,
     Column('id', Integer, primary_key=True, autoincrement=True),
-    Column('dataset_ref', None, ForeignKey(DATASET.c.id), index=True, nullable=False),
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('dataset_ref', None, ForeignKey(DATASET.c.id), index=True, nullable=False),  # type: ignore[call-overload]
 
     # The base URI to find the dataset.
     #
@@ -115,13 +119,15 @@
 # Link datasets to their source datasets.
 DATASET_SOURCE = Table(
     'dataset_source', _core.METADATA,
-    Column('dataset_ref', None, ForeignKey(DATASET.c.id), nullable=False),
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('dataset_ref', None, ForeignKey(DATASET.c.id), nullable=False),  # type: ignore[call-overload]
 
     # An identifier for this source dataset.
     #    -> Usually it's the dataset type ('ortho', 'nbar'...), as there's typically only one source
     #       of each type.
     Column('classifier', String, nullable=False),
-    Column('source_dataset_ref', None, ForeignKey(DATASET.c.id), nullable=False),
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('source_dataset_ref', None, ForeignKey(DATASET.c.id), nullable=False),  # type: ignore[call-overload]
 
     PrimaryKeyConstraint('dataset_ref', 'classifier'),
     UniqueConstraint('source_dataset_ref', 'dataset_ref'),

diff --git a/datacube/drivers/postgis/sql.py b/datacube/drivers/postgis/sql.py
@@ -99,7 +99,7 @@ def __init__(self, *args, **kwargs):
 
 # pylint: disable=too-many-ancestors
 class Float8Range(GenericFunction):
-    type = FLOAT8RANGE
+    type = FLOAT8RANGE  # type: ignore[assignment]
     package = 'odc'
     identifier = 'float8range'
     inherit_cache = False

diff --git a/datacube/drivers/postgres/_api.py b/datacube/drivers/postgres/_api.py
@@ -308,7 +308,7 @@ def get_datasets_for_location(self, uri, mode=None):
 
     def all_dataset_ids(self, archived: bool):
         query = select(
-            DATASET.c.id
+            DATASET.c.id  # type: ignore[arg-type]
         ).select_from(
             DATASET
         )

diff --git a/datacube/drivers/postgres/_schema.py b/datacube/drivers/postgres/_schema.py
@@ -49,7 +49,8 @@
     Column('metadata', postgres.JSONB, nullable=False),
 
     # The metadata format expected (eg. what fields to search by)
-    Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False),
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False),  # type: ignore[call-overload]
 
     Column('definition', postgres.JSONB, nullable=False),
 
@@ -68,8 +69,10 @@
     'dataset', _core.METADATA,
     Column('id', postgres.UUID(as_uuid=True), primary_key=True),
 
-    Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False),
-    Column('dataset_type_ref', None, ForeignKey(PRODUCT.c.id), index=True, nullable=False),
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False),  # type: ignore[call-overload]
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('dataset_type_ref', None, ForeignKey(PRODUCT.c.id), index=True, nullable=False),  # type: ignore[call-overload]
 
     Column('metadata', postgres.JSONB, index=False, nullable=False),
 
@@ -87,7 +90,8 @@
 DATASET_LOCATION = Table(
     'dataset_location', _core.METADATA,
     Column('id', Integer, primary_key=True, autoincrement=True),
-    Column('dataset_ref', None, ForeignKey(DATASET.c.id), index=True, nullable=False),
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('dataset_ref', None, ForeignKey(DATASET.c.id), index=True, nullable=False),  # type: ignore[call-overload]
 
     # The base URI to find the dataset.
     #
@@ -115,13 +119,15 @@
 # Link datasets to their source datasets.
 DATASET_SOURCE = Table(
     'dataset_source', _core.METADATA,
-    Column('dataset_ref', None, ForeignKey(DATASET.c.id), nullable=False),
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('dataset_ref', None, ForeignKey(DATASET.c.id), nullable=False),  # type: ignore[call-overload]
 
     # An identifier for this source dataset.
     #    -> Usually it's the dataset type ('ortho', 'nbar'...), as there's typically only one source
     #       of each type.
     Column('classifier', String, nullable=False),
-    Column('source_dataset_ref', None, ForeignKey(DATASET.c.id), nullable=False),
+    #   Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern.
+    Column('source_dataset_ref', None, ForeignKey(DATASET.c.id), nullable=False),  # type: ignore[call-overload]
 
     PrimaryKeyConstraint('dataset_ref', 'classifier'),
     UniqueConstraint('source_dataset_ref', 'dataset_ref'),

diff --git a/datacube/drivers/postgres/sql.py b/datacube/drivers/postgres/sql.py
@@ -99,7 +99,7 @@ def __init__(self, *args, **kwargs):
 
 # pylint: disable=too-many-ancestors
 class Float8Range(GenericFunction):
-    type = FLOAT8RANGE
+    type = FLOAT8RANGE  # type: ignore[assignment]
     package = 'agdc'
     identifier = 'float8range'
     inherit_cache = False

diff --git a/datacube/drivers/rio/_reader.py b/datacube/drivers/rio/_reader.py
@@ -11,9 +11,9 @@
 import numpy as np
 from affine import Affine
 from concurrent.futures import ThreadPoolExecutor
-import rasterio
-from rasterio.io import DatasetReader
-import rasterio.crs
+import rasterio                         # type: ignore[import]
+from rasterio.io import DatasetReader   # type: ignore[import]
+import rasterio.crs                     # type: ignore[import]
 
 from datacube.storage import BandInfo
 from datacube.utils.geometry import CRS

diff --git a/datacube/executor.py b/datacube/executor.py
@@ -2,6 +2,8 @@
 #
 # Copyright (c) 2015-2020 ODC Contributors
 # SPDX-License-Identifier: Apache-2.0
+#
+# type: ignore
 import sys
 
 _REMOTE_LOG_FORMAT_STRING = '%(asctime)s {} %(process)d %(name)s %(levelname)s %(message)s'

diff --git a/datacube/helpers.py b/datacube/helpers.py
@@ -9,7 +9,7 @@
 """
 
 import numpy as np
-import rasterio
+import rasterio  # type: ignore[import]
 import warnings
 
 DEFAULT_PROFILE = {

diff --git a/datacube/index/_api.py b/datacube/index/_api.py
@@ -9,7 +9,6 @@
 import logging
 
 from datacube.config import LocalConfig
-from datacube.drivers import index_driver_by_name, index_drivers
 from datacube.index.abstract import AbstractIndex
 
 _LOG = logging.getLogger(__name__)
@@ -29,6 +28,8 @@ def index_connect(local_config: LocalConfig = None,
     :param validate_connection: Validate database connection and schema immediately
     :raises datacube.index.Exceptions.IndexSetupError:
     """
+    from datacube.drivers import index_driver_by_name, index_drivers
+
     if local_config is None:
         local_config = LocalConfig.find()