From ae2c96c32430caec2d178bb7af2ff43fe5428c97 Mon Sep 17 00:00:00 2001 From: Eric Lidwa Date: Tue, 7 Mar 2023 16:06:59 +0000 Subject: [PATCH 1/5] metadata file for landsat --- utils/landsat.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/utils/landsat.py b/utils/landsat.py index 57fddd3..9fa743d 100644 --- a/utils/landsat.py +++ b/utils/landsat.py @@ -38,7 +38,7 @@ def get_temp_creds(provider): catalog = Client.open("https://cmr.earthdata.nasa.gov/stac/LPCLOUD") - timeRange = '2021-01-01/2022-01-01' + timeRange = '2021-01-01/2021-02-01' geometry = BuildSquare(-178, 50, 1) mybbox = [-179,41,-177,51] @@ -48,10 +48,12 @@ def get_temp_creds(provider): # datetime=timeRange) # print(f"Results matched: {results.matched()}") + print("Searching with Intersects...") results = catalog.search(collections=['HLSS30.v2.0', 'HLSL30.v2.0'], datetime=timeRange, - intersects=geometry) + intersects=geometry, + fields=['HORIZONTAL_CS_CODE', 'HORIZONTAL_CS_NAME']) # print(f"{results.url_with_parameters()}") print(f"Results matched: {results.matched()}") @@ -66,6 +68,7 @@ def get_temp_creds(provider): urlList = [] + for i in reversed(range(len(itemsDict["features"]))): del itemsDict["features"][i]["links"] del itemsDict["features"][i]["stac_version"] @@ -73,10 +76,16 @@ def get_temp_creds(provider): del itemsDict["features"][i]["collection"] del itemsDict["features"][i]["bbox"] del itemsDict["features"][i]["assets"]["browse"] - del itemsDict["features"][i]["assets"]["metadata"] propertiesDict = itemsDict["features"][i]["properties"] assetsDict = itemsDict["features"][i]["assets"] + metaFileUrl = assetsDict["metadata"]["href"] + # we may have to read metaFile, get some algo related attributes from it + # and add them to properties ie: + # propertiesDict['algo1'] = someAlgo1value + + del itemsDict["features"][i]["assets"]["metadata"] + for val in assetsDict: if "href" in assetsDict[val]: # add raster url to properties @@ -88,7 +97,7 @@ def get_temp_creds(provider): - # Dumped trimmed dictionary as geojson file, for testing + # Dump trimmed dictionary as geojson file, for testing file = 'hls_trimmed.geojson' print(f"Writing reults to file {file}") with open(file, 'w') as fp: From cfae5be3b21be8bb56ce203cbbc995029e49def4 Mon Sep 17 00:00:00 2001 From: JP Swinski Date: Wed, 8 Mar 2023 23:42:42 +0000 Subject: [PATCH 2/5] added stac support to cmr queries --- sliderule/earthdata.py | 264 ++++++++++++++++++++++++++++------------- sliderule/sliderule.py | 2 +- utils/query_stac.py | 38 ++++++ utils/stac.py | 68 +++++++++++ 4 files changed, 291 insertions(+), 81 deletions(-) create mode 100644 utils/query_stac.py create mode 100644 utils/stac.py diff --git a/sliderule/earthdata.py b/sliderule/earthdata.py index eb5915f..f79fa45 100644 --- a/sliderule/earthdata.py +++ b/sliderule/earthdata.py @@ -32,6 +32,7 @@ import json import ssl import urllib.request +import requests import datetime import logging import warnings @@ -55,20 +56,24 @@ # best effort match of datasets to providers and versions for earthdata DATASETS = { - "ATL03": {"provider": "NSIDC_ECS", "version": "005"}, - "ATL06": {"provider": "NSIDC_ECS", "version": "005"}, - "ATL08": {"provider": "NSIDC_ECS", "version": "005"}, - "GEDI01_B": {"provider": "LPDAAC_ECS", "version": "002"}, - "GEDI02_A": {"provider": "LPDAAC_ECS", "version": "002"}, - "GEDI02_B": {"provider": "LPDAAC_ECS", "version": "002"}, - "GEDI_L3_LandSurface_Metrics_V2_1952": {"provider": "ORNL_CLOUD", "version": None}, - "GEDI_L4A_AGB_Density_V2_1_2056": {"provider": "ORNL_CLOUD", "version": None}, - "GEDI_L4B_Gridded_Biomass_2017": {"provider": "ORNL_CLOUD", "version": None} + "ATL03": {"provider": "NSIDC_ECS", "version": "005", "stac": False, "collections": [] }, + "ATL06": {"provider": "NSIDC_ECS", "version": "005", "stac": False, "collections": []}, + "ATL08": {"provider": "NSIDC_ECS", "version": "005", "stac": False, "collections": []}, + "GEDI01_B": {"provider": "LPDAAC_ECS", "version": "002", "stac": False, "collections": []}, + "GEDI02_A": {"provider": "LPDAAC_ECS", "version": "002", "stac": False, "collections": []}, + "GEDI02_B": {"provider": "LPDAAC_ECS", "version": "002", "stac": False, "collections": []}, + "GEDI_L3_LandSurface_Metrics_V2_1952": {"provider": "ORNL_CLOUD", "version": None, "stac": False, "collections": []}, + "GEDI_L4A_AGB_Density_V2_1_2056": {"provider": "ORNL_CLOUD", "version": None, "stac": False, "collections": []}, + "GEDI_L4B_Gridded_Biomass_2017": {"provider": "ORNL_CLOUD", "version": None, "stac": False, "collections": []}, + "HLS": {"provider": "LPCLOUD", "version": None, "stac": True, "collections": ["HLSS30.v2.0", "HLSL30.v2.0"]} } # page size for requests CMR_PAGE_SIZE = 2000 +# upper limit on stac resources returned from stac query per request +MAX_STAC_RESOURCES_PER_PAGE = 100 + ############################################################################### # NSIDC UTILITIES ############################################################################### @@ -170,7 +175,7 @@ def __cmr_granule_metadata(search_results): # - polygons as geodataframe geometry return granule_metadata -def __cmr_search(provider, short_name, version, time_start, time_end, **kwargs): +def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs): """Perform a scrolling CMR query for files matching input criteria.""" kwargs.setdefault('polygon',None) kwargs.setdefault('name_filter',None) @@ -227,6 +232,157 @@ def __cmr_search(provider, short_name, version, time_start, time_end, **kwargs): return (urls,metadata) +############################################################################### +# CMR UTILITIES +############################################################################### + +def __cmr_search(provider, short_name, version, polygons, time_start, time_end, return_metadata, name_filter): + + # attempt to fill in version + if version == None: + if short_name in DATASETS: + version = DATASETS[short_name]["version"] + else: + raise sliderule.FatalError("Unable to determine version for CMR query") + + # initialize return value + resources = {} # [] = + + # iterate through each polygon (or none if none supplied) + for polygon in polygons: + urls = [] + metadata = sliderule.emptyframe() + + # issue CMR request + for tolerance in [0.0001, 0.001, 0.01, 0.1, 1.0, None]: + + # convert polygon list into string + polystr = None + if polygon: + flatpoly = [] + for p in polygon: + flatpoly.append(p["lon"]) + flatpoly.append(p["lat"]) + polystr = str(flatpoly)[1:-1] + polystr = polystr.replace(" ", "") # remove all spaces as this will be embedded in a url + + # call into NSIDC routines to make CMR request + try: + urls,metadata = __cmr_query(provider, short_name, version, time_start, time_end, polygon=polystr, return_metadata=return_metadata, name_filter=name_filter) + break # exit loop because cmr search was successful + except urllib.error.HTTPError as e: + logger.error('HTTP Request Error: {}'.format(e.reason)) + except RuntimeError as e: + logger.error("Runtime Error:", e) + + # simplify polygon + if polygon and tolerance: + raw_multi_polygon = [[(tuple([(c['lon'], c['lat']) for c in polygon]), [])]] + shape = MultiPolygon(*raw_multi_polygon) + buffered_shape = shape.buffer(tolerance) + simplified_shape = buffered_shape.simplify(tolerance) + simplified_coords = list(simplified_shape.exterior.coords) + logger.warning('Using simplified polygon (for CMR request only!), {} points using tolerance of {}'.format(len(simplified_coords), tolerance)) + region = [] + for coord in simplified_coords: + point = {"lon": coord[0], "lat": coord[1]} + region.insert(0,point) + polygon = region + else: + break # exit here because nothing can be done + + # populate resources + for i,url, in enumerate(urls): + resources[url] = metadata.iloc[i] + + # build return lists + url_list = list(resources.keys()) + meta_list = list(resources.values()) + + if return_metadata: + return (url_list,meta_list) + else: + return url_list + +############################################################################### +# STAC UTILITIES +############################################################################### + +def __build_geojson(rsps): + geojson = rsps.json() + del geojson["links"] + if 'numberMatched' in geojson: + del geojson['numberMatched'] + if 'numberReturned' in geojson: + del geojson['numberReturned'] + for i in reversed(range(len(geojson["features"]))): + del geojson["features"][i]["links"] + del geojson["features"][i]["stac_version"] + del geojson["features"][i]["stac_extensions"] + del geojson["features"][i]["collection"] + del geojson["features"][i]["bbox"] + del geojson["features"][i]["assets"]["browse"] + del geojson["features"][i]["assets"]["metadata"] + propertiesDict = geojson["features"][i]["properties"] + assetsDict = geojson["features"][i]["assets"] + for val in assetsDict: + if "href" in assetsDict[val]: + propertiesDict[val] = assetsDict[val]["href"] + del geojson["features"][i]["assets"] + return geojson + +def __stac_search(provider, short_name, collections, polygons, time_start, time_end): + global max_requested_resources + + # attempt to fill in collections + if collections == None: + if short_name in DATASETS: + collections = DATASETS[short_name]["collections"] + else: + raise sliderule.FatalError("Unable to determine collections for CMR query") + + # create requests context + context = requests.Session() + context.trust_env = False # prevent requests from attaching credentials from environment + + # build stac request + url = 'https://cmr.earthdata.nasa.gov/stac/{}/search'.format(provider) + headers = {'Content-Type': 'application/json'} + rqst = { + "limit": MAX_STAC_RESOURCES_PER_PAGE, + "datetime": '{}/{}'.format(time_start, time_end), + "collections": collections, + } + + # add polygon if provided + if polygons: + rqst["intersects"] = { + "type": "Polygon", + "coordinates": [[[coord["lon"], coord["lat"]] for coord in polygon] for polygon in polygons] + } + + # make initial stac request + data = context.post(url, data=json.dumps(rqst), headers=headers) + data.raise_for_status() + geojson = __build_geojson(data) + + # iterate through additional pages if not all returned + num_returned = geojson["context"]["returned"] + num_matched = geojson["context"]["matched"] + if num_matched > max_requested_resources: + logger.warn("Number of matched resources truncated from {} to {}".format(num_matched, max_requested_resources)) + num_matched = max_requested_resources + num_pages = int((num_matched + (num_returned - 1)) / num_returned) + for page in range(2, num_pages+1): + rqst["page"] = page + data = context.post(url, data=json.dumps(rqst), headers=headers) + data.raise_for_status() + _geojson = __build_geojson(data) + geojson["features"] += _geojson["features"] + geojson["context"]["returned"] = num_matched + + # return geojson dictionary + return geojson ############################################################################### # APIs @@ -255,22 +411,30 @@ def set_max_resources (max_resources): # # Common Metadata Repository # -def cmr(provider=None, short_name=None, version=None, polygon=None, time_start='2018-01-01T00:00:00Z', time_end=datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), return_metadata=False, name_filter=None): +def cmr(provider=None, short_name=None, version=None, polygon=None, time_start='2018-01-01T00:00:00Z', time_end=datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), return_metadata=False, name_filter=None, collections=None): ''' - Query the `NASA Common Metadata Repository (CMR) `_ for a list of data within temporal and spatial parameters + Query the `NASA Common Metadata Repository (CMR) `_ for a list of data within temporal and spatial parameters Parameters ---------- + short_name: str + dataset short name as defined in the `NASA CMR Directory `_ + provider: str + dataset provider as specified by CMR, leave as None to use default provider associated with short_name + version: str + dataset version string, leave as None to get latest support version polygon: list either a single list of longitude,latitude in counter-clockwise order with first and last point matching, defining region of interest (see `polygons `_), or a list of such lists when the region includes more than one polygon time_start: str starting time for query in format ``--T::Z`` time_end: str ending time for query in format ``--T::Z`` - version: str - dataset version as found in the `NASA CMR Directory `_ - short_name: str - dataset short name as defined in the `NASA CMR Directory `_ + collections: list + list of dataset collections as specified by CMR, leave as None to use defaults + return_metadata: bool + flag indicating whether metadata associated with the query is returned back to the user + name_filter: str + filter to apply to resources returned by query Returns ------- @@ -300,16 +464,6 @@ def cmr(provider=None, short_name=None, version=None, polygon=None, time_start=' else: raise sliderule.FatalError("Unable to determine provider for CMR query") - # attempt to fil in provider - if version == None: - if short_name in DATASETS: - version = DATASETS[short_name]["version"] - else: - raise sliderule.FatalError("Unable to determine version for CMR query") - - # initialize return value - resources = {} # [] = - # create list of polygons polygons = [None] if polygon and len(polygon) > 0: @@ -318,58 +472,8 @@ def cmr(provider=None, short_name=None, version=None, polygon=None, time_start=' elif type(polygon[0] == list): polygons = copy.deepcopy(polygon) - # iterate through each polygon (or none if none supplied) - for polygon in polygons: - urls = [] - metadata = sliderule.emptyframe() - - # issue CMR request - for tolerance in [0.0001, 0.001, 0.01, 0.1, 1.0, None]: - - # convert polygon list into string - polystr = None - if polygon: - flatpoly = [] - for p in polygon: - flatpoly.append(p["lon"]) - flatpoly.append(p["lat"]) - polystr = str(flatpoly)[1:-1] - polystr = polystr.replace(" ", "") # remove all spaces as this will be embedded in a url - - # call into NSIDC routines to make CMR request - try: - urls,metadata = __cmr_search(provider, short_name, version, time_start, time_end, polygon=polystr, return_metadata=return_metadata, name_filter=name_filter) - break # exit loop because cmr search was successful - except urllib.error.HTTPError as e: - logger.error('HTTP Request Error: {}'.format(e.reason)) - except RuntimeError as e: - logger.error("Runtime Error:", e) - - # simplify polygon - if polygon and tolerance: - raw_multi_polygon = [[(tuple([(c['lon'], c['lat']) for c in polygon]), [])]] - shape = MultiPolygon(*raw_multi_polygon) - buffered_shape = shape.buffer(tolerance) - simplified_shape = buffered_shape.simplify(tolerance) - simplified_coords = list(simplified_shape.exterior.coords) - logger.warning('Using simplified polygon (for CMR request only!), {} points using tolerance of {}'.format(len(simplified_coords), tolerance)) - region = [] - for coord in simplified_coords: - point = {"lon": coord[0], "lat": coord[1]} - region.insert(0,point) - polygon = region - else: - break # exit here because nothing can be done - - # populate resources - for i,url, in enumerate(urls): - resources[url] = metadata.iloc[i] - - # build return lists - url_list = list(resources.keys()) - meta_list = list(resources.values()) - - if return_metadata: - return (url_list,meta_list) + # perform query + if short_name in DATASETS and DATASETS[short_name]["stac"]: + return __stac_search(provider, short_name, collections, polygons, time_start, time_end) else: - return url_list + return __cmr_search(provider, short_name, version, polygons, time_start, time_end, return_metadata, name_filter) \ No newline at end of file diff --git a/sliderule/sliderule.py b/sliderule/sliderule.py index 0c07a55..f20cc06 100644 --- a/sliderule/sliderule.py +++ b/sliderule/sliderule.py @@ -1066,7 +1066,7 @@ def toregion(source, tolerance=0.0, cellsize=0.01, n_clusters=1): dict a list of longitudes and latitudes containing the region of interest that can be used for the **poly** and **raster** parameters in a processing request to SlideRule. - region = {"poly": [{"lat": , "lon": , ... }], "clusters": [{"lat": , "lon": , ... }, {"lat": , "lon": , ... }, ...], "raster": {"data": , "length": , "cellsize": }} + region = {"poly": [{"lat": , "lon": }, ...], "clusters": [[{"lat": , "lon": }, ...], [{"lat": , "lon": }, ...]], "raster": {"data": , "length": , "cellsize": }} Examples -------- diff --git a/utils/query_stac.py b/utils/query_stac.py new file mode 100644 index 0000000..46ebad0 --- /dev/null +++ b/utils/query_stac.py @@ -0,0 +1,38 @@ +# +# Imports +# +import sys +import json +import sliderule +from utils import parse_command_line +from sliderule import earthdata + +############################################################################### +# MAIN +############################################################################### + +if __name__ == '__main__': + + # Defaults + cfg = { + "dataset": "HLS", + "region": "examples/grandmesa.geojson", + "time_start": "2021-01-01T00:00:00Z", + "time_end": "2022-01-01T23:59:59Z", + "display_all": False + } + + # Command line parameters + parse_command_line(sys.argv, cfg) + + # Region of interest + polygon = sliderule.toregion(cfg["region"])["poly"] + + # Query CMR for list of resources + geojson = earthdata.cmr(short_name=cfg["dataset"], polygon=polygon, time_start=cfg["time_start"], time_end=cfg["time_end"]) + + # Display Results + print("Returned {} features".format(geojson["context"]["returned"])) + if cfg["display_all"]: + print(json.dumps(geojson, indent=2)) + diff --git a/utils/stac.py b/utils/stac.py new file mode 100644 index 0000000..c88a01d --- /dev/null +++ b/utils/stac.py @@ -0,0 +1,68 @@ +from pystac_client import Client +import sliderule +import requests +import json +import sys + +# parameters +debug = False +client = False +manual = True +page = None +url = 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD' +headers = {'Content-Type': 'application/json'} +limit = 200 +collections = ["HLSS30.v2.0", "HLSL30.v2.0"] +time_start = "2021-01-01T00:00:00Z" +time_end = "2022-01-01T23:59:59Z" + +polygons = [[{"lon": -177.0000000001, "lat": 51.0000000001}, + {"lon": -179.0000000001, "lat": 51.0000000001}, + {"lon": -179.0000000001, "lat": 49.0000000001}, + {"lon": -177.0000000001, "lat": 49.0000000001}, + {"lon": -177.0000000001, "lat": 51.0000000001}]] +coordinates = [[[coord["lon"], coord["lat"]] for coord in polygon] for polygon in polygons] +coordinates = [[[coord["lon"], coord["lat"]] for coord in polygon] for polygon in [sliderule.toregion("examples/grandmesa.geojson")["poly"]]] + +# debug output +if debug: + import logging + import http.client as http_client + http_client.HTTPConnection.debuglevel = 1 + logging.basicConfig() + logging.getLogger().setLevel(logging.DEBUG) + requests_log = logging.getLogger("requests.packages.urllib3") + requests_log.setLevel(logging.DEBUG) + requests_log.propagate = True + + +# client stac request +if client: + catalog = Client.open(url) + timeRange = '{}/{}'.format(time_start, time_end) + geometry = {"type": "Polygon", "coordinates": coordinates} + results = catalog.search(collections=collections, datetime=timeRange, intersects=geometry) + items = results.get_all_items_as_dict() + print(f"Returned {results.matched()} features") + + +# manual stac request +if manual: + rqst = { + "limit": 100, + "datetime": '{}/{}'.format(time_start, time_end), + "collections": collections, + "intersects": { + "type": "Polygon", + "coordinates": coordinates + } + } + if page: + rqst["page"] = page + context = requests.Session() + context.trust_env = False + data = context.post(url+"/search", data=json.dumps(rqst), headers=headers) + data.raise_for_status() + geojson = data.json() + print("Returned {} features".format(geojson["context"]["returned"])) + From 622691106f5e96080788238f52efbc2ecdf61a81 Mon Sep 17 00:00:00 2001 From: JP Swinski Date: Thu, 9 Mar 2023 13:41:14 +0000 Subject: [PATCH 3/5] updates to earthdata stac interface --- sliderule/earthdata.py | 197 ++++++++++++++++++++++++++++++---------- sliderule/icesat2.py | 4 + tests/test_arcticdem.py | 2 +- tests/test_landsat.py | 24 +++++ utils/query_cmr.py | 3 +- utils/query_stac.py | 2 +- 6 files changed, 182 insertions(+), 50 deletions(-) create mode 100644 tests/test_landsat.py diff --git a/sliderule/earthdata.py b/sliderule/earthdata.py index f79fa45..70e445e 100644 --- a/sliderule/earthdata.py +++ b/sliderule/earthdata.py @@ -236,15 +236,11 @@ def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs): # CMR UTILITIES ############################################################################### +# +# Perform a CMR Search Request +# def __cmr_search(provider, short_name, version, polygons, time_start, time_end, return_metadata, name_filter): - # attempt to fill in version - if version == None: - if short_name in DATASETS: - version = DATASETS[short_name]["version"] - else: - raise sliderule.FatalError("Unable to determine version for CMR query") - # initialize return value resources = {} # [] = @@ -304,10 +300,9 @@ def __cmr_search(provider, short_name, version, polygons, time_start, time_end, else: return url_list -############################################################################### -# STAC UTILITIES -############################################################################### - +# +# Build a GeoJSON Response from STAC Query Response +# def __build_geojson(rsps): geojson = rsps.json() del geojson["links"] @@ -331,6 +326,11 @@ def __build_geojson(rsps): del geojson["features"][i]["assets"] return geojson +# +# Perform a STAC Query +# +# See https://cmr.earthdata.nasa.gov/stac/docs/index.html for details on API +# def __stac_search(provider, short_name, collections, polygons, time_start, time_end): global max_requested_resources @@ -380,10 +380,47 @@ def __stac_search(provider, short_name, collections, polygons, time_start, time_ _geojson = __build_geojson(data) geojson["features"] += _geojson["features"] geojson["context"]["returned"] = num_matched + geojson["context"]["limit"] = max_requested_resources # return geojson dictionary return geojson +# +# Get Provider from DATASETS +# +def __get_provider(short_name): + + # check parameters + if short_name == None: + raise sliderule.FatalError("Must supply short name to CMR query") + elif short_name not in DATASETS: + raise sliderule.FatalError("Must supply a supported dataset: " + short_name) + + # attempt to fill in provider + provider = DATASETS[short_name]["provider"] + if provider == None: + raise sliderule.FatalError("Unable to determine provider for CMR query") + + # return provider string (cannot be None) + return provider + +# +# Format Polygons for Request +# +def __format_polygons(polygon): + + polygons = [None] + + # create list of polygons + if polygon and len(polygon) > 0: + if type(polygon[0]) == dict: + polygons = [copy.deepcopy(polygon)] + elif type(polygon[0] == list): + polygons = copy.deepcopy(polygon) + + # return list of polygons + return polygons + ############################################################################### # APIs ############################################################################### @@ -411,26 +448,22 @@ def set_max_resources (max_resources): # # Common Metadata Repository # -def cmr(provider=None, short_name=None, version=None, polygon=None, time_start='2018-01-01T00:00:00Z', time_end=datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), return_metadata=False, name_filter=None, collections=None): +def cmr(short_name=None, version=None, polygon=None, time_start='2018-01-01T00:00:00Z', time_end=datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), return_metadata=False, name_filter=None): ''' Query the `NASA Common Metadata Repository (CMR) `_ for a list of data within temporal and spatial parameters Parameters ---------- - short_name: str - dataset short name as defined in the `NASA CMR Directory `_ - provider: str - dataset provider as specified by CMR, leave as None to use default provider associated with short_name - version: str - dataset version string, leave as None to get latest support version - polygon: list - either a single list of longitude,latitude in counter-clockwise order with first and last point matching, defining region of interest (see `polygons `_), or a list of such lists when the region includes more than one polygon - time_start: str - starting time for query in format ``--T::Z`` - time_end: str - ending time for query in format ``--T::Z`` - collections: list - list of dataset collections as specified by CMR, leave as None to use defaults + short_name: str + dataset short name as defined in the `NASA CMR Directory `_ + version: str + dataset version string, leave as None to get latest support version + polygon: list + either a single list of longitude,latitude in counter-clockwise order with first and last point matching, defining region of interest (see `polygons `_), or a list of such lists when the region includes more than one polygon + time_start: str + starting time for query in format ``--T::Z`` + time_end: str + ending time for query in format ``--T::Z`` return_metadata: bool flag indicating whether metadata associated with the query is returned back to the user name_filter: str @@ -443,37 +476,109 @@ def cmr(provider=None, short_name=None, version=None, polygon=None, time_start=' Examples -------- - >>> from sliderule import icesat2 + >>> from sliderule import earthdata >>> region = [ {"lon": -108.3435200747503, "lat": 38.89102961045247}, ... {"lon": -107.7677425431139, "lat": 38.90611184543033}, ... {"lon": -107.7818591266989, "lat": 39.26613714985466}, ... {"lon": -108.3605610678553, "lat": 39.25086131372244}, ... {"lon": -108.3435200747503, "lat": 38.89102961045247} ] - >>> granules = icesat2.cmr(polygon=region) + >>> granules = earthdata.cmr(short_name='ATL06', polygon=region) >>> granules ['ATL03_20181017222812_02950102_003_01.h5', 'ATL03_20181110092841_06530106_003_01.h5', ... 'ATL03_20201111102237_07370902_003_01.h5'] ''' - # check parameters - if short_name == None: - raise sliderule.FatalError("Must supply short name to CMR query") - # attempt to fil in provider - if provider == None: - if short_name in DATASETS: - provider = DATASETS[short_name]["provider"] - else: - raise sliderule.FatalError("Unable to determine provider for CMR query") + # get provider + provider = __get_provider(short_name) + + # attempt to fill in version + if version == None: + version = DATASETS[short_name]["version"] # create list of polygons - polygons = [None] - if polygon and len(polygon) > 0: - if type(polygon[0]) == dict: - polygons = [copy.deepcopy(polygon)] - elif type(polygon[0] == list): - polygons = copy.deepcopy(polygon) + polygons = __format_polygons(polygon) + + # perform query + return __cmr_search(provider, short_name, version, polygons, time_start, time_end, return_metadata, name_filter) + +# +# SpatioTemporal Asset Catalog +# +def stac(short_name=None, collections=None, polygon=None, time_start='2018-01-01T00:00:00Z', time_end=datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), as_str=False): + ''' + Perform a STAC query of the `NASA Common Metadata Repository (CMR) `_ catalog for a list of data within temporal and spatial parameters + + Parameters + ---------- + short_name: str + dataset short name as defined in the `NASA CMR Directory `_ + collections: list + list of dataset collections as specified by CMR, leave as None to use defaults + polygon: list + either a single list of longitude,latitude in counter-clockwise order with first and last point matching, defining region of interest (see `polygons `_), or a list of such lists when the region includes more than one polygon + time_start: str + starting time for query in format ``--T::Z`` + time_end: str + ending time for query in format ``--T::Z`` + as_str: bool + whether to return geojson as a dictionary or string + + Returns + ------- + str + geojson of the feature set returned by the query + { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "id": "", + "geometry": { + "type": "Polygon", + "coordinates": [..] + }, + "properties": { + "datetime": "YYYY-MM-DDTHH:MM:SS.SSSZ", + "start_datetime": "YYYY-MM-DDTHH:MM:SS.SSSZ", + "end_datetime": "YYYY-MM-DDTHH:MM:SS.SSSZ", + "": "", + .. + } + }, + .. + ], + "stac_version": "" + "context": { + "returned": , + "limit": , + "matched": + } + } + + Examples + -------- + >>> from sliderule import earthdata + >>> region = [ {"lon": -108.3435200747503, "lat": 38.89102961045247}, + ... {"lon": -107.7677425431139, "lat": 38.90611184543033}, + ... {"lon": -107.7818591266989, "lat": 39.26613714985466}, + ... {"lon": -108.3605610678553, "lat": 39.25086131372244}, + ... {"lon": -108.3435200747503, "lat": 38.89102961045247} ] + >>> geojson = earthdata.stac(short_name='HLS', polygon=region) + ''' + # get provider + provider = __get_provider(short_name) + + # check collections + if collections == None: + collections = DATASETS[short_name]["collections"] + + # create list of polygons + polygons = __format_polygons(polygon) # perform query - if short_name in DATASETS and DATASETS[short_name]["stac"]: - return __stac_search(provider, short_name, collections, polygons, time_start, time_end) + geojson = __stac_search(provider, short_name, collections, polygons, time_start, time_end) + + # return + if as_str: + return json.dumps(geojson) else: - return __cmr_search(provider, short_name, version, polygons, time_start, time_end, return_metadata, name_filter) \ No newline at end of file + return geojson diff --git a/sliderule/icesat2.py b/sliderule/icesat2.py index dc79ff8..b23b571 100644 --- a/sliderule/icesat2.py +++ b/sliderule/icesat2.py @@ -155,6 +155,10 @@ def __todataframe(columns, time_key="time", lon_key="lon", lat_key="lat", **kwar # Generate Time Column columns['time'] = columns[time_key].astype('datetime64[ns]') + # Temporary code for backward compatibility + if 'delta_time' in columns: + del columns['delta_time'] + # Generate Geometry Column geometry = geopandas.points_from_xy(columns[lon_key], columns[lat_key]) del columns[lon_key] diff --git a/tests/test_arcticdem.py b/tests/test_arcticdem.py index 44e2be6..040f69a 100644 --- a/tests/test_arcticdem.py +++ b/tests/test_arcticdem.py @@ -9,7 +9,7 @@ TESTDIR = Path(__file__).parent @pytest.mark.network -class TestVrt: +class TestMosaic: def test_vrt(self, domain, organization): icesat2.init(domain, organization=organization) rqst = {"samples": {"asset": "arcticdem-mosaic"}, "coordinates": [[-178.0,51.7]]} diff --git a/tests/test_landsat.py b/tests/test_landsat.py new file mode 100644 index 0000000..fdb688d --- /dev/null +++ b/tests/test_landsat.py @@ -0,0 +1,24 @@ +"""Tests for sliderule-python landsat raster support.""" + +import pytest +from pathlib import Path +import os.path +from sliderule import sliderule, earthdata + +TESTDIR = Path(__file__).parent + +@pytest.mark.network +class TestHLS: + def test_samples(self, domain, organization): + sliderule.init(domain, organization=organization) + time_start = "2021-01-01T00:00:00Z" + time_end = "2022-01-01T23:59:59Z" + polygon = [ {"lon": -177.0000000001, "lat": 51.0000000001}, + {"lon": -179.0000000001, "lat": 51.0000000001}, + {"lon": -179.0000000001, "lat": 49.0000000001}, + {"lon": -177.0000000001, "lat": 49.0000000001}, + {"lon": -177.0000000001, "lat": 51.0000000001} ] + catalog = earthdata.stac(short_name="HLS", polygon=polygon, time_start=time_start, time_end=time_end, as_str=True) + print(catalog) + rqst = {"samples": {"asset": "landsat-hls", "catalog": catalog}, "coordinates": [[-178.0, 51.7]]} + rsps = sliderule.source("samples", rqst) diff --git a/utils/query_cmr.py b/utils/query_cmr.py index 2746531..5dbe090 100644 --- a/utils/query_cmr.py +++ b/utils/query_cmr.py @@ -18,7 +18,6 @@ "tolerance": 0.0, "dataset": "ATL03", "version": "005", - "provider": "NSIDC_ECS" } # Command line parameters @@ -28,7 +27,7 @@ region = sliderule.toregion(cfg["region"], cfg["tolerance"]) # Query CMR for list of resources - resources = earthdata.cmr(provider=cfg["provider"], short_name=cfg["dataset"], version=cfg["version"], polygon=region["poly"]) + resources = earthdata.cmr(short_name=cfg["dataset"], version=cfg["version"], polygon=region["poly"]) print("Region: {} points, {} files".format(len(region["poly"]), len(resources))) for resource in resources: print(resource) diff --git a/utils/query_stac.py b/utils/query_stac.py index 46ebad0..7b60052 100644 --- a/utils/query_stac.py +++ b/utils/query_stac.py @@ -29,7 +29,7 @@ polygon = sliderule.toregion(cfg["region"])["poly"] # Query CMR for list of resources - geojson = earthdata.cmr(short_name=cfg["dataset"], polygon=polygon, time_start=cfg["time_start"], time_end=cfg["time_end"]) + geojson = earthdata.stac(short_name=cfg["dataset"], polygon=polygon, time_start=cfg["time_start"], time_end=cfg["time_end"]) # Display Results print("Returned {} features".format(geojson["context"]["returned"])) From 61134d0cbcd7c61cbd8ce550d326f46ddf134e57 Mon Sep 17 00:00:00 2001 From: JP Swinski Date: Thu, 9 Mar 2023 15:43:44 +0000 Subject: [PATCH 4/5] added ndvi pytest --- tests/test_landsat.py | 25 +++++++++++++++++++++---- utils/stac.py | 7 ------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/tests/test_landsat.py b/tests/test_landsat.py index fdb688d..721c518 100644 --- a/tests/test_landsat.py +++ b/tests/test_landsat.py @@ -3,7 +3,7 @@ import pytest from pathlib import Path import os.path -from sliderule import sliderule, earthdata +from sliderule import sliderule, earthdata, icesat2 TESTDIR = Path(__file__).parent @@ -12,13 +12,30 @@ class TestHLS: def test_samples(self, domain, organization): sliderule.init(domain, organization=organization) time_start = "2021-01-01T00:00:00Z" - time_end = "2022-01-01T23:59:59Z" + time_end = "2021-02-01T23:59:59Z" polygon = [ {"lon": -177.0000000001, "lat": 51.0000000001}, {"lon": -179.0000000001, "lat": 51.0000000001}, {"lon": -179.0000000001, "lat": 49.0000000001}, {"lon": -177.0000000001, "lat": 49.0000000001}, {"lon": -177.0000000001, "lat": 51.0000000001} ] catalog = earthdata.stac(short_name="HLS", polygon=polygon, time_start=time_start, time_end=time_end, as_str=True) - print(catalog) - rqst = {"samples": {"asset": "landsat-hls", "catalog": catalog}, "coordinates": [[-178.0, 51.7]]} + rqst = {"samples": {"asset": "landsat-hls", "catalog": catalog, "bands": ["B02"]}, "coordinates": [[-178.0, 51.7]]} rsps = sliderule.source("samples", rqst) + + def test_ndvi(self, domain, asset, organization): + icesat2.init(domain, organization=organization) + region = sliderule.toregion(os.path.join(TESTDIR, "data/grandmesa.geojson")) + resource = "ATL03_20181017222812_02950102_005_01.h5" + time_start = "2021-01-01T00:00:00Z" + time_end = "2021-02-01T23:59:59Z" + catalog = earthdata.stac(short_name="HLS", polygon=region['poly'], time_start=time_start, time_end=time_end, as_str=True) + parms = { "poly": region['poly'], + "raster": region['raster'], + "cnf": "atl03_high", + "ats": 20.0, + "cnt": 10, + "len": 40.0, + "res": 20.0, + "maxi": 1, + "samples": {"ndvi": {"asset": "landsat-hls", "catalog": catalog, "bands": ["NDVI"]}} } + gdf = icesat2.atl06p(parms, asset=asset, resources=[resource]) diff --git a/utils/stac.py b/utils/stac.py index c88a01d..e51ee6e 100644 --- a/utils/stac.py +++ b/utils/stac.py @@ -15,13 +15,6 @@ collections = ["HLSS30.v2.0", "HLSL30.v2.0"] time_start = "2021-01-01T00:00:00Z" time_end = "2022-01-01T23:59:59Z" - -polygons = [[{"lon": -177.0000000001, "lat": 51.0000000001}, - {"lon": -179.0000000001, "lat": 51.0000000001}, - {"lon": -179.0000000001, "lat": 49.0000000001}, - {"lon": -177.0000000001, "lat": 49.0000000001}, - {"lon": -177.0000000001, "lat": 51.0000000001}]] -coordinates = [[[coord["lon"], coord["lat"]] for coord in polygon] for polygon in polygons] coordinates = [[[coord["lon"], coord["lat"]] for coord in polygon] for polygon in [sliderule.toregion("examples/grandmesa.geojson")["poly"]]] # debug output From 5fb74e380fa3383e741f411c01b82d971a0b5f0b Mon Sep 17 00:00:00 2001 From: JP Swinski Date: Thu, 9 Mar 2023 16:00:31 +0000 Subject: [PATCH 5/5] moved sample to inside poly in the landsat pytest --- tests/test_landsat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_landsat.py b/tests/test_landsat.py index 721c518..97642e2 100644 --- a/tests/test_landsat.py +++ b/tests/test_landsat.py @@ -19,7 +19,7 @@ def test_samples(self, domain, organization): {"lon": -177.0000000001, "lat": 49.0000000001}, {"lon": -177.0000000001, "lat": 51.0000000001} ] catalog = earthdata.stac(short_name="HLS", polygon=polygon, time_start=time_start, time_end=time_end, as_str=True) - rqst = {"samples": {"asset": "landsat-hls", "catalog": catalog, "bands": ["B02"]}, "coordinates": [[-178.0, 51.7]]} + rqst = {"samples": {"asset": "landsat-hls", "catalog": catalog, "bands": ["B02"]}, "coordinates": [[-178.0, 50.7]]} rsps = sliderule.source("samples", rqst) def test_ndvi(self, domain, asset, organization):