From 11080296724eda8148d044ad17d53a9f6fda6f62 Mon Sep 17 00:00:00 2001 From: To Huynh Date: Sun, 17 Jul 2022 13:50:06 -0700 Subject: [PATCH 1/3] Optimize get spreadsheet title --- .../google_sheets_institution_extracter.py | 54 +++++++++++++------ 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/siglatools/institution_extracters/google_sheets_institution_extracter.py b/siglatools/institution_extracters/google_sheets_institution_extracter.py index c4d2c3a..942b2be 100644 --- a/siglatools/institution_extracters/google_sheets_institution_extracter.py +++ b/siglatools/institution_extracters/google_sheets_institution_extracter.py @@ -300,27 +300,46 @@ def __init__(self, credentials_path: str): # Store the spreadsheets service self.spreadsheets = service.spreadsheets() + def _get_spreadsheet(self, spreadsheet_id: str) -> Any: + """ + Get the spreadsheet from a spreadsheet_id + + Parameters + ---------- + spreadsheet_id: str + The id of the spreadsheet + + Returns + ------- + spreadsheet: The spreadsheet. + See https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets#Spreadsheet. + + """ + # Get the spreadsheet + return self.spreadsheets.get(spreadsheetId=spreadsheet_id).execute() + + def _get_spreadsheet_tile(self, spreadsheet: Any) -> str: + """Get the title of a spreadsheet""" + return spreadsheet.get("properties").get("title") + def _get_meta_data_a1_notations( self, - spreadsheet_id: str, + spreadsheet: Any, ) -> List[A1Notation]: """ Construct an A1Notation for each sheet from its first two rows of meta data Parameters ---------- - spreadsheet_id: str - The id of a spreadsheet + spreadsheet: str + The spreadsheet object. + See https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets#Spreadsheet Returns ------- a1_notations: List[A1Notatoin] The list of A1Notations, one for each sheet. """ - # Get the spreadsheet - spreadsheet_response = self.spreadsheets.get( - spreadsheetId=spreadsheet_id - ).execute() # Create an A1Notation for each sheet's meta data return [ A1Notation( @@ -329,7 +348,7 @@ def _get_meta_data_a1_notations( start_row=1, end_row=2, ) - for sheet in spreadsheet_response.get("sheets") + for sheet in spreadsheet.get("sheets") ] def _get_meta_data( @@ -378,7 +397,9 @@ def _get_data_a1_notations( return bounding_box_a1_notations - def _get_data(self, spreadsheet_id: str, a1_notations: List[A1Notation]) -> List[List[List[Any]]]: + def _get_data( + self, spreadsheet_id: str, a1_notations: List[A1Notation] + ) -> List[List[List[Any]]]: data_response = ( self.spreadsheets.values() .batchGet( @@ -448,15 +469,13 @@ def get_spreadsheet_data(self, spreadsheet_id: str) -> List[SheetData]: The spreadsheet data. Please the SheetData class to view its attributes. """ try: + spreadsheet = self._get_spreadsheet(spreadsheet_id=spreadsheet_id) # Get an A1Notation for each sheet's meta data meta_data_a1_notations = self._get_meta_data_a1_notations( - spreadsheet_id=spreadsheet_id + spreadsheet=spreadsheet ) # Get the spreadsheet title - spreadsheet_response = self.spreadsheets.get( - spreadsheetId=spreadsheet_id - ).execute() - spreadsheet_title = spreadsheet_response.get("properties").get("title") + spreadsheet_title = self._get_spreadsheet_tile(spreadsheet=spreadsheet) # Get the meta data for each sheet meta_data = self._get_meta_data( spreadsheet_id=spreadsheet_id, @@ -484,7 +503,12 @@ def get_spreadsheet_data(self, spreadsheet_id: str) -> List[SheetData]: ) except HttpError as http_error: raise exceptions.UnableToAccessSpreadsheet( - ErrorInfo({GoogleSheetsInfoField.spreadsheet_title: spreadsheet_title, "reason": f"{http_error}"}) + ErrorInfo( + { + GoogleSheetsInfoField.spreadsheet_title: spreadsheet_title, + "reason": f"{http_error}", + } + ) ) next_uv_date_data_iter = iter(next_uv_date_data) From baf61ba96a2ac5d5a5cf5056db34ae03223e7eee Mon Sep 17 00:00:00 2001 From: To Huynh Date: Sun, 17 Jul 2022 13:56:27 -0700 Subject: [PATCH 2/3] Don't get get next uv dates if list is empty --- .../google_sheets_institution_extracter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/siglatools/institution_extracters/google_sheets_institution_extracter.py b/siglatools/institution_extracters/google_sheets_institution_extracter.py index 942b2be..8124ace 100644 --- a/siglatools/institution_extracters/google_sheets_institution_extracter.py +++ b/siglatools/institution_extracters/google_sheets_institution_extracter.py @@ -439,6 +439,9 @@ def _get_next_uv_dates_a1_annotations( def _get_next_uv_dates_data( self, spreadsheet_id: str, a1_notations: List[A1Notation] ) -> List[List[Any]]: + if not a1_notations: + return [] + next_uv_date_response = ( self.spreadsheets.values() .batchGet( From aa3e28120d8c2a29777fa0301e2c42172e69900b Mon Sep 17 00:00:00 2001 From: To Huynh Date: Sun, 17 Jul 2022 16:05:27 -0700 Subject: [PATCH 3/3] Add num retries --- .../google_sheets_institution_extracter.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/siglatools/institution_extracters/google_sheets_institution_extracter.py b/siglatools/institution_extracters/google_sheets_institution_extracter.py index 8124ace..0ba43f9 100644 --- a/siglatools/institution_extracters/google_sheets_institution_extracter.py +++ b/siglatools/institution_extracters/google_sheets_institution_extracter.py @@ -296,7 +296,15 @@ def __init__(self, credentials_path: str): self._credentials_path, scopes=GOOGLE_API_SCOPES ) # Construct a Resource for interacting with Google Sheets API - service = build("sheets", "v4", credentials=credentials, cache_discovery=False) + # `num_retries` downstreams + # See https://github.com/googleapis/google-api-python-client/issues/1049#issuecomment-702893972 + service = build( + "sheets", + "v4", + credentials=credentials, + cache_discovery=False, + num_retries=3, + ) # Store the spreadsheets service self.spreadsheets = service.spreadsheets() @@ -441,7 +449,7 @@ def _get_next_uv_dates_data( ) -> List[List[Any]]: if not a1_notations: return [] - + next_uv_date_response = ( self.spreadsheets.values() .batchGet(