From f9ad429c00d09ec7c15b54afa981611318bdea93 Mon Sep 17 00:00:00 2001 From: Seltyk Date: Mon, 7 Aug 2023 16:17:09 -0400 Subject: [PATCH 001/122] Add endpoint to get repo by ID Also wrote API documentation for it using an example 3scale repo in my testing Signed-off-by: Seltyk --- augur/api/routes/util.py | 62 +++++++++++++--- docs/source/rest-api/spec.yml | 129 ++++++++++++++++++++++------------ 2 files changed, 137 insertions(+), 54 deletions(-) diff --git a/augur/api/routes/util.py b/augur/api/routes/util.py index cd6a8ad3bc..8a32618554 100644 --- a/augur/api/routes/util.py +++ b/augur/api/routes/util.py @@ -1,10 +1,11 @@ #SPDX-License-Identifier: MIT +from augur.api.routes import AUGUR_API_VERSION +from ..server import app, engine import base64 import sqlalchemy as s import pandas as pd import json from flask import Response -import logging from augur.application.db.session import DatabaseSession from augur.application.logs import AugurLogger @@ -12,10 +13,6 @@ logger = AugurLogger("augur").get_logger() -from augur.api.routes import AUGUR_API_VERSION -from ..server import app, engine - - @app.route('/{}/repo-groups'.format(AUGUR_API_VERSION)) def get_all_repo_groups(): #TODO: make this name automatic - wrapper? repoGroupsSQL = s.sql.text(""" @@ -52,9 +49,9 @@ def get_all_repos(): (select * from api_get_all_repos_issues) b on repo.repo_id = b.repo_id - left outer join - (select * from api_get_all_repo_prs) c - on repo.repo_id=c.repo_id + left outer join + (select * from api_get_all_repo_prs) c + on repo.repo_id=c.repo_id JOIN repo_groups ON repo_groups.repo_group_id = repo.repo_group_id order by repo_name """) @@ -91,9 +88,9 @@ def get_repos_in_repo_group(repo_group_id): (select * from api_get_all_repos_issues) b on repo.repo_id = b.repo_id - left outer join - (select * from api_get_all_repo_prs) c - on repo.repo_id=c.repo_id + left outer join + (select * from api_get_all_repo_prs) c + on repo.repo_id=c.repo_id JOIN repo_groups ON repo_groups.repo_group_id = repo.repo_group_id WHERE repo_groups.repo_group_id = :repo_group_id @@ -106,6 +103,49 @@ def get_repos_in_repo_group(repo_group_id): status=200, mimetype="application/json") +@app.route('/{}/repos/'.format(AUGUR_API_VERSION)) +def get_repo_by_id(repo_id: int) -> Response: + repo_by_id_SQL = s.sql.text(""" + SELECT + repo.repo_id, + repo.repo_name, + repo.description, + repo.repo_git AS url, + a.commits_all_time, + b.issues_all_time, + c.pull_requests_all_time, + rg_name, + repo.repo_group_id + FROM + repo + LEFT OUTER JOIN + (SELECT * FROM api_get_all_repos_commits) a + ON repo.repo_id = a.repo_id + LEFT OUTER JOIN + (SELECT * FROM api_get_all_repos_issues) b + ON repo.repo_id = b.repo_id + LEFT OUTER JOIN + (SELECT * FROM api_get_all_repo_prs) c + ON repo.repo_id = c.repo_id + JOIN repo_groups ON repo_groups.repo_group_id = repo.repo_group_id + WHERE + repo.repo_id = :id + """) + + results = pd.read_sql(repo_by_id_SQL, engine, params={"id": repo_id}) + results["url"] = results["url"].apply(lambda datum: datum.split("//")[1]) # cut "https://" off the URL + results["base64_url"] = [base64.b64encode(results.at[i, "url"].encode()) for i in results.index] + data = results.to_json(orient="records", date_format="iso", date_unit="ms") + + if not data or data == "[]": + return Response(response='{"status": "Repository ' + str(repo_id) + ' does not exist"}', + status=400, + mimetype="application/json") + + return Response(response=data[1:-1], # cut off brackets at each end, turns list of length 1 into single value + status=200, + mimetype="application/json") + @app.route('/{}/owner//repo/'.format(AUGUR_API_VERSION)) def get_repo_by_git_name(owner, repo): diff --git a/docs/source/rest-api/spec.yml b/docs/source/rest-api/spec.yml index ae583dbbbb..50643d9164 100644 --- a/docs/source/rest-api/spec.yml +++ b/docs/source/rest-api/spec.yml @@ -96,6 +96,49 @@ paths: type: array tags: - utility + /repos/:id: + get: + description: Get a downloaded repo by its ID in Augur. + operationId: Get Repo By ID + responses: + '200': + description: OK + scema: + items: + properties: + base64_url: + description: 'Base64 encode of the full URL. Example Z2l0aHViLmNvbS8zc2NhbGUvM3NjYWxlLW9wZXJhdG9yLW1ldGFkYXRh' + type: string + description: + description: 'Repository description. Example: null' + type: string + commits_all_time: + description: 'How many commits have been made to this respository. Example: 24' + type: integer + issues_all_time: + description: 'How many issues have been raised on this respository. Example: 1' + type: integer + pull_requests_all_time: + description: 'How many pull requests have been made to this reqpository. Example: 7' + type: integer + repo_id: + description: 'Repository ID, should match provided URL parameter. Example: 25551' + type: integer + repo_name: + description: 'Name of the provided rpository. Example: 3scale-operator-metadata' + type: string + rg_name: + description: 'Name of the repository group containing this repo. Example: 3scale' + type: string + repo_group_id: + description: 'ID of the repository group containing this repo. Example: 25431' + type: integer + url: + description: 'URL of this repository, sans leading protocol. Example: github.com/3scale/3scale-operator-metadata' + type: string + type: object + tags: + - utility /owner/:owner/repo/:repo: get: description: Get the repo_group_id and repo_id of a particular repo. @@ -284,12 +327,12 @@ paths: tags: - utility #risk endpoints - /metadata/repo_info: - get: - description: 'Returns the metadata about all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the default branch name, repository license file, forks, stars, watchers, and committers. Also includes metadata about current repository issue and pull request status and counts.' + /metadata/repo_info: + get: + description: 'Returns the metadata about all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the default branch name, repository license file, forks, stars, watchers, and committers. Also includes metadata about current repository issue and pull request status and counts.' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md + url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md operationId: Activity Metadata (Repo) responses: '200': @@ -315,45 +358,45 @@ paths: fork_count: description: 'Example: 554' type: integer - watchers_count: + watchers_count: description: 'Example: 424' type: integer - stars_count: + stars_count: description: 'Example: 443' type: integer - commits_count: + commits_count: description: '4434' - type: integer - committers_count: + type: integer + committers_count: description: 'Example: 42' type: integer - open_issues: + open_issues: description: 'Example: 7' - type: integer - issues_count: + type: integer + issues_count: description: 'Example: 23332' type: integer - issues_closed: + issues_closed: description: 'Example: 23322' type: integer - pull_request_count: + pull_request_count: description: 'Example: 19445' type: integer - pull_requests_open: + pull_requests_open: description: 'Example: 10' type: integer - pull_requests_closed: + pull_requests_closed: description: 'Example: 19435' type: integer - pull_requests_merged: + pull_requests_merged: description: 'Example: 17473' type: integer type: array tags: - risk - /metadata/contributions_count: - get: - description: 'Returns a list of repositories contributed to by all the contributors in an Augur Instance: INCLUDING all repositories on a platform, *not* merely those repositories in the Augur Instance. Numerical totals represent total CONTRIBUTIONS.' + /metadata/contributions_count: + get: + description: 'Returns a list of repositories contributed to by all the contributors in an Augur Instance: INCLUDING all repositories on a platform, *not* merely those repositories in the Augur Instance. Numerical totals represent total CONTRIBUTIONS.' externalDocs: description: CHAOSS Metric Definition url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md @@ -373,9 +416,9 @@ paths: type: array tags: - risk - /metadata/contributors_count: - get: - description: 'Returns a list of repositories contributed to by all the contributors in an Augur Instance: INCLUDING all repositories on a platform, *not* merely those repositories in the Augur Instance. Numerical totals represent total CONTRIBUTORS.' + /metadata/contributors_count: + get: + description: 'Returns a list of repositories contributed to by all the contributors in an Augur Instance: INCLUDING all repositories on a platform, *not* merely those repositories in the Augur Instance. Numerical totals represent total CONTRIBUTORS.' externalDocs: description: CHAOSS Metric Definition url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md @@ -5144,9 +5187,9 @@ paths: type: object tags: - visualizations - /complexity/project_lines: - get: - description: 'Returns project line data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the total and average number of lines in the project repository.' + /complexity/project_lines: + get: + description: 'Returns project line data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the total and average number of lines in the project repository.' operationId: Total Lines (repo) responses: '200': @@ -5172,9 +5215,9 @@ paths: type: array tags: - complexity - /complexity/project_file_complexity: - get: - description: 'Returns project file complexity data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the total and average file complexity of the project repository.' + /complexity/project_file_complexity: + get: + description: 'Returns project file complexity data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the total and average file complexity of the project repository.' operationId: File Complexity (repo) responses: '200': @@ -5200,9 +5243,9 @@ paths: type: array tags: - complexity - /complexity/project_blank_lines: - get: - description: 'Returns project blank line data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the total and average number of blank lines in the project repository.' + /complexity/project_blank_lines: + get: + description: 'Returns project blank line data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the total and average number of blank lines in the project repository.' operationId: Total Blank Lines (repo) responses: '200': @@ -5228,9 +5271,9 @@ paths: type: array tags: - complexity - /complexity/project_comment_lines: - get: - description: 'Returns project comment line data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the total and average number of comment lines in the project repository.' + /complexity/project_comment_lines: + get: + description: 'Returns project comment line data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the total and average number of comment lines in the project repository.' operationId: Total Comment Lines (repo) responses: '200': @@ -5256,9 +5299,9 @@ paths: type: array tags: - complexity - /complexity/project_files: - get: - description: 'Returns project file data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the total number of files in the project repository.' + /complexity/project_files: + get: + description: 'Returns project file data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the total number of files in the project repository.' operationId: Total Files (repo) responses: '200': @@ -5281,9 +5324,9 @@ paths: type: array tags: - complexity - /complexity/project_languages: - get: - description: 'Returns project language data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the lines and files of a language in a repository.' + /complexity/project_languages: + get: + description: 'Returns project language data for all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the lines and files of a language in a repository.' operationId: Project Languages (repo) responses: '200': @@ -5317,7 +5360,7 @@ paths: description: 'The number of messages exchanged for a repository group over a specified period.' externalDocs: description: CHAOSS Metric Definition - url: + url: operationId: Repository Messages (Repo Group) parameters: - description: Repository Group ID @@ -5633,4 +5676,4 @@ paths: type: string enum: ["Missing argument"] tags: - - DEI Badging \ No newline at end of file + - DEI Badging From a83fee5c2a5d71922277478763b8f1451501eb32 Mon Sep 17 00:00:00 2001 From: Seltyk Date: Tue, 8 Aug 2023 09:25:58 -0400 Subject: [PATCH 002/122] It would maybe help if I spelled "schema" right Signed-off-by: Seltyk --- docs/source/rest-api/spec.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/rest-api/spec.yml b/docs/source/rest-api/spec.yml index 50643d9164..c7ed3601a7 100644 --- a/docs/source/rest-api/spec.yml +++ b/docs/source/rest-api/spec.yml @@ -103,7 +103,7 @@ paths: responses: '200': description: OK - scema: + schema: items: properties: base64_url: From 577c4f8994c350de7f6070fb9f9ecbb127fce242 Mon Sep 17 00:00:00 2001 From: Seltyk Date: Tue, 8 Aug 2023 09:59:26 -0400 Subject: [PATCH 003/122] Pretend the schema is an array for rendering I'm honestly not sure why "object" doesn't work out, so for the first time in my life, I had to write docs for the docs in the docs! Signed-off-by: Seltyk --- docs/source/rest-api/spec.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/rest-api/spec.yml b/docs/source/rest-api/spec.yml index c7ed3601a7..15423e9a64 100644 --- a/docs/source/rest-api/spec.yml +++ b/docs/source/rest-api/spec.yml @@ -98,7 +98,7 @@ paths: - utility /repos/:id: get: - description: Get a downloaded repo by its ID in Augur. + description: Get a downloaded repo by its ID in Augur. The schema block below says it is an array, but it is not. operationId: Get Repo By ID responses: '200': @@ -136,7 +136,7 @@ paths: url: description: 'URL of this repository, sans leading protocol. Example: github.com/3scale/3scale-operator-metadata' type: string - type: object + type: array tags: - utility /owner/:owner/repo/:repo: From 6101789eb3e52204420b03116bb7413339ab7fa3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:37:50 +0000 Subject: [PATCH 004/122] Bump tornado from 6.1 to 6.3.3 Bumps [tornado](https://github.com/tornadoweb/tornado) from 6.1 to 6.3.3. - [Changelog](https://github.com/tornadoweb/tornado/blob/master/docs/releases.rst) - [Commits](https://github.com/tornadoweb/tornado/compare/v6.1.0...v6.3.3) --- updated-dependencies: - dependency-name: tornado dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cd95eabaee..51f2d81c3d 100644 --- a/setup.py +++ b/setup.py @@ -80,7 +80,7 @@ "httpx==0.23.0", # 0.23.0 "eventlet==0.33.3", "flower==1.2.0", - "tornado==6.1", # added because it sometimes errors when tornado is not 6.1 even though nothing we install depends on it + "tornado==6.3.3", # added because it sometimes errors when tornado is not 6.1 even though nothing we install depends on it "pylint==2.15.5", "dnspython==2.2.1", 'Werkzeug~=2.0.0', From cf8bf5be98813eada2f2ae3aad46936bd5435d7e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Nov 2023 19:01:56 +0000 Subject: [PATCH 005/122] Bump joblib in /augur/tasks/data_analysis/message_insights Bumps [joblib](https://github.com/joblib/joblib) from 1.0.1 to 1.2.0. - [Release notes](https://github.com/joblib/joblib/releases) - [Changelog](https://github.com/joblib/joblib/blob/master/CHANGES.rst) - [Commits](https://github.com/joblib/joblib/compare/1.0.1...1.2.0) --- updated-dependencies: - dependency-name: joblib dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/message_insights/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index e3dedb4191..8ad12349dc 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -41,7 +41,7 @@ def read(filename): 'tensorflow==2.13.1', 'h5py==3.10.0', 'scikit-image==0.19.1', - 'joblib==1.0.1', + 'joblib==1.2.0', 'xgboost', 'bs4==0.0.1', 'xlrd==2.0.1', From 08e7a31332fdf8c5d9f5cda0192c345add3302d3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Nov 2023 19:01:58 +0000 Subject: [PATCH 006/122] Bump scipy in /augur/tasks/data_analysis/discourse_analysis Bumps [scipy](https://github.com/scipy/scipy) from 1.7.3 to 1.10.0. - [Release notes](https://github.com/scipy/scipy/releases) - [Commits](https://github.com/scipy/scipy/compare/v1.7.3...v1.10.0) --- updated-dependencies: - dependency-name: scipy dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/discourse_analysis/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/discourse_analysis/setup.py b/augur/tasks/data_analysis/discourse_analysis/setup.py index f109164ffd..6e992eb857 100644 --- a/augur/tasks/data_analysis/discourse_analysis/setup.py +++ b/augur/tasks/data_analysis/discourse_analysis/setup.py @@ -28,7 +28,7 @@ def read(filename): 'requests==2.28.0', 'psycopg2-binary==2.9.3', 'click==8.0.3', - 'scipy==1.7.3', + 'scipy==1.10.0', 'nltk==3.6.6', 'pandas==1.5.3', 'scikit-learn==1.1.3', From cb7c63256887816a1d17b8673658da1260c71048 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Nov 2023 19:01:59 +0000 Subject: [PATCH 007/122] Bump scipy in /augur/tasks/data_analysis/pull_request_analysis_worker Bumps [scipy](https://github.com/scipy/scipy) from 1.7.3 to 1.10.0. - [Release notes](https://github.com/scipy/scipy/releases) - [Commits](https://github.com/scipy/scipy/compare/v1.7.3...v1.10.0) --- updated-dependencies: - dependency-name: scipy dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/pull_request_analysis_worker/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py index 5132f29d2e..d61540afbf 100644 --- a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py +++ b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py @@ -34,7 +34,7 @@ def read(filename): 'emoji==1.2.0', 'joblib==1.0.1', 'xgboost==1.4.2', - 'scipy==1.7.3' + 'scipy==1.10.0' ], classifiers=[ 'Development Status :: 2 - Pre-Alpha', From b819f4d40184fa88829372a5e7eed981af3b1790 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Nov 2023 19:02:10 +0000 Subject: [PATCH 008/122] Bump scipy from 1.7.3 to 1.10.0 Bumps [scipy](https://github.com/scipy/scipy) from 1.7.3 to 1.10.0. - [Release notes](https://github.com/scipy/scipy/releases) - [Commits](https://github.com/scipy/scipy/compare/v1.7.3...v1.10.0) --- updated-dependencies: - dependency-name: scipy dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a456352065..f6e560b092 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ "distributed >= 2021.03.0", # 2022.8.1 "nltk==3.6.6", # 3.7 "h5py==3.10.0", # 3.7 - "scipy==1.7.3", # 1.9.0 + "scipy==1.10.0", # 1.9.0 "blinker==1.4", # 1.5 "protobuf<3.22", # 4.21.5 "slack==0.0.2", # 0.0.2 From dc3468b3f6468871aa17e7caae76e72c1c615b70 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Nov 2023 19:02:14 +0000 Subject: [PATCH 009/122] Bump scipy in /augur/tasks/data_analysis/message_insights Bumps [scipy](https://github.com/scipy/scipy) from 1.7.3 to 1.10.0. - [Release notes](https://github.com/scipy/scipy/releases) - [Commits](https://github.com/scipy/scipy/compare/v1.7.3...v1.10.0) --- updated-dependencies: - dependency-name: scipy dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/message_insights/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index e3dedb4191..fc2950978c 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -30,7 +30,7 @@ def read(filename): 'requests==2.28.0', 'psycopg2-binary==2.9.3', 'click==8.0.3', - 'scipy==1.7.3', + 'scipy==1.10.0', 'scikit-learn==1.1.3', #0.24.2', 'numpy==1.22.0', 'nltk==3.6.6', From 8e36b394a881d3a042b51261d5c2a8ac38263526 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Wed, 1 Nov 2023 16:59:19 -0500 Subject: [PATCH 010/122] Work on adding ORM syntax into RLC Signed-off-by: Ulincsys --- augur/api/view/utils.py | 55 ++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py index 76551a6ab9..2c557ae635 100644 --- a/augur/api/view/utils.py +++ b/augur/api/view/utils.py @@ -9,11 +9,13 @@ from augur.application.db.session import DatabaseSession from augur.application.db.engine import DatabaseEngine from augur.application.db.models import User, Repo, RepoGroup, UserGroup, UserRepo -from sqlalchemy import Column, Table, Integer, MetaData, or_, Label +from sqlalchemy import Column, Table, Integer, MetaData, or_ from sqlalchemy.sql.operators import ilike_op, distinct_op from sqlalchemy.sql.functions import coalesce from augur.application.db.models.base import Base +from sqlalchemy.orm import Query + init_logging() from .init import logger @@ -327,7 +329,7 @@ def renderLoading(dest, query, request): """ ---------------------------------------------------------------- """ def load_repos_test(count = False, source = None, **kwargs): - columns: list[Label] = [ + columns: list[Column] = [ Repo.repo_id.distinct().label("repo_id"), Repo.description.label("description"), Repo.repo_git.label("url"), @@ -339,22 +341,44 @@ def load_repos_test(count = False, source = None, **kwargs): RepoGroup.repo_group_id.label("repo_group_id") ] - def get_colum_by_label(label: str)-> Label: + def get_colum_by_label(label: str)-> Column: for column in columns: if column.name == label: return column - repos = db_session.query(*columns)\ + repos: Query = db_session.query(*columns)\ .outerjoin(commits_materialized_view, Repo.repo_id == commits_materialized_view.columns.repo_id)\ .outerjoin(issues_materialized_view, Repo.repo_id == issues_materialized_view.columns.repo_id)\ .join(RepoGroup, Repo.repo_group_id == RepoGroup.repo_group_id) - user: User = kwargs.get("user") - if user: + if source == "user": + user: User = kwargs.get("user") + + if not user: + return None, {"status": "User not passed when trying to get user repos"} + if not user.groups: + return None, {"status": "No data"} + repos = repos.join(UserRepo, Repo.repo_id == UserRepo.repo_id)\ .join(UserGroup, UserGroup.group_id == UserRepo.group_id)\ .filter(UserGroup.user_id == user.user_id) + elif source == "group": + user: User = kwargs.get("user") + + if not user: + return None, {"status": "User not specified"} + group_name = kwargs.get("group_name") + if not group_name: + return None, {"status": "Group name not specified"} + + group_id = ... # UserGroup.convert_group_name_to_id(self.session, user.user_id, group_name) + if group_id is None: + return None, {"status": "Group does not exists"} + + repos = repos.join(UserRepo, Repo.repo_id == UserRepo.repo_id)\ + .filter(UserRepo.group_id == group_id) + search = kwargs.get("search") qkey = kwargs.get("query_key") or ["repo_name", "repo_owner"] if search: @@ -367,9 +391,22 @@ def get_colum_by_label(label: str)-> Label: if count: c = repos.count() return math.ceil(c / page_size) - 1 - - page: int = kwargs.get("page") or 0 - offset = page * page_size + else: + page: int = kwargs.get("page") or 0 + offset = page * page_size + direction = kwargs.get("direction") or "ASC" + order_by = kwargs.get("order_by") or "repo_id" + + if direction not in ["ASC", "DESC"]: + return None, None, {"status": "Invalid direction"} + + if order_by not in ["repo_id", "repo_name", "repo_owner", "commits_all_time", "issues_all_time"]: + return None, None, {"status": "Invalid order by"} + + # Find the column named in the 'order_by', and get its asc() or desc() method + directive: function = getattr(get_colum_by_label(order_by), direction.lower()) + + repos = repos.order_by(directive()) return repos.slice(offset, offset + page_size) From c21c8b37635bc5ab05d3425d5f48a84b1b623324 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 2 Nov 2023 17:43:30 +0000 Subject: [PATCH 011/122] updating joblib version requirement in pr analysis Signed-off-by: Sean P. Goggins --- augur/tasks/data_analysis/pull_request_analysis_worker/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py index d61540afbf..e6a8927998 100644 --- a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py +++ b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py @@ -32,7 +32,7 @@ def read(filename): 'numpy==1.22.0', 'pandas==1.5.3', 'emoji==1.2.0', - 'joblib==1.0.1', + 'joblib==1.2.0', 'xgboost==1.4.2', 'scipy==1.10.0' ], From 9e023b13b0a4189a8ab8191e47969ab1287a45d4 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 3 Nov 2023 17:29:38 -0500 Subject: [PATCH 012/122] Update repo_load_controller.py to use orm syntax. Signed-off-by: Andrew Brain Co-authored-by: John McGinness --- augur/api/view/routes.py | 16 +-- augur/api/view/utils.py | 97 -------------- augur/util/repo_load_controller.py | 205 ++++++++++++----------------- 3 files changed, 93 insertions(+), 225 deletions(-) diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index ed617cd36c..bf6e8fc056 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -74,17 +74,15 @@ def repo_table_view(): pagination_offset = config.get_value("frontend", "pagination_offset") if current_user.is_authenticated: - data = load_repos_test(user = current_user, search = query, page = page, sort = sorting, direction = direction, source = "user") - page_count = load_repos_test(user = current_user, search = query, count = True, source = "user") - # data = current_user.get_repos(page = page, sort = sorting, direction = direction, search=query)[0] - # page_count = (current_user.get_repo_count(search = query)[0] or 0) // pagination_offset + data = current_user.get_repos(page = page, sort = sorting, direction = direction, search=query)[0] + repos_count = (current_user.get_repo_count(search = query)[0] or 0) else: - data = load_repos_test(search = query, page = page, sort = sorting, direction = direction) - page_count = load_repos_test(search = query, count = True) - # data = get_all_repos(page = page, sort = sorting, direction = direction, search=query)[0] - # page_count = (get_all_repos_count(search = query)[0] or 0) // pagination_offset + data = get_all_repos(page = page, sort = sorting, direction = direction, search=query)[0] + repos_count = (get_all_repos_count(search = query)[0] or 0) + + page_count = math.ceil(repos_count / pagination_offset) - 1 - if not data.count(): + if not data: data = None diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py index 2c557ae635..7712873b55 100644 --- a/augur/api/view/utils.py +++ b/augur/api/view/utils.py @@ -312,101 +312,4 @@ def render_module(module, **args): return render_template('index.j2', **args) """ ---------------------------------------------------------------- - No longer used """ -# My attempt at a loading page -def renderLoading(dest, query, request): - cache_files_requested.append(request) - return render_template('index.j2', body="loading", title="Loading", d=dest, query_key=query, api_url=getSetting('serving')) - -with DatabaseEngine() as engine: - augur_data_schema = MetaData(schema = "augur_data") - augur_data_schema.reflect(bind = engine, views = True) - - commits_materialized_view: Table = augur_data_schema.tables["augur_data.api_get_all_repos_commits"] - issues_materialized_view: Table = augur_data_schema.tables["augur_data.api_get_all_repos_issues"] - -""" ---------------------------------------------------------------- -""" -def load_repos_test(count = False, source = None, **kwargs): - columns: list[Column] = [ - Repo.repo_id.distinct().label("repo_id"), - Repo.description.label("description"), - Repo.repo_git.label("url"), - coalesce(commits_materialized_view.columns.commits_all_time, 0).label("commits_all_time"), - coalesce(issues_materialized_view.columns.issues_all_time, 0).label("issues_all_time"), - RepoGroup.rg_name.label("rg_name"), - Repo.repo_git.regexp_replace('.*github\.com\/[A-Za-z0-9 \- _]+\/([A-Za-z0-9 \- _ .]+)$', "\\1").label("repo_name"), - Repo.repo_git.regexp_replace('.*github\.com\/([A-Za-z0-9 \- _]+)\/[A-Za-z0-9 \- _ .]+$', "\\1").label("repo_owner"), - RepoGroup.repo_group_id.label("repo_group_id") - ] - - def get_colum_by_label(label: str)-> Column: - for column in columns: - if column.name == label: - return column - - repos: Query = db_session.query(*columns)\ - .outerjoin(commits_materialized_view, Repo.repo_id == commits_materialized_view.columns.repo_id)\ - .outerjoin(issues_materialized_view, Repo.repo_id == issues_materialized_view.columns.repo_id)\ - .join(RepoGroup, Repo.repo_group_id == RepoGroup.repo_group_id) - - if source == "user": - user: User = kwargs.get("user") - - if not user: - return None, {"status": "User not passed when trying to get user repos"} - if not user.groups: - return None, {"status": "No data"} - - repos = repos.join(UserRepo, Repo.repo_id == UserRepo.repo_id)\ - .join(UserGroup, UserGroup.group_id == UserRepo.group_id)\ - .filter(UserGroup.user_id == user.user_id) - - elif source == "group": - user: User = kwargs.get("user") - - if not user: - return None, {"status": "User not specified"} - group_name = kwargs.get("group_name") - if not group_name: - return None, {"status": "Group name not specified"} - - group_id = ... # UserGroup.convert_group_name_to_id(self.session, user.user_id, group_name) - if group_id is None: - return None, {"status": "Group does not exists"} - - repos = repos.join(UserRepo, Repo.repo_id == UserRepo.repo_id)\ - .filter(UserRepo.group_id == group_id) - - search = kwargs.get("search") - qkey = kwargs.get("query_key") or ["repo_name", "repo_owner"] - if search: - if isinstance(qkey, list) and len(qkey) > 0: - repos = repos.filter(or_(ilike_op(get_colum_by_label(filter_column), f"%{search}%") for filter_column in qkey)) - else: - repos = repos.filter(ilike_op(get_colum_by_label(qkey), f"%{search}%")) - - page_size: int = kwargs.get("page_size") or 25 - if count: - c = repos.count() - return math.ceil(c / page_size) - 1 - else: - page: int = kwargs.get("page") or 0 - offset = page * page_size - direction = kwargs.get("direction") or "ASC" - order_by = kwargs.get("order_by") or "repo_id" - - if direction not in ["ASC", "DESC"]: - return None, None, {"status": "Invalid direction"} - - if order_by not in ["repo_id", "repo_name", "repo_owner", "commits_all_time", "issues_all_time"]: - return None, None, {"status": "Invalid order by"} - - # Find the column named in the 'order_by', and get its asc() or desc() method - directive: function = getattr(get_colum_by_label(order_by), direction.lower()) - - repos = repos.order_by(directive()) - - return repos.slice(offset, offset + page_size) - diff --git a/augur/util/repo_load_controller.py b/augur/util/repo_load_controller.py index f6841d976e..b5ff5d07a0 100644 --- a/augur/util/repo_load_controller.py +++ b/augur/util/repo_load_controller.py @@ -12,10 +12,23 @@ from augur.application.db.models.augur_operations import retrieve_owner_repos from augur.application.db.util import execute_session_query +from sqlalchemy import Column, Table, MetaData, or_ +from sqlalchemy.sql.operators import ilike_op +from sqlalchemy.sql.functions import coalesce +from sqlalchemy.orm import Query + logger = logging.getLogger(__name__) +with DatabaseEngine() as engine: + augur_data_schema = MetaData(schema = "augur_data") + augur_data_schema.reflect(bind = engine, views = True) + + commits_materialized_view: Table = augur_data_schema.tables["augur_data.api_get_all_repos_commits"] + issues_materialized_view: Table = augur_data_schema.tables["augur_data.api_get_all_repos_issues"] + + class RepoLoadController: def __init__(self, gh_session): @@ -131,34 +144,33 @@ def paginate_repos(self, source, page=0, page_size=25, sort="repo_id", direction order_by = sort if sort else "repo_id" order_direction = direction if direction else "ASC" - query, query_args, result = self.generate_repo_query(source, count=False, order_by=order_by, direction=order_direction, + + query, result = self.generate_repo_query(source, count=False, order_by=order_by, direction=order_direction, page=page, page_size=page_size, **kwargs) + + + # query, query_args, result = self.generate_repo_query(source, count=False, order_by=order_by, direction=order_direction, + # page=page, page_size=page_size, **kwargs) if not query: return None, {"status": result["status"]} if result["status"] == "No data": return [], {"status": "No data"} - get_page_of_repos_sql = s.sql.text(query) - - with DatabaseEngine(connection_pool_size=1).connect() as conn: + # get_page_of_repos_sql = s.sql.text(query) - results = pd.read_sql(get_page_of_repos_sql, conn, params=query_args) + # with DatabaseEngine(connection_pool_size=1).connect() as conn: - results['url'] = results['url'].apply(lambda datum: datum.split('//')[1]) + # results = pd.read_sql(get_page_of_repos_sql, conn, params=query_args) - b64_urls = [] - for i in results.index: - b64_urls.append(base64.b64encode((results.at[i, 'url']).encode())) - results['base64_url'] = b64_urls + results = [dict(x._mapping) for x in query] - data = results.to_dict(orient="records") + for row in results: - # The SELECT statement in generate_repo_query has been updated to include `repo_name` - # for row in data: - # row["repo_name"] = re.search(r"github\.com\/[A-Za-z0-9 \- _]+\/([A-Za-z0-9 \- _ .]+)$", row["url"]).groups()[0] + row["url"] = row["url"].split('//')[1] + row["base64_url"] = base64.b64encode(row["url"].encode()) - return data, {"status": "success"} + return results, {"status": "success"} def get_repo_count(self, source, **kwargs): @@ -169,142 +181,97 @@ def get_repo_count(self, source, **kwargs): if source not in ["all", "user", "group"]: print("Func: get_repo_count. Error: Invalid source") return None, {"status": "Invalid source"} - - query, query_args, result = self.generate_repo_query(source, count=True, **kwargs) + + query, result = self.generate_repo_query(source, count=True, **kwargs) if not query: return None, result if result["status"] == "No data": return 0, {"status": "No data"} - - # surround query with count query so we just get the count of the rows - final_query = f"SELECT count(*) FROM ({query}) a;" - - get_page_of_repos_sql = s.sql.text(final_query) - - result = self.session.execute(get_page_of_repos_sql, query_args).fetchall() + + count = query.count() - return result[0]["count"], {"status": "success"} + return count, {"status": "success"} def generate_repo_query(self, source, count, **kwargs): - # TODO: need more flexible way of calculating count for variable column queries - - query_args = {} - - if count: - # only query for repos ids so the query is faster for getting the count - select = """ DISTINCT(augur_data.repo.repo_id), - (regexp_match(augur_data.repo.repo_git, 'github\.com\/[A-Za-z0-9 \- _]+\/([A-Za-z0-9 \- _ .]+)$'))[1] as repo_name, - (regexp_match(augur_data.repo.repo_git, 'github\.com\/([A-Za-z0-9 \- _]+)\/[A-Za-z0-9 \- _ .]+$'))[1] as repo_owner""" - else: - - select = """ DISTINCT(augur_data.repo.repo_id), - augur_data.repo.description, - augur_data.repo.repo_git AS url, - COALESCE(a.commits_all_time, 0) as commits_all_time, - COALESCE(b.issues_all_time, 0) as issues_all_time, - rg_name, - (regexp_match(augur_data.repo.repo_git, 'github\.com\/[A-Za-z0-9 \- _]+\/([A-Za-z0-9 \- _ .]+)$'))[1] as repo_name, - (regexp_match(augur_data.repo.repo_git, 'github\.com\/([A-Za-z0-9 \- _]+)\/[A-Za-z0-9 \- _ .]+$'))[1] as repo_owner, - augur_data.repo.repo_group_id""" - - query = f""" - SELECT - {select} - FROM - augur_data.repo - LEFT OUTER JOIN augur_data.api_get_all_repos_commits a ON augur_data.repo.repo_id = a.repo_id - LEFT OUTER JOIN augur_data.api_get_all_repos_issues b ON augur_data.repo.repo_id = b.repo_id - JOIN augur_data.repo_groups ON augur_data.repo.repo_group_id = augur_data.repo_groups.repo_group_id\n""" - + + columns: list[Column] = [ + Repo.repo_id.distinct().label("repo_id"), + Repo.description.label("description"), + Repo.repo_git.label("url"), + coalesce(commits_materialized_view.columns.commits_all_time, 0).label("commits_all_time"), + coalesce(issues_materialized_view.columns.issues_all_time, 0).label("issues_all_time"), + RepoGroup.rg_name.label("rg_name"), + Repo.repo_git.regexp_replace('.*github\.com\/[A-Za-z0-9 \- _]+\/([A-Za-z0-9 \- _ .]+)$', "\\1").label("repo_name"), + Repo.repo_git.regexp_replace('.*github\.com\/([A-Za-z0-9 \- _]+)\/[A-Za-z0-9 \- _ .]+$', "\\1").label("repo_owner"), + RepoGroup.repo_group_id.label("repo_group_id") + ] + + def get_colum_by_label(label: str)-> Column: + for column in columns: + if column.name == label: + return column + + repos: Query = self.session.query(*columns)\ + .outerjoin(commits_materialized_view, Repo.repo_id == commits_materialized_view.columns.repo_id)\ + .outerjoin(issues_materialized_view, Repo.repo_id == issues_materialized_view.columns.repo_id)\ + .join(RepoGroup, Repo.repo_group_id == RepoGroup.repo_group_id) + if source == "user": + user: User = kwargs.get("user") - user = kwargs.get("user") if not user: - print("Func: generate_repo_query. Error: User not passed when trying to get user repos") return None, {"status": "User not passed when trying to get user repos"} - if not user.groups: return None, {"status": "No data"} - - query += "\t\t JOIN augur_operations.user_repos ON augur_data.repo.repo_id = augur_operations.user_repos.repo_id\n" - query += "\t\t JOIN augur_operations.user_groups ON augur_operations.user_repos.group_id = augur_operations.user_groups.group_id\n" - query += "\t\t WHERE augur_operations.user_groups.user_id = :user_id\n" - - query_args["user_id"] = user.user_id - + + repos = repos.join(UserRepo, Repo.repo_id == UserRepo.repo_id)\ + .join(UserGroup, UserGroup.group_id == UserRepo.group_id)\ + .filter(UserGroup.user_id == user.user_id) + elif source == "group": - - user = kwargs.get("user") + user: User = kwargs.get("user") + if not user: - print("Func: generate_repo_query. Error: User not specified") return None, {"status": "User not specified"} - group_name = kwargs.get("group_name") if not group_name: - print("Func: generate_repo_query. Error: Group name not specified") return None, {"status": "Group name not specified"} - + group_id = UserGroup.convert_group_name_to_id(self.session, user.user_id, group_name) if group_id is None: - print("Func: generate_repo_query. Error: Group does not exist") return None, {"status": "Group does not exists"} - - query += "\t\t JOIN augur_operations.user_repos ON augur_data.repo.repo_id = augur_operations.user_repos.repo_id\n" - query += "\t\t WHERE augur_operations.user_repos.group_id = :group_id \n" - - query_args["group_id"] = group_id + + repos = repos.join(UserRepo, Repo.repo_id == UserRepo.repo_id)\ + .filter(UserRepo.group_id == group_id) - # implement sorting by query_key search = kwargs.get("search") qkey = kwargs.get("query_key") or ["repo_name", "repo_owner"] - if search: - # The WHERE clause cannot use a column alias created in the directly preceeding SELECT clause - # We must wrap the query in an additional SELECT with a table alias - # This way, we can use WHERE with the computed repo_name column alias - query = f"""\tSELECT * from ( - {query} - ) res\n""" - # This is done so repos with a NULL repo_name can still be sorted. - # "res" here is a randomly chosen table alias, short for "result" - # It is only included because it is required by the SQL syntax - if isinstance(qkey, list) and len(qkey) > 0: - query += f"\tWHERE :qkey_where ilike :search\n" - query_args["qkey_where"] = qkey.pop(0) - - for i, key in enumerate(qkey): - param_name = f"qkey_or_{i}" - query += f"OR :{param_name} ilike :search\n" - query_args[param_name] = key + repos = repos.filter(or_(ilike_op(get_colum_by_label(filter_column), f"%{search}%") for filter_column in qkey)) else: - query += f"\tWHERE :qkey ilike :search\n" - query_args["qkey"] = qkey - - query_args["search"] = f'%{search}%' - - - if not count: - order_by = kwargs.get("order_by") or "repo_id" - page = kwargs.get("page") or 0 - page_size = kwargs.get("page_size") or 25 + repos = repos.filter(ilike_op(get_colum_by_label(qkey), f"%{search}%")) + + page_size: int = kwargs.get("page_size") or 25 + if count: + return repos, {"status": "success"} + else: + page: int = kwargs.get("page") or 0 + offset = page * page_size direction = kwargs.get("direction") or "ASC" - + order_by = kwargs.get("order_by") or "repo_id" + if direction not in ["ASC", "DESC"]: - return None, None, {"status": "Invalid direction"} + return None, {"status": "Invalid direction"} if order_by not in ["repo_id", "repo_name", "repo_owner", "commits_all_time", "issues_all_time"]: - return None, None, {"status": "Invalid order by"} - - offset = page*page_size - - query += f"\tORDER BY {order_by} {direction}\n" - query += "\tLIMIT :page_size\n" - query += "\tOFFSET :offset;\n" - - query_args["page_size"] = page_size - query_args["offset"] = offset + return None, {"status": "Invalid order by"} + + # Find the column named in the 'order_by', and get its asc() or desc() method + directive: function = getattr(get_colum_by_label(order_by), direction.lower()) + + repos = repos.order_by(directive()) - return query, query_args, {"status": "success"} + return repos.slice(offset, offset + page_size), {"status": "success"} From 631aea266bedc95d221da6819e8716abb3f931e7 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 4 Nov 2023 07:41:10 -0500 Subject: [PATCH 013/122] Fix orm warnings --- augur/application/db/models/augur_data.py | 10 +++--- .../application/db/models/augur_operations.py | 32 +++++++++---------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 3ed584ee14..f5efceb4e7 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -564,6 +564,8 @@ class RepoGroup(Base): data_source = Column(String) data_collection_date = Column(TIMESTAMP(precision=0)) + repo = relationship("Repo", back_populates="repo_group") + @staticmethod def is_valid_repo_group_id(session, repo_group_id: int) -> bool: """Deterime is repo_group_id exists. @@ -866,8 +868,8 @@ class Repo(Base): TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP") ) - repo_group = relationship("RepoGroup") - user_repo = relationship("UserRepo") + repo_group = relationship("RepoGroup", back_populates="repo") + user_repo = relationship("UserRepo", back_populates="repo") collection_status = relationship("CollectionStatus", back_populates="repo") issues = relationship("Issue", back_populates="repo") prs = relationship("PullRequest", back_populates="repo") @@ -1208,10 +1210,6 @@ class Commit(Base): primaryjoin="Commit.cmt_author_platform_username == Contributor.cntrb_login", back_populates="commits" ) - contributor1 = relationship( - "Contributor", - primaryjoin="Commit.cmt_author_platform_username == Contributor.cntrb_login", - ) repo = relationship("Repo", back_populates="commits") message_ref = relationship("CommitCommentRef", back_populates="cmt") diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index f702d829a3..86a96743b9 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -271,9 +271,9 @@ class User(Base): {"schema": "augur_operations"} ) - groups = relationship("UserGroup") - tokens = relationship("UserSessionToken") - applications = relationship("ClientApplication") + groups = relationship("UserGroup", back_populates="user") + tokens = relationship("UserSessionToken", back_populates="user") + applications = relationship("ClientApplication", back_populates="user") _is_authenticated = False _is_active = True @@ -628,8 +628,8 @@ class UserGroup(Base): {"schema": "augur_operations"} ) - user = relationship("User") - repos = relationship("UserRepo") + user = relationship("User", back_populates="groups") + user_repos = relationship("UserRepo", back_populates="group") @staticmethod def insert(session, user_id:int, group_name:str) -> dict: @@ -739,8 +739,8 @@ class UserRepo(Base): ForeignKey("augur_data.repo.repo_id", name="user_repo_user_id_fkey"), primary_key=True, nullable=False ) - repo = relationship("Repo") - group = relationship("UserGroup") + repo = relationship("Repo", back_populates="user_repo") + group = relationship("UserGroup", back_populates="user_repos") @staticmethod def insert(session, repo_id: int, group_id:int = 1) -> bool: @@ -949,9 +949,9 @@ class UserSessionToken(Base): application_id = Column(ForeignKey("augur_operations.client_applications.id", name="user_session_token_application_id_fkey"), nullable=False) created_at = Column(BigInteger) - user = relationship("User") - application = relationship("ClientApplication") - refresh_tokens = relationship("RefreshToken") + user = relationship("User", back_populates="tokens") + application = relationship("ClientApplication", back_populates="sessions") + refresh_tokens = relationship("RefreshToken", back_populates="user_session") @staticmethod def create(session, user_id, application_id, seconds_to_expire=86400): @@ -991,9 +991,9 @@ class ClientApplication(Base): redirect_url = Column(String, nullable=False) api_key = Column(String, nullable=False) - user = relationship("User") + user = relationship("User", back_populates="applications") sessions = relationship("UserSessionToken") - subscriptions = relationship("Subscription") + subscriptions = relationship("Subscription", back_populates="application") def __eq__(self, other): return isinstance(other, ClientApplication) and str(self.id) == str(other.id) @@ -1013,8 +1013,8 @@ class Subscription(Base): application_id = Column(ForeignKey("augur_operations.client_applications.id", name="subscriptions_application_id_fkey"), primary_key=True) type_id = Column(ForeignKey("augur_operations.subscription_types.id", name="subscriptions_type_id_fkey"), primary_key=True) - application = relationship("ClientApplication") - type = relationship("SubscriptionType") + application = relationship("ClientApplication", back_populates="subscriptions") + type = relationship("SubscriptionType", back_populates="subscriptions") class SubscriptionType(Base): __tablename__ = "subscription_types" @@ -1027,7 +1027,7 @@ class SubscriptionType(Base): id = Column(BigInteger, primary_key=True) name = Column(String, nullable=False) - subscriptions = relationship("Subscription") + subscriptions = relationship("Subscription", back_populates="type") class RefreshToken(Base): @@ -1040,7 +1040,7 @@ class RefreshToken(Base): id = Column(String, primary_key=True) user_session_token = Column(ForeignKey("augur_operations.user_session_tokens.token", name="refresh_token_session_token_id_fkey"), nullable=False) - user_session = relationship("UserSessionToken") + user_session = relationship("UserSessionToken", back_populates="refresh_tokens") @staticmethod def create(session, user_session_token_id): From de5a69f92450c2bce86a12d3934d0a05c9ed9d35 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 4 Nov 2023 08:51:15 -0500 Subject: [PATCH 014/122] Fix issue when merging branches together Signed-off-by: Andrew Brain --- augur/application/db/models/augur_operations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 86a96743b9..63b9144b35 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -629,7 +629,7 @@ class UserGroup(Base): ) user = relationship("User", back_populates="groups") - user_repos = relationship("UserRepo", back_populates="group") + repos = relationship("UserRepo", back_populates="group") @staticmethod def insert(session, user_id:int, group_name:str) -> dict: @@ -740,7 +740,7 @@ class UserRepo(Base): ) repo = relationship("Repo", back_populates="user_repo") - group = relationship("UserGroup", back_populates="user_repos") + group = relationship("UserGroup", back_populates="repos") @staticmethod def insert(session, repo_id: int, group_id:int = 1) -> bool: From d18baa344af6cfaff572e279b593f7df6d67b82a Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 27 Nov 2023 19:48:00 -0600 Subject: [PATCH 015/122] Start renaming things to be github specific and adding gitlab functions --- augur/api/routes/user.py | 4 +- augur/application/db/models/augur_data.py | 24 +++ .../application/db/models/augur_operations.py | 23 ++- augur/tasks/frontend.py | 25 ++- .../github/util/github_api_key_handler.py | 2 +- augur/tasks/gitlab/gitlab_api_key_handler.py | 178 ++++++++++++++++++ augur/tasks/gitlab/gitlab_random_key_auth.py | 28 +++ augur/tasks/gitlab/gitlab_task_session.py | 44 +++++ 8 files changed, 312 insertions(+), 16 deletions(-) create mode 100644 augur/tasks/gitlab/gitlab_api_key_handler.py create mode 100644 augur/tasks/gitlab/gitlab_random_key_auth.py create mode 100644 augur/tasks/gitlab/gitlab_task_session.py diff --git a/augur/api/routes/user.py b/augur/api/routes/user.py index dfaeb81f7f..62bc44068a 100644 --- a/augur/api/routes/user.py +++ b/augur/api/routes/user.py @@ -227,7 +227,7 @@ def add_user_repo(): repo = request.args.get("repo_url") group_name = request.args.get("group_name") - result = current_user.add_repo(group_name, repo) + result = current_user.add_github_repo(group_name, repo) return jsonify(result[1]) @@ -260,7 +260,7 @@ def add_user_org(): org = request.args.get("org_url") group_name = request.args.get("group_name") - result = current_user.add_org(group_name, org) + result = current_user.add_github_org(group_name, org) return jsonify(result[1]) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index f5efceb4e7..cbfd8d6afb 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -949,6 +949,30 @@ def parse_github_repo_url(url: str) -> tuple: capturing_groups = result.groups() + owner = capturing_groups[0] + repo = capturing_groups[1] + + return owner, repo + + @staticmethod + def parse_gitlab_repo_url(url: str) -> tuple: + """ Gets the owner and repo from a gitlab url. + + Args: + url: Gitlab url + + Returns: + Tuple of owner and repo. Or a tuple of None and None if the url is invalid. + """ + + result = re.search(r"https?:\/\/gitlab\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$", url) + + if not result: + return None, None + + capturing_groups = result.groups() + + owner = capturing_groups[0] repo = capturing_groups[1] diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 63b9144b35..00d3fa3ab3 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -449,17 +449,30 @@ def remove_group(self, group_name): return result - def add_repo(self, group_name, repo_url): + def add_github_repo(self, group_name, repo_url): from augur.tasks.github.util.github_task_session import GithubTaskSession from augur.tasks.github.util.github_api_key_handler import NoValidKeysError try: with GithubTaskSession(logger) as session: - result = UserRepo.add(session, repo_url, self.user_id, group_name) + result = UserRepo.add_github_repo(session, repo_url, self.user_id, group_name) except NoValidKeysError: return False, {"status": "No valid keys"} return result + + def add_gitlab_repo(self, group_name, repo_url): + + from augur.tasks.gitlab.gitlab_task_session import GitLabTaskSession + from augur.tasks.github.util.github_api_key_handler import NoValidKeysError + try: + with GitLabTaskSession(logger) as session: + result = UserRepo.add_gitlab_repo(session, repo_url, self.user_id, group_name) + except NoValidKeysError: + return False, {"status": "No valid keys"} + + return result + def remove_repo(self, group_name, repo_id): @@ -468,7 +481,7 @@ def remove_repo(self, group_name, repo_id): return result - def add_org(self, group_name, org_url): + def add_github_org(self, group_name, org_url): from augur.tasks.github.util.github_task_session import GithubTaskSession from augur.tasks.github.util.github_api_key_handler import NoValidKeysError @@ -771,7 +784,7 @@ def insert(session, repo_id: int, group_id:int = 1) -> bool: return data[0]["group_id"] == group_id and data[0]["repo_id"] == repo_id @staticmethod - def add(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict: + def add_github_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict: """Add repo to the user repo table Args: @@ -911,7 +924,7 @@ def add_org_repos(session, url: List[str], user_id: int, group_name: int): failed_repos = [] for repo in repos: - result = UserRepo.add(session, repo, user_id, group_id=group_id, from_org_list=True, repo_type=type, repo_group_id=repo_group_id) + result = UserRepo.add_github_repo(session, repo, user_id, group_id=group_id, from_org_list=True, repo_type=type, repo_group_id=repo_group_id) # keep track of all the repos that failed if not result[0]: diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index b8eb8b203c..2adc806534 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -30,15 +30,15 @@ def add_org_repo_list(user_id, group_name, urls): valid_repos = [] for url in urls: - # matches https://github.com/{org}/ or htts://github.com/{org} + # matches https://github.com/{org}/ or http://github.com/{org} if Repo.parse_github_org_url(url): - added = user.add_org(group_name, url)[0] + added = user.add_github_org(group_name, url)[0] if added: valid_orgs.append(url) - # matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo} + # matches https://github.com/{org}/{repo}/ or http://github.com/{org}/{repo} elif Repo.parse_github_repo_url(url)[0]: - added = user.add_repo(group_name, url)[0] + added = user.add_github_repo(group_name, url)[0] if added: valid_repos.append(url) @@ -46,7 +46,7 @@ def add_org_repo_list(user_id, group_name, urls): elif (match := parse_org_and_repo_name(url)): org, repo = match.groups() repo_url = f"https://github.com/{org}/{repo}/" - added = user.add_repo(group_name, repo_url)[0] + added = user.add_github_repo(group_name, repo_url)[0] if added: valid_repos.append(url) @@ -54,9 +54,17 @@ def add_org_repo_list(user_id, group_name, urls): elif (match := parse_org_name(url)): org = match.group(1) org_url = f"https://github.com/{org}/" - added = user.add_org(group_name, org_url)[0] + added = user.add_github_org(group_name, org_url)[0] if added: valid_orgs.append(url) + + # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} + elif Repo.parse_gitlab_repo_url(url)[0]: + + #added = user.add_github_repo(group_name, url)[0] + if added: + valid_repos.append(url) + else: invalid_urls.append(url) @@ -66,18 +74,19 @@ def add_org_repo_list(user_id, group_name, urls): - +# TODO: Change to github specific @celery.task def add_repo(user_id, group_name, repo_url): logger = logging.getLogger(add_org.__name__) with GithubTaskSession(logger) as session: - result = UserRepo.add(session, repo_url, user_id, group_name) + result = UserRepo.add_github_repo(session, repo_url, user_id, group_name) print(repo_url, result) +# TODO: Change to github specific @celery.task def add_org(user_id, group_name, org_url): diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index 8a19430e87..20ce07f066 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -32,7 +32,7 @@ def __init__(self, session: DatabaseSession): self.logger = session.logger self.config = AugurConfig(self.logger, session) - self.oauth_redis_key = "oauth_keys_list" + self.oauth_redis_key = "github_oauth_keys_list" self.redis_key_list = RedisList(self.oauth_redis_key) diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py new file mode 100644 index 0000000000..e59a3620ef --- /dev/null +++ b/augur/tasks/gitlab/gitlab_api_key_handler.py @@ -0,0 +1,178 @@ +import httpx +import time +import random + +from typing import Optional, List + +from augur.tasks.util.redis_list import RedisList +from augur.application.db.session import DatabaseSession +from augur.application.config import AugurConfig +from sqlalchemy import func + + +class NoValidKeysError(Exception): + pass + + +class GitlabApiKeyHandler(): + """Handles Gitlab API key retrieval from the database and redis + + Attributes: + session (DatabaseSession): Database connection + logger (logging.Logger): Handles all logs + oauth_redis_key (str): The key where the github api keys are cached in redis + redis_key_list (RedisList): Acts like a python list, and interacts directly with the redis cache + config_key (str): The api key that is stored in the users config table + key: (List[str]): List of keys retrieve from database or cache + """ + + def __init__(self, session: DatabaseSession): + + self.session = session + self.logger = session.logger + self.config = AugurConfig(self.logger, session) + + self.oauth_redis_key = "gitlab_oauth_keys_list" + + self.redis_key_list = RedisList(self.oauth_redis_key) + + self.config_key = self.get_config_key() + + self.keys = self.get_api_keys() + + self.logger.info(f"Retrieved {len(self.keys)} gitlab api keys for use") + + def get_random_key(self): + """Retrieves a random key from the list of keys + + Returns: + A random github api key + """ + + return random.choice(self.keys) + + def get_config_key(self) -> str: + """Retrieves the users github api key from their config table + + Returns: + Github API key from config table + """ + + # TODO: Change to get the gitlab api key + return self.config.get_value("Keys", "github_api_key") + + def get_api_keys_from_database(self) -> List[str]: + """Retieves all github api keys from database + + Note: + It retrieves all the keys from the database except the one defined in the users config + + Returns: + Github api keys that are in the database + """ + from augur.application.db.models import WorkerOauth + + select = WorkerOauth.access_token + # randomizing the order at db time + #select.order_by(func.random()) + # TODO: Change to get gitlab api keys + where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'github'] + + return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).order_by(func.random()).all()] + #return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).all()] + + + def get_api_keys(self) -> List[str]: + """Retrieves all valid Github API Keys + + Note: + It checks to see if the keys are in the redis cache first. + It removes bad keys before returning. + If keys were taken from the database, it caches all the valid keys that were found + + Returns: + Valid Github api keys + """ + + redis_keys = list(self.redis_key_list) + + if redis_keys: + return redis_keys + + attempts = 0 + while attempts < 3: + + try: + keys = self.get_api_keys_from_database() + break + except: + time.sleep(5) + attempts += 1 + + if self.config_key is not None: + keys += [self.config_key] + + if len(keys) == 0: + return [] + + valid_keys = [] + with httpx.Client() as client: + + for key in keys: + + # removes key if it returns "Bad Credentials" + if self.is_bad_api_key(client, key) is False: + valid_keys.append(key) + else: + print(f"WARNING: The key '{key}' is not a valid key. Hint: If valid in past it may have expired") + + # just in case the mulitprocessing adds extra values to the list. + # we are clearing it before we push the values we got + self.redis_key_list.clear() + + # add all the keys to redis + self.redis_key_list.extend(valid_keys) + + if not valid_keys: + raise NoValidKeysError("No valid github api keys found in the config or worker oauth table") + + + # shuffling the keys so not all processes get the same keys in the same order + valid_now = valid_keys + #try: + #self.logger.info(f'valid keys before shuffle: {valid_keys}') + #valid_keys = random.sample(valid_keys, len(valid_keys)) + #self.logger.info(f'valid keys AFTER shuffle: {valid_keys}') + #except Exception as e: + # self.logger.debug(f'{e}') + # valid_keys = valid_now + # pass + + return valid_keys + + # TODO: Change to use gitlab rate limit api + def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool: + """Determines if a Github API is bad + + Args: + client: makes the http requests + oauth_key: github api key that is being tested + + Returns: + True if key is bad. False if the key is good + """ + + # this endpoint allows us to check the rate limit, but it does not use one of our 5000 requests + url = "https://api.github.com/rate_limit" + + headers = {'Authorization': f'token {oauth_key}'} + + data = client.request(method="GET", url=url, headers=headers, timeout=180).json() + + try: + if data["message"] == "Bad credentials": + return True + except KeyError: + pass + + return False \ No newline at end of file diff --git a/augur/tasks/gitlab/gitlab_random_key_auth.py b/augur/tasks/gitlab/gitlab_random_key_auth.py new file mode 100644 index 0000000000..9194a94fe8 --- /dev/null +++ b/augur/tasks/gitlab/gitlab_random_key_auth.py @@ -0,0 +1,28 @@ +"""Defines the GitlabRandomKeyAuth class""" + +from augur.tasks.util.random_key_auth import RandomKeyAuth +from augur.tasks.gitlab.gitlab_api_key_handler import GitlabApiKeyHandler +from augur.application.db.session import DatabaseSession + + +class GitlabRandomKeyAuth(RandomKeyAuth): + """Defines a github specific RandomKeyAuth class so + github collections can have a class randomly selects an api key for each request + """ + + def __init__(self, session: DatabaseSession, logger): + """Creates a GitlabRandomKeyAuth object and initializes the RandomKeyAuth parent class""" + + + # gets the github api keys from the database via the GithubApiKeyHandler + github_api_keys = GitlabApiKeyHandler(session).keys + #github_api_keys = random.sample(github_api_keys, len(github_api_keys)) + + if not github_api_keys: + print("Failed to find github api keys. This is usually because your key has expired") + + # TODO: Change to set key headers how gitlab expects + header_name = "Authorization" + key_format = "token {0}" + + super().__init__(github_api_keys, header_name, session.logger, key_format) \ No newline at end of file diff --git a/augur/tasks/gitlab/gitlab_task_session.py b/augur/tasks/gitlab/gitlab_task_session.py new file mode 100644 index 0000000000..a7a68ec6b5 --- /dev/null +++ b/augur/tasks/gitlab/gitlab_task_session.py @@ -0,0 +1,44 @@ +from logging import Logger + +from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth +from augur.application.db.session import DatabaseSession + +class GitlabTaskManifest: + + def __init__(self, logger): + + from augur.tasks.init.celery_app import engine + from augur.application.db.session import DatabaseSession + + self.augur_db = DatabaseSession(logger, engine) + self.key_auth = GitlabRandomKeyAuth(self.augur_db.session, logger) + self.logger = logger + self.platform_id = 2 + + def __enter__(self): + + return self + + def __exit__(self, exception_type, exception_value, exception_traceback): + + self.augur_db.close() + + +class GithubTaskSession(DatabaseSession): + """ORM session used in gitlab tasks. + This class adds the platform_id and the gitlab key authentication class, + to the already existing DatabaseSession so there is a central location to access + api keys and a single platform_id reference + + Attributes: + oauths (GitlabRandomKeyAuth): Class that handles randomly assigning gitlab api keys to httpx requests + platform_id (int): The id that refers to the Gitlab platform + """ + + def __init__(self, logger: Logger, engine=None): + + super().__init__(logger, engine=engine) + + self.oauths = GitlabRandomKeyAuth(self, logger) + self.platform_id = 2 + From 2b25d5989cbd421b19813e394b235b099d6b2fca Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 27 Nov 2023 20:16:38 -0600 Subject: [PATCH 016/122] Continue changing code to insert gitlab repos Signed-off-by: Andrew Brain --- augur/api/routes/dei.py | 2 +- augur/application/db/models/augur_data.py | 93 ++++++++++++++++++- .../application/db/models/augur_operations.py | 71 +++++++++++++- augur/tasks/frontend.py | 4 +- augur/util/repo_load_controller.py | 2 +- .../test_models/test_augur_data/test_repo.py | 12 +-- .../test_augur_operations/test_user_repo.py | 14 +-- 7 files changed, 177 insertions(+), 21 deletions(-) diff --git a/augur/api/routes/dei.py b/augur/api/routes/dei.py index dea79b79c2..82324a8d62 100644 --- a/augur/api/routes/dei.py +++ b/augur/api/routes/dei.py @@ -52,7 +52,7 @@ def dei_track_repo(application: ClientApplication): return jsonify({"status": "Repo already exists"}) frontend_repo_group: RepoGroup = session.query(RepoGroup).filter(RepoGroup.rg_name == FRONTEND_REPO_GROUP_NAME).first() - repo_id = Repo.insert(session, repo_url, frontend_repo_group.repo_group_id, "API.DEI", repo_type="") + repo_id = Repo.insert_github_repo(session, repo_url, frontend_repo_group.repo_group_id, "API.DEI", repo_type="") if not repo_id: return jsonify({"status": "Error adding repo"}) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index cbfd8d6afb..9a2f14e8bb 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -929,6 +929,50 @@ def is_valid_github_repo(gh_session, url: str) -> bool: return False, {"status": f"Github Error: {data['message']}"} return True, {"status": "Valid repo", "repo_type": data["owner"]["type"]} + + # TODO: Change to use gitlab api + @staticmethod + def is_valid_gitlab_repo(gl_session, url: str) -> bool: + """Determine whether repo url is valid. + + Args: + url: repo_url + + Returns + True if repo url is valid and False if not + """ + from augur.tasks.github.util.github_paginator import hit_api + + REPO_ENDPOINT = "https://api.github.com/repos/{}/{}" + + if not gl_session.oauths.list_of_keys: + return False, {"status": "No valid github api keys to retrieve data with"} + + owner, repo = Repo.parse_gitlab_repo_url(url) + if not owner or not repo: + return False, {"status":"Invalid repo url"} + + url = REPO_ENDPOINT.format(owner, repo) + + attempts = 0 + while attempts < 10: + result = hit_api(gl_session.oauths, url, logger) + + # if result is None try again + if not result: + attempts+=1 + continue + + data = result.json() + # if there was an error return False + if "message" in data.keys(): + + if data["message"] == "Not Found": + return False, {"status": "Invalid repo"} + + return False, {"status": f"Gitlab Error: {data['message']}"} + + return True, {"status": "Valid repo", "repo_type": data["owner"]["type"]} @staticmethod def parse_github_repo_url(url: str) -> tuple: @@ -998,7 +1042,54 @@ def parse_github_org_url(url): return result.groups()[0] @staticmethod - def insert(session, url: str, repo_group_id: int, tool_source, repo_type): + def insert_githlab_repo(session, url: str, repo_group_id: int, tool_source, repo_type): + """Add a repo to the repo table. + + Args: + url: repo url + repo_group_id: group to assign repo to + + Note: + If repo row exists then it will update the repo_group_id if param repo_group_id is not a default. If it does not exist is will simply insert the repo. + """ + + if not isinstance(url, str) or not isinstance(repo_group_id, int) or not isinstance(tool_source, str) or not isinstance(repo_type, str): + return None + + if not RepoGroup.is_valid_repo_group_id(session, repo_group_id): + return None + + if url.endswith("/"): + url = url[:-1] + + url = url.lower() + + owner, repo = Repo.parse_gitlab_repo_url(url) + if not owner or not repo: + return None + + repo_data = { + "repo_group_id": repo_group_id, + "repo_git": url, + "repo_path": f"gitlab.com/{owner}/", + "repo_name": repo, + "repo_type": repo_type, + "tool_source": tool_source, + "tool_version": "1.0", + "data_source": "Git" + } + + repo_unique = ["repo_git"] + return_columns = ["repo_id"] + result = session.insert_data(repo_data, Repo, repo_unique, return_columns, on_conflict_update=False) + + if not result: + return None + + return result[0]["repo_id"] + + @staticmethod + def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type): """Add a repo to the repo table. Args: diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 00d3fa3ab3..37f0e2f420 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -488,7 +488,7 @@ def add_github_org(self, group_name, org_url): try: with GithubTaskSession(logger) as session: - result = UserRepo.add_org_repos(session, org_url, self.user_id, group_name) + result = UserRepo.add_github_org_repos(session, org_url, self.user_id, group_name) except NoValidKeysError: return False, {"status": "No valid keys"} @@ -782,6 +782,71 @@ def insert(session, repo_id: int, group_id:int = 1) -> bool: return False return data[0]["group_id"] == group_id and data[0]["repo_id"] == repo_id + + @staticmethod + def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict: + """Add repo to the user repo table + + Args: + urls: list of repo urls + user_id: id of user_id from users table + group_name: name of group to add repo to. + group_id: id of the group + valid_repo: boolean that indicates whether the repo has already been validated + + Note: + Either the group_name or group_id can be passed not both + + Returns: + Dict that contains the key "status" and additional useful data + """ + + if group_name and group_id: + return False, {"status": "Pass only the group name or group id not both"} + + if not group_name and not group_id: + return False, {"status": "Need group name or group id to add a repo"} + + if from_org_list and not repo_type: + return False, {"status": "Repo type must be passed if the repo is from an organization's list of repos"} + + if group_id is None: + + group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name) + if group_id is None: + return False, {"status": "Invalid group name"} + + if not from_org_list: + result = Repo.is_valid_gitlab_repo(session, url) + if not result[0]: + return False, {"status": result[1]["status"], "repo_url": url} + + repo_type = result[1]["repo_type"] + + # if no repo_group_id is passed then assign the repo to the frontend repo group + if repo_group_id is None: + + frontend_repo_group = session.query(RepoGroup).filter(RepoGroup.rg_name == FRONTEND_REPO_GROUP_NAME).first() + if not frontend_repo_group: + return False, {"status": "Could not find repo group with name 'Frontend Repos'", "repo_url": url} + + repo_group_id = frontend_repo_group.repo_group_id + + + repo_id = Repo.insert_gitlab_repo(session, url, repo_group_id, "Frontend", repo_type) + if not repo_id: + return False, {"status": "Repo insertion failed", "repo_url": url} + + result = UserRepo.insert(session, repo_id, group_id) + if not result: + return False, {"status": "repo_user insertion failed", "repo_url": url} + + #collection_status records are now only added during collection -IM 5/1/23 + #status = CollectionStatus.insert(session, repo_id) + #if not status: + # return False, {"status": "Failed to create status for repo", "repo_url": url} + + return True, {"status": "Repo Added", "repo_url": url} @staticmethod def add_github_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict: @@ -833,7 +898,7 @@ def add_github_repo(session, url: List[str], user_id: int, group_name=None, grou repo_group_id = frontend_repo_group.repo_group_id - repo_id = Repo.insert(session, url, repo_group_id, "Frontend", repo_type) + repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type) if not repo_id: return False, {"status": "Repo insertion failed", "repo_url": url} @@ -875,7 +940,7 @@ def delete(session, repo_id:int, user_id:int, group_name:str) -> dict: return True, {"status": "Repo Removed"} @staticmethod - def add_org_repos(session, url: List[str], user_id: int, group_name: int): + def add_github_org_repos(session, url: List[str], user_id: int, group_name: int): """Add list of orgs and their repos to a users repos. Args: diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index 2adc806534..fffd79d330 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -61,7 +61,7 @@ def add_org_repo_list(user_id, group_name, urls): # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} elif Repo.parse_gitlab_repo_url(url)[0]: - #added = user.add_github_repo(group_name, url)[0] + added = user.add_gitlab_repo(group_name, url)[0] if added: valid_repos.append(url) @@ -93,6 +93,6 @@ def add_org(user_id, group_name, org_url): logger = logging.getLogger(add_org.__name__) with GithubTaskSession(logger) as session: - result = UserRepo.add_org_repos(session, org_url, user_id, group_name) + result = UserRepo.add_github_org_repos(session, org_url, user_id, group_name) print(org_url, result) diff --git a/augur/util/repo_load_controller.py b/augur/util/repo_load_controller.py index b5ff5d07a0..943e3373a6 100644 --- a/augur/util/repo_load_controller.py +++ b/augur/util/repo_load_controller.py @@ -62,7 +62,7 @@ def add_cli_repo(self, repo_data: Dict[str, Any], from_org_list=False, repo_type # if the repo doesn't exist it adds it - repo_id = Repo.insert(self.session, url, repo_group_id, "CLI", repo_type) + repo_id = Repo.insert_github_repo(self.session, url, repo_group_id, "CLI", repo_type) if not repo_id: logger.warning(f"Invalid repo group id specified for {url}, skipping.") diff --git a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py b/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py index bf22254244..1c32472f32 100644 --- a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py +++ b/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py @@ -77,20 +77,20 @@ def test_insert_repo(test_db_engine): with DatabaseSession(logger, test_db_engine) as session: - assert Repo.insert(session, data["repo_urls"][0], data["rg_id"], data["tool_source"]) is not None - assert Repo.insert(session, data["repo_urls"][1], data["rg_id"], data["tool_source"]) is not None + assert Repo.insert_github_repo(session, data["repo_urls"][0], data["rg_id"], data["tool_source"]) is not None + assert Repo.insert_github_repo(session, data["repo_urls"][1], data["rg_id"], data["tool_source"]) is not None # invalid rg_id - assert Repo.insert(session, data["repo_urls"][0], 12, data["tool_source"]) is None + assert Repo.insert_github_repo(session, data["repo_urls"][0], 12, data["tool_source"]) is None # invalid type for repo url - assert Repo.insert(session, 1, data["rg_id"], data["tool_source"]) is None + assert Repo.insert_github_repo(session, 1, data["rg_id"], data["tool_source"]) is None # invalid type for rg_id - assert Repo.insert(session, data["repo_urls"][1], "1", data["tool_source"]) is None + assert Repo.insert_github_repo(session, data["repo_urls"][1], "1", data["tool_source"]) is None # invalid type for tool_source - assert Repo.insert(session, data["repo_urls"][1], data["rg_id"], 52) is None + assert Repo.insert_github_repo(session, data["repo_urls"][1], data["rg_id"], 52) is None with test_db_engine.connect() as connection: diff --git a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py b/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py index 3fc5451791..4b288cbabb 100644 --- a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py +++ b/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py @@ -124,7 +124,7 @@ def test_add_frontend_repos_with_invalid_repo(test_db_engine): with GithubTaskSession(logger, test_db_engine) as session: - result = UserRepo.add(session, url, data["user_id"], data["user_group_name"]) + result = UserRepo.add_github_repo(session, url, data["user_id"], data["user_group_name"]) assert result[1]["status"] == "Invalid repo" @@ -163,11 +163,11 @@ def test_add_frontend_repos_with_duplicates(test_db_engine): with GithubTaskSession(logger, test_db_engine) as session: - result = UserRepo.add(session, url, data["user_id"], data["user_group_name"]) - result2 = UserRepo.add(session, url, data["user_id"], data["user_group_name"]) + result = UserRepo.add_github_repo(session, url, data["user_id"], data["user_group_name"]) + result2 = UserRepo.add_github_repo(session, url, data["user_id"], data["user_group_name"]) # add repo with invalid group name - result3 = UserRepo.add(session, url, data["user_id"], "Invalid group name") + result3 = UserRepo.add_github_repo(session, url, data["user_id"], "Invalid group name") assert result[1]["status"] == "Repo Added" assert result2[1]["status"] == "Repo Added" @@ -263,11 +263,11 @@ def test_add_frontend_org_with_invalid_org(test_db_engine): with GithubTaskSession(logger, test_db_engine) as session: url = f"https://github.com/{data['org_name']}/" - result = UserRepo.add_org_repos(session, url, data["user_id"], data["user_group_name"]) + result = UserRepo.add_github_org_repos(session, url, data["user_id"], data["user_group_name"]) assert result[1]["status"] == "Invalid owner url" # test with invalid group name - result = UserRepo.add_org_repos(session, url, data["user_id"], "Invalid group name") + result = UserRepo.add_github_org_repos(session, url, data["user_id"], "Invalid group name") assert result[1]["status"] == "Invalid group name" with test_db_engine.connect() as connection: @@ -305,7 +305,7 @@ def test_add_frontend_org_with_valid_org(test_db_engine): with GithubTaskSession(logger, test_db_engine) as session: url = "https://github.com/{}/".format(data["org_name"]) - result = UserRepo.add_org_repos(session, url, data["user_id"], data["user_group_name"]) + result = UserRepo.add_github_org_repos(session, url, data["user_id"], data["user_group_name"]) assert result[1]["status"] == "Org repos added" with test_db_engine.connect() as connection: From ca7702904de7bb819b3021973bcbdd55a24b62cc Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 17:19:47 -0600 Subject: [PATCH 017/122] Update gitlab api key handler and gitlab random key auth to use gitlab --- augur/tasks/gitlab/gitlab_api_key_handler.py | 25 +++++++------------- augur/tasks/gitlab/gitlab_random_key_auth.py | 3 +-- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py index e59a3620ef..fabc04543f 100644 --- a/augur/tasks/gitlab/gitlab_api_key_handler.py +++ b/augur/tasks/gitlab/gitlab_api_key_handler.py @@ -57,9 +57,7 @@ def get_config_key(self) -> str: Returns: Github API key from config table """ - - # TODO: Change to get the gitlab api key - return self.config.get_value("Keys", "github_api_key") + return self.config.get_value("Keys", "gitlab_api_key") def get_api_keys_from_database(self) -> List[str]: """Retieves all github api keys from database @@ -75,8 +73,7 @@ def get_api_keys_from_database(self) -> List[str]: select = WorkerOauth.access_token # randomizing the order at db time #select.order_by(func.random()) - # TODO: Change to get gitlab api keys - where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'github'] + where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'gitlab'] return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).order_by(func.random()).all()] #return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).all()] @@ -150,9 +147,8 @@ def get_api_keys(self) -> List[str]: return valid_keys - # TODO: Change to use gitlab rate limit api def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool: - """Determines if a Github API is bad + """Determines if a Gitlab API key is bad Args: client: makes the http requests @@ -162,17 +158,12 @@ def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool: True if key is bad. False if the key is good """ - # this endpoint allows us to check the rate limit, but it does not use one of our 5000 requests url = "https://api.github.com/rate_limit" - headers = {'Authorization': f'token {oauth_key}'} - - data = client.request(method="GET", url=url, headers=headers, timeout=180).json() - - try: - if data["message"] == "Bad credentials": - return True - except KeyError: - pass + headers = {'Authorization': f'Bearer {oauth_key}'} + response = client.request(method="GET", url=url, headers=headers, timeout=180) + if response.status_code == 401: + return True + return False \ No newline at end of file diff --git a/augur/tasks/gitlab/gitlab_random_key_auth.py b/augur/tasks/gitlab/gitlab_random_key_auth.py index 9194a94fe8..7543f5a310 100644 --- a/augur/tasks/gitlab/gitlab_random_key_auth.py +++ b/augur/tasks/gitlab/gitlab_random_key_auth.py @@ -21,8 +21,7 @@ def __init__(self, session: DatabaseSession, logger): if not github_api_keys: print("Failed to find github api keys. This is usually because your key has expired") - # TODO: Change to set key headers how gitlab expects header_name = "Authorization" - key_format = "token {0}" + key_format = "Bearer {0}" super().__init__(github_api_keys, header_name, session.logger, key_format) \ No newline at end of file From 63b6350fe1b32300c70bf7260bf885d4591560ec Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 17:29:34 -0600 Subject: [PATCH 018/122] Change is_valid_gitlab_repo to use gitlab api --- augur/application/db/models/augur_data.py | 44 +++++++++++------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 9a2f14e8bb..a90a2254f4 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -930,49 +930,45 @@ def is_valid_github_repo(gh_session, url: str) -> bool: return True, {"status": "Valid repo", "repo_type": data["owner"]["type"]} - # TODO: Change to use gitlab api @staticmethod def is_valid_gitlab_repo(gl_session, url: str) -> bool: - """Determine whether repo url is valid. + """Determine whether a GitLab repo URL is valid. Args: - url: repo_url + gl_session: GitLab session object with API key + url: Repository URL - Returns - True if repo url is valid and False if not + Returns: + True if repo URL is valid, False otherwise """ - from augur.tasks.github.util.github_paginator import hit_api - - REPO_ENDPOINT = "https://api.github.com/repos/{}/{}" - - if not gl_session.oauths.list_of_keys: - return False, {"status": "No valid github api keys to retrieve data with"} + REPO_ENDPOINT = "https://gitlab.com/api/v4/projects/{}/{}" owner, repo = Repo.parse_gitlab_repo_url(url) if not owner or not repo: - return False, {"status":"Invalid repo url"} + return False, {"status": "Invalid repo URL"} - url = REPO_ENDPOINT.format(owner, repo) + # Encode namespace and project name for the API request + project_identifier = f"{owner}%2F{repo}" + url = REPO_ENDPOINT.format(project_identifier) attempts = 0 while attempts < 10: - result = hit_api(gl_session.oauths, url, logger) + response = gl_session.get(url) - # if result is None try again - if not result: - attempts+=1 + if response.status_code != 200: + attempts += 1 continue - data = result.json() - # if there was an error return False - if "message" in data.keys(): + if response.status_code == 404: + return False, {"status": "Invalid repo"} - if data["message"] == "Not Found": - return False, {"status": "Invalid repo"} + if response.status_code != 200: + return False, {"status": f"GitLab Error: {response.json().get('message', 'Unknown error')}"} - return False, {"status": f"Gitlab Error: {data['message']}"} + return True, {"status": "Valid repo"} + + return False, {"status": "Failed to validate repo after multiple attempts"} - return True, {"status": "Valid repo", "repo_type": data["owner"]["type"]} @staticmethod def parse_github_repo_url(url: str) -> tuple: From 8446c04e81a7a2ef9da38efa032ddb316337c63b Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 17:32:39 -0600 Subject: [PATCH 019/122] Clean up method --- augur/application/db/models/augur_data.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index a90a2254f4..47eeffcdca 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -941,6 +941,8 @@ def is_valid_gitlab_repo(gl_session, url: str) -> bool: Returns: True if repo URL is valid, False otherwise """ + from augur.tasks.github.util.github_paginator import hit_api + REPO_ENDPOINT = "https://gitlab.com/api/v4/projects/{}/{}" owner, repo = Repo.parse_gitlab_repo_url(url) @@ -953,19 +955,15 @@ def is_valid_gitlab_repo(gl_session, url: str) -> bool: attempts = 0 while attempts < 10: - response = gl_session.get(url) - - if response.status_code != 200: - attempts += 1 - continue + response = hit_api(gl_session.oauths, url, logger) if response.status_code == 404: return False, {"status": "Invalid repo"} - if response.status_code != 200: - return False, {"status": f"GitLab Error: {response.json().get('message', 'Unknown error')}"} + if response.status_code == 200: + return True, {"status": "Valid repo"} - return True, {"status": "Valid repo"} + attempts += 1 return False, {"status": "Failed to validate repo after multiple attempts"} @@ -988,7 +986,6 @@ def parse_github_repo_url(url: str) -> tuple: capturing_groups = result.groups() - owner = capturing_groups[0] repo = capturing_groups[1] From 9a0fb0845ea9f88aabcc0e7265e7cd4e4d918208 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 17:37:22 -0600 Subject: [PATCH 020/122] Renaming --- augur/tasks/gitlab/gitlab_random_key_auth.py | 9 ++++----- augur/tasks/gitlab/gitlab_task_session.py | 3 +-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_random_key_auth.py b/augur/tasks/gitlab/gitlab_random_key_auth.py index 7543f5a310..86ad64b056 100644 --- a/augur/tasks/gitlab/gitlab_random_key_auth.py +++ b/augur/tasks/gitlab/gitlab_random_key_auth.py @@ -14,14 +14,13 @@ def __init__(self, session: DatabaseSession, logger): """Creates a GitlabRandomKeyAuth object and initializes the RandomKeyAuth parent class""" - # gets the github api keys from the database via the GithubApiKeyHandler - github_api_keys = GitlabApiKeyHandler(session).keys - #github_api_keys = random.sample(github_api_keys, len(github_api_keys)) + # gets the gitlab api keys from the database via the GitlabApiKeyHandler + gitlab_api_keys = GitlabApiKeyHandler(session).keys - if not github_api_keys: + if not gitlab_api_keys: print("Failed to find github api keys. This is usually because your key has expired") header_name = "Authorization" key_format = "Bearer {0}" - super().__init__(github_api_keys, header_name, session.logger, key_format) \ No newline at end of file + super().__init__(gitlab_api_keys, header_name, session.logger, key_format) \ No newline at end of file diff --git a/augur/tasks/gitlab/gitlab_task_session.py b/augur/tasks/gitlab/gitlab_task_session.py index a7a68ec6b5..1871e46c50 100644 --- a/augur/tasks/gitlab/gitlab_task_session.py +++ b/augur/tasks/gitlab/gitlab_task_session.py @@ -23,8 +23,7 @@ def __exit__(self, exception_type, exception_value, exception_traceback): self.augur_db.close() - -class GithubTaskSession(DatabaseSession): +class GitlabTaskSession(DatabaseSession): """ORM session used in gitlab tasks. This class adds the platform_id and the gitlab key authentication class, to the already existing DatabaseSession so there is a central location to access From dccb8bc9989e92d05b8ae51401c566a63efd00fa Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 17:42:56 -0600 Subject: [PATCH 021/122] Fix insert logic to work for gitlab --- augur/application/db/models/augur_data.py | 4 ++-- augur/application/db/models/augur_operations.py | 9 ++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 47eeffcdca..4f2bfec5ac 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -1035,7 +1035,7 @@ def parse_github_org_url(url): return result.groups()[0] @staticmethod - def insert_githlab_repo(session, url: str, repo_group_id: int, tool_source, repo_type): + def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source): """Add a repo to the repo table. Args: @@ -1066,7 +1066,7 @@ def insert_githlab_repo(session, url: str, repo_group_id: int, tool_source, repo "repo_git": url, "repo_path": f"gitlab.com/{owner}/", "repo_name": repo, - "repo_type": repo_type, + "repo_type": None, "tool_source": tool_source, "tool_version": "1.0", "data_source": "Git" diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 37f0e2f420..a0b57ca4ee 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -784,7 +784,7 @@ def insert(session, repo_id: int, group_id:int = 1) -> bool: return data[0]["group_id"] == group_id and data[0]["repo_id"] == repo_id @staticmethod - def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict: + def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_group_id=None) -> dict: """Add repo to the user repo table Args: @@ -807,9 +807,6 @@ def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, grou if not group_name and not group_id: return False, {"status": "Need group name or group id to add a repo"} - if from_org_list and not repo_type: - return False, {"status": "Repo type must be passed if the repo is from an organization's list of repos"} - if group_id is None: group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name) @@ -821,8 +818,6 @@ def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, grou if not result[0]: return False, {"status": result[1]["status"], "repo_url": url} - repo_type = result[1]["repo_type"] - # if no repo_group_id is passed then assign the repo to the frontend repo group if repo_group_id is None: @@ -833,7 +828,7 @@ def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, grou repo_group_id = frontend_repo_group.repo_group_id - repo_id = Repo.insert_gitlab_repo(session, url, repo_group_id, "Frontend", repo_type) + repo_id = Repo.insert_gitlab_repo(session, url, repo_group_id, "Frontend") if not repo_id: return False, {"status": "Repo insertion failed", "repo_url": url} From ec0cfff791745ea9d02e991125cc54c914359c21 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 19:02:37 -0600 Subject: [PATCH 022/122] Successfully insert gitlab repo --- augur/api/view/api.py | 13 ++++++++++++- augur/application/db/models/augur_data.py | 4 ++-- augur/application/db/models/augur_operations.py | 4 ++-- augur/tasks/gitlab/gitlab_api_key_handler.py | 2 +- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index 287b079436..598c0cdb6d 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -102,7 +102,18 @@ def av_add_user_repo(): if rg_obj: # add the orgs repos to the group add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) - + + # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} + elif Repo.parse_gitlab_repo_url(url)[0]: + + org_name, repo_name = Repo.parse_github_repo_url(url) + repo_git = f"https://gitlab.com/{org_name}/{repo_name}" + + # TODO: gitlab ensure the whole repo git is inserted so it can be found here + repo_obj = Repo.get_by_repo_git(session, repo_git) + if repo_obj: + add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) + else: invalid_urls.append(url) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 4f2bfec5ac..cfbcdca1d7 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -943,7 +943,7 @@ def is_valid_gitlab_repo(gl_session, url: str) -> bool: """ from augur.tasks.github.util.github_paginator import hit_api - REPO_ENDPOINT = "https://gitlab.com/api/v4/projects/{}/{}" + REPO_ENDPOINT = "https://gitlab.com/api/v4/projects/{}/" owner, repo = Repo.parse_gitlab_repo_url(url) if not owner or not repo: @@ -1046,7 +1046,7 @@ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source): If repo row exists then it will update the repo_group_id if param repo_group_id is not a default. If it does not exist is will simply insert the repo. """ - if not isinstance(url, str) or not isinstance(repo_group_id, int) or not isinstance(tool_source, str) or not isinstance(repo_type, str): + if not isinstance(url, str) or not isinstance(repo_group_id, int) or not isinstance(tool_source, str): return None if not RepoGroup.is_valid_repo_group_id(session, repo_group_id): diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index a0b57ca4ee..8da2b397fa 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -463,10 +463,10 @@ def add_github_repo(self, group_name, repo_url): def add_gitlab_repo(self, group_name, repo_url): - from augur.tasks.gitlab.gitlab_task_session import GitLabTaskSession + from augur.tasks.gitlab.gitlab_task_session import GitlabTaskSession from augur.tasks.github.util.github_api_key_handler import NoValidKeysError try: - with GitLabTaskSession(logger) as session: + with GitlabTaskSession(logger) as session: result = UserRepo.add_gitlab_repo(session, repo_url, self.user_id, group_name) except NoValidKeysError: return False, {"status": "No valid keys"} diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py index fabc04543f..7e6b359f5e 100644 --- a/augur/tasks/gitlab/gitlab_api_key_handler.py +++ b/augur/tasks/gitlab/gitlab_api_key_handler.py @@ -158,7 +158,7 @@ def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool: True if key is bad. False if the key is good """ - url = "https://api.github.com/rate_limit" + url = "https://gitlab.com/api/v4/user" headers = {'Authorization': f'Bearer {oauth_key}'} From 291feae14238a43972d166002f13c5bc8c6b5793 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 19:31:43 -0600 Subject: [PATCH 023/122] Ensure that gitlab repos aren't being started --- augur/tasks/util/collection_util.py | 39 +++++++++++++++-------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index 4d5b663a20..1d5ddd79c5 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -587,27 +587,28 @@ def send_messages(self): for col_hook in self.collection_hooks: self.logger.info(f"Starting collection on {len(col_hook.repo_list)} {col_hook.name} repos") - + for repo_git in col_hook.repo_list: - #repo = self.session.query(Repo).filter(Repo.repo_git == repo_git).one() - #repo_id = repo.repo_id - - augur_collection_sequence = [] - for job in col_hook.phases: - #Add the phase to the sequence in order as a celery task. - #The preliminary task creates the larger task chain - augur_collection_sequence.append(job(repo_git)) - - #augur_collection_sequence.append(core_task_success_util.si(repo_git)) - #Link all phases in a chain and send to celery - augur_collection_chain = chain(*augur_collection_sequence) - task_id = augur_collection_chain.apply_async().task_id - - self.logger.info(f"Setting repo {col_hook.name} status to collecting for repo: {repo_git}") - - #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated - yield repo_git, task_id, col_hook.name + repo = self.session.query(Repo).filter(Repo.repo_git == repo_git).one() + if "github" in repo.repo_git: + augur_collection_sequence = [] + for job in col_hook.phases: + #Add the phase to the sequence in order as a celery task. + #The preliminary task creates the larger task chain + augur_collection_sequence.append(job(repo_git)) + + #augur_collection_sequence.append(core_task_success_util.si(repo_git)) + #Link all phases in a chain and send to celery + augur_collection_chain = chain(*augur_collection_sequence) + task_id = augur_collection_chain.apply_async().task_id + + self.logger.info(f"Setting repo {col_hook.name} status to collecting for repo: {repo_git}") + + #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated + yield repo_git, task_id, col_hook.name + else: + print(f"Unable to start collection for {repo.repo_git}") #def start_block_of_repos(logger,session,repo_git_identifiers,phases,repos_type,hook="core"): # From 12a204a1d7029a313b19869c6ce867ed9e1f476d Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Wed, 29 Nov 2023 11:59:38 -0600 Subject: [PATCH 024/122] typing-extensions has to be 4.7.1 for two other packages --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8291d46fe8..bc229654c0 100644 --- a/setup.py +++ b/setup.py @@ -85,7 +85,8 @@ "dnspython==2.2.1", 'Werkzeug~=2.0.0', "pylint==2.15.5", - "mdpdf==0.0.18" + "mdpdf==0.0.18", + "typing-extensions==4.7.1" ], extras_require={ "dev": [ From 3c24954dd2244a6211cf0fed74603a318a8b7361 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Wed, 29 Nov 2023 12:05:18 -0600 Subject: [PATCH 025/122] tensorflow update --- augur/tasks/data_analysis/message_insights/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index 7609ead3c3..04c2822d35 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -38,7 +38,7 @@ def read(filename): 'emoji==1.2.0', 'Keras==2.13.1', 'Keras-Preprocessing', - 'tensorflow==2.13.1', + 'tensorflow==2.15.0', 'h5py==3.10.0', 'scikit-image==0.19.1', 'joblib==1.2.0', From 59486fec91ed5ae11c98bc6e665e9976ba4b12c9 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Wed, 29 Nov 2023 12:11:05 -0600 Subject: [PATCH 026/122] scipy numpy updates --- augur/tasks/data_analysis/clustering_worker/setup.py | 2 +- augur/tasks/data_analysis/discourse_analysis/setup.py | 2 +- augur/tasks/data_analysis/insight_worker/setup.py | 4 ++-- augur/tasks/data_analysis/message_insights/setup.py | 4 ++-- .../tasks/data_analysis/pull_request_analysis_worker/setup.py | 4 ++-- setup.py | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/augur/tasks/data_analysis/clustering_worker/setup.py b/augur/tasks/data_analysis/clustering_worker/setup.py index 56e01e52f3..778d1f580b 100644 --- a/augur/tasks/data_analysis/clustering_worker/setup.py +++ b/augur/tasks/data_analysis/clustering_worker/setup.py @@ -29,7 +29,7 @@ def read(filename): 'psycopg2-binary==2.9.3', #'sklearn==0.0.0', 'scikit-learn==1.1.3', - 'numpy==1.22.0', + 'numpy==1.27.0', 'nltk==3.6.6', 'seaborn==0.11.1', 'pandas==1.5.3', diff --git a/augur/tasks/data_analysis/discourse_analysis/setup.py b/augur/tasks/data_analysis/discourse_analysis/setup.py index 6e992eb857..118c5bdf50 100644 --- a/augur/tasks/data_analysis/discourse_analysis/setup.py +++ b/augur/tasks/data_analysis/discourse_analysis/setup.py @@ -28,7 +28,7 @@ def read(filename): 'requests==2.28.0', 'psycopg2-binary==2.9.3', 'click==8.0.3', - 'scipy==1.10.0', + 'scipy>=1.10.0', 'nltk==3.6.6', 'pandas==1.5.3', 'scikit-learn==1.1.3', diff --git a/augur/tasks/data_analysis/insight_worker/setup.py b/augur/tasks/data_analysis/insight_worker/setup.py index 0eb35d8a78..74d02850f6 100644 --- a/augur/tasks/data_analysis/insight_worker/setup.py +++ b/augur/tasks/data_analysis/insight_worker/setup.py @@ -29,9 +29,9 @@ def read(filename): 'requests==2.28.0', 'psycopg2-binary==2.9.3', 'click==8.0.3', - 'scipy>=1.7.3', + 'scipy>=1.10.0', 'sklearn==0.0', - 'numpy==1.22.0', + 'numpy==1.27.0', ], entry_points={ 'console_scripts': [ diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index 04c2822d35..373e4fd203 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -30,9 +30,9 @@ def read(filename): 'requests==2.28.0', 'psycopg2-binary==2.9.3', 'click==8.0.3', - 'scipy==1.10.0', + 'scipy>=1.10.0', 'scikit-learn==1.1.3', #0.24.2', - 'numpy==1.22.0', + 'numpy==1.27.0', 'nltk==3.6.6', 'pandas==1.5.3', 'emoji==1.2.0', diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py index e6a8927998..738b3ae3a1 100644 --- a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py +++ b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py @@ -29,12 +29,12 @@ def read(filename): 'psycopg2-binary==2.9.3', 'sklearn==0.0', 'nltk==3.6.6', - 'numpy==1.22.0', + 'numpy==1.27.0', 'pandas==1.5.3', 'emoji==1.2.0', 'joblib==1.2.0', 'xgboost==1.4.2', - 'scipy==1.10.0' + 'scipy>=1.10.0' ], classifiers=[ 'Development Status :: 2 - Pre-Alpha', diff --git a/setup.py b/setup.py index bc229654c0..634e46ee88 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ "Flask-Login==0.5.0", "Flask-WTF==1.0.0", "pandas==1.5.3", # 1.4.3 - "numpy==1.22", # 1.23.2 + "numpy==1.27.0", # 1.23.2 "requests==2.28.0", # 2.28.1 "psycopg2-binary==2.9.3", #2.9.3 what is pscopg-binary 3.0.16 "click==8.0.3", # 8.1.3 @@ -66,7 +66,7 @@ "distributed >= 2021.03.0", # 2022.8.1 "nltk==3.6.6", # 3.7 "h5py==3.10.0", # 3.7 - "scipy==1.10.0", # 1.9.0 + "scipy>=1.10.0", # 1.9.0 "blinker==1.4", # 1.5 "protobuf<3.22", # 4.21.5 "slack==0.0.2", # 0.0.2 From 82c1a1b4285a99d5cab8397991450e40d28dde60 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Wed, 29 Nov 2023 12:13:09 -0600 Subject: [PATCH 027/122] numpy --- augur/tasks/data_analysis/clustering_worker/setup.py | 2 +- augur/tasks/data_analysis/insight_worker/setup.py | 2 +- augur/tasks/data_analysis/message_insights/setup.py | 2 +- augur/tasks/data_analysis/pull_request_analysis_worker/setup.py | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/augur/tasks/data_analysis/clustering_worker/setup.py b/augur/tasks/data_analysis/clustering_worker/setup.py index 778d1f580b..06d2a5eeba 100644 --- a/augur/tasks/data_analysis/clustering_worker/setup.py +++ b/augur/tasks/data_analysis/clustering_worker/setup.py @@ -29,7 +29,7 @@ def read(filename): 'psycopg2-binary==2.9.3', #'sklearn==0.0.0', 'scikit-learn==1.1.3', - 'numpy==1.27.0', + 'numpy==1.26.0', 'nltk==3.6.6', 'seaborn==0.11.1', 'pandas==1.5.3', diff --git a/augur/tasks/data_analysis/insight_worker/setup.py b/augur/tasks/data_analysis/insight_worker/setup.py index 74d02850f6..1ee6e8a4bd 100644 --- a/augur/tasks/data_analysis/insight_worker/setup.py +++ b/augur/tasks/data_analysis/insight_worker/setup.py @@ -31,7 +31,7 @@ def read(filename): 'click==8.0.3', 'scipy>=1.10.0', 'sklearn==0.0', - 'numpy==1.27.0', + 'numpy==1.26.0', ], entry_points={ 'console_scripts': [ diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index 373e4fd203..ae3bdf254e 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -32,7 +32,7 @@ def read(filename): 'click==8.0.3', 'scipy>=1.10.0', 'scikit-learn==1.1.3', #0.24.2', - 'numpy==1.27.0', + 'numpy==1.26.0', 'nltk==3.6.6', 'pandas==1.5.3', 'emoji==1.2.0', diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py index 738b3ae3a1..3341f24ff1 100644 --- a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py +++ b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py @@ -29,7 +29,7 @@ def read(filename): 'psycopg2-binary==2.9.3', 'sklearn==0.0', 'nltk==3.6.6', - 'numpy==1.27.0', + 'numpy==1.26.0', 'pandas==1.5.3', 'emoji==1.2.0', 'joblib==1.2.0', diff --git a/setup.py b/setup.py index 634e46ee88..d2a149d93b 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ "Flask-Login==0.5.0", "Flask-WTF==1.0.0", "pandas==1.5.3", # 1.4.3 - "numpy==1.27.0", # 1.23.2 + "numpy==1.26.0", # 1.23.2 "requests==2.28.0", # 2.28.1 "psycopg2-binary==2.9.3", #2.9.3 what is pscopg-binary 3.0.16 "click==8.0.3", # 8.1.3 From 8d4c1d79d49d784009fe6565611934461331af30 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Wed, 29 Nov 2023 12:14:43 -0600 Subject: [PATCH 028/122] keras --- augur/tasks/data_analysis/message_insights/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index ae3bdf254e..8f529ec536 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -36,7 +36,7 @@ def read(filename): 'nltk==3.6.6', 'pandas==1.5.3', 'emoji==1.2.0', - 'Keras==2.13.1', + 'keras>=2.15.0', 'Keras-Preprocessing', 'tensorflow==2.15.0', 'h5py==3.10.0', From d204673650dae6e5b2e43c77f8ec2abe62f8e001 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 30 Nov 2023 07:46:03 -0600 Subject: [PATCH 029/122] allowing matplotlib to go higher if compatible. --- augur/tasks/data_analysis/clustering_worker/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/clustering_worker/setup.py b/augur/tasks/data_analysis/clustering_worker/setup.py index 06d2a5eeba..78fb0b4b50 100644 --- a/augur/tasks/data_analysis/clustering_worker/setup.py +++ b/augur/tasks/data_analysis/clustering_worker/setup.py @@ -33,7 +33,7 @@ def read(filename): 'nltk==3.6.6', 'seaborn==0.11.1', 'pandas==1.5.3', - 'matplotlib==3.5.1' + 'matplotlib>=3.5.1' ], classifiers=[ 'Development Status :: 2 - Pre-Alpha', From e4d1375165aaa28a028cbbb9a86a1b309d02bd0c Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 30 Nov 2023 07:49:59 -0600 Subject: [PATCH 030/122] Finished compatibility updates for compilation on osx with python 3.11 --- augur/tasks/data_analysis/discourse_analysis/setup.py | 4 ++-- augur/tasks/data_analysis/message_insights/setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/tasks/data_analysis/discourse_analysis/setup.py b/augur/tasks/data_analysis/discourse_analysis/setup.py index 118c5bdf50..37d6557ec5 100644 --- a/augur/tasks/data_analysis/discourse_analysis/setup.py +++ b/augur/tasks/data_analysis/discourse_analysis/setup.py @@ -33,8 +33,8 @@ def read(filename): 'pandas==1.5.3', 'scikit-learn==1.1.3', 'textblob==0.15.3', - 'python-crfsuite==0.9.8', - 'sklearn-crfsuite==0.3.6', + 'python-crfsuite>=0.9.8', + 'sklearn-crfsuite>=0.3.6', 'tabulate==0.8.9' ], # python-crfsuite-0.9.8 sklearn-crfsuite-0.3.6 tabulate-0.8.9 entry_points={ diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index 8f529ec536..a4f6a30c43 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -45,7 +45,7 @@ def read(filename): 'xgboost', 'bs4==0.0.1', 'xlrd==2.0.1', - 'gensim==4.2.0' + 'gensim>=4.2.0' ], classifiers=[ 'Development Status :: 3 - Alpha', From 574885ef7762b0c726edff1e3048971f1da00fff Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 30 Nov 2023 18:56:50 +0000 Subject: [PATCH 031/122] version bump Signed-off-by: Sean P. Goggins --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 075a397fc5..556f8c3611 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.60.2 +# Augur NEW Release v0.60.5 [![first-timers-only](https://img.shields.io/badge/first--timers--only-friendly-blue.svg?style=flat-square)](https://www.firsttimersonly.com/) We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy of tagging issues for first timers only, and walking one newcomer through the resolution process weekly. [You can find these issues tagged with "first timers only" on our issues list.](https://github.com/chaoss/augur/labels/first-timers-only). @@ -7,7 +7,7 @@ ## NEW RELEASE ALERT! ### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md) -Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.60.2 +Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.60.5 - The `main` branch is a stable version of our new architecture, which features: - Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks. - A new job management architecture that uses Celery and Redis to manage queues, and enables users to run a Flower job monitoring dashboard From a5d652257420477c56391dd005e84eff9a9324fc Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Thu, 30 Nov 2023 16:13:02 -0600 Subject: [PATCH 032/122] Update github words to gitlab --- augur/tasks/gitlab/gitlab_api_key_handler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py index 7e6b359f5e..50efa446f8 100644 --- a/augur/tasks/gitlab/gitlab_api_key_handler.py +++ b/augur/tasks/gitlab/gitlab_api_key_handler.py @@ -20,7 +20,7 @@ class GitlabApiKeyHandler(): Attributes: session (DatabaseSession): Database connection logger (logging.Logger): Handles all logs - oauth_redis_key (str): The key where the github api keys are cached in redis + oauth_redis_key (str): The key where the gitlab api keys are cached in redis redis_key_list (RedisList): Acts like a python list, and interacts directly with the redis cache config_key (str): The api key that is stored in the users config table key: (List[str]): List of keys retrieve from database or cache @@ -46,13 +46,13 @@ def get_random_key(self): """Retrieves a random key from the list of keys Returns: - A random github api key + A random gitlab api key """ return random.choice(self.keys) def get_config_key(self) -> str: - """Retrieves the users github api key from their config table + """Retrieves the users gitlab api key from their config table Returns: Github API key from config table @@ -60,7 +60,7 @@ def get_config_key(self) -> str: return self.config.get_value("Keys", "gitlab_api_key") def get_api_keys_from_database(self) -> List[str]: - """Retieves all github api keys from database + """Retieves all gitlab api keys from database Note: It retrieves all the keys from the database except the one defined in the users config @@ -131,7 +131,7 @@ def get_api_keys(self) -> List[str]: self.redis_key_list.extend(valid_keys) if not valid_keys: - raise NoValidKeysError("No valid github api keys found in the config or worker oauth table") + raise NoValidKeysError("No valid gitlab api keys found in the config or worker oauth table") # shuffling the keys so not all processes get the same keys in the same order @@ -152,7 +152,7 @@ def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool: Args: client: makes the http requests - oauth_key: github api key that is being tested + oauth_key: gitlab api key that is being tested Returns: True if key is bad. False if the key is good From 33be983b79a862e078831e21ed7a0ebad244aa48 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 1 Dec 2023 08:00:42 -0600 Subject: [PATCH 033/122] Basic retrieval of merge requests Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 54 +++++++++++++++++-- .../application/db/models/augur_operations.py | 23 ++++---- augur/tasks/github/util/util.py | 2 +- augur/tasks/init/celery_app.py | 4 +- augur/tasks/start_tasks.py | 16 +++++- augur/tasks/util/collection_util.py | 23 ++++++-- 6 files changed, 104 insertions(+), 18 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index abdc6de54c..619542afbb 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -313,7 +313,7 @@ def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo def extract_needed_pr_data(pr, repo_id, tool_source, tool_version): - pr_dict = { + pr = { 'repo_id': repo_id, 'pr_url': pr['url'], # 1-22-2022 inconsistent casting; sometimes int, sometimes float in bulk_insert @@ -367,7 +367,7 @@ def extract_needed_pr_data(pr, repo_id, tool_source, tool_version): 'data_source': 'GitHub API' } - return pr_dict + return pr def extract_needed_issue_data(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str): @@ -513,8 +513,56 @@ def extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, return review_row +def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, tool_version): - + pr_dict = { + 'repo_id': repo_id, + 'pr_url': pr['web_url'], + 'pr_src_id': pr['id'], + 'pr_src_node_id': None, + 'pr_html_url': pr['web_url'], + 'pr_diff_url': None, + 'pr_patch_url': None, + 'pr_issue_url': None, + 'pr_augur_issue_id': None, + 'pr_src_number': pr['iid'], + 'pr_src_state': pr['state'], + 'pr_src_locked': pr['discussion_locked'], + 'pr_src_title': pr['title'], + # TODO: Add contributor logic for gitlab + 'pr_augur_contributor_id': None, + 'pr_body': pr['description'], + 'pr_created_at': pr['created_at'], + 'pr_updated_at': pr['updated_at'], + 'pr_closed_at': pr['closed_at'], + 'pr_merged_at': pr['merged_at'], + 'pr_merge_commit_sha': pr['merge_commit_sha'], + 'pr_teams': None, + 'pr_milestone': pr['milestone'].get('title') if pr['milestone'] else None, + 'pr_commits_url': None, + 'pr_review_comments_url': None, + 'pr_review_comment_url': None, + 'pr_comments_url': None, + 'pr_statuses_url': None, + 'pr_meta_head_id': None, + 'pr_meta_base_id': None, + 'pr_src_issue_url': None, + 'pr_src_comments_url': None, + 'pr_src_review_comments_url': None, + 'pr_src_commits_url': None, + 'pr_src_statuses_url': None, + 'pr_src_author_association': None, + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': 'Gitlab API' + } + + return pr_dict + + + + + diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 8da2b397fa..99b0782968 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -1232,18 +1232,22 @@ def insert(session, repo_id): repo_git = repo.repo_git collection_status_unique = ["repo_id"] + pr_issue_count = 0 + github_weight = 0 - try: - pr_issue_count = get_repo_weight_by_issue(session.logger, repo_git) - #session.logger.info(f"date weight: {calculate_date_weight_from_timestamps(repo.repo_added, None)}") - github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None) - except Exception as e: - pr_issue_count = None - github_weight = None - session.logger.error( - ''.join(traceback.format_exception(None, e, e.__traceback__))) + if "github" in repo_git: + try: + pr_issue_count = get_repo_weight_by_issue(session.logger, repo_git) + #session.logger.info(f"date weight: {calculate_date_weight_from_timestamps(repo.repo_added, None)}") + github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None) + except Exception as e: + pr_issue_count = None + github_weight = None + session.logger.error( + ''.join(traceback.format_exception(None, e, e.__traceback__))) + record = { "repo_id": repo_id, "issue_pr_sum": pr_issue_count, @@ -1251,6 +1255,7 @@ def insert(session, repo_id): "secondary_weight": github_weight, "ml_weight": github_weight } + result = session.insert_data(record, CollectionStatus, collection_status_unique, on_conflict_update=False) diff --git a/augur/tasks/github/util/util.py b/augur/tasks/github/util/util.py index fbb23dd6e8..0400b82e1a 100644 --- a/augur/tasks/github/util/util.py +++ b/augur/tasks/github/util/util.py @@ -54,7 +54,7 @@ def parse_json_response(logger: logging.Logger, response: httpx.Response) -> dic try: return response.json() except json.decoder.JSONDecodeError as e: - logger.warning(f"invalid return from GitHub. Response was: {response.text}. Exception: {e}") + logger.warning(f"invalid return. Response was: {response.text}. Exception: {e}") return json.loads(json.dumps(response.text)) def get_repo_weight_by_issue(logger,repo_git): diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index 706541d1c7..fda25c46ab 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -50,6 +50,8 @@ class CollectionState(Enum): 'augur.tasks.github.pull_requests.commits_model.tasks', 'augur.tasks.github.traffic.tasks'] +gitlab_tasks = ['augur.tasks.gitlab.merge_request_task'] + git_tasks = ['augur.tasks.git.facade_tasks', 'augur.tasks.git.dependency_tasks.tasks', 'augur.tasks.git.dependency_libyear_tasks.tasks', @@ -66,7 +68,7 @@ class CollectionState(Enum): frontend_tasks = ['augur.tasks.frontend'] -tasks = start_tasks + github_tasks + git_tasks + materialized_view_tasks + frontend_tasks +tasks = start_tasks + github_tasks + gitlab_tasks + git_tasks + materialized_view_tasks + frontend_tasks if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": tasks += data_analysis_tasks diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 225f78ffde..17de504b51 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -24,6 +24,7 @@ from augur.tasks.github.pull_requests.commits_model.tasks import process_pull_request_commits from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data +from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests from augur.tasks.git.facade_tasks import * from augur.tasks.db.refresh_materialized_views import * # from augur.tasks.data_analysis import * @@ -93,6 +94,16 @@ def primary_repo_collect_phase(repo_git): return repo_task_group +def primary_gitlab_repo_collect_phase(repo_git): + + logger = logging.getLogger(primary_gitlab_repo_collect_phase.__name__) + + jobs = group( + collect_gitlab_merge_requests.si(repo_git) + ) + + return jobs + #This phase creates the message for secondary collection tasks. #These are less important and have their own worker. @@ -146,20 +157,23 @@ def non_repo_domain_tasks(): def build_primary_repo_collect_request(session,enabled_phase_names, days_until_collect_again = 1): #Add all required tasks to a list and pass it to the CollectionRequest primary_enabled_phases = [] + primary_gitlab_enabled_phases = [] #Primary jobs if prelim_phase.__name__ in enabled_phase_names: primary_enabled_phases.append(prelim_phase) primary_enabled_phases.append(primary_repo_collect_phase) + primary_gitlab_enabled_phases.append(primary_gitlab_repo_collect_phase) #task success is scheduled no matter what the config says. def core_task_success_util_gen(repo_git): return core_task_success_util.si(repo_git) primary_enabled_phases.append(core_task_success_util_gen) + primary_gitlab_enabled_phases.append(core_task_success_util_gen) - primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=7) + primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=7, gitlab_phases=primary_gitlab_enabled_phases) primary_request.get_valid_repos(session) return primary_request diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index 1d5ddd79c5..47705785e9 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -132,9 +132,10 @@ def get_required_conditions_for_ml_repos(allow_collected_before = False, days_un class CollectionRequest: - def __init__(self,name,phases,max_repo = 10,days_until_collect_again = 1): + def __init__(self,name,phases,max_repo = 10,days_until_collect_again = 1, gitlab_phases=None): self.name = name self.phases = phases + self.gitlab_phases = gitlab_phases self.max_repo = max_repo self.days_until_collect_again = days_until_collect_again self.new_status = CollectionState.PENDING.value @@ -603,12 +604,28 @@ def send_messages(self): augur_collection_chain = chain(*augur_collection_sequence) task_id = augur_collection_chain.apply_async().task_id - self.logger.info(f"Setting repo {col_hook.name} status to collecting for repo: {repo_git}") + self.logger.info(f"Setting github repo {col_hook.name} status to collecting for repo: {repo_git}") #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated yield repo_git, task_id, col_hook.name else: - print(f"Unable to start collection for {repo.repo_git}") + if col_hook.gitlab_phases is not None: + + augur_collection_sequence = [] + for job in col_hook.gitlab_phases: + #Add the phase to the sequence in order as a celery task. + #The preliminary task creates the larger task chain + augur_collection_sequence.append(job(repo_git)) + + #augur_collection_sequence.append(core_task_success_util.si(repo_git)) + #Link all phases in a chain and send to celery + augur_collection_chain = chain(*augur_collection_sequence) + task_id = augur_collection_chain.apply_async().task_id + + self.logger.info(f"Setting gitlab repo {col_hook.name} status to collecting for repo: {repo_git}") + + #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated + yield repo_git, task_id, col_hook.name #def start_block_of_repos(logger,session,repo_git_identifiers,phases,repos_type,hook="core"): # From 31b7614e141f6db2388535dff36f8315d99e2509 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 1 Dec 2023 13:55:08 -0600 Subject: [PATCH 034/122] Start gitlab issues collection Signed-off-by: Andrew Brain --- augur/application/cli/backend.py | 9 +++-- augur/application/db/data_parse.py | 33 ++++++++++++++++--- .../application/db/models/augur_operations.py | 12 +++++-- augur/tasks/init/celery_app.py | 3 +- augur/tasks/start_tasks.py | 4 ++- 5 files changed, 50 insertions(+), 11 deletions(-) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 29afab2b0d..fc466f021c 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -91,9 +91,12 @@ def start(disable_collection, development, port): logger.info("Deleting old task schedule") os.remove("celerybeat-schedule.db") - celery_beat_process = None - celery_command = "celery -A augur.tasks.init.celery_app.celery_app beat -l debug" - celery_beat_process = subprocess.Popen(celery_command.split(" ")) + with DatabaseSession(logger) as db_session: + config = AugurConfig(logger, db_session) + log_level = config.get_value("Logging", "log_level") + celery_beat_process = None + celery_command = f"celery -A augur.tasks.init.celery_app.celery_app beat -l {log_level.lower()}" + celery_beat_process = subprocess.Popen(celery_command.split(" ")) if not disable_collection: diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 619542afbb..4c618860ba 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -560,9 +560,34 @@ def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, t return pr_dict +def extract_needed_issue_data_from_gitlab_issue(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str): + + issue_dict = { + "repo_id": repo_id, + "reporter_id": None, + "pull_request": None, + "pull_request_id": None, + "created_at": issue['created_at'], + "issue_title": issue['title'], + "issue_body": issue['description'] if 'description' in issue else None, + "comment_count": issue['user_notes_count'], + "updated_at": issue['updated_at'], + "closed_at": issue['closed_at'], + "repository_url": issue['_links']['project'], + "issue_url": issue['_links']['self'], + "labels_url": None, + "comments_url": issue['_links']['notes'], + "events_url": None, + "html_url": issue['_links']['self'], + "issue_state": issue['state'], + "issue_node_id": None, + "gh_issue_id": issue['id'], + "gh_issue_number": issue['iid'], + "gh_user_id": issue['author']['id'], + "tool_source": tool_source, + "tool_version": tool_version, + "data_source": data_source + } - + return issue_dict - - - diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 99b0782968..cf9da75af6 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -1234,8 +1234,6 @@ def insert(session, repo_id): collection_status_unique = ["repo_id"] pr_issue_count = 0 github_weight = 0 - - if "github" in repo_git: try: @@ -1248,6 +1246,16 @@ def insert(session, repo_id): session.logger.error( ''.join(traceback.format_exception(None, e, e.__traceback__))) + try: + pr_issue_count = 0 + github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None) + except Exception as e: + pr_issue_count = None + github_weight = None + session.logger.error( + ''.join(traceback.format_exception(None, e, e.__traceback__))) + + record = { "repo_id": repo_id, "issue_pr_sum": pr_issue_count, diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index fda25c46ab..a2b06a22a6 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -50,7 +50,8 @@ class CollectionState(Enum): 'augur.tasks.github.pull_requests.commits_model.tasks', 'augur.tasks.github.traffic.tasks'] -gitlab_tasks = ['augur.tasks.gitlab.merge_request_task'] +gitlab_tasks = ['augur.tasks.gitlab.merge_request_task', + 'augur.tasks.gitlab.issues_task'] git_tasks = ['augur.tasks.git.facade_tasks', 'augur.tasks.git.dependency_tasks.tasks', diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 17de504b51..b99032bb82 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -25,6 +25,7 @@ from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests +from augur.tasks.gitlab.issues_task import collect_gitlab_issues from augur.tasks.git.facade_tasks import * from augur.tasks.db.refresh_materialized_views import * # from augur.tasks.data_analysis import * @@ -99,7 +100,8 @@ def primary_gitlab_repo_collect_phase(repo_git): logger = logging.getLogger(primary_gitlab_repo_collect_phase.__name__) jobs = group( - collect_gitlab_merge_requests.si(repo_git) + collect_gitlab_merge_requests.si(repo_git), + collect_gitlab_issues.si(repo_git) ) return jobs From 3a2a46de8c05a7e434f517407d0ff2aaf7a2de03 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 1 Dec 2023 14:52:36 -0600 Subject: [PATCH 035/122] Add missed files Signed-off-by: Andrew Brain --- augur/tasks/gitlab/gitlab_paginator.py | 642 +++++++++++++++++++++++ augur/tasks/gitlab/issues_task.py | 33 ++ augur/tasks/gitlab/merge_request_task.py | 87 +++ 3 files changed, 762 insertions(+) create mode 100644 augur/tasks/gitlab/gitlab_paginator.py create mode 100644 augur/tasks/gitlab/issues_task.py create mode 100644 augur/tasks/gitlab/merge_request_task.py diff --git a/augur/tasks/gitlab/gitlab_paginator.py b/augur/tasks/gitlab/gitlab_paginator.py new file mode 100644 index 0000000000..99638e9007 --- /dev/null +++ b/augur/tasks/gitlab/gitlab_paginator.py @@ -0,0 +1,642 @@ + +import collections +import httpx +import time +import json +import asyncio +import datetime +import logging + + +from typing import List, Optional, Union, Generator, Tuple +from urllib.parse import urlencode, urlparse, parse_qs, urlunparse +from enum import Enum + + +from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth +from augur.tasks.github.util.util import parse_json_response + +class GitlabApiResult(Enum): + """All the different results of querying the Gitlab API.""" + + NEW_RESULT = -1 + SUCCESS = 0 + TIMEOUT = 1 + NO_MORE_ATTEMPTS = 2 + REPO_NOT_FOUND = 3 + SECONDARY_RATE_LIMIT = 4 + RATE_LIMIT_EXCEEDED = 5 + ABUSE_MECHANISM_TRIGGERED = 6 + BAD_CREDENTIALS = 7 + HTML = 8 + EMPTY_STRING = 9 + +def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, method: str = 'GET', ) -> Optional[httpx.Response]: + """Ping the api and get the data back for the page. + + Returns: + A httpx response that contains the data. None if a timeout occurs + """ + # self.logger.info(f"Hitting endpoint with {method} request: {url}...\n") + + with httpx.Client() as client: + + try: + response = client.request( + method=method, url=url, auth=key_manager, timeout=timeout, follow_redirects=True) + + except TimeoutError: + logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n") + time.sleep(round(timeout)) + return None + except httpx.TimeoutException: + logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n") + time.sleep(round(timeout)) + return None + except httpx.NetworkError: + logger.info(f"Network Error. Sleeping {round(timeout)} seconds and trying again...\n") + time.sleep(round(timeout)) + return None + except httpx.ProtocolError: + logger.info(f"Protocol Error. Sleeping {round(timeout*1.5)} seconds and trying again...\n") + time.sleep(round(timeout*1.5)) + return None + + return response + + +def process_dict_response(logger: logging.Logger, response: httpx.Response, page_data: dict) -> Optional[str]: + """Process dict response from the api and return the status. + + Args: + logger: handles logging + response: used to access the url of the request and the headers + page_data: dict response from the api + + Returns: + A string explaining what happened is returned if what happened is determined, otherwise None is returned. + """ + #logger.info("Request returned a dict: {}\n".format(page_data)) + + status_code = response.status_code + if status_code == 429: + + current_epoch = int(time.time()) + epoch_when_key_resets = int(response.headers["X-RateLimit-Reset"]) + key_reset_time = epoch_when_key_resets - current_epoch + + if key_reset_time < 0: + logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") + key_reset_time = 0 + + logger.info(f"\n\n\nGitlab API rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") + time.sleep(key_reset_time) + + return GitlabApiResult.RATE_LIMIT_EXCEEDED + + message = page_data.get('message') + errors = page_data.get('errors') + + if not message and not errors: + return GitlabApiResult.SUCCESS + + if message == "Not Found": + logger.error( + "Gitlab repo was not found or does not exist for endpoint: " + f"{response.url}" + ) + return GitlabApiResult.REPO_NOT_FOUND + + if message and "You have exceeded a secondary rate limit. Please wait a few minutes before you try again" in message: + + # sleeps for the specified amount of time that github says to retry after + retry_after = int(response.headers["Retry-After"]) + logger.info( + f'\n\n\n\nSleeping for {retry_after} seconds due to secondary rate limit issue.\n\n\n\n') + time.sleep(retry_after) + + return GitlabApiResult.SECONDARY_RATE_LIMIT + # return "do_not_increase_attempts" + + if message and "API rate limit exceeded for user" in message: + + current_epoch = int(time.time()) + epoch_when_key_resets = int(response.headers["X-RateLimit-Reset"]) + key_reset_time = epoch_when_key_resets - current_epoch + + if key_reset_time < 0: + logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") + key_reset_time = 0 + + logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") + time.sleep(key_reset_time) + + return GitlabApiResult.RATE_LIMIT_EXCEEDED + + if message and "You have triggered an abuse detection mechanism." in message: + # self.update_rate_limit(response, temporarily_disable=True,platform=platform) + + + # sleeps for the specified amount of time that github says to retry after + retry_after = int(response.headers["Retry-After"]) + logger.info(f"Abuse mechanism detected sleeping for {retry_after} seconds") + time.sleep(retry_after) + + return GitlabApiResult.ABUSE_MECHANISM_TRIGGERED + + if message == "Bad credentials": + logger.error("\n\n\n\n\n\n\n Bad Token Detected \n\n\n\n\n\n\n") + # self.update_rate_limit(response, bad_credentials=True, platform=platform) + return GitlabApiResult.BAD_CREDENTIALS + + if errors: + for error in errors: + if "API rate limit exceeded for user" in error['message']: + current_epoch = int(time.time()) + epoch_when_key_resets = int(response.headers["X-RateLimit-Reset"]) + key_reset_time = epoch_when_key_resets - current_epoch + + if key_reset_time < 0: + logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") + key_reset_time = 0 + + logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") + time.sleep(key_reset_time) + return GitlabApiResult.RATE_LIMIT_EXCEEDED + + err_type = error.get('type') + + if err_type and 'NOT_FOUND' in err_type: + return GitlabApiResult.REPO_NOT_FOUND + + + return GitlabApiResult.NEW_RESULT + +class GitlabPaginator(collections.abc.Sequence): + """This class is a sequence that handles paginating through data on the Gitlab API. + + Attributes: + url (str): The url that we are collecting data + key_mangager (GitlabRandomKeyAuth): Custom httpx auth class + that randomizes the github api key a request gets. + This is how the requests are getting their api keys + logger (logging.Logger): Logger that handler printing information to files and stdout + """ + + def __init__(self, url: str, key_manager: GitlabRandomKeyAuth, logger: logging.Logger, from_datetime=None, to_datetime=None): + """Initialize the class GitlabPaginator. + + Args: + url: url that the data is being collected + key_manager: class that randomly selects a Gitlab API key for each request + logger: handles logging + from_datetime: collects data after this datatime (not yet implemented) + to_datetime: collects data before this datatime (not yet implemented) + """ + remove_fields = ["per_page", "page"] + url = clean_url(url, remove_fields) + + # we need to add query params directly to the url, instead of passing the param to the httpx.Client.request + # this is because github will only append specified params to the links in the headers if they are a part + # of the url, and not the params with the request + params = {"per_page": 100} + url = add_query_params(url, params) + + self.url = url + self.key_manager = key_manager + self.logger = logger + + # get the logger from the key manager + # self.logger = key_manager.logger + + self.from_datetime = from_datetime + self.to_datetime = to_datetime + + def __getitem__(self, index: int) -> Optional[dict]: + """Get the value at index of the Gitlab API data returned from the url. + + Args: + index: The index of the desired data from the Gitlab API + + Returns: + The value at the index + """ + # if isinstance(index, slice) is True: + + # data_slice = index + # start = data_slice.start + # stop = data_slice.stop + # step = data_slice.step + + # first_item_page = (start // 100) + 1 + # end_item_page = (stop // 100) + 1 + + # all_data: List[dict] = [] + + # for page_number in range(first_item_page, end_item_page+1): + + # # create url to query + # params = {"page": items_page} + # url = add_query_params(self.url, params) + + # data, _ = self.retrieve_data(url) + + # all_data += data + + # first_page_index = start % 100 + + # needed_data = [] + # for index in range(start, stop, step): + # needed_data.append(all_data[index]) + + # return needed_data + + + # get the page the item is on + items_page = (index // 100) + 1 + + # create url to query + params = {"page": items_page} + url = add_query_params(self.url, params) + + data, _, result = self.retrieve_data(url) + + if result != GitlabApiResult.SUCCESS: + self.logger.debug("Unable to get item from the api") + return None + + # get the position of data on the page + page_index = index % 100 + + try: + return data[page_index] + except KeyError as e: + raise KeyError("Data does not exists for that index") from e + + def __len__(self): + """Get the length of the Gitlab API data. + + Returns: + The length of the Gitlab API data at the url. + + Examples: + This function is called when len() is called on the GitlabPaginator class for example. + + issues = GitlabPaginator(url, session.oauths, logger) + issue_len = len(issues) + """ + + num_pages = self.get_num_pages() + + self.logger.info(f"Num pages: {num_pages}") + + params = {"page": num_pages} + url = add_query_params(self.url, params) + + # get the amount of data on last page + data, _, result = self.retrieve_data(url) + + if result == GitlabApiResult.SUCCESS: + return (100 * (num_pages -1)) + len(data) + + self.logger.debug("Unable to retrieve data length from api") + return 0 + + def __iter__(self) -> Generator[Optional[dict], None, None]: + """Provide data from Gitlab API via a generator that yields one dict at a time. + + Yields: + A piece of data from the github api as the specified url + """ + data_list, response, result = self.retrieve_data(self.url) + + if result != GitlabApiResult.SUCCESS: + self.logger.debug("Failed to retrieve the data even though 10 attempts were given") + yield None + return + + # yield the first page data + for data in data_list: + yield data + + while 'next' in response.links.keys(): + next_page = response.links['next']['url'] + + # Here we don't need to pass in params with the page, or the default params because the url from the headers already has those values + data_list, response, result = self.retrieve_data(next_page) + + if result != GitlabApiResult.SUCCESS: + self.logger.debug("Failed to retrieve the data even though 10 attempts were given") + return + + for data in data_list: + yield data + + def iter_pages(self) -> Generator[Tuple[Optional[List[dict]], int], None, None]: + """Provide data from Gitlab API via a generator that yields a page of dicts at a time. + + Returns: + A page of data from the Gitlab API at the specified url + """ + # retrieves the data for the given url + data_list, response, result = self.retrieve_data(self.url) + + if result != GitlabApiResult.SUCCESS: + self.logger.debug("Failed to retrieve the data even though 10 attempts were given") + yield None, None + return + + # this retrieves the page for the given url + page_number = get_url_page_number(self.url) + + # yields the first page of data and its page number + yield data_list, page_number + + while 'next' in response.links.keys(): + + # gets the next page from the last responses header + next_page = response.links['next']['url'] + + # Here we don't need to pass in params with the page, or the default params because the url from the headers already has those values + data_list, response, result = self.retrieve_data(next_page) + + if result != GitlabApiResult.SUCCESS: + self.logger.debug(f"Failed to retrieve the data for even though 10 attempts were given. Url: {next_page}") + return + + page_number = get_url_page_number(next_page) + + # if either the data or response is None then yield None and return + if data_list is None or response is None: + return + + # yield the data from the page and its number + yield data_list, page_number + + def retrieve_data(self, url: str) -> Tuple[Optional[List[dict]], Optional[httpx.Response]]: + """Attempt to retrieve data at given url. + + Args: + url: The url to retrieve the data from + + Returns + The response object from hitting the url and the data on the page + """ + timeout = 30 + timeout_count = 0 + num_attempts = 1 + while num_attempts <= 10: + + response = hit_api(self.key_manager, url, self.logger, timeout) + + if response is None: + if timeout_count == 10: + self.logger.error(f"Request timed out 10 times for {url}") + return None, None, GitlabApiResult.TIMEOUT + + timeout = timeout * 1.1 + num_attempts += 1 + continue + + # if api returns a status of 204 No Content then return empty list + if response.status_code == 204: + return [], response, GitlabApiResult.SUCCESS + + + page_data = parse_json_response(self.logger, response) + + + # if the data is a list, then return it and the response + if isinstance(page_data, list) is True: + return page_data, response, GitlabApiResult.SUCCESS + + # if the data is a dict then call process_dict_response, and + if isinstance(page_data, dict) is True: + dict_processing_result = process_dict_response(self.logger, response, page_data) + + if dict_processing_result == GitlabApiResult.NEW_RESULT: + print(f"Encountered new dict response from api on url: {url}. Response: {page_data}") + return None, None, GitlabApiResult.NEW_RESULT + + if dict_processing_result == GitlabApiResult.REPO_NOT_FOUND: + return None, response, GitlabApiResult.REPO_NOT_FOUND + + if dict_processing_result in (GitlabApiResult.SECONDARY_RATE_LIMIT, GitlabApiResult.ABUSE_MECHANISM_TRIGGERED): + continue + + if dict_processing_result == GitlabApiResult.RATE_LIMIT_EXCEEDED: + num_attempts = 0 + continue + + if isinstance(page_data, str) is True: + str_processing_result: Union[str, List[dict]] = self.process_str_response(page_data) + + if isinstance(str_processing_result, list): + return str_processing_result, response, GitlabApiResult.SUCCESS + + num_attempts += 1 + + self.logger.error("Unable to collect data in 10 attempts") + return None, None, GitlabApiResult.NO_MORE_ATTEMPTS + + def get_num_pages(self) -> Optional[int]: + """Get the number of pages of data that a url can paginate through. + + Returns: + The number of pages a url can access + """ + timeout: float = 5 + num_attempts = 0 + while num_attempts < 10: + r = hit_api(self.key_manager, self.url, self.logger, timeout=timeout, method="HEAD") + + if r: + break + + timeout = timeout * 1.2 + else: + raise RuntimeError("Unable to get the number of pages of data in 10 attempts") + + if 'last' not in r.links.keys(): + return 1 + + # get the last url from header + last_page_url = r.links['last']['url'] + + parsed_url = urlparse(last_page_url) + try: + num_pages = int(parse_qs(parsed_url.query)['page'][0]) + except (KeyError, ValueError): + return None + + return num_pages + + def hit_api(self, url, timeout): + + return hit_api(self.key_manager, url, self.logger, timeout) + + +################################################### + + def process_str_response(self, page_data: str) -> Union[str, List[dict]]: + """Process an api response of type string. + + Args: + page_data: the string response from the api that is being processed + + Returns: + html_response, empty_string, and failed_to_parse_jsonif the data is not processable. + Or a list of dicts if the json was parasable + """ + self.logger.info(f"Warning! page_data was string: {page_data}\n") + + if "" in page_data: + self.logger.info("HTML was returned, trying again...\n") + return GitlabApiResult.HTML + + if not page_data: + self.logger.info("Empty string, trying again...\n") + return GitlabApiResult.EMPTY_STRING + + try: + list_of_dict_page_data = json.loads(page_data) + return list_of_dict_page_data + except TypeError: + return "failed_to_parse_json" + + +################################################################################ + +# Url Helper Method to remove query paramaters from the url +def clean_url(url: str, keys: List[str]) -> str: + """Remove query params from url. + + Args: + url: the url that is being modified + keys: the query params that are being removed + + Returns: + A url with the params in keys removed + """ + u = urlparse(url) + query = parse_qs(u.query, keep_blank_values=True) + + for key in keys: + query.pop(key, None) + + u = u._replace(query=urlencode(query, True)) + + return urlunparse(u) + + +def add_query_params(url: str, additional_params: dict) -> str: + """Add query params to a url. + + Args: + url: the url that is being modified + additional_params: key value pairs specififying the paramaters to be added + + Returns: + The url with the key value pairs in additional_params added as query params + """ + url_components = urlparse(url) + original_params = parse_qs(url_components.query) + # Before Python 3.5 you could update original_params with + # additional_params, but here all the variables are immutable. + merged_params = {**original_params, **additional_params} + updated_query = urlencode(merged_params, doseq=True) + # _replace() is how you can create a new NamedTuple with a changed field + return url_components._replace(query=updated_query).geturl() + + + +################################################################################ + + +def get_url_page_number(url: str) -> int: + """Parse the page number from the url. + + Note: + If the url does not contain a page number the function returns 1 + + Args: + url: url to get the page number from + + Returns: + The page number that the url contains + """ + parsed_url = urlparse(url) + try: + # if page is not a url query param then this is page 1 + page_number = int(parse_qs(parsed_url.query)['page'][0]) + + except KeyError: + return 1 + + return page_number + + +def retrieve_dict_from_endpoint(logger, key_auth, url, timeout_wait=10) -> Tuple[Optional[dict], GitlabApiResult]: + timeout = timeout_wait + timeout_count = 0 + num_attempts = 1 + + while num_attempts <= 10: + + response = hit_api(key_auth, url, logger, timeout) + + if response is None: + if timeout_count == 10: + logger.error(f"Request timed out 10 times for {url}") + return None, GitlabApiResult.TIMEOUT + + timeout = timeout * 1.1 + num_attempts += 1 + continue + + + page_data = parse_json_response(logger, response) + + if isinstance(page_data, str): + str_processing_result: Union[str, List[dict]] = process_str_response(logger,page_data) + + if isinstance(str_processing_result, dict): + #return str_processing_result, response, GitlabApiResult.SUCCESS + page_data = str_processing_result + else: + num_attempts += 1 + continue + + # if the data is a list, then return it and the response + if isinstance(page_data, list): + logger.warning("Wrong type returned, trying again...") + logger.info(f"Returned list: {page_data}") + + # if the data is a dict then call process_dict_response, and + elif isinstance(page_data, dict): + dict_processing_result = process_dict_response(logger, response, page_data) + + if dict_processing_result == GitlabApiResult.SUCCESS: + return page_data, dict_processing_result + if dict_processing_result == GitlabApiResult.NEW_RESULT: + logger.info(f"Encountered new dict response from api on url: {url}. Response: {page_data}") + return None, GitlabApiResult.NEW_RESULT + + if dict_processing_result == GitlabApiResult.REPO_NOT_FOUND: + return None, GitlabApiResult.REPO_NOT_FOUND + + if dict_processing_result in (GitlabApiResult.SECONDARY_RATE_LIMIT, GitlabApiResult.ABUSE_MECHANISM_TRIGGERED): + continue + + if dict_processing_result == GitlabApiResult.RATE_LIMIT_EXCEEDED: + num_attempts = 0 + continue + + + + num_attempts += 1 + + logger.error("Unable to collect data in 10 attempts") + return None, GitlabApiResult.NO_MORE_ATTEMPTS + + diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py new file mode 100644 index 0000000000..a2f338f53b --- /dev/null +++ b/augur/tasks/gitlab/issues_task.py @@ -0,0 +1,33 @@ +import logging + +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask +from augur.tasks.gitlab.gitlab_paginator import GitlabPaginator +from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest +from augur.application.db.data_parse import extract_needed_issue_data_from_gitlab_issue +from augur.tasks.github.util.util import get_owner_repo + + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_gitlab_issues(repo_git: str) -> int: + + logger = logging.getLogger(collect_gitlab_issues.__name__) + + owner, repo = get_owner_repo(repo_git) + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues" + + with GitlabTaskManifest(logger) as manifest: + + issues = GitlabPaginator(url, manifest.key_auth, logger) + for page_data, page_number in issues.iter_pages(): + + if page_data == None: + logger.info("Page was null") + logger.info(f"Page number: {page_number}") + break + + #logger.info(f"Page {page_number} data len: {len(page_data)}") + data = extract_needed_issue_data_from_gitlab_issue(page_data[0], 1, "tool source", "tool version", "Gitlab API") + #logger.info(f"Issue data: {data}") + break \ No newline at end of file diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py new file mode 100644 index 0000000000..6897351ebb --- /dev/null +++ b/augur/tasks/gitlab/merge_request_task.py @@ -0,0 +1,87 @@ +import logging + +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask +from augur.tasks.gitlab.gitlab_paginator import GitlabPaginator +from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest +from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request +from augur.tasks.github.util.util import get_owner_repo +from augur.application.db.models import PullRequest, Repo + + + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_gitlab_merge_requests(repo_git: str) -> int: + + + logger = logging.getLogger(collect_gitlab_merge_requests.__name__) + + with GitlabTaskManifest(logger) as manifest: + + augur_db = manifest.augur_db + + repo_id = augur_db.session.query(Repo).filter( + Repo.repo_git == repo_git).one().repo_id + + owner, repo = get_owner_repo(repo_git) + mr_data = retrieve_all_mr_data(repo_git, logger, manifest.key_auth) + + if mr_data: + process_merge_requests(mr_data, f"{owner}/{repo}: Mr task", repo_id, logger, augur_db) + + return len(mr_data) + else: + logger.info(f"{owner}/{repo} has no merge requests") + return 0 + + +def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None: + + owner, repo = get_owner_repo(repo_git) + + logger.info(f"Collecting pull requests for {owner}/{repo}") + + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests" + mrs = GitlabPaginator(url, key_auth, logger) + + all_data = [] + num_pages = mrs.get_num_pages() + for page_data, page in mrs.iter_pages(): + + if page_data is None: + return all_data + + if len(page_data) == 0: + logger.debug( + f"{owner}/{repo} Mrs Page {page} contains no data...returning") + logger.info(f"{owner}/{repo} Prs Page {page} of {num_pages}") + return all_data + + logger.info(f"{owner}/{repo} Mrs Page {page} of {num_pages}") + + all_data += page_data + + return all_data + + +def process_merge_requests(data, task_name, repo_id, logger, augur_db): + + tool_source = "Mr Task" + tool_version = "2.0" + merge_requests = extract_needed_mr_data(data, repo_id, tool_source, tool_version) + + logger.info(f"{task_name}: Inserting mrs of length: {len(merge_requests)}") + pr_natural_keys = ["repo_id", "pr_src_id"] + pr_string_fields = ["pr_src_title", "pr_body"] + pr_return_data = augur_db.insert_data(merge_requests, PullRequest, pr_natural_keys, string_fields=pr_string_fields) + + +def extract_needed_mr_data(mrs, repo_id, tool_source, tool_version): + + data = [] + for mr in mrs: + data.append(extract_needed_pr_data_from_gitlab_merge_request(mr, repo_id, tool_source, tool_version)) + + return data + From 2d4d149e59f13a7091051e190550236df25c2207 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 1 Dec 2023 15:04:10 -0600 Subject: [PATCH 036/122] Add gitlab issue collection' Signed-off-by: Andrew Brain --- augur/tasks/gitlab/issues_task.py | 110 ++++++++++++++++++++++++++---- 1 file changed, 97 insertions(+), 13 deletions(-) diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index a2f338f53b..c0b615b563 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -8,26 +8,110 @@ from augur.tasks.github.util.util import get_owner_repo +import time +import logging +import traceback +import re + +from sqlalchemy.exc import IntegrityError + +from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler + +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask +from augur.application.db.data_parse import * +from augur.tasks.github.util.github_paginator import GithubPaginator, hit_api +from augur.tasks.github.util.github_task_session import GithubTaskManifest +from augur.application.db.session import DatabaseSession +from augur.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo +from augur.tasks.util.worker_util import remove_duplicate_dicts +from augur.application.db.models import Issue, Repo +from augur.application.config import get_development_flag +from augur.application.db.util import execute_session_query + +development = get_development_flag() @celery.task(base=AugurCoreRepoCollectionTask) -def collect_gitlab_issues(repo_git: str) -> int: +def collect_gitlab_issues(repo_git : str) -> int: + + + logger = logging.getLogger(collect_gitlab_issues.__name__) + with GitlabTaskManifest(logger) as manifest: + + augur_db = manifest.augur_db + + try: + + query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) + repo_obj = execute_session_query(query, 'one') + repo_id = repo_obj.repo_id + + owner, repo = get_owner_repo(repo_git) + + issue_data = retrieve_all_gitlab_issue_data(repo_git, logger, manifest.key_auth) - logger = logging.getLogger(collect_gitlab_issues.__name__) + if issue_data: + total_issues = len(issue_data) + process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + + return total_issues + else: + logger.info(f"{owner}/{repo} has no issues") + return 0 + except Exception as e: + logger.error(f"Could not collect gitlab issues for repo {repo_git}\n Reason: {e} \n Traceback: {''.join(traceback.format_exception(None, e, e.__traceback__))}") + return -1 + + + +def retrieve_all_gitlab_issue_data(repo_git, logger, key_auth) -> None: owner, repo = get_owner_repo(repo_git) + + logger.info(f"Collecting gitlab issues for {owner}/{repo}") + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues" + issues = GitlabPaginator(url, key_auth, logger) - with GitlabTaskManifest(logger) as manifest: + all_data = [] + num_pages = issues.get_num_pages() + for page_data, page in issues.iter_pages(): + + if page_data is None: + return all_data + + if len(page_data) == 0: + logger.debug( + f"{owner}/{repo}: Gitlab Issues Page {page} contains no data...returning") + logger.info(f"{owner}/{repo}: Issues Page {page} of {num_pages}") + return all_data + + logger.info(f"{owner}/{repo}: Gitlab Issues Page {page} of {num_pages}") + + all_data += page_data + + return all_data + +def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: + + # get repo_id or have it passed + tool_source = "Gitlab Issue Task" + tool_version = "2.0" + data_source = "Gitlab API" + + issue_dicts = [] + for issue in issues: - issues = GitlabPaginator(url, manifest.key_auth, logger) - for page_data, page_number in issues.iter_pages(): + issue_dicts.append( + extract_needed_issue_data_from_gitlab_issue(issue, repo_id, tool_source, tool_version, data_source) + ) - if page_data == None: - logger.info("Page was null") - logger.info(f"Page number: {page_number}") - break + if len(issue_dicts) == 0: + print("No gitlab issues found while processing") + return + + logger.info(f"{task_name}: Inserting {len(issue_dicts)} gitlab issues") + issue_natural_keys = ["repo_id", "gh_issue_id"] + issue_string_columns = ["issue_title", "issue_body"] - #logger.info(f"Page {page_number} data len: {len(page_data)}") - data = extract_needed_issue_data_from_gitlab_issue(page_data[0], 1, "tool source", "tool version", "Gitlab API") - #logger.info(f"Issue data: {data}") - break \ No newline at end of file + augur_db.insert_data(issue_dicts, Issue, issue_natural_keys, string_fields=issue_string_columns) \ No newline at end of file From 4e104c539eebdf4b7ada60ba094df4c0e0bfe819 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 1 Dec 2023 15:12:12 -0600 Subject: [PATCH 037/122] Comment out github error handling logic from gitlab paginator Signed-off-by: Andrew Brain --- augur/tasks/gitlab/gitlab_paginator.py | 108 ++++++++++++------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_paginator.py b/augur/tasks/gitlab/gitlab_paginator.py index 99638e9007..6092532c19 100644 --- a/augur/tasks/gitlab/gitlab_paginator.py +++ b/augur/tasks/gitlab/gitlab_paginator.py @@ -94,80 +94,80 @@ def process_dict_response(logger: logging.Logger, response: httpx.Response, page return GitlabApiResult.RATE_LIMIT_EXCEEDED - message = page_data.get('message') - errors = page_data.get('errors') + # message = page_data.get('message') + # errors = page_data.get('errors') - if not message and not errors: - return GitlabApiResult.SUCCESS + # if not message and not errors: + # return GitlabApiResult.SUCCESS - if message == "Not Found": - logger.error( - "Gitlab repo was not found or does not exist for endpoint: " - f"{response.url}" - ) - return GitlabApiResult.REPO_NOT_FOUND + # if message == "Not Found": + # logger.error( + # "Gitlab repo was not found or does not exist for endpoint: " + # f"{response.url}" + # ) + # return GitlabApiResult.REPO_NOT_FOUND - if message and "You have exceeded a secondary rate limit. Please wait a few minutes before you try again" in message: + # if message and "You have exceeded a secondary rate limit. Please wait a few minutes before you try again" in message: - # sleeps for the specified amount of time that github says to retry after - retry_after = int(response.headers["Retry-After"]) - logger.info( - f'\n\n\n\nSleeping for {retry_after} seconds due to secondary rate limit issue.\n\n\n\n') - time.sleep(retry_after) + # # sleeps for the specified amount of time that github says to retry after + # retry_after = int(response.headers["Retry-After"]) + # logger.info( + # f'\n\n\n\nSleeping for {retry_after} seconds due to secondary rate limit issue.\n\n\n\n') + # time.sleep(retry_after) - return GitlabApiResult.SECONDARY_RATE_LIMIT - # return "do_not_increase_attempts" + # return GitlabApiResult.SECONDARY_RATE_LIMIT + # # return "do_not_increase_attempts" - if message and "API rate limit exceeded for user" in message: + # if message and "API rate limit exceeded for user" in message: - current_epoch = int(time.time()) - epoch_when_key_resets = int(response.headers["X-RateLimit-Reset"]) - key_reset_time = epoch_when_key_resets - current_epoch + # current_epoch = int(time.time()) + # epoch_when_key_resets = int(response.headers["X-RateLimit-Reset"]) + # key_reset_time = epoch_when_key_resets - current_epoch - if key_reset_time < 0: - logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") - key_reset_time = 0 + # if key_reset_time < 0: + # logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") + # key_reset_time = 0 - logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") - time.sleep(key_reset_time) + # logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") + # time.sleep(key_reset_time) - return GitlabApiResult.RATE_LIMIT_EXCEEDED + # return GitlabApiResult.RATE_LIMIT_EXCEEDED - if message and "You have triggered an abuse detection mechanism." in message: - # self.update_rate_limit(response, temporarily_disable=True,platform=platform) + # if message and "You have triggered an abuse detection mechanism." in message: + # # self.update_rate_limit(response, temporarily_disable=True,platform=platform) - # sleeps for the specified amount of time that github says to retry after - retry_after = int(response.headers["Retry-After"]) - logger.info(f"Abuse mechanism detected sleeping for {retry_after} seconds") - time.sleep(retry_after) + # # sleeps for the specified amount of time that github says to retry after + # retry_after = int(response.headers["Retry-After"]) + # logger.info(f"Abuse mechanism detected sleeping for {retry_after} seconds") + # time.sleep(retry_after) - return GitlabApiResult.ABUSE_MECHANISM_TRIGGERED + # return GitlabApiResult.ABUSE_MECHANISM_TRIGGERED - if message == "Bad credentials": - logger.error("\n\n\n\n\n\n\n Bad Token Detected \n\n\n\n\n\n\n") - # self.update_rate_limit(response, bad_credentials=True, platform=platform) - return GitlabApiResult.BAD_CREDENTIALS + # if message == "Bad credentials": + # logger.error("\n\n\n\n\n\n\n Bad Token Detected \n\n\n\n\n\n\n") + # # self.update_rate_limit(response, bad_credentials=True, platform=platform) + # return GitlabApiResult.BAD_CREDENTIALS - if errors: - for error in errors: - if "API rate limit exceeded for user" in error['message']: - current_epoch = int(time.time()) - epoch_when_key_resets = int(response.headers["X-RateLimit-Reset"]) - key_reset_time = epoch_when_key_resets - current_epoch + # if errors: + # for error in errors: + # if "API rate limit exceeded for user" in error['message']: + # current_epoch = int(time.time()) + # epoch_when_key_resets = int(response.headers["X-RateLimit-Reset"]) + # key_reset_time = epoch_when_key_resets - current_epoch - if key_reset_time < 0: - logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") - key_reset_time = 0 + # if key_reset_time < 0: + # logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") + # key_reset_time = 0 - logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") - time.sleep(key_reset_time) - return GitlabApiResult.RATE_LIMIT_EXCEEDED + # logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") + # time.sleep(key_reset_time) + # return GitlabApiResult.RATE_LIMIT_EXCEEDED - err_type = error.get('type') + # err_type = error.get('type') - if err_type and 'NOT_FOUND' in err_type: - return GitlabApiResult.REPO_NOT_FOUND + # if err_type and 'NOT_FOUND' in err_type: + # return GitlabApiResult.REPO_NOT_FOUND return GitlabApiResult.NEW_RESULT From 9729bceec18277d074814ff2e833b5b5c769d860 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 1 Dec 2023 15:19:14 -0600 Subject: [PATCH 038/122] Remove commented code Signed-off-by: Andrew Brain --- augur/tasks/gitlab/gitlab_paginator.py | 106 ------------------------- 1 file changed, 106 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_paginator.py b/augur/tasks/gitlab/gitlab_paginator.py index 6092532c19..e7dd36b9e5 100644 --- a/augur/tasks/gitlab/gitlab_paginator.py +++ b/augur/tasks/gitlab/gitlab_paginator.py @@ -76,7 +76,6 @@ def process_dict_response(logger: logging.Logger, response: httpx.Response, page Returns: A string explaining what happened is returned if what happened is determined, otherwise None is returned. """ - #logger.info("Request returned a dict: {}\n".format(page_data)) status_code = response.status_code if status_code == 429: @@ -94,81 +93,6 @@ def process_dict_response(logger: logging.Logger, response: httpx.Response, page return GitlabApiResult.RATE_LIMIT_EXCEEDED - # message = page_data.get('message') - # errors = page_data.get('errors') - - # if not message and not errors: - # return GitlabApiResult.SUCCESS - - # if message == "Not Found": - # logger.error( - # "Gitlab repo was not found or does not exist for endpoint: " - # f"{response.url}" - # ) - # return GitlabApiResult.REPO_NOT_FOUND - - # if message and "You have exceeded a secondary rate limit. Please wait a few minutes before you try again" in message: - - # # sleeps for the specified amount of time that github says to retry after - # retry_after = int(response.headers["Retry-After"]) - # logger.info( - # f'\n\n\n\nSleeping for {retry_after} seconds due to secondary rate limit issue.\n\n\n\n') - # time.sleep(retry_after) - - # return GitlabApiResult.SECONDARY_RATE_LIMIT - # # return "do_not_increase_attempts" - - # if message and "API rate limit exceeded for user" in message: - - # current_epoch = int(time.time()) - # epoch_when_key_resets = int(response.headers["X-RateLimit-Reset"]) - # key_reset_time = epoch_when_key_resets - current_epoch - - # if key_reset_time < 0: - # logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") - # key_reset_time = 0 - - # logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") - # time.sleep(key_reset_time) - - # return GitlabApiResult.RATE_LIMIT_EXCEEDED - - # if message and "You have triggered an abuse detection mechanism." in message: - # # self.update_rate_limit(response, temporarily_disable=True,platform=platform) - - - # # sleeps for the specified amount of time that github says to retry after - # retry_after = int(response.headers["Retry-After"]) - # logger.info(f"Abuse mechanism detected sleeping for {retry_after} seconds") - # time.sleep(retry_after) - - # return GitlabApiResult.ABUSE_MECHANISM_TRIGGERED - - # if message == "Bad credentials": - # logger.error("\n\n\n\n\n\n\n Bad Token Detected \n\n\n\n\n\n\n") - # # self.update_rate_limit(response, bad_credentials=True, platform=platform) - # return GitlabApiResult.BAD_CREDENTIALS - - # if errors: - # for error in errors: - # if "API rate limit exceeded for user" in error['message']: - # current_epoch = int(time.time()) - # epoch_when_key_resets = int(response.headers["X-RateLimit-Reset"]) - # key_reset_time = epoch_when_key_resets - current_epoch - - # if key_reset_time < 0: - # logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") - # key_reset_time = 0 - - # logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") - # time.sleep(key_reset_time) - # return GitlabApiResult.RATE_LIMIT_EXCEEDED - - # err_type = error.get('type') - - # if err_type and 'NOT_FOUND' in err_type: - # return GitlabApiResult.REPO_NOT_FOUND - return GitlabApiResult.NEW_RESULT @@ -221,36 +145,6 @@ def __getitem__(self, index: int) -> Optional[dict]: Returns: The value at the index """ - # if isinstance(index, slice) is True: - - # data_slice = index - # start = data_slice.start - # stop = data_slice.stop - # step = data_slice.step - - # first_item_page = (start // 100) + 1 - # end_item_page = (stop // 100) + 1 - - # all_data: List[dict] = [] - - # for page_number in range(first_item_page, end_item_page+1): - - # # create url to query - # params = {"page": items_page} - # url = add_query_params(self.url, params) - - # data, _ = self.retrieve_data(url) - - # all_data += data - - # first_page_index = start % 100 - - # needed_data = [] - # for index in range(start, stop, step): - # needed_data.append(all_data[index]) - - # return needed_data - # get the page the item is on items_page = (index // 100) + 1 From f10fb2984873f2cf65f9d1fdde6fcba658991a23 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 1 Dec 2023 15:22:37 -0600 Subject: [PATCH 039/122] fix up imports Signed-off-by: Andrew Brain --- augur/tasks/github/pull_requests/tasks.py | 2 +- augur/tasks/gitlab/issues_task.py | 22 +--------------------- augur/tasks/gitlab/merge_request_task.py | 2 -- 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 3af6e39e08..478260bcd7 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -74,7 +74,7 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth) -> None: return all_data - + def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): tool_source = "Pr Task" diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index c0b615b563..a5a55f353a 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -1,4 +1,5 @@ import logging +import traceback from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask @@ -6,30 +7,9 @@ from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest from augur.application.db.data_parse import extract_needed_issue_data_from_gitlab_issue from augur.tasks.github.util.util import get_owner_repo - - -import time -import logging -import traceback -import re - -from sqlalchemy.exc import IntegrityError - -from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler - -from augur.tasks.init.celery_app import celery_app as celery -from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask -from augur.application.db.data_parse import * -from augur.tasks.github.util.github_paginator import GithubPaginator, hit_api -from augur.tasks.github.util.github_task_session import GithubTaskManifest -from augur.application.db.session import DatabaseSession -from augur.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo -from augur.tasks.util.worker_util import remove_duplicate_dicts from augur.application.db.models import Issue, Repo -from augur.application.config import get_development_flag from augur.application.db.util import execute_session_query -development = get_development_flag() @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_issues(repo_git : str) -> int: diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 6897351ebb..cfe849ba8a 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -9,8 +9,6 @@ from augur.application.db.models import PullRequest, Repo - - @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_merge_requests(repo_git: str) -> int: From 20bd29d592dda65946bee89ceca0d81167215c0d Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 4 Dec 2023 14:36:31 -0600 Subject: [PATCH 040/122] Implement gitlab issues labels and assignees Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 49 ++++++++++++++++++++++++++ augur/tasks/github/issues/tasks.py | 2 +- augur/tasks/gitlab/issues_task.py | 56 +++++++++++++++++++++++++++--- 3 files changed, 102 insertions(+), 5 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 4c618860ba..cf11f4341b 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -247,6 +247,28 @@ def extract_needed_issue_assignee_data(assignees: List[dict], repo_id: int, tool return assignee_dicts +def extract_needed_gitlab_issue_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + + if len(assignees) == 0: + return [] + + assignee_dicts = [] + for assignee in assignees: + + assignee_dict = { + "cntrb_id": None, + "tool_source": tool_source, + "tool_version": tool_version, + "data_source": data_source, + "issue_assignee_src_id": assignee['id'], + "issue_assignee_src_node": None, + "repo_id": repo_id + } + + assignee_dicts.append(assignee_dict) + + return assignee_dicts + # retrieve only the needed data for pr labels from the api response @@ -277,6 +299,33 @@ def extract_needed_issue_label_data(labels: List[dict], repo_id: int, tool_sourc return label_dicts +def extract_needed_gitlab_issue_label_data(labels: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + + if len(labels) == 0: + return [] + + label_dicts = [] + for label in labels: + + print(f"Processing repo id for issue label: {repo_id}") + + label_dict = { + "label_text": label["name"], + "label_description": label.get("description", None), + "label_color": label['color'], + "tool_source": tool_source, + "tool_version": tool_version, + "data_source": data_source, + "label_src_id": label['id'], + "label_src_node_id": None, + "repo_id": repo_id + } + + label_dicts.append(label_dict) + + return label_dicts + + # retrieve only the needed data for pr labels from the api response def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 5380b8bf10..0ba793470e 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -195,7 +195,7 @@ def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: issue_assignee_dicts += add_key_value_pair_to_dicts(other_issue_data["assignees"], "issue_id", issue_id) - logger.info(f"{task_name}: Inserting other issue data of lengths: Labels: {len(issue_label_dicts)} - Assignees: {len(issue_assignee_dicts)}") + logger.info(f"{task_name}: Inserting other github issue data of lengths: Labels: {len(issue_label_dicts)} - Assignees: {len(issue_assignee_dicts)}") # inserting issue labels # we are using label_src_id and issue_id to determine if the label is already in the database. diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index a5a55f353a..612a5d36c4 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -5,9 +5,9 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.tasks.gitlab.gitlab_paginator import GitlabPaginator from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest -from augur.application.db.data_parse import extract_needed_issue_data_from_gitlab_issue -from augur.tasks.github.util.util import get_owner_repo -from augur.application.db.models import Issue, Repo +from augur.application.db.data_parse import extract_needed_issue_data_from_gitlab_issue, extract_needed_gitlab_issue_label_data, extract_needed_gitlab_issue_assignee_data +from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts +from augur.application.db.models import Issue, IssueLabel, IssueAssignee, Repo from augur.application.db.util import execute_session_query @@ -80,12 +80,26 @@ def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: data_source = "Gitlab API" issue_dicts = [] + issue_mapping_data = {} for issue in issues: issue_dicts.append( extract_needed_issue_data_from_gitlab_issue(issue, repo_id, tool_source, tool_version, data_source) ) + issue_labels = extract_needed_gitlab_issue_label_data(issue["labels"], repo_id, + tool_source, tool_version, data_source) + + issue_assignees = extract_needed_gitlab_issue_assignee_data(issue["assignees"], repo_id, + tool_source, tool_version, data_source) + + mapping_data_key = issue["id"] + issue_mapping_data[mapping_data_key] = { + "labels": issue_labels, + "assignees": issue_assignees, + } + + if len(issue_dicts) == 0: print("No gitlab issues found while processing") return @@ -93,5 +107,39 @@ def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: logger.info(f"{task_name}: Inserting {len(issue_dicts)} gitlab issues") issue_natural_keys = ["repo_id", "gh_issue_id"] issue_string_columns = ["issue_title", "issue_body"] + issue_return_columns = ["gh_issue_id", "issue_id"] + + issue_return_data = augur_db.insert_data(issue_dicts, Issue, issue_natural_keys, return_columns=issue_return_columns, string_fields=issue_string_columns) + + issue_label_dicts = [] + issue_assignee_dicts = [] + for data in issue_return_data: + + gh_issue_id = data["gh_issue_id"] + issue_id = data["issue_id"] + + try: + other_issue_data = issue_mapping_data[gh_issue_id] + except KeyError as e: + logger.info(f"{task_name}: Cold not find other gitlab issue data. This should never happen. Error: {e}") + + + # add the issue id to the lables and assignees, then add them to a list of dicts that will be inserted soon + dict_key = "issue_id" + issue_label_dicts += add_key_value_pair_to_dicts(other_issue_data["labels"], dict_key, issue_id) + issue_assignee_dicts += add_key_value_pair_to_dicts(other_issue_data["assignees"], dict_key, issue_id) + + + logger.info(f"{task_name}: Inserting other gitlab issue data of lengths: Labels: {len(issue_label_dicts)} - Assignees: {len(issue_assignee_dicts)}") + + # inserting issue labels + # we are using label_src_id and issue_id to determine if the label is already in the database. + issue_label_natural_keys = ['label_src_id', 'issue_id'] + issue_label_string_fields = ["label_text", "label_description"] + augur_db.insert_data(issue_label_dicts, IssueLabel, + issue_label_natural_keys, string_fields=issue_label_string_fields) - augur_db.insert_data(issue_dicts, Issue, issue_natural_keys, string_fields=issue_string_columns) \ No newline at end of file + # inserting issue assignees + # we are using issue_assignee_src_id and issue_id to determine if the label is already in the database. + issue_assignee_natural_keys = ['issue_assignee_src_id', 'issue_id'] + augur_db.insert_data(issue_assignee_dicts, IssueAssignee, issue_assignee_natural_keys) \ No newline at end of file From 0dfcf9bd2c154f93607ca19f6e105c93cb0542b4 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 4 Dec 2023 14:37:18 -0600 Subject: [PATCH 041/122] comment out variable reassignment overriding pr_issue_count Signed-off-by: Isaac Milarsky --- augur/application/db/models/augur_operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index cf9da75af6..0b9a4ee4c4 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -1247,7 +1247,7 @@ def insert(session, repo_id): ''.join(traceback.format_exception(None, e, e.__traceback__))) try: - pr_issue_count = 0 + #pr_issue_count = 0 github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None) except Exception as e: pr_issue_count = None From 11340d4985bf0634f0b10573cedef381b8e9791b Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 4 Dec 2023 14:42:16 -0600 Subject: [PATCH 042/122] Missed in last commit Signed-off-by: Andrew Brain --- augur/tasks/gitlab/issues_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 612a5d36c4..8e40e65f13 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -50,7 +50,7 @@ def retrieve_all_gitlab_issue_data(repo_git, logger, key_auth) -> None: logger.info(f"Collecting gitlab issues for {owner}/{repo}") - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues" + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues?with_labels_details=True" issues = GitlabPaginator(url, key_auth, logger) all_data = [] From 286aa0c238607655321e972d26aaed4e077333ce Mon Sep 17 00:00:00 2001 From: Oleks Date: Tue, 5 Dec 2023 10:09:04 +0100 Subject: [PATCH 043/122] Update firefox version to match requirement of firefox-geckodriver --- docs/source/getting-started/new-install.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/getting-started/new-install.rst b/docs/source/getting-started/new-install.rst index 14e276fc51..ba38fb7758 100644 --- a/docs/source/getting-started/new-install.rst +++ b/docs/source/getting-started/new-install.rst @@ -44,7 +44,7 @@ Executable sudo snap install go --classic && #required: Go Needs to be version 1.19.x or higher. Snap is the package manager that gets you to the right version. Classic enables it to actually be installed at the correct version. sudo apt install nginx && # required for hosting sudo add-apt-repository ppa:mozillateam/firefox-next && - sudo apt install firefox=115.0~b2+build1-0ubuntu0.22.04.1 && + sudo apt install firefox=121.0~b7+build1-0ubuntu0.22.04.1 && sudo apt install firefox-geckodriver # You will almost certainly need to reboot after this. From de4217e4c13e515a72276e3ab950bb1bd112d675 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 8 Dec 2023 13:24:57 -0600 Subject: [PATCH 044/122] Fix small bugs in assignees and labels for mrs --- augur/application/db/data_parse.py | 57 +++++++++++++++++- .../application/db/models/augur_operations.py | 18 +++--- augur/tasks/gitlab/merge_request_task.py | 58 +++++++++++++++---- augur/tasks/start_tasks.py | 6 +- 4 files changed, 112 insertions(+), 27 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index cf11f4341b..6cc94494ae 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -37,6 +37,34 @@ def extract_needed_pr_label_data(labels: List[dict], repo_id: int, tool_source: return label_dicts + +def extract_needed_mr_label_data(labels: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + + if len(labels) == 0: + return [] + + label_dicts = [] + for label in labels: + + label_dict = { + 'pr_src_id': label['id'], + 'pr_src_node_id': None, + 'pr_src_url': None, + 'pr_src_description': label['name'], + 'pr_src_color': label['color'], + # TODO: Populate this by making an api call for each label + 'pr_src_default_bool': None, + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': data_source, + 'repo_id': repo_id + } + + label_dicts.append(label_dict) + + return label_dicts + + # retrieve only the needed data for pr assignees from the api response def extract_needed_pr_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: @@ -48,7 +76,6 @@ def extract_needed_pr_assignee_data(assignees: List[dict], repo_id: int, tool_so for assignee in assignees: assignee_dict = { - # store the pr_url data on in the pr assignee data for now so we can relate it back to a pr later 'contrib_id': assignee["cntrb_id"], 'pr_assignee_src_id': int(assignee['id']), 'tool_source': tool_source, @@ -61,6 +88,30 @@ def extract_needed_pr_assignee_data(assignees: List[dict], repo_id: int, tool_so return assignee_dicts +def extract_needed_merge_request_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + + if len(assignees) == 0: + return [] + + assignee_dicts = [] + for assignee in assignees: + + assignee_dict = { + 'contrib_id': None, + 'repo_id': repo_id, + # TODO: Temporarily setting this to id which the id of the contributor, unitl we can get the contrib_id set and create a unique on the contrib_id and the pull_request_id + 'pr_assignee_src_id': assignee["id"], + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': data_source + } + + assignee_dicts.append(assignee_dict) + + return assignee_dicts + + + # retrieve only the needed data for pr reviewers from the api response def extract_needed_pr_reviewer_data(reviewers: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: @@ -307,8 +358,6 @@ def extract_needed_gitlab_issue_label_data(labels: List[dict], repo_id: int, too label_dicts = [] for label in labels: - print(f"Processing repo id for issue label: {repo_id}") - label_dict = { "label_text": label["name"], "label_description": label.get("description", None), @@ -640,3 +689,5 @@ def extract_needed_issue_data_from_gitlab_issue(issue: dict, repo_id: int, tool_ return issue_dict + + diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 0b9a4ee4c4..47f28b12f2 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -1245,15 +1245,15 @@ def insert(session, repo_id): github_weight = None session.logger.error( ''.join(traceback.format_exception(None, e, e.__traceback__))) - - try: - #pr_issue_count = 0 - github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None) - except Exception as e: - pr_issue_count = None - github_weight = None - session.logger.error( - ''.join(traceback.format_exception(None, e, e.__traceback__))) + else: + try: + pr_issue_count = 0 + github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None) + except Exception as e: + pr_issue_count = None + github_weight = None + session.logger.error( + ''.join(traceback.format_exception(None, e, e.__traceback__))) record = { diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index cfe849ba8a..5e6597a678 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -4,9 +4,9 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.tasks.gitlab.gitlab_paginator import GitlabPaginator from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest -from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request -from augur.tasks.github.util.util import get_owner_repo -from augur.application.db.models import PullRequest, Repo +from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data +from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts +from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, Repo @celery.task(base=AugurCoreRepoCollectionTask) @@ -40,7 +40,7 @@ def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None: logger.info(f"Collecting pull requests for {owner}/{repo}") - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests" + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests?with_labels_details=True" mrs = GitlabPaginator(url, key_auth, logger) all_data = [] @@ -67,19 +67,53 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): tool_source = "Mr Task" tool_version = "2.0" - merge_requests = extract_needed_mr_data(data, repo_id, tool_source, tool_version) + data_source = "Gitlab API" + + merge_requests = [] + mr_mapping_data = {} + for mr in data: + merge_requests.append(extract_needed_pr_data_from_gitlab_merge_request(mr, repo_id, tool_source, tool_version)) + + assignees = extract_needed_merge_request_assignee_data(mr["assignees"], repo_id, tool_source, tool_version, data_source) + + labels = extract_needed_mr_label_data(mr["labels"], repo_id, tool_source, tool_version, data_source) + + mapping_data_key = mr["id"] + mr_mapping_data[mapping_data_key] = { + "assignees": assignees, + "labels": labels + } logger.info(f"{task_name}: Inserting mrs of length: {len(merge_requests)}") pr_natural_keys = ["repo_id", "pr_src_id"] pr_string_fields = ["pr_src_title", "pr_body"] - pr_return_data = augur_db.insert_data(merge_requests, PullRequest, pr_natural_keys, string_fields=pr_string_fields) + pr_return_columns = ["pull_request_id", "pr_src_id"] + pr_return_data = augur_db.insert_data(merge_requests, PullRequest, pr_natural_keys, return_columns=pr_return_columns, string_fields=pr_string_fields) + + + mr_assignee_dicts = [] + mr_label_dicts = [] + for data in pr_return_data: + + mr_src_id = data["pr_src_id"] + pull_request_id = data["pull_request_id"] + + try: + other_mr_data = mr_mapping_data[mr_src_id] + except KeyError as e: + logger.info(f"Cold not find other pr data. This should never happen. Error: {e}") + + dict_key = "pull_request_id" + mr_assignee_dicts += add_key_value_pair_to_dicts(other_mr_data["assignees"], dict_key, pull_request_id) + mr_label_dicts += add_key_value_pair_to_dicts(other_mr_data["labels"], dict_key, pull_request_id) + logger.info(f"{task_name}: Inserting other pr data of lengths: Labels: {len(mr_label_dicts)} - Assignees: {len(mr_assignee_dicts)}") -def extract_needed_mr_data(mrs, repo_id, tool_source, tool_version): - - data = [] - for mr in mrs: - data.append(extract_needed_pr_data_from_gitlab_merge_request(mr, repo_id, tool_source, tool_version)) + # TODO: Setup unique key on asignees with a value of ('cntrb_id', 'pull_request_id') and add 'cntrb_id' to assingee data + mr_assignee_natural_keys = ['pr_assignee_src_id', 'pull_request_id'] + augur_db.insert_data(mr_assignee_dicts, PullRequestAssignee, mr_assignee_natural_keys) - return data + pr_label_natural_keys = ['pr_src_id', 'pull_request_id'] + pr_label_string_fields = ["pr_src_description"] + augur_db.insert_data(mr_label_dicts, PullRequestLabel, pr_label_natural_keys, string_fields=pr_label_string_fields) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index b99032bb82..e837a65f6b 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -95,9 +95,9 @@ def primary_repo_collect_phase(repo_git): return repo_task_group -def primary_gitlab_repo_collect_phase(repo_git): +def primary_repo_collect_phase_gitlab(repo_git): - logger = logging.getLogger(primary_gitlab_repo_collect_phase.__name__) + logger = logging.getLogger(primary_repo_collect_phase_gitlab.__name__) jobs = group( collect_gitlab_merge_requests.si(repo_git), @@ -166,7 +166,7 @@ def build_primary_repo_collect_request(session,enabled_phase_names, days_until_c primary_enabled_phases.append(prelim_phase) primary_enabled_phases.append(primary_repo_collect_phase) - primary_gitlab_enabled_phases.append(primary_gitlab_repo_collect_phase) + primary_gitlab_enabled_phases.append(primary_repo_collect_phase_gitlab) #task success is scheduled no matter what the config says. def core_task_success_util_gen(repo_git): From 924a7adb50aa8e3c6c07cd4a2294012597fc6948 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 8 Dec 2023 15:02:01 -0600 Subject: [PATCH 045/122] Setup passing of mr ids to next tasks Signed-off-by: Andrew Brain --- augur/tasks/gitlab/merge_request_task.py | 57 ++++++++++++++++++++++-- augur/tasks/start_tasks.py | 11 ++++- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 5e6597a678..ae3b95e41f 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -26,12 +26,12 @@ def collect_gitlab_merge_requests(repo_git: str) -> int: mr_data = retrieve_all_mr_data(repo_git, logger, manifest.key_auth) if mr_data: - process_merge_requests(mr_data, f"{owner}/{repo}: Mr task", repo_id, logger, augur_db) + mr_ids = process_merge_requests(mr_data, f"{owner}/{repo}: Mr task", repo_id, logger, augur_db) - return len(mr_data) + return mr_ids else: logger.info(f"{owner}/{repo} has no merge requests") - return 0 + return [] def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None: @@ -70,8 +70,12 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): data_source = "Gitlab API" merge_requests = [] + mr_ids = [] mr_mapping_data = {} for mr in data: + + mr_ids.append(mr["iid"]) + merge_requests.append(extract_needed_pr_data_from_gitlab_merge_request(mr, repo_id, tool_source, tool_version)) assignees = extract_needed_merge_request_assignee_data(mr["assignees"], repo_id, tool_source, tool_version, data_source) @@ -117,3 +121,50 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): pr_label_string_fields = ["pr_src_description"] augur_db.insert_data(mr_label_dicts, PullRequestLabel, pr_label_natural_keys, string_fields=pr_label_string_fields) + return mr_ids + + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_merge_request_comments(mr_ids, repo_git) -> int: + + print("Collect merge request comments") + # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_merge_request_events(mr_ids, repo_git) -> int: + + print("Collect merge request events") + # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_merge_request_metadata(mr_ids, repo_git) -> int: + + print("Collect merge request metadata") + # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_merge_request_reviewers(mr_ids, repo_git) -> int: + + print("Collect merge request reviewers") + # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_merge_request_commits(mr_ids, repo_git) -> int: + + print("Collect merge request commits") + # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_merge_request_files(mr_ids, repo_git) -> int: + + print("Collect merge request files") + # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") + + + \ No newline at end of file diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index e837a65f6b..251da00f2a 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -24,7 +24,7 @@ from augur.tasks.github.pull_requests.commits_model.tasks import process_pull_request_commits from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data -from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests +from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_comments, collect_merge_request_events, collect_merge_request_metadata, collect_merge_request_reviewers, collect_merge_request_commits, collect_merge_request_files from augur.tasks.gitlab.issues_task import collect_gitlab_issues from augur.tasks.git.facade_tasks import * from augur.tasks.db.refresh_materialized_views import * @@ -100,7 +100,14 @@ def primary_repo_collect_phase_gitlab(repo_git): logger = logging.getLogger(primary_repo_collect_phase_gitlab.__name__) jobs = group( - collect_gitlab_merge_requests.si(repo_git), + chain(collect_gitlab_merge_requests.si(repo_git), group( + collect_merge_request_comments.s(repo_git), + collect_merge_request_events.s(repo_git), + collect_merge_request_reviewers.s(repo_git), + collect_merge_request_metadata.s(repo_git), + collect_merge_request_commits.s(repo_git), + collect_merge_request_files.s(repo_git) + )), collect_gitlab_issues.si(repo_git) ) From 5a80cf269667a133c12a1554adb2dcda98ad3fdd Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 8 Dec 2023 15:14:28 -0600 Subject: [PATCH 046/122] Setup issues tasks to pass ids to next tasks Signed-off-by: Andrew Brain --- augur/tasks/gitlab/issues_task.py | 27 ++++++++++++++++++++++----- augur/tasks/start_tasks.py | 7 +++++-- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 8e40e65f13..75635eecd1 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -31,13 +31,12 @@ def collect_gitlab_issues(repo_git : str) -> int: issue_data = retrieve_all_gitlab_issue_data(repo_git, logger, manifest.key_auth) if issue_data: - total_issues = len(issue_data) - process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) - return total_issues + return issue_ids else: logger.info(f"{owner}/{repo} has no issues") - return 0 + return [] except Exception as e: logger.error(f"Could not collect gitlab issues for repo {repo_git}\n Reason: {e} \n Traceback: {''.join(traceback.format_exception(None, e, e.__traceback__))}") return -1 @@ -80,9 +79,12 @@ def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: data_source = "Gitlab API" issue_dicts = [] + issue_ids = [] issue_mapping_data = {} for issue in issues: + issue_ids.append(issue["iid"]) + issue_dicts.append( extract_needed_issue_data_from_gitlab_issue(issue, repo_id, tool_source, tool_version, data_source) ) @@ -142,4 +144,19 @@ def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: # inserting issue assignees # we are using issue_assignee_src_id and issue_id to determine if the label is already in the database. issue_assignee_natural_keys = ['issue_assignee_src_id', 'issue_id'] - augur_db.insert_data(issue_assignee_dicts, IssueAssignee, issue_assignee_natural_keys) \ No newline at end of file + augur_db.insert_data(issue_assignee_dicts, IssueAssignee, issue_assignee_natural_keys) + + return issue_ids + + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_issue_comments(issue_ids, repo_git) -> int: + + print(f"Collect issue comments. Repo git: {repo_git}. Len ids: {issue_ids}") + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_issue_events(issue_ids, repo_git) -> int: + + print(f"Collect issue events. Repo git: {repo_git}. Len ids: {issue_ids}") \ No newline at end of file diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 251da00f2a..86216c94ee 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -25,7 +25,7 @@ from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_comments, collect_merge_request_events, collect_merge_request_metadata, collect_merge_request_reviewers, collect_merge_request_commits, collect_merge_request_files -from augur.tasks.gitlab.issues_task import collect_gitlab_issues +from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_issue_comments, collect_issue_events from augur.tasks.git.facade_tasks import * from augur.tasks.db.refresh_materialized_views import * # from augur.tasks.data_analysis import * @@ -108,7 +108,10 @@ def primary_repo_collect_phase_gitlab(repo_git): collect_merge_request_commits.s(repo_git), collect_merge_request_files.s(repo_git) )), - collect_gitlab_issues.si(repo_git) + chain(collect_gitlab_issues.si(repo_git), group( + collect_issue_comments.s(repo_git), + collect_issue_events.s(repo_git) + )) ) return jobs From 57ce5deb422663c1df426adc7af54b77e298468e Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 8 Dec 2023 16:33:42 -0600 Subject: [PATCH 047/122] Add collection of gitlab events & comments Signed-off-by: Andrew Brain --- augur/tasks/gitlab/issues_task.py | 87 ++++++++++++++++++++++-- augur/tasks/gitlab/merge_request_task.py | 20 +++--- augur/tasks/start_tasks.py | 8 +-- 3 files changed, 97 insertions(+), 18 deletions(-) diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 75635eecd1..5cd16ea4d0 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -151,12 +151,91 @@ def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: @celery.task(base=AugurCoreRepoCollectionTask) -def collect_issue_comments(issue_ids, repo_git) -> int: +def collect_gitlab_issue_comments(issue_ids, repo_git) -> int: + + owner, repo = get_owner_repo(repo_git) + + logger = logging.getLogger(collect_gitlab_issues.__name__) + with GitlabTaskManifest(logger) as manifest: + + comments = retrieve_all_gitlab_issue_comments(manifest.key_auth, logger, issue_ids, repo_git) + + if comments: + logger.info(f"Length of comments: {len(comments)}") + #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + else: + logger.info(f"{owner}/{repo} has no gitlab issue comments") + + +def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): + + owner, repo = get_owner_repo(repo_git) + + all_comments = [] + issue_count = len(issue_ids) + index = 1 + for id in issue_ids: + + print(f"Collecting {owner}/{repo} gitlab issue comments for issue {index} of {issue_count}") + + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues/{id}/notes" + comments = GitlabPaginator(url, key_auth, logger) + + for page_data, page in comments.iter_pages(): + + if page_data is None or len(page_data) == 0: + break + + all_comments += page_data + + index += 1 + + return all_comments - print(f"Collect issue comments. Repo git: {repo_git}. Len ids: {issue_ids}") @celery.task(base=AugurCoreRepoCollectionTask) -def collect_issue_events(issue_ids, repo_git) -> int: +def collect_gitlab_issue_events(repo_git) -> int: + + owner, repo = get_owner_repo(repo_git) + + logger = logging.getLogger(collect_gitlab_issues.__name__) + with GitlabTaskManifest(logger) as manifest: + + events = retrieve_all_gitlab_issue_event_data(repo_git, logger, manifest.key_auth) + + if events: + logger.info(f"Length of events: {len(events)}") + #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + else: + logger.info(f"{owner}/{repo} has no gitlab issue events") + + + +def retrieve_all_gitlab_issue_event_data(repo_git, logger, key_auth) -> None: + + owner, repo = get_owner_repo(repo_git) + + logger.info(f"Collecting gitlab issue events for {owner}/{repo}") + + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type=issue&per_page=100" + events = GitlabPaginator(url, key_auth, logger) + + all_data = [] + num_pages = events.get_num_pages() + for page_data, page in events.iter_pages(): + + if page_data is None: + return all_data + + if len(page_data) == 0: + logger.debug( + f"{owner}/{repo}: Gitlab Issue Events Page {page} contains no data...returning") + logger.info(f"{owner}/{repo}: Issue Events Page {page} of {num_pages}") + return all_data + + logger.info(f"{owner}/{repo}: Gitlab Issue Events Page {page} of {num_pages}") + + all_data += page_data - print(f"Collect issue events. Repo git: {repo_git}. Len ids: {issue_ids}") \ No newline at end of file + return all_data \ No newline at end of file diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index ae3b95e41f..e0875b4194 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -134,36 +134,36 @@ def collect_merge_request_comments(mr_ids, repo_git) -> int: @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_events(mr_ids, repo_git) -> int: - - print("Collect merge request events") + pass + #print("Collect merge request events") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_metadata(mr_ids, repo_git) -> int: - - print("Collect merge request metadata") + pass + #print("Collect merge request metadata") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_reviewers(mr_ids, repo_git) -> int: - - print("Collect merge request reviewers") + pass + #print("Collect merge request reviewers") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_commits(mr_ids, repo_git) -> int: - - print("Collect merge request commits") + pass + #print("Collect merge request commits") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_files(mr_ids, repo_git) -> int: - - print("Collect merge request files") + pass + #print("Collect merge request files") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 86216c94ee..dca7f3518b 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -25,7 +25,7 @@ from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_comments, collect_merge_request_events, collect_merge_request_metadata, collect_merge_request_reviewers, collect_merge_request_commits, collect_merge_request_files -from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_issue_comments, collect_issue_events +from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_gitlab_issue_comments, collect_gitlab_issue_events from augur.tasks.git.facade_tasks import * from augur.tasks.db.refresh_materialized_views import * # from augur.tasks.data_analysis import * @@ -109,9 +109,9 @@ def primary_repo_collect_phase_gitlab(repo_git): collect_merge_request_files.s(repo_git) )), chain(collect_gitlab_issues.si(repo_git), group( - collect_issue_comments.s(repo_git), - collect_issue_events.s(repo_git) - )) + collect_gitlab_issue_comments.s(repo_git) + )), + collect_gitlab_issue_events.si(repo_git) ) return jobs From ca6544d3534bb8ef76e3e62fc0a6faf6f92eaa26 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 8 Dec 2023 17:01:30 -0600 Subject: [PATCH 048/122] Setup urls for merge request collections Signed-off-by: Andrew Brain --- augur/tasks/gitlab/issues_task.py | 2 +- augur/tasks/gitlab/merge_request_task.py | 36 +++++++++++++++++++----- augur/tasks/start_tasks.py | 4 +-- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 5cd16ea4d0..6504615f8b 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -218,7 +218,7 @@ def retrieve_all_gitlab_issue_event_data(repo_git, logger, key_auth) -> None: logger.info(f"Collecting gitlab issue events for {owner}/{repo}") - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type=issue&per_page=100" + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type=issue" events = GitlabPaginator(url, key_auth, logger) all_data = [] diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index e0875b4194..b1ecbb481b 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -128,41 +128,63 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_comments(mr_ids, repo_git) -> int: - print("Collect merge request comments") + owner, repo = get_owner_repo(repo_git) + id = mr_ids[0] + + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/notes" # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) -def collect_merge_request_events(mr_ids, repo_git) -> int: - pass +def collect_merge_request_events(repo_git) -> int: + + owner, repo = get_owner_repo(repo_git) + + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type=merge_request" #print("Collect merge request events") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_metadata(mr_ids, repo_git) -> int: - pass + + owner, repo = get_owner_repo(repo_git) + id = mr_ids[0] + + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}" #print("Collect merge request metadata") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_reviewers(mr_ids, repo_git) -> int: - pass + + owner, repo = get_owner_repo(repo_git) + id = mr_ids[0] + + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/approvals" #print("Collect merge request reviewers") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_commits(mr_ids, repo_git) -> int: - pass + + owner, repo = get_owner_repo(repo_git) + id = mr_ids[0] + + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/commits" #print("Collect merge request commits") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_files(mr_ids, repo_git) -> int: - pass + + owner, repo = get_owner_repo(repo_git) + id = mr_ids[0] + + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/cahnges" #print("Collect merge request files") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index dca7f3518b..ae819f2030 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -102,7 +102,6 @@ def primary_repo_collect_phase_gitlab(repo_git): jobs = group( chain(collect_gitlab_merge_requests.si(repo_git), group( collect_merge_request_comments.s(repo_git), - collect_merge_request_events.s(repo_git), collect_merge_request_reviewers.s(repo_git), collect_merge_request_metadata.s(repo_git), collect_merge_request_commits.s(repo_git), @@ -111,7 +110,8 @@ def primary_repo_collect_phase_gitlab(repo_git): chain(collect_gitlab_issues.si(repo_git), group( collect_gitlab_issue_comments.s(repo_git) )), - collect_gitlab_issue_events.si(repo_git) + collect_gitlab_issue_events.si(repo_git), + collect_merge_request_events.si(repo_git), ) return jobs From daa4106fdd9dc4b2fc63255d4f210bb8ce728b51 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 10:13:46 -0600 Subject: [PATCH 049/122] Refeactor gitlab paginator Signed-off-by: Andrew Brain --- augur/tasks/github/util/github_paginator.py | 2 + ...lab_paginator.py => gitlab_api_handler.py} | 347 +++++------------- augur/tasks/gitlab/issues_task.py | 18 +- augur/tasks/gitlab/merge_request_task.py | 140 +++++-- 4 files changed, 212 insertions(+), 295 deletions(-) rename augur/tasks/gitlab/{gitlab_paginator.py => gitlab_api_handler.py} (56%) diff --git a/augur/tasks/github/util/github_paginator.py b/augur/tasks/github/util/github_paginator.py index 548d25b0f9..31c14565df 100644 --- a/augur/tasks/github/util/github_paginator.py +++ b/augur/tasks/github/util/github_paginator.py @@ -154,6 +154,8 @@ class GithubApiResult(Enum): SECONDARY_RATE_LIMIT = 4 RATE_LIMIT_EXCEEDED = 5 ABUSE_MECHANISM_TRIGGERED = 6 + # TODO: Add bad credentials detection that removes key + # from redis if bad credentials are detected BAD_CREDENTIALS = 7 HTML = 8 EMPTY_STRING = 9 diff --git a/augur/tasks/gitlab/gitlab_paginator.py b/augur/tasks/gitlab/gitlab_api_handler.py similarity index 56% rename from augur/tasks/gitlab/gitlab_paginator.py rename to augur/tasks/gitlab/gitlab_api_handler.py index e7dd36b9e5..97b1690006 100644 --- a/augur/tasks/gitlab/gitlab_paginator.py +++ b/augur/tasks/gitlab/gitlab_api_handler.py @@ -1,25 +1,18 @@ -import collections import httpx import time -import json -import asyncio -import datetime import logging - from typing import List, Optional, Union, Generator, Tuple from urllib.parse import urlencode, urlparse, parse_qs, urlunparse from enum import Enum - from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth from augur.tasks.github.util.util import parse_json_response class GitlabApiResult(Enum): """All the different results of querying the Gitlab API.""" - NEW_RESULT = -1 SUCCESS = 0 TIMEOUT = 1 NO_MORE_ATTEMPTS = 2 @@ -27,77 +20,12 @@ class GitlabApiResult(Enum): SECONDARY_RATE_LIMIT = 4 RATE_LIMIT_EXCEEDED = 5 ABUSE_MECHANISM_TRIGGERED = 6 + # TODO: Add bad credentials detection that removes key + # from redis if bad credentials are detected BAD_CREDENTIALS = 7 - HTML = 8 - EMPTY_STRING = 9 - -def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, method: str = 'GET', ) -> Optional[httpx.Response]: - """Ping the api and get the data back for the page. - - Returns: - A httpx response that contains the data. None if a timeout occurs - """ - # self.logger.info(f"Hitting endpoint with {method} request: {url}...\n") - - with httpx.Client() as client: - - try: - response = client.request( - method=method, url=url, auth=key_manager, timeout=timeout, follow_redirects=True) - - except TimeoutError: - logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n") - time.sleep(round(timeout)) - return None - except httpx.TimeoutException: - logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n") - time.sleep(round(timeout)) - return None - except httpx.NetworkError: - logger.info(f"Network Error. Sleeping {round(timeout)} seconds and trying again...\n") - time.sleep(round(timeout)) - return None - except httpx.ProtocolError: - logger.info(f"Protocol Error. Sleeping {round(timeout*1.5)} seconds and trying again...\n") - time.sleep(round(timeout*1.5)) - return None - - return response - - -def process_dict_response(logger: logging.Logger, response: httpx.Response, page_data: dict) -> Optional[str]: - """Process dict response from the api and return the status. - - Args: - logger: handles logging - response: used to access the url of the request and the headers - page_data: dict response from the api - - Returns: - A string explaining what happened is returned if what happened is determined, otherwise None is returned. - """ - - status_code = response.status_code - if status_code == 429: - current_epoch = int(time.time()) - epoch_when_key_resets = int(response.headers["X-RateLimit-Reset"]) - key_reset_time = epoch_when_key_resets - current_epoch - - if key_reset_time < 0: - logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") - key_reset_time = 0 - - logger.info(f"\n\n\nGitlab API rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") - time.sleep(key_reset_time) - - return GitlabApiResult.RATE_LIMIT_EXCEEDED - - - return GitlabApiResult.NEW_RESULT - -class GitlabPaginator(collections.abc.Sequence): - """This class is a sequence that handles paginating through data on the Gitlab API. +class GitlabApiHandler(): + """This class is a sequence that handles retrieving data from the Gitlab API. Attributes: url (str): The url that we are collecting data @@ -107,7 +35,7 @@ class GitlabPaginator(collections.abc.Sequence): logger (logging.Logger): Logger that handler printing information to files and stdout """ - def __init__(self, url: str, key_manager: GitlabRandomKeyAuth, logger: logging.Logger, from_datetime=None, to_datetime=None): + def __init__(self, key_manager: GitlabRandomKeyAuth, logger: logging.Logger): """Initialize the class GitlabPaginator. Args: @@ -117,57 +45,10 @@ def __init__(self, url: str, key_manager: GitlabRandomKeyAuth, logger: logging.L from_datetime: collects data after this datatime (not yet implemented) to_datetime: collects data before this datatime (not yet implemented) """ - remove_fields = ["per_page", "page"] - url = clean_url(url, remove_fields) - - # we need to add query params directly to the url, instead of passing the param to the httpx.Client.request - # this is because github will only append specified params to the links in the headers if they are a part - # of the url, and not the params with the request - params = {"per_page": 100} - url = add_query_params(url, params) - - self.url = url self.key_manager = key_manager self.logger = logger - # get the logger from the key manager - # self.logger = key_manager.logger - - self.from_datetime = from_datetime - self.to_datetime = to_datetime - - def __getitem__(self, index: int) -> Optional[dict]: - """Get the value at index of the Gitlab API data returned from the url. - - Args: - index: The index of the desired data from the Gitlab API - - Returns: - The value at the index - """ - - # get the page the item is on - items_page = (index // 100) + 1 - - # create url to query - params = {"page": items_page} - url = add_query_params(self.url, params) - - data, _, result = self.retrieve_data(url) - - if result != GitlabApiResult.SUCCESS: - self.logger.debug("Unable to get item from the api") - return None - - # get the position of data on the page - page_index = index % 100 - - try: - return data[page_index] - except KeyError as e: - raise KeyError("Data does not exists for that index") from e - - def __len__(self): + def get_length(self, url): """Get the length of the Gitlab API data. Returns: @@ -185,7 +66,7 @@ def __len__(self): self.logger.info(f"Num pages: {num_pages}") params = {"page": num_pages} - url = add_query_params(self.url, params) + url = add_query_params(url, params) # get the amount of data on last page data, _, result = self.retrieve_data(url) @@ -196,13 +77,16 @@ def __len__(self): self.logger.debug("Unable to retrieve data length from api") return 0 - def __iter__(self) -> Generator[Optional[dict], None, None]: + def iter(self, url) -> Generator[Optional[dict], None, None]: """Provide data from Gitlab API via a generator that yields one dict at a time. Yields: A piece of data from the github api as the specified url """ - data_list, response, result = self.retrieve_data(self.url) + + url = self._set_paginaton_query_params(url) + + data_list, response, result = self.retrieve_data(url) if result != GitlabApiResult.SUCCESS: self.logger.debug("Failed to retrieve the data even though 10 attempts were given") @@ -226,14 +110,18 @@ def __iter__(self) -> Generator[Optional[dict], None, None]: for data in data_list: yield data - def iter_pages(self) -> Generator[Tuple[Optional[List[dict]], int], None, None]: + def iter_pages(self, url) -> Generator[Tuple[Optional[List[dict]], int], None, None]: """Provide data from Gitlab API via a generator that yields a page of dicts at a time. Returns: A page of data from the Gitlab API at the specified url """ + + url = self._set_paginaton_query_params(url) + print(f"Iter pages url: {url}") + # retrieves the data for the given url - data_list, response, result = self.retrieve_data(self.url) + data_list, response, result = self.retrieve_data(url) if result != GitlabApiResult.SUCCESS: self.logger.debug("Failed to retrieve the data even though 10 attempts were given") @@ -241,7 +129,8 @@ def iter_pages(self) -> Generator[Tuple[Optional[List[dict]], int], None, None]: return # this retrieves the page for the given url - page_number = get_url_page_number(self.url) + page_number = get_url_page_number(url) + print(f"iter pages first page number: {page_number}") # yields the first page of data and its page number yield data_list, page_number @@ -250,6 +139,7 @@ def iter_pages(self) -> Generator[Tuple[Optional[List[dict]], int], None, None]: # gets the next page from the last responses header next_page = response.links['next']['url'] + print(f"next page url: {next_page}") # Here we don't need to pass in params with the page, or the default params because the url from the headers already has those values data_list, response, result = self.retrieve_data(next_page) @@ -260,6 +150,8 @@ def iter_pages(self) -> Generator[Tuple[Optional[List[dict]], int], None, None]: page_number = get_url_page_number(next_page) + print(f"iter pages page number: {page_number}") + # if either the data or response is None then yield None and return if data_list is None or response is None: return @@ -276,6 +168,7 @@ def retrieve_data(self, url: str) -> Tuple[Optional[List[dict]], Optional[httpx. Returns The response object from hitting the url and the data on the page """ + timeout = 30 timeout_count = 0 num_attempts = 1 @@ -292,57 +185,51 @@ def retrieve_data(self, url: str) -> Tuple[Optional[List[dict]], Optional[httpx. num_attempts += 1 continue - # if api returns a status of 204 No Content then return empty list + if response.status_code == 500: + self.logger.error(f"Gitlab returned {response.status_code} error when fetching {url}. Message: {response.json()}") + continue + + if response.status_code == 429: + + current_epoch = int(time.time()) + epoch_when_key_resets = int(response.headers["ratelimit-reset"]) + key_reset_time = epoch_when_key_resets - current_epoch + + if key_reset_time < 0: + self.logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") + key_reset_time = 0 + + self.logger.info(f"\n\n\nGitlab API rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") + time.sleep(key_reset_time) + continue + if response.status_code == 204: return [], response, GitlabApiResult.SUCCESS - - page_data = parse_json_response(self.logger, response) - + if response.status_code >= 200 and response.status_code <=299: - # if the data is a list, then return it and the response - if isinstance(page_data, list) is True: + page_data = parse_json_response(self.logger, response) return page_data, response, GitlabApiResult.SUCCESS - - # if the data is a dict then call process_dict_response, and - if isinstance(page_data, dict) is True: - dict_processing_result = process_dict_response(self.logger, response, page_data) - - if dict_processing_result == GitlabApiResult.NEW_RESULT: - print(f"Encountered new dict response from api on url: {url}. Response: {page_data}") - return None, None, GitlabApiResult.NEW_RESULT - - if dict_processing_result == GitlabApiResult.REPO_NOT_FOUND: - return None, response, GitlabApiResult.REPO_NOT_FOUND - - if dict_processing_result in (GitlabApiResult.SECONDARY_RATE_LIMIT, GitlabApiResult.ABUSE_MECHANISM_TRIGGERED): - continue - - if dict_processing_result == GitlabApiResult.RATE_LIMIT_EXCEEDED: - num_attempts = 0 - continue - - if isinstance(page_data, str) is True: - str_processing_result: Union[str, List[dict]] = self.process_str_response(page_data) - - if isinstance(str_processing_result, list): - return str_processing_result, response, GitlabApiResult.SUCCESS - - num_attempts += 1 + + self.logger.warning(f"Unhandled gitlab response. Status code: {response.status_code}. Body: {response.json()}") + self.logger.error("Unable to collect data in 10 attempts") return None, None, GitlabApiResult.NO_MORE_ATTEMPTS - def get_num_pages(self) -> Optional[int]: + def get_num_pages(self, url) -> Optional[int]: """Get the number of pages of data that a url can paginate through. Returns: The number of pages a url can access """ + + url = self._set_paginaton_query_params(url) + timeout: float = 5 num_attempts = 0 while num_attempts < 10: - r = hit_api(self.key_manager, self.url, self.logger, timeout=timeout, method="HEAD") + r = self.hit_api(url=url, timeout=timeout, method="HEAD") if r: break @@ -365,39 +252,22 @@ def get_num_pages(self) -> Optional[int]: return num_pages - def hit_api(self, url, timeout): - - return hit_api(self.key_manager, url, self.logger, timeout) - - -################################################### + def hit_api(self, url, timeout, method): - def process_str_response(self, page_data: str) -> Union[str, List[dict]]: - """Process an api response of type string. + return hit_api(self.key_manager, url, self.logger, timeout, method=method) - Args: - page_data: the string response from the api that is being processed - - Returns: - html_response, empty_string, and failed_to_parse_jsonif the data is not processable. - Or a list of dicts if the json was parasable - """ - self.logger.info(f"Warning! page_data was string: {page_data}\n") - - if "" in page_data: - self.logger.info("HTML was returned, trying again...\n") - return GitlabApiResult.HTML + def _set_paginaton_query_params(self, url): - if not page_data: - self.logger.info("Empty string, trying again...\n") - return GitlabApiResult.EMPTY_STRING + remove_fields = ["per_page", "page"] + url = clean_url(url, remove_fields) - try: - list_of_dict_page_data = json.loads(page_data) - return list_of_dict_page_data - except TypeError: - return "failed_to_parse_json" + # we need to add query params directly to the url, instead of passing the param to the httpx.Client.request + # this is because github will only append specified params to the links in the headers if they are a part + # of the url, and not the params with the request + params = {"per_page": 100} + url = add_query_params(url, params) + return url ################################################################################ @@ -443,10 +313,6 @@ def add_query_params(url: str, additional_params: dict) -> str: return url_components._replace(query=updated_query).geturl() - -################################################################################ - - def get_url_page_number(url: str) -> int: """Parse the page number from the url. @@ -469,68 +335,37 @@ def get_url_page_number(url: str) -> int: return page_number +################################################################################ -def retrieve_dict_from_endpoint(logger, key_auth, url, timeout_wait=10) -> Tuple[Optional[dict], GitlabApiResult]: - timeout = timeout_wait - timeout_count = 0 - num_attempts = 1 - - while num_attempts <= 10: - - response = hit_api(key_auth, url, logger, timeout) - - if response is None: - if timeout_count == 10: - logger.error(f"Request timed out 10 times for {url}") - return None, GitlabApiResult.TIMEOUT - - timeout = timeout * 1.1 - num_attempts += 1 - continue - - - page_data = parse_json_response(logger, response) - - if isinstance(page_data, str): - str_processing_result: Union[str, List[dict]] = process_str_response(logger,page_data) - - if isinstance(str_processing_result, dict): - #return str_processing_result, response, GitlabApiResult.SUCCESS - page_data = str_processing_result - else: - num_attempts += 1 - continue - - # if the data is a list, then return it and the response - if isinstance(page_data, list): - logger.warning("Wrong type returned, trying again...") - logger.info(f"Returned list: {page_data}") - - # if the data is a dict then call process_dict_response, and - elif isinstance(page_data, dict): - dict_processing_result = process_dict_response(logger, response, page_data) - - if dict_processing_result == GitlabApiResult.SUCCESS: - return page_data, dict_processing_result - if dict_processing_result == GitlabApiResult.NEW_RESULT: - logger.info(f"Encountered new dict response from api on url: {url}. Response: {page_data}") - return None, GitlabApiResult.NEW_RESULT - - if dict_processing_result == GitlabApiResult.REPO_NOT_FOUND: - return None, GitlabApiResult.REPO_NOT_FOUND - - if dict_processing_result in (GitlabApiResult.SECONDARY_RATE_LIMIT, GitlabApiResult.ABUSE_MECHANISM_TRIGGERED): - continue - - if dict_processing_result == GitlabApiResult.RATE_LIMIT_EXCEEDED: - num_attempts = 0 - continue +def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, method: str = 'GET', ) -> Optional[httpx.Response]: + """Ping the api and get the data back for the page. - + Returns: + A httpx response that contains the data. None if a timeout occurs + """ + # self.logger.info(f"Hitting endpoint with {method} request: {url}...\n") - num_attempts += 1 + with httpx.Client() as client: - logger.error("Unable to collect data in 10 attempts") - return None, GitlabApiResult.NO_MORE_ATTEMPTS + try: + response = client.request( + method=method, url=url, auth=key_manager, timeout=timeout, follow_redirects=True) + except TimeoutError: + logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n") + time.sleep(round(timeout)) + return None + except httpx.TimeoutException: + logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n") + time.sleep(round(timeout)) + return None + except httpx.NetworkError: + logger.info(f"Network Error. Sleeping {round(timeout)} seconds and trying again...\n") + time.sleep(round(timeout)) + return None + except httpx.ProtocolError: + logger.info(f"Protocol Error. Sleeping {round(timeout*1.5)} seconds and trying again...\n") + time.sleep(round(timeout*1.5)) + return None + return response \ No newline at end of file diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 6504615f8b..fa4427f79c 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -3,7 +3,7 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask -from augur.tasks.gitlab.gitlab_paginator import GitlabPaginator +from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest from augur.application.db.data_parse import extract_needed_issue_data_from_gitlab_issue, extract_needed_gitlab_issue_label_data, extract_needed_gitlab_issue_assignee_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts @@ -50,11 +50,11 @@ def retrieve_all_gitlab_issue_data(repo_git, logger, key_auth) -> None: logger.info(f"Collecting gitlab issues for {owner}/{repo}") url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues?with_labels_details=True" - issues = GitlabPaginator(url, key_auth, logger) + issues = GitlabApiHandler(key_auth, logger) all_data = [] - num_pages = issues.get_num_pages() - for page_data, page in issues.iter_pages(): + num_pages = issues.get_num_pages(url) + for page_data, page in issues.iter_pages(url): if page_data is None: return all_data @@ -179,9 +179,9 @@ def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): print(f"Collecting {owner}/{repo} gitlab issue comments for issue {index} of {issue_count}") url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues/{id}/notes" - comments = GitlabPaginator(url, key_auth, logger) + comments = GitlabApiHandler(key_auth, logger) - for page_data, page in comments.iter_pages(): + for page_data, page in comments.iter_pages(url): if page_data is None or len(page_data) == 0: break @@ -219,11 +219,11 @@ def retrieve_all_gitlab_issue_event_data(repo_git, logger, key_auth) -> None: logger.info(f"Collecting gitlab issue events for {owner}/{repo}") url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type=issue" - events = GitlabPaginator(url, key_auth, logger) + events = GitlabApiHandler(key_auth, logger) all_data = [] - num_pages = events.get_num_pages() - for page_data, page in events.iter_pages(): + num_pages = events.get_num_pages(url) + for page_data, page in events.iter_pages(url): if page_data is None: return all_data diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index b1ecbb481b..eda51ea61f 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -2,7 +2,7 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask -from augur.tasks.gitlab.gitlab_paginator import GitlabPaginator +from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts @@ -41,11 +41,11 @@ def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None: logger.info(f"Collecting pull requests for {owner}/{repo}") url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests?with_labels_details=True" - mrs = GitlabPaginator(url, key_auth, logger) + mrs = GitlabApiHandler(key_auth, logger) all_data = [] - num_pages = mrs.get_num_pages() - for page_data, page in mrs.iter_pages(): + num_pages = mrs.get_num_pages(url) + for page_data, page in mrs.iter_pages(url): if page_data is None: return all_data @@ -53,7 +53,7 @@ def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None: if len(page_data) == 0: logger.debug( f"{owner}/{repo} Mrs Page {page} contains no data...returning") - logger.info(f"{owner}/{repo} Prs Page {page} of {num_pages}") + logger.info(f"{owner}/{repo} Mrs Page {page} of {num_pages}") return all_data logger.info(f"{owner}/{repo} Mrs Page {page} of {num_pages}") @@ -129,10 +129,20 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): def collect_merge_request_comments(mr_ids, repo_git) -> int: owner, repo = get_owner_repo(repo_git) - id = mr_ids[0] - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/notes" - # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") + logger = logging.getLogger(collect_merge_request_comments.__name__) + with GitlabTaskManifest(logger) as manifest: + + url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/notes".format(owner=owner, repo=repo, id="{id}") + comments = retrieve_merge_request_data(mr_ids, url, "comments", owner, repo, manifest.key_auth, logger) + + if comments: + logger.info(f"Length of merge request comments: {len(comments)}") + logger.info(f"Mr comment: {comments[0]}") + #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + else: + logger.info(f"{owner}/{repo} has no gitlab merge request comments") + @celery.task(base=AugurCoreRepoCollectionTask) @@ -147,46 +157,116 @@ def collect_merge_request_events(repo_git) -> int: @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_metadata(mr_ids, repo_git) -> int: - - owner, repo = get_owner_repo(repo_git) - id = mr_ids[0] - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}" - #print("Collect merge request metadata") - # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") + pass + + # TODO: Figure out how to handle new response it gitlab paginator + # owner, repo = get_owner_repo(repo_git) + + # logger = logging.getLogger(collect_merge_request_metadata.__name__) + # with GitlabTaskManifest(logger) as manifest: + + # url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}".format(owner=owner, repo=repo, id="{id}") + # metadata = retrieve_merge_request_data(mr_ids, url, "metadata", owner, repo, manifest.key_auth, logger) + + # if metadata: + # logger.info(f"Length of merge request metadata: {len(metadata)}") + # logger.info(f"Mr metadata: {metadata[0]}") + # #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + # else: + # logger.info(f"{owner}/{repo} has no gitlab merge request metadata") + @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_reviewers(mr_ids, repo_git) -> int: - - owner, repo = get_owner_repo(repo_git) - id = mr_ids[0] - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/approvals" + pass + + # TODO: Figure out how to handle new response it gitlab paginator + # owner, repo = get_owner_repo(repo_git) + + # logger = logging.getLogger(collect_merge_request_reviewers.__name__) + # with GitlabTaskManifest(logger) as manifest: + + # url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/approvals".format(owner=owner, repo=repo, id="{id}") + # reviewers = retrieve_merge_request_data(mr_ids, url, "reviewers", owner, repo, manifest.key_auth, logger) + + # if reviewers: + # logger.info(f"Length of merge request reviewers: {len(reviewers)}") + # logger.info(f"Mr reviewer: {reviewers[0]}") + # #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + # else: + # logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") + #print("Collect merge request reviewers") # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_commits(mr_ids, repo_git) -> int: - - owner, repo = get_owner_repo(repo_git) - id = mr_ids[0] - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/commits" - #print("Collect merge request commits") - # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") + pass + + # TODO: Figure out how to handle new response it gitlab paginator + # owner, repo = get_owner_repo(repo_git) + + # logger = logging.getLogger(collect_merge_request_comments.__name__) + # with GitlabTaskManifest(logger) as manifest: + + # url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/commits".format(owner=owner, repo=repo, id="{id}") + # commits = retrieve_merge_request_data(mr_ids, url, "commits", owner, repo, manifest.key_auth, logger) + + # if commits: + # logger.info(f"Length of merge request commits: {len(commits)}") + # logger.info(f"Mr commit: {commits[0]}") + # #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + # else: + # logger.info(f"{owner}/{repo} has no gitlab merge request commits") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_files(mr_ids, repo_git) -> int: + + pass + + # TODO: Figure out how to handle new response it gitlab paginator + # owner, repo = get_owner_repo(repo_git) + + # logger = logging.getLogger(collect_merge_request_comments.__name__) + # with GitlabTaskManifest(logger) as manifest: + + # url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/changes".format(owner=owner, repo=repo, id="{id}") + # files = retrieve_merge_request_data(mr_ids, url, "files", owner, repo, manifest.key_auth, logger) + + # if files: + # logger.info(f"Length of merge request files: {len(files)}") + # logger.info(f"Mr comment: {files[0]}") + # #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + # else: + # logger.info(f"{owner}/{repo} has no gitlab merge request files") - owner, repo = get_owner_repo(repo_git) - id = mr_ids[0] - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/cahnges" - #print("Collect merge request files") - # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") +def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger): + + all_data = [] + issue_count = len(ids) + index = 1 + + data = GitlabApiHandler(key_auth, logger) + for id in ids: + if len(all_data) > 40: + return all_data + + print(f"Collecting {owner}/{repo} gitlab merge request {name} for merge request {index} of {issue_count}") + for page_data, _ in data.iter_pages(url): + + if page_data is None or len(page_data) == 0: + break - \ No newline at end of file + all_data += page_data + + index += 1 + + return all_data From 4d5b4b2098e9ab2f6d1e9170543c33602aa3bce0 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 11:06:06 -0600 Subject: [PATCH 050/122] Add collection for all the mr data Signed-off-by: Andrew Brain --- augur/tasks/gitlab/gitlab_api_handler.py | 13 +-- augur/tasks/gitlab/merge_request_task.py | 133 +++++++++++------------ 2 files changed, 71 insertions(+), 75 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_api_handler.py b/augur/tasks/gitlab/gitlab_api_handler.py index 97b1690006..3a463127bb 100644 --- a/augur/tasks/gitlab/gitlab_api_handler.py +++ b/augur/tasks/gitlab/gitlab_api_handler.py @@ -16,7 +16,7 @@ class GitlabApiResult(Enum): SUCCESS = 0 TIMEOUT = 1 NO_MORE_ATTEMPTS = 2 - REPO_NOT_FOUND = 3 + NOT_FOUND = 3 SECONDARY_RATE_LIMIT = 4 RATE_LIMIT_EXCEEDED = 5 ABUSE_MECHANISM_TRIGGERED = 6 @@ -118,7 +118,6 @@ def iter_pages(self, url) -> Generator[Tuple[Optional[List[dict]], int], None, N """ url = self._set_paginaton_query_params(url) - print(f"Iter pages url: {url}") # retrieves the data for the given url data_list, response, result = self.retrieve_data(url) @@ -130,7 +129,6 @@ def iter_pages(self, url) -> Generator[Tuple[Optional[List[dict]], int], None, N # this retrieves the page for the given url page_number = get_url_page_number(url) - print(f"iter pages first page number: {page_number}") # yields the first page of data and its page number yield data_list, page_number @@ -139,7 +137,6 @@ def iter_pages(self, url) -> Generator[Tuple[Optional[List[dict]], int], None, N # gets the next page from the last responses header next_page = response.links['next']['url'] - print(f"next page url: {next_page}") # Here we don't need to pass in params with the page, or the default params because the url from the headers already has those values data_list, response, result = self.retrieve_data(next_page) @@ -150,8 +147,6 @@ def iter_pages(self, url) -> Generator[Tuple[Optional[List[dict]], int], None, N page_number = get_url_page_number(next_page) - print(f"iter pages page number: {page_number}") - # if either the data or response is None then yield None and return if data_list is None or response is None: return @@ -203,9 +198,13 @@ def retrieve_data(self, url: str) -> Tuple[Optional[List[dict]], Optional[httpx. time.sleep(key_reset_time) continue + if response.status_code == 404: + self.logger.info(f"ERROR: 404 not found for {url}") + return [], response, GitlabApiResult.NOT_FOUND + if response.status_code == 204: return [], response, GitlabApiResult.SUCCESS - + if response.status_code >= 200 and response.status_code <=299: page_data = parse_json_response(self.logger, response) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index eda51ea61f..b3dd5151dd 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -134,7 +134,7 @@ def collect_merge_request_comments(mr_ids, repo_git) -> int: with GitlabTaskManifest(logger) as manifest: url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/notes".format(owner=owner, repo=repo, id="{id}") - comments = retrieve_merge_request_data(mr_ids, url, "comments", owner, repo, manifest.key_auth, logger) + comments = retrieve_merge_request_data(mr_ids, url, "comments", owner, repo, manifest.key_auth, logger, response_type="list") if comments: logger.info(f"Length of merge request comments: {len(comments)}") @@ -158,114 +158,111 @@ def collect_merge_request_events(repo_git) -> int: @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_metadata(mr_ids, repo_git) -> int: - pass - - # TODO: Figure out how to handle new response it gitlab paginator - # owner, repo = get_owner_repo(repo_git) + owner, repo = get_owner_repo(repo_git) - # logger = logging.getLogger(collect_merge_request_metadata.__name__) - # with GitlabTaskManifest(logger) as manifest: + logger = logging.getLogger(collect_merge_request_metadata.__name__) + with GitlabTaskManifest(logger) as manifest: - # url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}".format(owner=owner, repo=repo, id="{id}") - # metadata = retrieve_merge_request_data(mr_ids, url, "metadata", owner, repo, manifest.key_auth, logger) + url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}".format(owner=owner, repo=repo, id="{id}") + metadata_list = retrieve_merge_request_data(mr_ids, url, "metadata", owner, repo, manifest.key_auth, logger, response_type="dict") - # if metadata: - # logger.info(f"Length of merge request metadata: {len(metadata)}") - # logger.info(f"Mr metadata: {metadata[0]}") - # #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) - # else: - # logger.info(f"{owner}/{repo} has no gitlab merge request metadata") + if metadata_list: + logger.info(f"Length of merge request metadata: {len(metadata_list)}") + logger.info(f"Mr metadata: {metadata_list[0]}") + #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + else: + logger.info(f"{owner}/{repo} has no gitlab merge request metadata") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_reviewers(mr_ids, repo_git) -> int: - pass - - # TODO: Figure out how to handle new response it gitlab paginator - # owner, repo = get_owner_repo(repo_git) + owner, repo = get_owner_repo(repo_git) - # logger = logging.getLogger(collect_merge_request_reviewers.__name__) - # with GitlabTaskManifest(logger) as manifest: + logger = logging.getLogger(collect_merge_request_reviewers.__name__) + with GitlabTaskManifest(logger) as manifest: - # url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/approvals".format(owner=owner, repo=repo, id="{id}") - # reviewers = retrieve_merge_request_data(mr_ids, url, "reviewers", owner, repo, manifest.key_auth, logger) + url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/approvals".format(owner=owner, repo=repo, id="{id}") + reviewers = retrieve_merge_request_data(mr_ids, url, "reviewers", owner, repo, manifest.key_auth, logger, response_type="dict") - # if reviewers: - # logger.info(f"Length of merge request reviewers: {len(reviewers)}") - # logger.info(f"Mr reviewer: {reviewers[0]}") - # #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) - # else: - # logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") + if reviewers: + logger.info(f"Length of merge request reviewers: {len(reviewers)}") + logger.info(f"Mr reviewer: {reviewers[0]}") + #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + else: + logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") - #print("Collect merge request reviewers") - # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") - @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_commits(mr_ids, repo_git) -> int: - pass - - # TODO: Figure out how to handle new response it gitlab paginator - # owner, repo = get_owner_repo(repo_git) + owner, repo = get_owner_repo(repo_git) - # logger = logging.getLogger(collect_merge_request_comments.__name__) - # with GitlabTaskManifest(logger) as manifest: + logger = logging.getLogger(collect_merge_request_comments.__name__) + with GitlabTaskManifest(logger) as manifest: - # url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/commits".format(owner=owner, repo=repo, id="{id}") - # commits = retrieve_merge_request_data(mr_ids, url, "commits", owner, repo, manifest.key_auth, logger) + url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/commits".format(owner=owner, repo=repo, id="{id}") + commits = retrieve_merge_request_data(mr_ids, url, "commits", owner, repo, manifest.key_auth, logger, response_type="list") - # if commits: - # logger.info(f"Length of merge request commits: {len(commits)}") - # logger.info(f"Mr commit: {commits[0]}") - # #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) - # else: - # logger.info(f"{owner}/{repo} has no gitlab merge request commits") + if commits: + logger.info(f"Length of merge request commits: {len(commits)}") + logger.info(f"Mr commit: {commits[0]}") + #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + else: + logger.info(f"{owner}/{repo} has no gitlab merge request commits") @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_files(mr_ids, repo_git) -> int: - pass - - # TODO: Figure out how to handle new response it gitlab paginator - # owner, repo = get_owner_repo(repo_git) + owner, repo = get_owner_repo(repo_git) - # logger = logging.getLogger(collect_merge_request_comments.__name__) - # with GitlabTaskManifest(logger) as manifest: + logger = logging.getLogger(collect_merge_request_comments.__name__) + with GitlabTaskManifest(logger) as manifest: - # url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/changes".format(owner=owner, repo=repo, id="{id}") - # files = retrieve_merge_request_data(mr_ids, url, "files", owner, repo, manifest.key_auth, logger) + url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/changes".format(owner=owner, repo=repo, id="{id}") + files = retrieve_merge_request_data(mr_ids, url, "files", owner, repo, manifest.key_auth, logger, response_type="dict") - # if files: - # logger.info(f"Length of merge request files: {len(files)}") - # logger.info(f"Mr comment: {files[0]}") - # #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) - # else: - # logger.info(f"{owner}/{repo} has no gitlab merge request files") + if files: + logger.info(f"Length of merge request files: {len(files)}") + logger.info(f"Mr file: {files[0]}") + #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + else: + logger.info(f"{owner}/{repo} has no gitlab merge request files") -def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger): +def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, response_type): all_data = [] issue_count = len(ids) index = 1 - data = GitlabApiHandler(key_auth, logger) + api_handler = GitlabApiHandler(key_auth, logger) for id in ids: - if len(all_data) > 40: + if len(all_data) > 10: return all_data - + print(f"Collecting {owner}/{repo} gitlab merge request {name} for merge request {index} of {issue_count}") - for page_data, _ in data.iter_pages(url): + formatted_url = url.format(id=id) + + if response_type == "dict": + page_data, _, _ = api_handler.retrieve_data(formatted_url) + if page_data: + all_data.append(page_data) + + elif response_type == "list": - if page_data is None or len(page_data) == 0: - break + for page_data, _ in api_handler.iter_pages(formatted_url): - all_data += page_data + if page_data is None or len(page_data) == 0: + break + + all_data += page_data + + else: + raise Exception(f"Unexpected reponse type: {response_type}") index += 1 From 9793d4455669b7fcce081a50522048b34d8c8351 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 11:12:37 -0600 Subject: [PATCH 051/122] Add merge request events collection Signed-off-by: Andrew Brain --- augur/tasks/gitlab/issues_task.py | 45 ------------------------ augur/tasks/gitlab/merge_request_task.py | 8 ----- augur/tasks/init/celery_app.py | 3 +- augur/tasks/start_tasks.py | 3 +- 4 files changed, 4 insertions(+), 55 deletions(-) diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index fa4427f79c..4f035a6024 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -194,48 +194,3 @@ def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): -@celery.task(base=AugurCoreRepoCollectionTask) -def collect_gitlab_issue_events(repo_git) -> int: - - owner, repo = get_owner_repo(repo_git) - - logger = logging.getLogger(collect_gitlab_issues.__name__) - with GitlabTaskManifest(logger) as manifest: - - events = retrieve_all_gitlab_issue_event_data(repo_git, logger, manifest.key_auth) - - if events: - logger.info(f"Length of events: {len(events)}") - #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) - else: - logger.info(f"{owner}/{repo} has no gitlab issue events") - - - -def retrieve_all_gitlab_issue_event_data(repo_git, logger, key_auth) -> None: - - owner, repo = get_owner_repo(repo_git) - - logger.info(f"Collecting gitlab issue events for {owner}/{repo}") - - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type=issue" - events = GitlabApiHandler(key_auth, logger) - - all_data = [] - num_pages = events.get_num_pages(url) - for page_data, page in events.iter_pages(url): - - if page_data is None: - return all_data - - if len(page_data) == 0: - logger.debug( - f"{owner}/{repo}: Gitlab Issue Events Page {page} contains no data...returning") - logger.info(f"{owner}/{repo}: Issue Events Page {page} of {num_pages}") - return all_data - - logger.info(f"{owner}/{repo}: Gitlab Issue Events Page {page} of {num_pages}") - - all_data += page_data - - return all_data \ No newline at end of file diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index b3dd5151dd..bf9e55be4d 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -145,14 +145,6 @@ def collect_merge_request_comments(mr_ids, repo_git) -> int: -@celery.task(base=AugurCoreRepoCollectionTask) -def collect_merge_request_events(repo_git) -> int: - - owner, repo = get_owner_repo(repo_git) - - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type=merge_request" - #print("Collect merge request events") - # print(f"Repo git: {repo_git}. Len ids: {mr_ids}") @celery.task(base=AugurCoreRepoCollectionTask) diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index a2b06a22a6..ee6eaeccdf 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -51,7 +51,8 @@ class CollectionState(Enum): 'augur.tasks.github.traffic.tasks'] gitlab_tasks = ['augur.tasks.gitlab.merge_request_task', - 'augur.tasks.gitlab.issues_task'] + 'augur.tasks.gitlab.issues_task', + 'augur.tasks.gitlab.events_task'] git_tasks = ['augur.tasks.git.facade_tasks', 'augur.tasks.git.dependency_tasks.tasks', diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index ae819f2030..b9cb5dd8b6 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -25,7 +25,8 @@ from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_comments, collect_merge_request_events, collect_merge_request_metadata, collect_merge_request_reviewers, collect_merge_request_commits, collect_merge_request_files -from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_gitlab_issue_comments, collect_gitlab_issue_events +from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_gitlab_issue_comments +from augur.tasks.gitlab.events_task import collect_gitlab_issue_events, collect_gitlab_merge_request_events from augur.tasks.git.facade_tasks import * from augur.tasks.db.refresh_materialized_views import * # from augur.tasks.data_analysis import * From 29c31d5a8012ee50effc35c1719f82c90359e1ff Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 11:18:49 -0600 Subject: [PATCH 052/122] Add gitlab event data mappers Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 6cc94494ae..40733923e8 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -691,3 +691,47 @@ def extract_needed_issue_data_from_gitlab_issue(issue: dict, repo_id: int, tool_ +def extract_gitlab_mr_event_data(event: dict, pr_id: int, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: + + mr_event = { + 'pull_request_id': pr_id, + 'cntrb_id': None, + 'action': event['action_name'], + 'action_commit_hash': None, + 'created_at': event['created_at'], + 'issue_event_src_id': event['target_id'], + 'repo_id': repo_id, + 'platform_id': platform_id, + 'node_id': None, + 'node_url': None, + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': data_source + } + + return mr_event + +def extract_gitlab_issue_event_data(event: dict, issue_id: int, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: + + issue_event = { + "issue_event_src_id": event['target_id'], + "issue_id": issue_id, + "node_id": None, + "node_url": None, + "cntrb_id": None, + "created_at": event['created_at'], + "action": event["action_name"], + "action_commit_hash": None, + "platform_id": platform_id, + "repo_id" : repo_id, + "tool_source": tool_source, + "tool_version": tool_version, + "data_source": data_source + } + + return issue_event + + + + + From 0d0efb410385d3673474abdf4f87c1536f4eb018 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 11:37:51 -0600 Subject: [PATCH 053/122] Add issue event db inserts Signed-off-by: Andrew Brain --- augur/tasks/gitlab/events_task.py | 117 ++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 augur/tasks/gitlab/events_task.py diff --git a/augur/tasks/gitlab/events_task.py b/augur/tasks/gitlab/events_task.py new file mode 100644 index 0000000000..a93bb4edbc --- /dev/null +++ b/augur/tasks/gitlab/events_task.py @@ -0,0 +1,117 @@ +import logging + +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask +from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler +from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest +from augur.application.db.data_parse import extract_gitlab_mr_event_data, extract_gitlab_issue_event_data +from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts +from augur.application.db.models import Repo, Issue, IssueEvent, PullRequest +from augur.application.db.util import execute_session_query + +platform_id = 2 + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_gitlab_issue_events(repo_git) -> int: + + owner, repo = get_owner_repo(repo_git) + + logger = logging.getLogger(collect_gitlab_issue_events.__name__) + with GitlabTaskManifest(logger) as manifest: + + augur_db = manifest.augur_db + + query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) + repo_obj = execute_session_query(query, 'one') + repo_id = repo_obj.repo_id + + events = retrieve_all_gitlab_event_data("issue", repo_git, logger, manifest.key_auth) + + if events: + logger.info(f"Length of gitlab issue events: {len(events)}") + process_issue_events(events, f"{owner}/{repo}: Gitlab Issue Events task", repo_id, logger, augur_db) + else: + logger.info(f"{owner}/{repo} has no gitlab issue events") + + +@celery.task(base=AugurCoreRepoCollectionTask) +def collect_gitlab_merge_request_events(repo_git) -> int: + + owner, repo = get_owner_repo(repo_git) + + logger = logging.getLogger(collect_gitlab_issue_events.__name__) + with GitlabTaskManifest(logger) as manifest: + + events = retrieve_all_gitlab_event_data("merge_request", repo_git, logger, manifest.key_auth) + + if events: + logger.info(f"Length of gitlab merge request events: {len(events)}") + #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + else: + logger.info(f"{owner}/{repo} has no gitlab merge request events") + + +def retrieve_all_gitlab_event_data(type, repo_git, logger, key_auth) -> None: + + owner, repo = get_owner_repo(repo_git) + + logger.info(f"Collecting gitlab issue events for {owner}/{repo}") + + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type={type}" + events = GitlabApiHandler(key_auth, logger) + + all_data = [] + num_pages = events.get_num_pages(url) + for page_data, page in events.iter_pages(url): + + if page_data is None: + return all_data + + if len(page_data) == 0: + logger.debug( + f"{owner}/{repo}: Gitlab {type} Events Page {page} contains no data...returning") + logger.info(f"{owner}/{repo}: {type} Events Page {page} of {num_pages}") + return all_data + + logger.info(f"{owner}/{repo}: Gitlab {type} Events Page {page} of {num_pages}") + + all_data += page_data + + return all_data + +def process_issue_events(events, task_name, repo_id, logger, augur_db): + + tool_source = "Gitlab events task" + tool_version = "2.0" + data_source = "Gitlab API" + + issue_event_dicts = [] + + # create mapping from issue number to issue id of current issues + issue_url_to_id_map = {} + issues = augur_db.session.query(Issue).filter(Issue.repo_id == repo_id).all() + for issue in issues: + issue_url_to_id_map[issue.gh_issue_number] = issue.issue_id + + for event in events: + + issue_number = event["target_iid"] + + try: + issue_id = issue_url_to_id_map[issue_number] + except KeyError: + logger.info(f"{task_name}: Could not find related issue") + logger.info(f"{task_name}: We were searching for an issue with number {issue_number} in repo {repo_id}") + logger.info(f"{task_name}: Skipping") + continue + + issue_event_dicts.append( + extract_gitlab_issue_event_data(event, issue_id, platform_id, repo_id, + tool_source, tool_version, data_source) + ) + + logger.info(f"{task_name}: Inserting {len(issue_event_dicts)} gitlab issue events") + issue_event_natural_keys = ["issue_id", "issue_event_src_id"] + augur_db.insert_data(issue_event_dicts, IssueEvent, issue_event_natural_keys) + + From 0e1dde5956f13abf946193074729d4073f0db19c Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 11:49:12 -0600 Subject: [PATCH 054/122] Add mr event processing Signed-off-by: Andrew Brain --- augur/tasks/gitlab/events_task.py | 49 +++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/augur/tasks/gitlab/events_task.py b/augur/tasks/gitlab/events_task.py index a93bb4edbc..4224988b9f 100644 --- a/augur/tasks/gitlab/events_task.py +++ b/augur/tasks/gitlab/events_task.py @@ -6,7 +6,7 @@ from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest from augur.application.db.data_parse import extract_gitlab_mr_event_data, extract_gitlab_issue_event_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts -from augur.application.db.models import Repo, Issue, IssueEvent, PullRequest +from augur.application.db.models import Repo, Issue, IssueEvent, PullRequest, PullRequestEvent from augur.application.db.util import execute_session_query platform_id = 2 @@ -42,11 +42,17 @@ def collect_gitlab_merge_request_events(repo_git) -> int: logger = logging.getLogger(collect_gitlab_issue_events.__name__) with GitlabTaskManifest(logger) as manifest: + augur_db = manifest.augur_db + + query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) + repo_obj = execute_session_query(query, 'one') + repo_id = repo_obj.repo_id + events = retrieve_all_gitlab_event_data("merge_request", repo_git, logger, manifest.key_auth) if events: logger.info(f"Length of gitlab merge request events: {len(events)}") - #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + process_mr_events(events, f"{owner}/{repo}: Gitlab MR Events task", repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request events") @@ -81,7 +87,7 @@ def retrieve_all_gitlab_event_data(type, repo_git, logger, key_auth) -> None: def process_issue_events(events, task_name, repo_id, logger, augur_db): - tool_source = "Gitlab events task" + tool_source = "Gitlab issue events task" tool_version = "2.0" data_source = "Gitlab API" @@ -115,3 +121,40 @@ def process_issue_events(events, task_name, repo_id, logger, augur_db): augur_db.insert_data(issue_event_dicts, IssueEvent, issue_event_natural_keys) +def process_mr_events(events, task_name, repo_id, logger, augur_db): + + tool_source = "Gitlab mr events task" + tool_version = "2.0" + data_source = "Gitlab API" + + mr_event_dicts = [] + + # create mapping from mr number to pull request id of current mrs + mr_number_to_id_map = {} + mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all() + for mr in mrs: + mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id + + for event in events: + + mr_number = event["target_iid"] + + try: + issue_id = mr_number_to_id_map[mr_number] + except KeyError: + logger.info(f"{task_name}: Could not find related mr") + logger.info(f"{task_name}: We were searching for an mr with number {mr_number} in repo {repo_id}") + logger.info(f"{task_name}: Skipping") + continue + + mr_event_dicts.append( + extract_gitlab_mr_event_data(event, issue_id, platform_id, repo_id, + tool_source, tool_version, data_source) + ) + + # TODO: Add unique key for this + logger.info(f"{task_name}: Inserting {len(mr_event_dicts)} gitlab mr events") + mr_event_natural_keys = ["pull_request_id", "issue_event_src_id"] + augur_db.insert_data(mr_event_dicts, PullRequestEvent, mr_event_natural_keys) + + From dbe280ae985eadd129c98ac389e195c1b34ca7ab Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 12:26:18 -0600 Subject: [PATCH 055/122] Start on mr reviewers Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 23 ++++++++++++++++ augur/tasks/gitlab/merge_request_task.py | 35 +++++++++++++++++++----- 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 40733923e8..9b764770f2 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -732,6 +732,29 @@ def extract_gitlab_issue_event_data(event: dict, issue_id: int, platform_id: int return issue_event +# retrieve only the needed data for pr reviewers from the api response +def extract_needed_pr_reviewer_data(reviewers: List[dict], pull_request_id, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + + if len(reviewers) == 0: + return [] + + reviewer_dicts = [] + for reviewer in reviewers: + + reviewer_dict = { + 'pull_request_id': pull_request_id, + 'cntrb_id': None, + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': data_source + } + + reviewer_dicts.append(reviewer_dict) + + return reviewer_dicts + + + diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index bf9e55be4d..be97382eb9 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -4,7 +4,7 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest -from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data +from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, Repo @@ -180,11 +180,30 @@ def collect_merge_request_reviewers(mr_ids, repo_git) -> int: if reviewers: logger.info(f"Length of merge request reviewers: {len(reviewers)}") - logger.info(f"Mr reviewer: {reviewers[0]}") #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") - + +def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): + + tool_source = "Mr Reviewr Task" + tool_version = "2.0" + data_source = "Gitlab API" + + # create mapping from mr number to pull request id of current mrs + mr_number_to_id_map = {} + mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all() + for mr in mrs: + mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id + + for id, values in data.items(): + + pull_request_id = mr_number_to_id_map[id] + + reviewers = extract_needed_pr_reviewer_data(values, pull_request_id, repo_id, tool_source, tool_version, data_source) + + for review in reviewers: + print(review["pull_request_id"]) @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_commits(mr_ids, repo_git) -> int: @@ -226,7 +245,7 @@ def collect_merge_request_files(mr_ids, repo_git) -> int: def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, response_type): - all_data = [] + all_data = {} issue_count = len(ids) index = 1 @@ -242,7 +261,7 @@ def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, r if response_type == "dict": page_data, _, _ = api_handler.retrieve_data(formatted_url) if page_data: - all_data.append(page_data) + all_data[id] = page_data elif response_type == "list": @@ -251,8 +270,10 @@ def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, r if page_data is None or len(page_data) == 0: break - all_data += page_data - + if id in all_data: + all_data[id].extend(page_data) + else: + all_data[id] = page_data else: raise Exception(f"Unexpected reponse type: {response_type}") From b1a2ea6d1735b7b013b40fa25c7481fe1ce66d50 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 12:49:47 -0600 Subject: [PATCH 056/122] Add start of mr reviewer processing Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 26 +++++++++++++----------- augur/tasks/gitlab/merge_request_task.py | 21 +++++++++++++++---- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 9b764770f2..6e0e2ea840 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -733,23 +733,25 @@ def extract_gitlab_issue_event_data(event: dict, issue_id: int, platform_id: int # retrieve only the needed data for pr reviewers from the api response -def extract_needed_pr_reviewer_data(reviewers: List[dict], pull_request_id, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_pr_reviewer_data(data: List[dict], pull_request_id, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: - if len(reviewers) == 0: + if len(data) == 0: return [] - + reviewer_dicts = [] - for reviewer in reviewers: + for x in data: - reviewer_dict = { - 'pull_request_id': pull_request_id, - 'cntrb_id': None, - 'tool_source': tool_source, - 'tool_version': tool_version, - 'data_source': data_source - } + for reviewer in x["suggested_approvers"]: - reviewer_dicts.append(reviewer_dict) + reviewer_dict = { + 'pull_request_id': pull_request_id, + 'cntrb_id': None, + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': data_source + } + + reviewer_dicts.append(reviewer_dict) return reviewer_dicts diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index be97382eb9..c8116b33a3 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -6,7 +6,8 @@ from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts -from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, Repo +from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, Repo +from augur.application.db.util import execute_session_query @celery.task(base=AugurCoreRepoCollectionTask) @@ -175,12 +176,18 @@ def collect_merge_request_reviewers(mr_ids, repo_git) -> int: logger = logging.getLogger(collect_merge_request_reviewers.__name__) with GitlabTaskManifest(logger) as manifest: + augur_db = manifest.augur_db + + query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) + repo_obj = execute_session_query(query, 'one') + repo_id = repo_obj.repo_id + url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/approvals".format(owner=owner, repo=repo, id="{id}") reviewers = retrieve_merge_request_data(mr_ids, url, "reviewers", owner, repo, manifest.key_auth, logger, response_type="dict") if reviewers: logger.info(f"Length of merge request reviewers: {len(reviewers)}") - #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + process_mr_reviewers(reviewers, f"{owner}/{repo}: Mr reviewer task", repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") @@ -196,14 +203,20 @@ def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): for mr in mrs: mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id + all_reviewers = [] for id, values in data.items(): pull_request_id = mr_number_to_id_map[id] reviewers = extract_needed_pr_reviewer_data(values, pull_request_id, repo_id, tool_source, tool_version, data_source) - for review in reviewers: - print(review["pull_request_id"]) + all_reviewers += reviewers + + # TODO: Need to add unique key with pull_request_id and cntrb_id to insert gitlab reviewers + # pr_reviewer_natural_keys = ["pull_request_id", "cntrb_id"] + # augur_db.insert_data(all_reviewers, PullRequestReviewer, pr_reviewer_natural_keys) + + @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_commits(mr_ids, repo_git) -> int: From a7c319567849545c6bffcb3218ac5790594af81e Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 13:05:59 -0600 Subject: [PATCH 057/122] Add processing for mr commits Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 16 ++++++++++ augur/tasks/gitlab/merge_request_task.py | 40 +++++++++++++++++++++--- augur/tasks/start_tasks.py | 18 +++++------ 3 files changed, 61 insertions(+), 13 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 6e0e2ea840..ff7befeafa 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -756,6 +756,22 @@ def extract_needed_pr_reviewer_data(data: List[dict], pull_request_id, repo_id: return reviewer_dicts +def extract_needed_pr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source): + + commit = { + 'pull_request_id': pull_request_id, + 'pr_cmt_sha': commit['id'], + 'pr_cmt_node_id': None, + 'pr_cmt_message': commit['message'], + 'repo_id': repo_id, + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': data_source, + } + + return commit + + diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index c8116b33a3..fa590eeab0 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -4,9 +4,9 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest -from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data +from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data, extract_needed_pr_commit_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts -from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, Repo +from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, PullRequestCommit, Repo from augur.application.db.util import execute_session_query @@ -226,17 +226,49 @@ def collect_merge_request_commits(mr_ids, repo_git) -> int: logger = logging.getLogger(collect_merge_request_comments.__name__) with GitlabTaskManifest(logger) as manifest: + augur_db = manifest.augur_db + + query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) + repo_obj = execute_session_query(query, 'one') + repo_id = repo_obj.repo_id + url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/commits".format(owner=owner, repo=repo, id="{id}") commits = retrieve_merge_request_data(mr_ids, url, "commits", owner, repo, manifest.key_auth, logger, response_type="list") if commits: logger.info(f"Length of merge request commits: {len(commits)}") - logger.info(f"Mr commit: {commits[0]}") - #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + process_mr_commits(commits, f"{owner}/{repo}: Mr commit task", repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request commits") +def process_mr_commits(data, task_name, repo_id, logger, augur_db): + + tool_source = "Mr Commit Task" + tool_version = "2.0" + data_source = "Gitlab API" + + # create mapping from mr number to pull request id of current mrs + mr_number_to_id_map = {} + mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all() + for mr in mrs: + mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id + + all_commits = [] + for id, values in data.items(): + + pull_request_id = mr_number_to_id_map[id] + + for commit in values: + + all_commits.append(extract_needed_pr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source)) + + + pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"] + augur_db.insert_data(all_commits,PullRequestCommit,pr_commits_natural_keys) + + + @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_files(mr_ids, repo_git) -> int: diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index b9cb5dd8b6..1ad4769d90 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -102,17 +102,17 @@ def primary_repo_collect_phase_gitlab(repo_git): jobs = group( chain(collect_gitlab_merge_requests.si(repo_git), group( - collect_merge_request_comments.s(repo_git), - collect_merge_request_reviewers.s(repo_git), - collect_merge_request_metadata.s(repo_git), + #collect_merge_request_comments.s(repo_git), + #collect_merge_request_reviewers.s(repo_git), + #collect_merge_request_metadata.s(repo_git), collect_merge_request_commits.s(repo_git), - collect_merge_request_files.s(repo_git) + #collect_merge_request_files.s(repo_git), + # collect_merge_request_events.si(repo_git), )), - chain(collect_gitlab_issues.si(repo_git), group( - collect_gitlab_issue_comments.s(repo_git) - )), - collect_gitlab_issue_events.si(repo_git), - collect_merge_request_events.si(repo_git), + # chain(collect_gitlab_issues.si(repo_git), group( + # collect_gitlab_issue_comments.s(repo_git), + # collect_gitlab_issue_events.si(repo_git), + # )), ) return jobs From 1ca0855be599d31336bfb83c0586cd74462abd06 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 13:20:27 -0600 Subject: [PATCH 058/122] Add processing for mr files Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 29 +++++++++++++++++++ augur/tasks/gitlab/merge_request_task.py | 37 +++++++++++++++++++++--- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index ff7befeafa..ba58eeed3e 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -772,6 +772,35 @@ def extract_needed_pr_commit_data(commit, repo_id, pull_request_id, tool_source, return commit +def extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool_source, tool_version, data_source): + + files = [] + + changes = gitlab_file_data["changes"] + for file_changes in changes: + try: + deletes = int(file_changes['diff'].split('@@')[1].strip().split(' ')[0].split(',')[1]) + adds = int(file_changes['diff'].split('@@')[1].strip().split(' ')[1].split(',')[1]) + except: + deletes = 0 + adds = 0 + + file_dict = { + 'pull_request_id': pull_request_id, + 'repo_id': repo_id, + 'pr_file_additions': adds, + 'pr_file_deletions': deletes, + 'pr_file_path': file_changes['old_path'], + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': data_source, + } + + files.append(file_dict) + + return files + + diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index fa590eeab0..759e82d083 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -4,9 +4,9 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest -from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data, extract_needed_pr_commit_data +from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data, extract_needed_pr_commit_data, extract_needed_mr_file_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts -from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, PullRequestCommit, Repo +from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, PullRequestCommit, PullRequestFile, Repo from augur.application.db.util import execute_session_query @@ -264,6 +264,7 @@ def process_mr_commits(data, task_name, repo_id, logger, augur_db): all_commits.append(extract_needed_pr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source)) + logger.info(f"{task_name}: Inserting {len(all_commits)} merge request commits") pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"] augur_db.insert_data(all_commits,PullRequestCommit,pr_commits_natural_keys) @@ -277,15 +278,43 @@ def collect_merge_request_files(mr_ids, repo_git) -> int: logger = logging.getLogger(collect_merge_request_comments.__name__) with GitlabTaskManifest(logger) as manifest: + augur_db = manifest.augur_db + + query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) + repo_obj = execute_session_query(query, 'one') + repo_id = repo_obj.repo_id + url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/changes".format(owner=owner, repo=repo, id="{id}") files = retrieve_merge_request_data(mr_ids, url, "files", owner, repo, manifest.key_auth, logger, response_type="dict") if files: logger.info(f"Length of merge request files: {len(files)}") - logger.info(f"Mr file: {files[0]}") - #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + process_mr_files(files, f"{owner}/{repo}: Mr files task", repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request files") + +def process_mr_files(data, task_name, repo_id, logger, augur_db): + + tool_source = "Mr files Task" + tool_version = "2.0" + data_source = "Gitlab API" + + # create mapping from mr number to pull request id of current mrs + mr_number_to_id_map = {} + mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all() + for mr in mrs: + mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id + + all_files = [] + for id, gitlab_file_data in data.items(): + + pull_request_id = mr_number_to_id_map[id] + + all_files.extend(extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool_source, tool_version, data_source)) + + logger.info(f"{task_name}: Inserting {len(all_files)} merge request files") + pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"] + augur_db.insert_data(all_files, PullRequestFile, pr_file_natural_keys) def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, response_type): From b45db1ef56cd90a8dc2fbe8966a7d07bb4ee4bbd Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 13:37:47 -0600 Subject: [PATCH 059/122] Add processing for mr metadata Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 42 +++++++++++++++++++++++- augur/tasks/gitlab/merge_request_task.py | 39 ++++++++++++++++++---- augur/tasks/start_tasks.py | 2 +- 3 files changed, 75 insertions(+), 8 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index ba58eeed3e..6fa1bce16e 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -756,7 +756,7 @@ def extract_needed_pr_reviewer_data(data: List[dict], pull_request_id, repo_id: return reviewer_dicts -def extract_needed_pr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source): +def extract_needed_mr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source): commit = { 'pull_request_id': pull_request_id, @@ -801,6 +801,46 @@ def extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool return files +def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, tool_version, data_source): + + head = {'sha': mr_dict['diff_refs']['head_sha'], + 'ref': mr_dict['target_branch'], + 'label': str(mr_dict['target_project_id']) + ':' + mr_dict['target_branch'], + 'author': mr_dict['author']['username'], + 'repo': str(mr_dict['target_project_id']) + } + + base = {'sha': mr_dict['diff_refs']['base_sha'], + 'ref': mr_dict['source_branch'], + 'label': str(mr_dict['source_project_id']) + ':' + mr_dict['source_branch'], + 'author': mr_dict['author']['username'], + 'repo': str(mr_dict['source_project_id']) + } + + pr_meta_dict = { + 'head': head, + 'base': base + } + all_meta = [] + for pr_side, pr_meta_data in pr_meta_dict.items(): + pr_meta = { + 'pull_request_id': pull_request_id, + 'repo_id': repo_id, + 'pr_head_or_base': pr_side, + 'pr_src_meta_label': pr_meta_data['label'], + 'pr_src_meta_ref': pr_meta_data['ref'], + 'pr_sha': pr_meta_data['sha'], + 'cntrb_id': None, + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': data_source + } + all_meta.append(pr_meta) + + return all_meta + + + diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 759e82d083..4f92385871 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -4,9 +4,9 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest -from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data, extract_needed_pr_commit_data, extract_needed_mr_file_data +from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts -from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, PullRequestCommit, PullRequestFile, Repo +from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestCommit, PullRequestFile, Repo from augur.application.db.util import execute_session_query @@ -156,17 +156,44 @@ def collect_merge_request_metadata(mr_ids, repo_git) -> int: logger = logging.getLogger(collect_merge_request_metadata.__name__) with GitlabTaskManifest(logger) as manifest: + augur_db = manifest.augur_db + + query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) + repo_obj = execute_session_query(query, 'one') + repo_id = repo_obj.repo_id + url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}".format(owner=owner, repo=repo, id="{id}") metadata_list = retrieve_merge_request_data(mr_ids, url, "metadata", owner, repo, manifest.key_auth, logger, response_type="dict") if metadata_list: logger.info(f"Length of merge request metadata: {len(metadata_list)}") - logger.info(f"Mr metadata: {metadata_list[0]}") - #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + process_mr_metadata(metadata_list, f"{owner}/{repo}: Mr metadata task", repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request metadata") - +def process_mr_metadata(data, task_name, repo_id, logger, augur_db): + + tool_source = "Mr Metadata Task" + tool_version = "2.0" + data_source = "Gitlab API" + + # create mapping from mr number to pull request id of current mrs + mr_number_to_id_map = {} + mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all() + for mr in mrs: + mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id + + all_metadata = [] + for id, metadata in data.items(): + + pull_request_id = mr_number_to_id_map[id] + + all_metadata.extend(extract_needed_mr_metadata(metadata, repo_id, pull_request_id, tool_source, tool_version, data_source)) + + logger.info(f"{task_name}: Inserting {len(all_metadata)} merge request metadata") + pr_metadata_natural_keys = ['pull_request_id', 'pr_head_or_base', 'pr_sha'] + augur_db.insert_data(all_metadata, PullRequestMeta, pr_metadata_natural_keys) + @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_reviewers(mr_ids, repo_git) -> int: @@ -261,7 +288,7 @@ def process_mr_commits(data, task_name, repo_id, logger, augur_db): for commit in values: - all_commits.append(extract_needed_pr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source)) + all_commits.append(extract_needed_mr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source)) logger.info(f"{task_name}: Inserting {len(all_commits)} merge request commits") diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 1ad4769d90..6c01e42b98 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -24,7 +24,7 @@ from augur.tasks.github.pull_requests.commits_model.tasks import process_pull_request_commits from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data -from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_comments, collect_merge_request_events, collect_merge_request_metadata, collect_merge_request_reviewers, collect_merge_request_commits, collect_merge_request_files +from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_comments, collect_merge_request_metadata, collect_merge_request_reviewers, collect_merge_request_commits, collect_merge_request_files from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_gitlab_issue_comments from augur.tasks.gitlab.events_task import collect_gitlab_issue_events, collect_gitlab_merge_request_events from augur.tasks.git.facade_tasks import * From f43a9fd144c39f3260136b45a6802370fd2ec236 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 14:14:15 -0600 Subject: [PATCH 060/122] Add processing for issues messages Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 24 ++++++++ augur/tasks/gitlab/issues_task.py | 89 +++++++++++++++++++++++++++--- 2 files changed, 106 insertions(+), 7 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 6fa1bce16e..503fd58b60 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -840,8 +840,32 @@ def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, t return all_meta +def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + message_ref_dict = { + 'issue_id': issue_id, + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': data_source, + 'issue_msg_ref_src_comment_id': int(message['id']), + 'issue_msg_ref_src_node_id': None, + 'repo_id': repo_id + } + + return message_ref_dict +def extract_needed_gitlab_message_data(comment: dict, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str): + comment_dict = { + "pltfrm_id": platform_id, + "msg_text": comment['body'], + "msg_timestamp": comment['created_at'], + "cntrb_id": None, + "platform_msg_id": int(comment['id']), + "tool_source": tool_source, + "tool_version": tool_version, + "data_source": data_source + } + return comment_dict diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 4f035a6024..eeb0f4ac34 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -5,11 +5,12 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest -from augur.application.db.data_parse import extract_needed_issue_data_from_gitlab_issue, extract_needed_gitlab_issue_label_data, extract_needed_gitlab_issue_assignee_data +from augur.application.db.data_parse import extract_needed_issue_data_from_gitlab_issue, extract_needed_gitlab_issue_label_data, extract_needed_gitlab_issue_assignee_data, extract_needed_gitlab_issue_message_ref_data, extract_needed_gitlab_message_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts -from augur.application.db.models import Issue, IssueLabel, IssueAssignee, Repo +from augur.application.db.models import Issue, IssueLabel, IssueAssignee, IssueMessageRef, Message, Repo from augur.application.db.util import execute_session_query +platform_id = 2 @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_issues(repo_git : str) -> int: @@ -158,11 +159,17 @@ def collect_gitlab_issue_comments(issue_ids, repo_git) -> int: logger = logging.getLogger(collect_gitlab_issues.__name__) with GitlabTaskManifest(logger) as manifest: + augur_db = manifest.augur_db + + query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) + repo_obj = execute_session_query(query, 'one') + repo_id = repo_obj.repo_id + comments = retrieve_all_gitlab_issue_comments(manifest.key_auth, logger, issue_ids, repo_git) if comments: logger.info(f"Length of comments: {len(comments)}") - #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + process_gitlab_issue_messages(comments, f"{owner}/{repo}: Gitlab issue messages task", repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab issue comments") @@ -171,26 +178,94 @@ def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): owner, repo = get_owner_repo(repo_git) - all_comments = [] + all_comments = {} issue_count = len(issue_ids) index = 1 + + comments = GitlabApiHandler(key_auth, logger) + for id in issue_ids: + if len(all_comments) > 10: + return all_comments + print(f"Collecting {owner}/{repo} gitlab issue comments for issue {index} of {issue_count}") url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues/{id}/notes" - comments = GitlabApiHandler(key_auth, logger) - + for page_data, page in comments.iter_pages(url): if page_data is None or len(page_data) == 0: break - all_comments += page_data + if id in all_comments: + all_comments[id].extend(page_data) + else: + all_comments[id] = page_data index += 1 return all_comments +def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db): + + tool_source = "Gitlab issue comments" + tool_version = "2.0" + data_source = "Gitlab API" + + # create mapping from mr number to pull request id of current mrs + issue_number_to_id_map = {} + issues = augur_db.session.query(Issue).filter(Issue.repo_id == repo_id).all() + for issue in issues: + issue_number_to_id_map[issue.gh_issue_number] = issue.issue_id + + message_dicts = [] + message_ref_mapping_data = {} + for id, messages in data.items(): + + try: + issue_id = issue_number_to_id_map[id] + except KeyError: + logger.info(f"{task_name}: Could not find related issue") + logger.info(f"{task_name}: We were searching for issue number {id} in repo {repo_id}") + logger.info(f"{task_name}: Skipping") + continue + + for message in messages: + + issue_message_ref_data = extract_needed_gitlab_issue_message_ref_data(message, issue_id, repo_id, tool_source, tool_version, data_source) + + message_ref_mapping_data[message["id"]] = { + "msg_ref_data": issue_message_ref_data + } + + message_dicts.append( + extract_needed_gitlab_message_data(message, platform_id, repo_id, tool_source, tool_version, data_source) + ) + + + logger.info(f"{task_name}: Inserting {len(message_dicts)} messages") + message_natural_keys = ["platform_msg_id"] + message_return_columns = ["msg_id", "platform_msg_id"] + message_string_fields = ["msg_text"] + message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys, + return_columns=message_return_columns, string_fields=message_string_fields) + + issue_message_ref_dicts = [] + for data in message_return_data: + + augur_msg_id = data["msg_id"] + platform_message_id = data["platform_msg_id"] + + ref = message_ref_mapping_data[platform_message_id] + message_ref_data = ref["msg_ref_data"] + message_ref_data["msg_id"] = augur_msg_id + + issue_message_ref_dicts.append(message_ref_data) + + logger.info(f"{task_name}: Inserting {len(issue_message_ref_dicts)} gitlab issue messages ref rows") + issue_message_ref_natural_keys = ["issue_id", "issue_msg_ref_src_comment_id"] + augur_db.insert_data(issue_message_ref_dicts, IssueMessageRef, issue_message_ref_natural_keys) + From 3c096d72538dc8a663626872bca171a00e9ff938 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 14:26:44 -0600 Subject: [PATCH 061/122] Add processing for mr messages Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 16 +++++ augur/tasks/gitlab/merge_request_task.py | 76 ++++++++++++++++++++++-- 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 503fd58b60..3127086d4f 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -869,3 +869,19 @@ def extract_needed_gitlab_message_data(comment: dict, platform_id: int, repo_id: } return comment_dict + +# retrieve only the needed data for pr labels from the api response +def extract_needed_gitlab_mr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + + pr_msg_ref = { + 'pull_request_id': pull_request_id, + 'pr_message_ref_src_comment_id': comment['id'], + 'repo_id': repo_id, + 'pr_message_ref_src_node_id': None, + 'tool_source': tool_source, + 'tool_version': tool_version, + 'data_source': data_source + } + + return pr_msg_ref + diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 4f92385871..cab648fdb3 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -4,11 +4,12 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest -from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata +from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata, extract_needed_gitlab_mr_message_ref_data, extract_needed_gitlab_message_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts -from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestCommit, PullRequestFile, Repo +from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestCommit, PullRequestFile, PullRequestMessageRef, Repo, Message from augur.application.db.util import execute_session_query +platform_id = 2 @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_merge_requests(repo_git: str) -> int: @@ -134,18 +135,81 @@ def collect_merge_request_comments(mr_ids, repo_git) -> int: logger = logging.getLogger(collect_merge_request_comments.__name__) with GitlabTaskManifest(logger) as manifest: + augur_db = manifest.augur_db + + query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) + repo_obj = execute_session_query(query, 'one') + repo_id = repo_obj.repo_id + url = "https://gitlab.com/api/v4/projects/{owner}%2f{repo}/merge_requests/{id}/notes".format(owner=owner, repo=repo, id="{id}") comments = retrieve_merge_request_data(mr_ids, url, "comments", owner, repo, manifest.key_auth, logger, response_type="list") if comments: logger.info(f"Length of merge request comments: {len(comments)}") - logger.info(f"Mr comment: {comments[0]}") - #issue_ids = process_issues(issue_data, f"{owner}/{repo}: Gitlab Issue task", repo_id, logger, augur_db) + process_gitlab_mr_messages(comments, f"{owner}/{repo}: Gitlab mr messages task", repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request comments") +def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): + + tool_source = "Gitlab mr comments" + tool_version = "2.0" + data_source = "Gitlab API" + + # create mapping from mr number to pull request id of current mrs + mr_number_to_id_map = {} + mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all() + for mr in mrs: + mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id + + message_dicts = [] + message_ref_mapping_data = {} + for id, messages in data.items(): + + try: + pull_request_id = mr_number_to_id_map[id] + except KeyError: + logger.info(f"{task_name}: Could not find related mr") + logger.info(f"{task_name}: We were searching for mr number {id} in repo {repo_id}") + logger.info(f"{task_name}: Skipping") + continue + + for message in messages: + + mr_message_ref_data = extract_needed_gitlab_mr_message_ref_data(message, pull_request_id, repo_id, tool_source, tool_version, data_source) + + message_ref_mapping_data[message["id"]] = { + "msg_ref_data": mr_message_ref_data + } + + message_dicts.append( + extract_needed_gitlab_message_data(message, platform_id, repo_id, tool_source, tool_version, data_source) + ) + + + logger.info(f"{task_name}: Inserting {len(message_dicts)} messages") + message_natural_keys = ["platform_msg_id"] + message_return_columns = ["msg_id", "platform_msg_id"] + message_string_fields = ["msg_text"] + message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys, + return_columns=message_return_columns, string_fields=message_string_fields) + + mr_message_ref_dicts = [] + for data in message_return_data: + + augur_msg_id = data["msg_id"] + platform_message_id = data["platform_msg_id"] + + ref = message_ref_mapping_data[platform_message_id] + message_ref_data = ref["msg_ref_data"] + message_ref_data["msg_id"] = augur_msg_id + + mr_message_ref_dicts.append(message_ref_data) + logger.info(f"{task_name}: Inserting {len(mr_message_ref_dicts)} mr messages ref rows") + mr_message_ref_natural_keys = ["pull_request_id", "pr_message_ref_src_comment_id"] + augur_db.insert_data(mr_message_ref_dicts, PullRequestMessageRef, mr_message_ref_natural_keys) @celery.task(base=AugurCoreRepoCollectionTask) @@ -347,7 +411,7 @@ def process_mr_files(data, task_name, repo_id, logger, augur_db): def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, response_type): all_data = {} - issue_count = len(ids) + mr_count = len(ids) index = 1 api_handler = GitlabApiHandler(key_auth, logger) @@ -356,7 +420,7 @@ def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, r if len(all_data) > 10: return all_data - print(f"Collecting {owner}/{repo} gitlab merge request {name} for merge request {index} of {issue_count}") + print(f"Collecting {owner}/{repo} gitlab merge request {name} for merge request {index} of {mr_count}") formatted_url = url.format(id=id) if response_type == "dict": From 99e94c91622d803d6053d5b3495a0e859ae8d6a6 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 14:41:08 -0600 Subject: [PATCH 062/122] Comment out assignee inserts and update tasks that are running Signed-off-by: Andrew Brain --- augur/tasks/gitlab/issues_task.py | 4 ++-- augur/tasks/gitlab/merge_request_task.py | 4 ++-- augur/tasks/start_tasks.py | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index eeb0f4ac34..5ffc633d0e 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -144,8 +144,8 @@ def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: # inserting issue assignees # we are using issue_assignee_src_id and issue_id to determine if the label is already in the database. - issue_assignee_natural_keys = ['issue_assignee_src_id', 'issue_id'] - augur_db.insert_data(issue_assignee_dicts, IssueAssignee, issue_assignee_natural_keys) + # issue_assignee_natural_keys = ['issue_assignee_src_id', 'issue_id'] + # augur_db.insert_data(issue_assignee_dicts, IssueAssignee, issue_assignee_natural_keys) return issue_ids diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index cab648fdb3..adce8f60c7 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -116,8 +116,8 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): logger.info(f"{task_name}: Inserting other pr data of lengths: Labels: {len(mr_label_dicts)} - Assignees: {len(mr_assignee_dicts)}") # TODO: Setup unique key on asignees with a value of ('cntrb_id', 'pull_request_id') and add 'cntrb_id' to assingee data - mr_assignee_natural_keys = ['pr_assignee_src_id', 'pull_request_id'] - augur_db.insert_data(mr_assignee_dicts, PullRequestAssignee, mr_assignee_natural_keys) + # mr_assignee_natural_keys = ['pr_assignee_src_id', 'pull_request_id'] + # augur_db.insert_data(mr_assignee_dicts, PullRequestAssignee, mr_assignee_natural_keys) pr_label_natural_keys = ['pr_src_id', 'pull_request_id'] pr_label_string_fields = ["pr_src_description"] diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 6c01e42b98..c11ea76d53 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -102,17 +102,17 @@ def primary_repo_collect_phase_gitlab(repo_git): jobs = group( chain(collect_gitlab_merge_requests.si(repo_git), group( - #collect_merge_request_comments.s(repo_git), + collect_merge_request_comments.s(repo_git), #collect_merge_request_reviewers.s(repo_git), - #collect_merge_request_metadata.s(repo_git), + collect_merge_request_metadata.s(repo_git), collect_merge_request_commits.s(repo_git), - #collect_merge_request_files.s(repo_git), - # collect_merge_request_events.si(repo_git), + collect_merge_request_files.s(repo_git), + collect_gitlab_merge_request_events.si(repo_git), )), - # chain(collect_gitlab_issues.si(repo_git), group( - # collect_gitlab_issue_comments.s(repo_git), - # collect_gitlab_issue_events.si(repo_git), - # )), + chain(collect_gitlab_issues.si(repo_git), group( + #collect_gitlab_issue_comments.s(repo_git), + collect_gitlab_issue_events.si(repo_git), + # )), ) return jobs From a0234084745cfbf44336c9838e1d919fc24e4d72 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 14:42:07 -0600 Subject: [PATCH 063/122] comment out message collection Signed-off-by: Andrew Brain --- augur/tasks/start_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index c11ea76d53..c09d89dbba 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -102,7 +102,7 @@ def primary_repo_collect_phase_gitlab(repo_git): jobs = group( chain(collect_gitlab_merge_requests.si(repo_git), group( - collect_merge_request_comments.s(repo_git), + #collect_merge_request_comments.s(repo_git), #collect_merge_request_reviewers.s(repo_git), collect_merge_request_metadata.s(repo_git), collect_merge_request_commits.s(repo_git), From ba24d20dc362ced5698ea48d282d56c1e2664d38 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 14:45:06 -0600 Subject: [PATCH 064/122] Remove logic that stopped the collection early Signed-off-by: Andrew Brain --- augur/tasks/gitlab/issues_task.py | 3 --- augur/tasks/gitlab/merge_request_task.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 5ffc633d0e..7f0c7787ee 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -186,9 +186,6 @@ def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): for id in issue_ids: - if len(all_comments) > 10: - return all_comments - print(f"Collecting {owner}/{repo} gitlab issue comments for issue {index} of {issue_count}") url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues/{id}/notes" diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index adce8f60c7..6d69def093 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -416,9 +416,6 @@ def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, r api_handler = GitlabApiHandler(key_auth, logger) for id in ids: - - if len(all_data) > 10: - return all_data print(f"Collecting {owner}/{repo} gitlab merge request {name} for merge request {index} of {mr_count}") formatted_url = url.format(id=id) From 13eb4498b31f386754133c2f3718231e1fec3582 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 15:21:42 -0600 Subject: [PATCH 065/122] Fix small bugs Signed-off-by: Andrew Brain --- augur/tasks/gitlab/merge_request_task.py | 4 ++-- augur/tasks/start_tasks.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 6d69def093..bb27ecdafb 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -314,7 +314,7 @@ def collect_merge_request_commits(mr_ids, repo_git) -> int: owner, repo = get_owner_repo(repo_git) - logger = logging.getLogger(collect_merge_request_comments.__name__) + logger = logging.getLogger(collect_merge_request_commits.__name__) with GitlabTaskManifest(logger) as manifest: augur_db = manifest.augur_db @@ -366,7 +366,7 @@ def collect_merge_request_files(mr_ids, repo_git) -> int: owner, repo = get_owner_repo(repo_git) - logger = logging.getLogger(collect_merge_request_comments.__name__) + logger = logging.getLogger(collect_merge_request_files.__name__) with GitlabTaskManifest(logger) as manifest: augur_db = manifest.augur_db diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index c09d89dbba..e13a2a9b83 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -106,13 +106,13 @@ def primary_repo_collect_phase_gitlab(repo_git): #collect_merge_request_reviewers.s(repo_git), collect_merge_request_metadata.s(repo_git), collect_merge_request_commits.s(repo_git), - collect_merge_request_files.s(repo_git), + #collect_merge_request_files.s(repo_git), collect_gitlab_merge_request_events.si(repo_git), )), chain(collect_gitlab_issues.si(repo_git), group( #collect_gitlab_issue_comments.s(repo_git), collect_gitlab_issue_events.si(repo_git), - # )), + )), ) return jobs From 05b53ecc1ac78efe88a7e8c586f3fb5c158168c4 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Dec 2023 15:40:07 -0600 Subject: [PATCH 066/122] Fix bug in gitlab api handler Signed-off-by: Andrew Brain --- augur/tasks/gitlab/gitlab_api_handler.py | 3 +++ augur/tasks/start_tasks.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/augur/tasks/gitlab/gitlab_api_handler.py b/augur/tasks/gitlab/gitlab_api_handler.py index 3a463127bb..8f111d3c48 100644 --- a/augur/tasks/gitlab/gitlab_api_handler.py +++ b/augur/tasks/gitlab/gitlab_api_handler.py @@ -171,6 +171,8 @@ def retrieve_data(self, url: str) -> Tuple[Optional[List[dict]], Optional[httpx. response = hit_api(self.key_manager, url, self.logger, timeout) + num_attempts += 1 + if response is None: if timeout_count == 10: self.logger.error(f"Request timed out 10 times for {url}") @@ -211,6 +213,7 @@ def retrieve_data(self, url: str) -> Tuple[Optional[List[dict]], Optional[httpx. return page_data, response, GitlabApiResult.SUCCESS self.logger.warning(f"Unhandled gitlab response. Status code: {response.status_code}. Body: {response.json()}") + self.logger.error("Unable to collect data in 10 attempts") diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index e13a2a9b83..10f04e40b7 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -106,7 +106,7 @@ def primary_repo_collect_phase_gitlab(repo_git): #collect_merge_request_reviewers.s(repo_git), collect_merge_request_metadata.s(repo_git), collect_merge_request_commits.s(repo_git), - #collect_merge_request_files.s(repo_git), + collect_merge_request_files.s(repo_git), collect_gitlab_merge_request_events.si(repo_git), )), chain(collect_gitlab_issues.si(repo_git), group( From 90add5e226d962f098a7e647a8443539e1b5557e Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 2 Jan 2024 22:17:18 +0000 Subject: [PATCH 067/122] updated readme with public instance link Signed-off-by: Sean P. Goggins --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 556f8c3611..ad08374738 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Augur NEW Release v0.60.5 +CHECK out Augur's Public Instance at https://ai.chaoss.io + [![first-timers-only](https://img.shields.io/badge/first--timers--only-friendly-blue.svg?style=flat-square)](https://www.firsttimersonly.com/) We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy of tagging issues for first timers only, and walking one newcomer through the resolution process weekly. [You can find these issues tagged with "first timers only" on our issues list.](https://github.com/chaoss/augur/labels/first-timers-only). [![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme) [![Build Docker images](https://github.com/chaoss/augur/actions/workflows/build_docker.yml/badge.svg)](https://github.com/chaoss/augur/actions/workflows/build_docker.yml) [![Hits-of-Code](https://hitsofcode.com/github/chaoss/augur?branch=main)](https://hitsofcode.com/github/chaoss/augur/view?branch=main) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/2788/badge)](https://bestpractices.coreinfrastructure.org/projects/2788) From f31308a903157ec69579c6c3d1dfb82cd9afb62f Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Thu, 11 Jan 2024 21:26:28 -0600 Subject: [PATCH 068/122] Fix collection issue Signed-off-by: Andrew Brain --- augur/application/db/data_parse.py | 2 +- augur/tasks/gitlab/merge_request_task.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 3127086d4f..2d5c51a899 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -733,7 +733,7 @@ def extract_gitlab_issue_event_data(event: dict, issue_id: int, platform_id: int # retrieve only the needed data for pr reviewers from the api response -def extract_needed_pr_reviewer_data(data: List[dict], pull_request_id, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: if len(data) == 0: return [] diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index bb27ecdafb..5672a79895 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -4,7 +4,7 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest -from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_pr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata, extract_needed_gitlab_mr_message_ref_data, extract_needed_gitlab_message_data +from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_mr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata, extract_needed_gitlab_mr_message_ref_data, extract_needed_gitlab_message_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts from augur.application.db.models import PullRequest, PullRequestAssignee, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestCommit, PullRequestFile, PullRequestMessageRef, Repo, Message from augur.application.db.util import execute_session_query @@ -299,7 +299,7 @@ def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): pull_request_id = mr_number_to_id_map[id] - reviewers = extract_needed_pr_reviewer_data(values, pull_request_id, repo_id, tool_source, tool_version, data_source) + reviewers = extract_needed_mr_reviewer_data(values, pull_request_id, repo_id, tool_source, tool_version, data_source) all_reviewers += reviewers From 7370873fd2e8fc18e8332c1963c214efac499f13 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 13 Jan 2024 11:09:06 -0600 Subject: [PATCH 069/122] Fix syntax error in json parsing method --- augur/tasks/util/worker_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/util/worker_util.py b/augur/tasks/util/worker_util.py index 6380ed22b0..84c177724b 100644 --- a/augur/tasks/util/worker_util.py +++ b/augur/tasks/util/worker_util.py @@ -138,7 +138,7 @@ def parse_json_from_subprocess_call(logger, subprocess_arr, cwd=None): try: required_output = json.loads(output) except json.decoder.JSONDecodeError as e: - session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}") + logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}") raise e return required_output From f9717c9059fff9a6983c3469f1f60e67760cf348 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 16 Jan 2024 18:13:02 -0600 Subject: [PATCH 070/122] Update metadata.py --- metadata.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metadata.py b/metadata.py index 3a08e2ba55..c3c1597b49 100644 --- a/metadata.py +++ b/metadata.py @@ -5,8 +5,8 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection" -__version__ = "0.60.2" -__release__ = "v0.60.2 (Swifty Kelce)" +__version__ = "0.62.0" +__release__ = "v0.62.0 (AI for Pets)" __license__ = "MIT" -__copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Brian Warner & Augurlabs 2023" +__copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Brian Warner & Augurlabs 2024" From db449ca993f6e4e77219992dfd8e897f981a8376 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 16 Jan 2024 18:13:51 -0600 Subject: [PATCH 071/122] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ad08374738..3b4d7f4ddc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.60.5 +# Augur NEW Release v0.62.0 CHECK out Augur's Public Instance at https://ai.chaoss.io @@ -9,7 +9,7 @@ CHECK out Augur's Public Instance at https://ai.chaoss.io ## NEW RELEASE ALERT! ### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md) -Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.60.5 +Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.62.0 - The `main` branch is a stable version of our new architecture, which features: - Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks. - A new job management architecture that uses Celery and Redis to manage queues, and enables users to run a Flower job monitoring dashboard From ad0985a3af37c5377aac53faf5acbbb00313014d Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 16 Jan 2024 18:14:12 -0600 Subject: [PATCH 072/122] Update Dockerfile --- docker/backend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index b16deb4b34..a957609265 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -2,7 +2,7 @@ FROM python:3.8.11-slim-buster LABEL maintainer="outdoors@acm.org" -LABEL version="0.53.1" +LABEL version="0.62.0" ENV DEBIAN_FRONTEND=noninteractive From 0a43a506e7ab57af35b1379638737639ef83394c Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 16 Jan 2024 18:14:28 -0600 Subject: [PATCH 073/122] Update Dockerfile --- docker/database/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/database/Dockerfile b/docker/database/Dockerfile index 5a91307297..610df5b278 100644 --- a/docker/database/Dockerfile +++ b/docker/database/Dockerfile @@ -2,7 +2,7 @@ FROM postgres:12 LABEL maintainer="outdoors@acm.org" -LABEL version="0.53.1" +LABEL version="0.62.0" ENV POSTGRES_DB "test" ENV POSTGRES_USER "augur" From 6164e1cd1b4e87afb1d4c0f90e995662e6a7d8c3 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Wed, 17 Jan 2024 11:10:41 -0600 Subject: [PATCH 074/122] add GitHub workflow to run linting checks, namely Pylint for now Signed-off-by: Isaac Milarsky --- .github/workflows/checks.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/checks.yml diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml new file mode 100644 index 0000000000..cad5ee8065 --- /dev/null +++ b/.github/workflows/checks.yml @@ -0,0 +1,19 @@ +name: "run-linting-checks" +on: + pull_request: + branches: [main, dev] + +jobs: + run-pylint: + name: runner / pylint + permissions: write-all + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: dciborow/action-pylint@0.1.0 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + reporter: github-pr-review + level: warning + glob_pattern: "**/*.py" + filter_mode: "file" \ No newline at end of file From eaeb4e9418bff890193ed6ec12adbcae1fc9bb3f Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Wed, 17 Jan 2024 15:40:54 -0600 Subject: [PATCH 075/122] also add misspell check Signed-off-by: Isaac Milarsky --- .github/workflows/checks.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index cad5ee8065..c23bfd7bb3 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -16,4 +16,16 @@ jobs: reporter: github-pr-review level: warning glob_pattern: "**/*.py" - filter_mode: "file" \ No newline at end of file + filter_mode: "file" + + misspell: + name: runner / misspell + runs-on: ubuntu-latest + steps: + - name: Highlight any misspellings in changes. + uses: actions/checkout@v4 + - name: misspell + uses: reviewdog/action-misspell@v1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + locale: "US" \ No newline at end of file From ae75bef83e5f04c91b16157f09151d31d9792c12 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 20 Jan 2024 14:37:57 -0600 Subject: [PATCH 076/122] Fix null string in releases insert --- augur/tasks/github/releases/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/releases/core.py b/augur/tasks/github/releases/core.py index f3050fc1b3..5957d4cb57 100644 --- a/augur/tasks/github/releases/core.py +++ b/augur/tasks/github/releases/core.py @@ -84,7 +84,8 @@ def insert_release(augur_db, logger, repo_id, owner, release, tag_only = False): release_inf = get_release_inf(repo_id, release, tag_only) #Do an upsert - augur_db.insert_data(release_inf,Release,['release_id']) + string_fields = ["release_name", "release_description", "release_author", "release_tag_name"] + augur_db.insert_data(release_inf,Release,['release_id'], string_fields=string_fields) logger.info(f"Inserted info for {owner}/{repo_id}/{release['name']}\n") From 80d0c9d39ce9423d31a8f32ef5de029b0fd59dc2 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 20 Jan 2024 14:40:06 -0600 Subject: [PATCH 077/122] Fix messages NoneType error --- augur/tasks/github/messages/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/messages/tasks.py b/augur/tasks/github/messages/tasks.py index 6e23434bae..4dfd3a634b 100644 --- a/augur/tasks/github/messages/tasks.py +++ b/augur/tasks/github/messages/tasks.py @@ -187,7 +187,8 @@ def process_messages(messages, task_name, repo_id, logger, augur_db): message_string_fields = ["msg_text"] message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys, return_columns=message_return_columns, string_fields=message_string_fields) - + if message_return_data is None: + return pr_message_ref_dicts = [] issue_message_ref_dicts = [] From c2d97ab8f11d491333c88328d02437c6d99e2011 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 22 Jan 2024 19:25:14 -0600 Subject: [PATCH 078/122] Fix issue where table isn't passed into recursive call Signed-off-by: Andrew Brain --- augur/application/db/session.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/augur/application/db/session.py b/augur/application/db/session.py index f1d1e64dd0..a943f92fe8 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -240,6 +240,9 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s self.insert_data(first_half, natural_keys, return_columns, string_fields, on_conflict_update) self.insert_data(second_half, natural_keys, return_columns, string_fields, on_conflict_update) + self.insert_data(first_half, table, natural_keys, return_columns, string_fields, on_conflict_update) + self.insert_data(second_half, table, natural_keys, return_columns, string_fields, on_conflict_update) + else: self.logger.error("Unable to insert and return data in 10 attempts") return None From ef169bec07bf055b418ace9b75d06ced76110dd3 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 22 Jan 2024 19:42:36 -0600 Subject: [PATCH 079/122] Add 5 second delay before we retry Signed-off-by: Andrew Brain --- augur/application/db/session.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/augur/application/db/session.py b/augur/application/db/session.py index a943f92fe8..e4a358f346 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -197,6 +197,7 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s if(len(data) == 1): raise e else: + time.sleep(5) first_half = data[:len(data)//2] second_half = data[len(data)//2:] @@ -234,6 +235,7 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s if(len(data) == 1): raise e else: + time.sp first_half = data[:len(data)//2] second_half = data[len(data)//2:] From 888dd23350817bf985b6829d61ecd04cb17e3eb2 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 22 Jan 2024 19:43:07 -0600 Subject: [PATCH 080/122] Change to 3 second delay Signed-off-by: Andrew Brain --- augur/application/db/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/db/session.py b/augur/application/db/session.py index e4a358f346..0d98999de2 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -197,7 +197,7 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s if(len(data) == 1): raise e else: - time.sleep(5) + time.sleep(3) first_half = data[:len(data)//2] second_half = data[len(data)//2:] From eb3887feb2dbf7076571064583d3b1d2091ac21a Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 22 Jan 2024 19:57:33 -0600 Subject: [PATCH 081/122] Fix issues suggested by linter Signed-off-by: Andrew Brain --- augur/application/db/session.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/augur/application/db/session.py b/augur/application/db/session.py index 0d98999de2..5d11b3b11c 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -196,13 +196,13 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s #self.logger.info(e) if(len(data) == 1): raise e - else: - time.sleep(3) - first_half = data[:len(data)//2] - second_half = data[len(data)//2:] + + time.sleep(3) + first_half = data[:len(data)//2] + second_half = data[len(data)//2:] - self.insert_data(first_half, table,natural_keys, return_columns, string_fields, on_conflict_update) - self.insert_data(second_half,table, natural_keys, return_columns, string_fields, on_conflict_update) + self.insert_data(first_half, table, natural_keys, return_columns, string_fields, on_conflict_update) + self.insert_data(second_half,table, natural_keys, return_columns, string_fields, on_conflict_update) else: self.logger.error("Unable to insert data in 10 attempts") @@ -232,18 +232,15 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s raise e except Exception as e: - if(len(data) == 1): + if len(data) == 1: raise e - else: - time.sp - first_half = data[:len(data)//2] - second_half = data[len(data)//2:] - - self.insert_data(first_half, natural_keys, return_columns, string_fields, on_conflict_update) - self.insert_data(second_half, natural_keys, return_columns, string_fields, on_conflict_update) + + time.sleep(3) + first_half = data[:len(data)//2] + second_half = data[len(data)//2:] - self.insert_data(first_half, table, natural_keys, return_columns, string_fields, on_conflict_update) - self.insert_data(second_half, table, natural_keys, return_columns, string_fields, on_conflict_update) + self.insert_data(first_half, table, natural_keys, return_columns, string_fields, on_conflict_update) + self.insert_data(second_half, table, natural_keys, return_columns, string_fields, on_conflict_update) else: self.logger.error("Unable to insert and return data in 10 attempts") From 51c58e6ef280814ab7efe0363841d3279fd11ee8 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 22 Jan 2024 19:58:58 -0600 Subject: [PATCH 082/122] Fix other issues suggested by linter Signed-off-by: Andrew Brain --- augur/application/db/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/db/session.py b/augur/application/db/session.py index 5d11b3b11c..22379ad050 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -194,7 +194,7 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s except Exception as e: #self.logger.info(e) - if(len(data) == 1): + if len(data) == 1: raise e time.sleep(3) From c770c36c9d1ac2353134c853890eecd8a246cbb7 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 23 Jan 2024 09:01:58 -0600 Subject: [PATCH 083/122] Update Dockerfile Python 3.10 Dockerfile --- docker/backend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index a957609265..a1bfc37540 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -1,5 +1,5 @@ #SPDX-License-Identifier: MIT -FROM python:3.8.11-slim-buster +FROM python:3.10-slim-bullseye LABEL maintainer="outdoors@acm.org" LABEL version="0.62.0" From 25ebabb5fcd72a48fa3f64b94d0ff36ab7155b1c Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 23 Jan 2024 09:02:30 -0600 Subject: [PATCH 084/122] Update Dockerfile Postgresql 14 Dockerfile --- docker/database/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/database/Dockerfile b/docker/database/Dockerfile index 610df5b278..1670599754 100644 --- a/docker/database/Dockerfile +++ b/docker/database/Dockerfile @@ -1,5 +1,5 @@ #SPDX-License-Identifier: MIT -FROM postgres:12 +FROM postgres:14 LABEL maintainer="outdoors@acm.org" LABEL version="0.62.0" From 9ab76e756e4db1ef45808b34e01983467c56393a Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 10:16:56 -0600 Subject: [PATCH 085/122] address linting errors in data_parse.py Signed-off-by: Isaac Milarsky --- .pylintrc | 2 +- augur/api/view/routes.py | 1 + augur/api/view/utils.py | 1 + augur/application/db/data_parse.py | 301 ++++++++++++++++++++++- augur/tasks/gitlab/issues_task.py | 2 +- augur/tasks/gitlab/merge_request_task.py | 4 +- 6 files changed, 298 insertions(+), 13 deletions(-) diff --git a/.pylintrc b/.pylintrc index 0b1b7d2049..0056af873b 100644 --- a/.pylintrc +++ b/.pylintrc @@ -12,7 +12,7 @@ #refactoring checker #enable=R -disable=E0611,E1101,W1203,R0801,W0614,W0611,C0411,C0103,C0301,C0303,C0304,C0305,W0311 +disable=E0611,E1101,W1203,R0801,W0614,W0611,C0411,C0103,C0301,C0303,C0304,C0305,W0311,E0401 # Analyse import fallback blocks. This can be used to support both Python 2 and diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index bf6e8fc056..0c47afbc2e 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -1,4 +1,5 @@ import logging +import math from flask import Flask, render_template, render_template_string, request, abort, jsonify, redirect, url_for, session, flash from sqlalchemy.orm.exc import NoResultFound from .utils import * diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py index 7712873b55..043dd44831 100644 --- a/augur/api/view/utils.py +++ b/augur/api/view/utils.py @@ -1,6 +1,7 @@ from pathlib import Path from concurrent.futures import ThreadPoolExecutor from flask import render_template, flash, url_for, Flask +from .init import init_logging from .init import * from ..server import app, db_session from augur.application.config import AugurConfig diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 2d5c51a899..088c482988 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -39,6 +39,20 @@ def extract_needed_pr_label_data(labels: List[dict], repo_id: int, tool_source: def extract_needed_mr_label_data(labels: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + labels: List of dictionaries of label data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed label dicts + """ if len(labels) == 0: return [] @@ -65,8 +79,21 @@ def extract_needed_mr_label_data(labels: List[dict], repo_id: int, tool_source: return label_dicts -# retrieve only the needed data for pr assignees from the api response def extract_needed_pr_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for pr assignees from the api response + + Arguments: + assignees: List of dictionaries of asignee data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed asignee dicts + """ if len(assignees) == 0: return [] @@ -89,6 +116,20 @@ def extract_needed_pr_assignee_data(assignees: List[dict], repo_id: int, tool_so return assignee_dicts def extract_needed_merge_request_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for merge request assignees from the api response + + Arguments: + assignees: List of dictionaries of asignee data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed asignee dicts + """ if len(assignees) == 0: return [] @@ -112,8 +153,21 @@ def extract_needed_merge_request_assignee_data(assignees: List[dict], repo_id: i -# retrieve only the needed data for pr reviewers from the api response def extract_needed_pr_reviewer_data(reviewers: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for pr reviewers from the api response + + Arguments: + reviewers: List of dictionaries of reviewer data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed reviewer dicts + """ if len(reviewers) == 0: return [] @@ -299,6 +353,20 @@ def extract_needed_issue_assignee_data(assignees: List[dict], repo_id: int, tool return assignee_dicts def extract_needed_gitlab_issue_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for gitlab issue assignees from the api response + + Arguments: + assignees: List of dictionaries of gitlab assignee data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed assignee dicts + """ if len(assignees) == 0: return [] @@ -351,6 +419,20 @@ def extract_needed_issue_label_data(labels: List[dict], repo_id: int, tool_sourc def extract_needed_gitlab_issue_label_data(labels: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for gitlab issue labels from the api response + + Arguments: + labels: List of dictionaries of gitlab issue label data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed label dicts + """ if len(labels) == 0: return [] @@ -376,8 +458,22 @@ def extract_needed_gitlab_issue_label_data(labels: List[dict], repo_id: int, too -# retrieve only the needed data for pr labels from the api response def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for pr labels from the api response + + Arguments: + message: Message data dict + issue_id: id of the issue + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + Dict of message ref data. + """ message_ref_dict = { 'issue_id': issue_id, @@ -409,7 +505,19 @@ def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo def extract_needed_pr_data(pr, repo_id, tool_source, tool_version): + """ + Retrieve only the needed data for the pr api response + + Arguments: + pr: PR data dict + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + + Returns: + Parsed pr dict + """ pr = { 'repo_id': repo_id, @@ -468,6 +576,20 @@ def extract_needed_pr_data(pr, repo_id, tool_source, tool_version): return pr def extract_needed_issue_data(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str): + """ + Retrieve only the needed data for the issue api response + + Arguments: + issue: Issue data dict + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: platform source + + + Returns: + Parsed issue dict + """ dict_data = { 'cntrb_id': None, # this the contributor who closed the issue @@ -612,6 +734,19 @@ def extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, return review_row def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, tool_version): + """ + Retrieve only the needed data for the pr gitlab api response + + Arguments: + pr: PR data dict + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + + + Returns: + Parsed pr dict + """ pr_dict = { 'repo_id': repo_id, @@ -659,6 +794,20 @@ def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, t def extract_needed_issue_data_from_gitlab_issue(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str): + """ + Retrieve only the needed data for the issue gitlab api response + + Arguments: + issue: Issue data dict + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: platform source + + + Returns: + Parsed issue dict + """ issue_dict = { "repo_id": repo_id, @@ -692,6 +841,22 @@ def extract_needed_issue_data_from_gitlab_issue(issue: dict, repo_id: int, tool_ def extract_gitlab_mr_event_data(event: dict, pr_id: int, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: + """ + Retrieve only the needed data for the mr event gitlab api response + + Arguments: + event: Event data dict + pr_id: id of the pr + platform_id: id of the platform + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: platform source + + + Returns: + Parsed event dict + """ mr_event = { 'pull_request_id': pr_id, @@ -712,6 +877,22 @@ def extract_gitlab_mr_event_data(event: dict, pr_id: int, platform_id: int, repo return mr_event def extract_gitlab_issue_event_data(event: dict, issue_id: int, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: + """ + Retrieve only the needed data for the issue event gitlab api response + + Arguments: + event: Event data dict + issue_id: id of the issue + platform_id: id of the platform + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: platform source + + + Returns: + Parsed event dict + """ issue_event = { "issue_event_src_id": event['target_id'], @@ -732,8 +913,21 @@ def extract_gitlab_issue_event_data(event: dict, issue_id: int, platform_id: int return issue_event -# retrieve only the needed data for pr reviewers from the api response -def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for pr reviewers from the api response + + Arguments: + data: List of dictionaries that contain mr reviewer data to parse + pull_request_id: id of the PR + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of extracted relevent data from needed mr reviwer data + """ if len(data) == 0: return [] @@ -741,7 +935,7 @@ def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, repo_id: reviewer_dicts = [] for x in data: - for reviewer in x["suggested_approvers"]: + for _ in x["suggested_approvers"]: reviewer_dict = { 'pull_request_id': pull_request_id, @@ -757,6 +951,21 @@ def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, repo_id: def extract_needed_mr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source): + """ + Retrieve only the needed data for mr commit data from the api response + + Arguments: + commit: commit data dictionary + repo_id: augur id of the repository + pull_request_id: id of the PR + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + Dictionary of the extracted commit data + """ commit = { 'pull_request_id': pull_request_id, @@ -773,7 +982,21 @@ def extract_needed_mr_commit_data(commit, repo_id, pull_request_id, tool_source, def extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool_source, tool_version, data_source): + """ + Retrieve only the needed data for mr file data from the api response + + Arguments: + gitlab_file_data: file data dictionary + repo_id: augur id of the repository + pull_request_id: id of the PR + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + Returns: + List of dicts of parsed gitlab file changes + """ files = [] changes = gitlab_file_data["changes"] @@ -781,7 +1004,7 @@ def extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool try: deletes = int(file_changes['diff'].split('@@')[1].strip().split(' ')[0].split(',')[1]) adds = int(file_changes['diff'].split('@@')[1].strip().split(' ')[1].split(',')[1]) - except: + except Exception: deletes = 0 adds = 0 @@ -802,7 +1025,21 @@ def extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, tool_version, data_source): + """ + Retrieve only the needed data for mr metadata from the api response + + Arguments: + mr_dict: mr data dictionary + repo_id: augur id of the repository + pull_request_id: id of the PR + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + Returns: + List of dicts of parsed mr metadata + """ head = {'sha': mr_dict['diff_refs']['head_sha'], 'ref': mr_dict['target_branch'], 'label': str(mr_dict['target_project_id']) + ':' + mr_dict['target_branch'], @@ -841,6 +1078,22 @@ def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, t def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Extract the message id for a given message on an issue from an api response + and connect it to the relevent repo id. + + Arguments: + message: message data dict + issue_id: id of the issue + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + Dict containing the message ref id as well as the repo id. + """ message_ref_dict = { 'issue_id': issue_id, @@ -855,7 +1108,22 @@ def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, r return message_ref_dict -def extract_needed_gitlab_message_data(comment: dict, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str): +def extract_needed_gitlab_message_data(comment: dict, platform_id: int, tool_source: str, tool_version: str, data_source: str): + """ + Extract specific metadata for a comment from an api response + and connect it to the relevent platform id. + + Arguments: + comment: comment data dict + platform_id: augur id of the relevant platform + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + Dict containing parsed comment text and metadata + """ comment_dict = { "pltfrm_id": platform_id, @@ -870,8 +1138,23 @@ def extract_needed_gitlab_message_data(comment: dict, platform_id: int, repo_id: return comment_dict -# retrieve only the needed data for pr labels from the api response def extract_needed_gitlab_mr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for pr labels from the api response + + Arguments: + comment: comment data dict + pull_request_id: id of the PR + repo_id: augur id of the repository + platform_id: augur id of the relevant platform + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + Dict containing the comment, pr and repo id of the parsed comment data. + """ pr_msg_ref = { 'pull_request_id': pull_request_id, diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 7f0c7787ee..43b9c62942 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -238,7 +238,7 @@ def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db): } message_dicts.append( - extract_needed_gitlab_message_data(message, platform_id, repo_id, tool_source, tool_version, data_source) + extract_needed_gitlab_message_data(message, platform_id, tool_source, tool_version, data_source) ) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 5672a79895..5a183d0b0b 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -184,7 +184,7 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): } message_dicts.append( - extract_needed_gitlab_message_data(message, platform_id, repo_id, tool_source, tool_version, data_source) + extract_needed_gitlab_message_data(message, platform_id, tool_source, tool_version, data_source) ) @@ -299,7 +299,7 @@ def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): pull_request_id = mr_number_to_id_map[id] - reviewers = extract_needed_mr_reviewer_data(values, pull_request_id, repo_id, tool_source, tool_version, data_source) + reviewers = extract_needed_mr_reviewer_data(values, pull_request_id, tool_source, tool_version, data_source) all_reviewers += reviewers From b166fd6b466ad3ac94baf6eeb33de24c4db92c43 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 10:24:22 -0600 Subject: [PATCH 086/122] fix typos and add module doc-strings Signed-off-by: Isaac Milarsky --- augur/api/view/routes.py | 3 +++ augur/api/view/utils.py | 3 +++ augur/application/db/data_parse.py | 6 +++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index 0c47afbc2e..3a3fcccd03 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -1,3 +1,6 @@ +""" +Defines the api routes for the augur views +""" import logging import math from flask import Flask, render_template, render_template_string, request, abort, jsonify, redirect, url_for, session, flash diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py index 043dd44831..298e9950ae 100644 --- a/augur/api/view/utils.py +++ b/augur/api/view/utils.py @@ -1,3 +1,6 @@ +""" +Defines utility functions used by the augur api views +""" from pathlib import Path from concurrent.futures import ThreadPoolExecutor from flask import render_template, flash, url_for, Flask diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 088c482988..7562181398 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -926,7 +926,7 @@ def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, tool_sour Returns: - List of extracted relevent data from needed mr reviwer data + List of extracted relevant data from needed mr reviwer data """ if len(data) == 0: @@ -1080,7 +1080,7 @@ def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, t def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: """ Extract the message id for a given message on an issue from an api response - and connect it to the relevent repo id. + and connect it to the relevant repo id. Arguments: message: message data dict @@ -1111,7 +1111,7 @@ def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, r def extract_needed_gitlab_message_data(comment: dict, platform_id: int, tool_source: str, tool_version: str, data_source: str): """ Extract specific metadata for a comment from an api response - and connect it to the relevent platform id. + and connect it to the relevant platform id. Arguments: comment: comment data dict From 29247b9b3fd7e2172b59b0621295f218f08c5ec1 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 23 Jan 2024 10:31:56 -0600 Subject: [PATCH 087/122] Update setup.py Working on docker build --- setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.py b/setup.py index d2a149d93b..896813a8cc 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,11 @@ exec(open("metadata.py").read()) +# Make sure pip and wheel are up to date + +python3 -m ensurepip --default-pip +python3 -m pip install --upgrade pip setuptools wheel + setup( name=__slug__, version=__version__, From 3aec047951721719174445f301fec1ae02c82bdf Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 23 Jan 2024 10:37:00 -0600 Subject: [PATCH 088/122] Update setup.py Setup.py updates for docker. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 896813a8cc..ff34552519 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,8 @@ # Make sure pip and wheel are up to date -python3 -m ensurepip --default-pip -python3 -m pip install --upgrade pip setuptools wheel +exec(python3 -m ensurepip --default-pip) +exec(python3 -m pip install --upgrade pip setuptools wheel) setup( name=__slug__, From fbcc02d0cfefc6a95bc2bd9f8ef12fb178fed3c2 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 23 Jan 2024 10:44:23 -0600 Subject: [PATCH 089/122] Update setup.py Wheel install --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index ff34552519..7531c523d5 100644 --- a/setup.py +++ b/setup.py @@ -15,10 +15,10 @@ exec(open("metadata.py").read()) -# Make sure pip and wheel are up to date - -exec(python3 -m ensurepip --default-pip) -exec(python3 -m pip install --upgrade pip setuptools wheel) +RUN pip install -U \ + pip \ + setuptools \ + wheel setup( name=__slug__, From 70576754fd091bfef6df9a84d2bbe54fa01b7a1d Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 23 Jan 2024 10:47:27 -0600 Subject: [PATCH 090/122] Update setup.py Wheel --- setup.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 7531c523d5..e0a8a43204 100644 --- a/setup.py +++ b/setup.py @@ -6,6 +6,7 @@ """ from setuptools import setup, find_packages from os import path +import wheel here = path.abspath(path.dirname(__file__)) @@ -15,11 +16,6 @@ exec(open("metadata.py").read()) -RUN pip install -U \ - pip \ - setuptools \ - wheel - setup( name=__slug__, version=__version__, From 8ba219590cf23f41ef4a71dbfedf6412075cbf7a Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 23 Jan 2024 10:50:21 -0600 Subject: [PATCH 091/122] Update setup.py --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index e0a8a43204..d2a149d93b 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,6 @@ """ from setuptools import setup, find_packages from os import path -import wheel here = path.abspath(path.dirname(__file__)) From 251e534a5f0022eff20dd263995dc56c9a904a41 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 23 Jan 2024 10:57:05 -0600 Subject: [PATCH 092/122] Update Dockerfile python 3.10 test --- docker/backend/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index a1bfc37540..13854d9fd1 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -13,7 +13,8 @@ RUN set -x \ bash \ curl \ gcc \ - python3-pip \ + python3.10-distutils \ + python3.10 -m pip _rest_of_the_pip \ wget \ postgresql-client \ && rm -rf /var/lib/apt/lists/* @@ -29,7 +30,7 @@ COPY ./setup.py . COPY ./scripts/ scripts/ #COPY ./docker/backend/docker.config.json . -RUN python3 -m venv /opt/venv +RUN python310 -m venv /opt/venv RUN set -x \ && /opt/venv/bin/pip install . From d03dca7b35e411a7ce1726e80de28c5a179eba00 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 23 Jan 2024 10:58:34 -0600 Subject: [PATCH 093/122] Update Dockerfile wheel --- docker/backend/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 13854d9fd1..57c615d8f2 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -13,8 +13,8 @@ RUN set -x \ bash \ curl \ gcc \ - python3.10-distutils \ - python3.10 -m pip _rest_of_the_pip \ + python3-distutils \ + python3 -m pip _rest_of_the_pip \ wget \ postgresql-client \ && rm -rf /var/lib/apt/lists/* From 7f205db4869d2532d25dc1b05562477d961481db Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 11:25:51 -0600 Subject: [PATCH 094/122] add docs to pull_requests/tasks.py Signed-off-by: Isaac Milarsky --- augur/tasks/github/pull_requests/tasks.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 478260bcd7..8db394754c 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -76,7 +76,16 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth) -> None: def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): - + """ + Parse and insert all retrieved PR data. + + Arguments: + pull_requests: List of paginated pr endpoint data + task_name: Name of the calling task and the repo + repo_id: augur id of the repository + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Pr Task" tool_version = "2.0" data_source = "Github API" From 9273baac4c842b1d9aa42ccd49d656329c326826 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 11:43:48 -0600 Subject: [PATCH 095/122] apply linting to events_task.py for gitlab methods Signed-off-by: Isaac Milarsky --- augur/tasks/github/util/util.py | 11 ++++++ augur/tasks/gitlab/events_task.py | 61 ++++++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/augur/tasks/github/util/util.py b/augur/tasks/github/util/util.py index 0400b82e1a..42989dcca3 100644 --- a/augur/tasks/github/util/util.py +++ b/augur/tasks/github/util/util.py @@ -58,6 +58,17 @@ def parse_json_response(logger: logging.Logger, response: httpx.Response) -> dic return json.loads(json.dumps(response.text)) def get_repo_weight_by_issue(logger,repo_git): + """ + Retrieve the sum of the number of issues and prs in a repository from a graphql query. + + Arguments: + logger: logger object + repo_git: repository url + + Returns: + Sum of issues and prs for that repo + """ + from augur.tasks.github.util.gh_graphql_entities import GitHubRepo as GitHubRepoGraphql owner,name = get_owner_repo(repo_git) diff --git a/augur/tasks/gitlab/events_task.py b/augur/tasks/gitlab/events_task.py index 4224988b9f..8058831ba3 100644 --- a/augur/tasks/gitlab/events_task.py +++ b/augur/tasks/gitlab/events_task.py @@ -1,3 +1,6 @@ +""" +Module to define the task methods to collect gitlab event data for augur +""" import logging from augur.tasks.init.celery_app import celery_app as celery @@ -13,6 +16,12 @@ @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_issue_events(repo_git) -> int: + """ + Retrieve and parse gitlab events for the desired repo + + Arguments: + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -36,6 +45,13 @@ def collect_gitlab_issue_events(repo_git) -> int: @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_merge_request_events(repo_git) -> int: + """ + Retrieve and parse gitlab mrs for the desired repo + + Arguments: + repo_git: the repo url string + """ + owner, repo = get_owner_repo(repo_git) @@ -57,13 +73,22 @@ def collect_gitlab_merge_request_events(repo_git) -> int: logger.info(f"{owner}/{repo} has no gitlab merge request events") -def retrieve_all_gitlab_event_data(type, repo_git, logger, key_auth) -> None: +def retrieve_all_gitlab_event_data(gtype, repo_git, logger, key_auth) -> None: + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + gtype: type of event data + repo_git: url of the relevant repo + logger: loggin object + key_auth: key auth cache and rotator object + """ owner, repo = get_owner_repo(repo_git) logger.info(f"Collecting gitlab issue events for {owner}/{repo}") - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type={type}" + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type={gtype}" events = GitlabApiHandler(key_auth, logger) all_data = [] @@ -75,18 +100,28 @@ def retrieve_all_gitlab_event_data(type, repo_git, logger, key_auth) -> None: if len(page_data) == 0: logger.debug( - f"{owner}/{repo}: Gitlab {type} Events Page {page} contains no data...returning") - logger.info(f"{owner}/{repo}: {type} Events Page {page} of {num_pages}") + f"{owner}/{repo}: Gitlab {gtype} Events Page {page} contains no data...returning") + logger.info(f"{owner}/{repo}: {gtype} Events Page {page} of {num_pages}") return all_data - logger.info(f"{owner}/{repo}: Gitlab {type} Events Page {page} of {num_pages}") + logger.info(f"{owner}/{repo}: Gitlab {gtype} Events Page {page} of {num_pages}") all_data += page_data return all_data def process_issue_events(events, task_name, repo_id, logger, augur_db): - + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + events: List of dictionaries of issue event data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ + tool_source = "Gitlab issue events task" tool_version = "2.0" data_source = "Gitlab API" @@ -122,7 +157,21 @@ def process_issue_events(events, task_name, repo_id, logger, augur_db): def process_mr_events(events, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr events from the api response + + Arguments: + labels: List of dictionaries of label data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + Returns: + List of parsed label dicts + """ + tool_source = "Gitlab mr events task" tool_version = "2.0" data_source = "Gitlab API" From 4cbfa15bcfac78bca9aac2dccae0275e589f9a5c Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 11:47:27 -0600 Subject: [PATCH 096/122] unneeded elif Signed-off-by: Isaac Milarsky --- augur/api/view/routes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index 3a3fcccd03..72164a9291 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -41,9 +41,9 @@ def root(path=""): def logo(brand=None): if brand is None: return redirect(url_for('static', filename='img/augur_logo.png')) - elif "augur" in brand: + if "augur" in brand: return logo(None) - elif "chaoss" in brand: + if "chaoss" in brand: return redirect(url_for('static', filename='img/Chaoss_Logo_white.png')) return "" From fc3d13b29c0f154f4711d6f29452d06a1633113c Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 12:22:54 -0600 Subject: [PATCH 097/122] add doc-strings to gitlab tasks and data structures Signed-off-by: Isaac Milarsky --- augur/tasks/gitlab/gitlab_api_handler.py | 17 ++- augur/tasks/gitlab/gitlab_api_key_handler.py | 13 ++- augur/tasks/gitlab/gitlab_random_key_auth.py | 4 +- augur/tasks/gitlab/gitlab_task_session.py | 14 ++- augur/tasks/gitlab/issues_task.py | 56 ++++++++- augur/tasks/gitlab/merge_request_task.py | 115 +++++++++++++++++++ 6 files changed, 209 insertions(+), 10 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_api_handler.py b/augur/tasks/gitlab/gitlab_api_handler.py index 8f111d3c48..8735a8318b 100644 --- a/augur/tasks/gitlab/gitlab_api_handler.py +++ b/augur/tasks/gitlab/gitlab_api_handler.py @@ -1,4 +1,7 @@ - +""" +Defines a GitlabApiHandler class to paginate and handle interaction with GitLab's +api through automatic use of relevant key auth and pagination tools. +""" import httpx import time import logging @@ -61,7 +64,7 @@ def get_length(self, url): issue_len = len(issues) """ - num_pages = self.get_num_pages() + num_pages = self.get_num_pages(url) self.logger.info(f"Num pages: {num_pages}") @@ -255,6 +258,16 @@ def get_num_pages(self, url) -> Optional[int]: return num_pages def hit_api(self, url, timeout, method): + """Attempt to retrieve data at given url. + + Args: + url: The url to retrieve the data from + timeout: time to wait until timeout + method: GET, POST, etc. + + Returns + The response object from hitting the url and the data on the page + """ return hit_api(self.key_manager, url, self.logger, timeout, method=method) diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py index 50efa446f8..20bc1219ca 100644 --- a/augur/tasks/gitlab/gitlab_api_key_handler.py +++ b/augur/tasks/gitlab/gitlab_api_key_handler.py @@ -1,3 +1,8 @@ +""" +Defines the handler logic needed to effectively fetch GitLab auth keys +from either the redis cache or the database. Follows the same patterns as +the github api key handler. +""" import httpx import time import random @@ -11,7 +16,7 @@ class NoValidKeysError(Exception): - pass + """Defines an exception that is thrown when no gitlab keys are valid""" class GitlabApiKeyHandler(): @@ -102,7 +107,9 @@ def get_api_keys(self) -> List[str]: try: keys = self.get_api_keys_from_database() break - except: + except Exception as e: + self.logger.error(f"Ran into issue when fetching key from database:\n {e}\n") + self.logger.error("Sleeping for 5 seconds...") time.sleep(5) attempts += 1 @@ -135,7 +142,7 @@ def get_api_keys(self) -> List[str]: # shuffling the keys so not all processes get the same keys in the same order - valid_now = valid_keys + #valid_now = valid_keys #try: #self.logger.info(f'valid keys before shuffle: {valid_keys}') #valid_keys = random.sample(valid_keys, len(valid_keys)) diff --git a/augur/tasks/gitlab/gitlab_random_key_auth.py b/augur/tasks/gitlab/gitlab_random_key_auth.py index 86ad64b056..64ba31dd19 100644 --- a/augur/tasks/gitlab/gitlab_random_key_auth.py +++ b/augur/tasks/gitlab/gitlab_random_key_auth.py @@ -6,8 +6,8 @@ class GitlabRandomKeyAuth(RandomKeyAuth): - """Defines a github specific RandomKeyAuth class so - github collections can have a class randomly selects an api key for each request + """Defines a gitlab specific RandomKeyAuth class so + gitlab collections can have a class randomly selects an api key for each request """ def __init__(self, session: DatabaseSession, logger): diff --git a/augur/tasks/gitlab/gitlab_task_session.py b/augur/tasks/gitlab/gitlab_task_session.py index 1871e46c50..58a6e64373 100644 --- a/augur/tasks/gitlab/gitlab_task_session.py +++ b/augur/tasks/gitlab/gitlab_task_session.py @@ -1,14 +1,26 @@ +""" +Defines a GitLab-specific session and manifest object for use in GitLab tasks +""" from logging import Logger from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth from augur.application.db.session import DatabaseSession class GitlabTaskManifest: + """ + Manifest object that represents the state and common elements of + the specified task. GitLab version for the GitLab tasks. + + Attributes: + augur_db: sqlalchemy db object + key_auth: GitLab specific key auth retrieval collection + logger: logging object + platform_id: GitLab specific platform id (github is 1) + """ def __init__(self, logger): from augur.tasks.init.celery_app import engine - from augur.application.db.session import DatabaseSession self.augur_db = DatabaseSession(logger, engine) self.key_auth = GitlabRandomKeyAuth(self.augur_db.session, logger) diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 43b9c62942..cf6e5e5dab 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -1,3 +1,6 @@ +""" +Defines the set of tasks used to retrieve GitLab issue data. +""" import logging import traceback @@ -14,7 +17,12 @@ @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_issues(repo_git : str) -> int: + """ + Retrieve and parse gitlab issues for the desired repo + Arguments: + repo_git: the repo url string + """ logger = logging.getLogger(collect_gitlab_issues.__name__) with GitlabTaskManifest(logger) as manifest: @@ -45,6 +53,14 @@ def collect_gitlab_issues(repo_git : str) -> int: def retrieve_all_gitlab_issue_data(repo_git, logger, key_auth) -> None: + """ + Retrieve only the needed data for issues from the api response + + Arguments: + repo_git: url of the relevant repo + logger: loggin object + key_auth: key auth cache and rotator object + """ owner, repo = get_owner_repo(repo_git) @@ -73,7 +89,17 @@ def retrieve_all_gitlab_issue_data(repo_git, logger, key_auth) -> None: return all_data def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: - + """ + Retrieve only the needed data for issues from the api response + + Arguments: + issues: List of dictionaries of issue data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ + # get repo_id or have it passed tool_source = "Gitlab Issue Task" tool_version = "2.0" @@ -153,6 +179,13 @@ def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_issue_comments(issue_ids, repo_git) -> int: + """ + Retrieve and parse gitlab events for the desired repo + + Arguments: + issue_ids: Set of issue ids to collect coments for + repo_git: repo url + """ owner, repo = get_owner_repo(repo_git) @@ -175,6 +208,15 @@ def collect_gitlab_issue_comments(issue_ids, repo_git) -> int: def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): + """ + Retrieve only the needed data for issue comments + + Arguments: + key_auth: key auth cache and rotator object + logger: loggin object + issue_ids: ids of issues to find comements for + repo_git: repo url + """ owner, repo = get_owner_repo(repo_git) @@ -186,7 +228,7 @@ def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): for id in issue_ids: - print(f"Collecting {owner}/{repo} gitlab issue comments for issue {index} of {issue_count}") + logger.info(f"Collecting {owner}/{repo} gitlab issue comments for issue {index} of {issue_count}") url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues/{id}/notes" @@ -206,6 +248,16 @@ def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for issue messages from the api response + + Arguments: + data: List of dictionaries of issue event data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Gitlab issue comments" tool_version = "2.0" diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 5a183d0b0b..a04f055a27 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -13,6 +13,12 @@ @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_merge_requests(repo_git: str) -> int: + """ + Retrieve and parse gitlab MRs for the desired repo + + Arguments: + repo_git: the repo url string + """ logger = logging.getLogger(collect_gitlab_merge_requests.__name__) @@ -37,6 +43,14 @@ def collect_gitlab_merge_requests(repo_git: str) -> int: def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None: + """ + Retrieve only the needed data for MRs from the api response + + Arguments: + repo_git: url of the relevant repo + logger: loggin object + key_auth: key auth cache and rotator object + """ owner, repo = get_owner_repo(repo_git) @@ -66,6 +80,19 @@ def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None: def process_merge_requests(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + data: collection of mr data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + + Returns: + List of parsed MR ids. + """ tool_source = "Mr Task" tool_version = "2.0" @@ -129,6 +156,13 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_comments(mr_ids, repo_git) -> int: + """ + Retrieve and parse gitlab events for the desired repo + + Arguments: + mr_ids: ids of MRs to paginate comments for + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -152,6 +186,16 @@ def collect_merge_request_comments(mr_ids, repo_git) -> int: def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + data: List of dictionaries of mr message data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Gitlab mr comments" tool_version = "2.0" @@ -214,6 +258,13 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_metadata(mr_ids, repo_git) -> int: + """ + Retrieve and parse gitlab events for the desired repo + + Arguments: + mr_ids: list of mr ids to find metadata for + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -236,6 +287,16 @@ def collect_merge_request_metadata(mr_ids, repo_git) -> int: logger.info(f"{owner}/{repo} has no gitlab merge request metadata") def process_mr_metadata(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + data: List of dictionaries of mr metadata + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Mr Metadata Task" tool_version = "2.0" @@ -261,6 +322,13 @@ def process_mr_metadata(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_reviewers(mr_ids, repo_git) -> int: + """ + Retrieve and parse mr reviewers for the desired repo + + Arguments: + mr_ids: mrs to search for reviewers for + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -283,6 +351,16 @@ def collect_merge_request_reviewers(mr_ids, repo_git) -> int: logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr reviewer data from the api response + + Arguments: + data: List of dictionaries of mr reviewer data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Mr Reviewr Task" tool_version = "2.0" @@ -311,6 +389,13 @@ def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_commits(mr_ids, repo_git) -> int: + """ + Retrieve and parse mr commits for the desired repo + + Arguments: + mr_ids: ids of mrs to get commits for + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -334,6 +419,16 @@ def collect_merge_request_commits(mr_ids, repo_git) -> int: def process_mr_commits(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr commits from the api response + + Arguments: + data: List of dictionaries of mr commit data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Mr Commit Task" tool_version = "2.0" @@ -363,6 +458,13 @@ def process_mr_commits(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_files(mr_ids, repo_git) -> int: + """ + Retrieve and parse gitlab events for the desired repo + + Arguments: + mr_ids: the ids of mrs to get files for. + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -409,6 +511,19 @@ def process_mr_files(data, task_name, repo_id, logger, augur_db): def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, response_type): + """ + Retrieve specific mr data from the GitLab api. + + Arguments: + ids: mr ids to paginate info for + url: endpoint to paginate or hit + name: name of data to collect + owner: owner of the repo + repo: repo name + key_auth: key auth cache and rotator object + logger: loggin object + response_type: type of data to get from the api + """ all_data = {} mr_count = len(ids) From 579dec0d90cb8e38cd72fd6b2f02dc4659bdd624 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 12:26:38 -0600 Subject: [PATCH 098/122] remove unused variable Signed-off-by: Isaac Milarsky --- augur/tasks/gitlab/merge_request_task.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index a04f055a27..60d2ececda 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -346,17 +346,16 @@ def collect_merge_request_reviewers(mr_ids, repo_git) -> int: if reviewers: logger.info(f"Length of merge request reviewers: {len(reviewers)}") - process_mr_reviewers(reviewers, f"{owner}/{repo}: Mr reviewer task", repo_id, logger, augur_db) + process_mr_reviewers(reviewers, repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") -def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): +def process_mr_reviewers(data, repo_id, logger, augur_db): """ Retrieve only the needed data for mr reviewer data from the api response Arguments: data: List of dictionaries of mr reviewer data - task_name: name of the task as well as the repo being processed repo_id: augur id of the repo logger: logging object augur_db: sqlalchemy db object From 59afa4371f7199155d681b499493ac104bb4f90c Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 12:28:22 -0600 Subject: [PATCH 099/122] use vars Signed-off-by: Isaac Milarsky --- augur/tasks/gitlab/merge_request_task.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 60d2ececda..a7305acd46 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -346,11 +346,11 @@ def collect_merge_request_reviewers(mr_ids, repo_git) -> int: if reviewers: logger.info(f"Length of merge request reviewers: {len(reviewers)}") - process_mr_reviewers(reviewers, repo_id, logger, augur_db) + process_mr_reviewers(reviewers, f"{owner}/{repo}: Mr reviewer task", repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") -def process_mr_reviewers(data, repo_id, logger, augur_db): +def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): """ Retrieve only the needed data for mr reviewer data from the api response @@ -365,6 +365,8 @@ def process_mr_reviewers(data, repo_id, logger, augur_db): tool_version = "2.0" data_source = "Gitlab API" + logger.info(f"Running {task_name}...") + # create mapping from mr number to pull request id of current mrs mr_number_to_id_map = {} mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all() From 91b84a87aad2d7bf70d1eb50b6e728c5ffb3af86 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 11:22:48 -0600 Subject: [PATCH 100/122] Update Dockerfile Fixing Docker Error Signed-off-by: Sean P. Goggins --- docker/backend/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 57c615d8f2..b203081cf0 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -14,7 +14,6 @@ RUN set -x \ curl \ gcc \ python3-distutils \ - python3 -m pip _rest_of_the_pip \ wget \ postgresql-client \ && rm -rf /var/lib/apt/lists/* From 2b242effcd9e9956bca68de1946241a10a3220e9 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 11:24:37 -0600 Subject: [PATCH 101/122] Update Dockerfile Docker file fix. Signed-off-by: Sean P. Goggins --- docker/backend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index b203081cf0..35cb068d4f 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -29,7 +29,7 @@ COPY ./setup.py . COPY ./scripts/ scripts/ #COPY ./docker/backend/docker.config.json . -RUN python310 -m venv /opt/venv +RUN python31 -m venv /opt/venv RUN set -x \ && /opt/venv/bin/pip install . From 5da3277ed022b27b72a6e46ba551cbe591255383 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 11:26:03 -0600 Subject: [PATCH 102/122] Update Dockerfile Python 31 is not yet released. Signed-off-by: Sean P. Goggins --- docker/backend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 35cb068d4f..225ffe9b71 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -29,7 +29,7 @@ COPY ./setup.py . COPY ./scripts/ scripts/ #COPY ./docker/backend/docker.config.json . -RUN python31 -m venv /opt/venv +RUN python3 -m venv /opt/venv RUN set -x \ && /opt/venv/bin/pip install . From 3e792708066cd0f0cb923d92559f78779a7f7b8b Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 11:31:37 -0600 Subject: [PATCH 103/122] Update Dockerfile Debugging docker with python 3.10 Signed-off-by: Sean P. Goggins --- docker/backend/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 225ffe9b71..bd65da8cde 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -32,6 +32,8 @@ COPY ./scripts/ scripts/ RUN python3 -m venv /opt/venv RUN set -x \ + && /opt/venv/bin/pip install --upgrade pip + && /opt/venv/bin/pip install wheel && /opt/venv/bin/pip install . RUN ./scripts/docker/install-workers-deps.sh From 21b5dd60502a4f86aa8e6298fad868a3f1f9845f Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 11:34:00 -0600 Subject: [PATCH 104/122] Update Dockerfile Docker Docker One You need wheel for database Docker Docker blue Signed-off-by: Sean P. Goggins --- docker/backend/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index bd65da8cde..649368919b 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -32,8 +32,8 @@ COPY ./scripts/ scripts/ RUN python3 -m venv /opt/venv RUN set -x \ - && /opt/venv/bin/pip install --upgrade pip - && /opt/venv/bin/pip install wheel + && /opt/venv/bin/pip install --upgrade pip \ + && /opt/venv/bin/pip install wheel \ && /opt/venv/bin/pip install . RUN ./scripts/docker/install-workers-deps.sh From b61480b35adfb8ba63b13d6a40364d6f47d28c14 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 11:45:58 -0600 Subject: [PATCH 105/122] Update Dockerfile Docker issues Signed-off-by: Sean P. Goggins --- docker/backend/Dockerfile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 649368919b..6767dd0dec 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -31,6 +31,15 @@ COPY ./scripts/ scripts/ #COPY ./docker/backend/docker.config.json . RUN python3 -m venv /opt/venv +RUN set -x \ + && /opt/venv/bin/pip install --upgrade pip + +RUN set -x \ + && /opt/venv/bin/pip install wheel + +RUN set -x \ + && /opt/venv/bin/pip install . + RUN set -x \ && /opt/venv/bin/pip install --upgrade pip \ && /opt/venv/bin/pip install wheel \ From 79d74a195f86fd419963f53868a45fa1c380ae47 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 11:53:14 -0600 Subject: [PATCH 106/122] Update Dockerfile Fixing PS_UTIL issue on python3.10 gcc musl-dev linux-headers python3-dev https://github.com/giampaolo/psutil/issues/2192 Signed-off-by: Sean P. Goggins --- docker/backend/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 6767dd0dec..4672c17634 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -13,6 +13,9 @@ RUN set -x \ bash \ curl \ gcc \ + musl-dev \ + linux-headers + python3-dev \ python3-distutils \ wget \ postgresql-client \ From 3a1d89a43ee7680f9d5822628a182b91d41da897 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 11:53:29 -0600 Subject: [PATCH 107/122] Update Dockerfile Signed-off-by: Sean P. Goggins --- docker/backend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 4672c17634..cac070ac46 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -14,7 +14,7 @@ RUN set -x \ curl \ gcc \ musl-dev \ - linux-headers + linux-headers \ python3-dev \ python3-distutils \ wget \ From 41ad59d128d57541d0014dc3cb1e779edbd6ff91 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 11:57:14 -0600 Subject: [PATCH 108/122] Update Dockerfile Well Well Well Docker You are a beast to behold Docker Docker Why Signed-off-by: Sean P. Goggins --- docker/backend/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index cac070ac46..9c09a4a2f8 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -14,7 +14,6 @@ RUN set -x \ curl \ gcc \ musl-dev \ - linux-headers \ python3-dev \ python3-distutils \ wget \ From 8fcc4297e834138108de810fdbf43c66ed7e546d Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 12:06:18 -0600 Subject: [PATCH 109/122] Update spec.yml Fixing spelling error Signed-off-by: Sean P. Goggins --- docs/source/rest-api/spec.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/rest-api/spec.yml b/docs/source/rest-api/spec.yml index 1764f0f378..7b969d5803 100644 --- a/docs/source/rest-api/spec.yml +++ b/docs/source/rest-api/spec.yml @@ -113,19 +113,19 @@ paths: description: 'Repository description. Example: null' type: string commits_all_time: - description: 'How many commits have been made to this respository. Example: 24' + description: 'How many commits have been made to this repository? Example: 24' type: integer issues_all_time: - description: 'How many issues have been raised on this respository. Example: 1' + description: 'How many issues have been raised on this repository? Example: 1' type: integer pull_requests_all_time: - description: 'How many pull requests have been made to this reqpository. Example: 7' + description: 'How many pull requests have been made to this repository? Example: 7' type: integer repo_id: description: 'Repository ID, should match provided URL parameter. Example: 25551' type: integer repo_name: - description: 'Name of the provided rpository. Example: 3scale-operator-metadata' + description: 'Name of the provided repository. Example: 3scale-operator-metadata' type: string rg_name: description: 'Name of the repository group containing this repo. Example: 3scale' From 5099e31d029e749806db872944693de9b9b5590b Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 12:17:26 -0600 Subject: [PATCH 110/122] Fixing PyLint Error --- augur/application/db/models/augur_data.py | 1 + .../test_db/test_models/test_augur_data/test_repo.py | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index cfbcdca1d7..7f97e4bbdc 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -1088,6 +1088,7 @@ def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_ Args: url: repo url repo_group_id: group to assign repo to + repo_type: github or gitlab Note: If repo row exists then it will update the repo_group_id if param repo_group_id is not a default. If it does not exist is will simply insert the repo. diff --git a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py b/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py index 1c32472f32..dd1ef44b79 100644 --- a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py +++ b/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py @@ -77,20 +77,20 @@ def test_insert_repo(test_db_engine): with DatabaseSession(logger, test_db_engine) as session: - assert Repo.insert_github_repo(session, data["repo_urls"][0], data["rg_id"], data["tool_source"]) is not None - assert Repo.insert_github_repo(session, data["repo_urls"][1], data["rg_id"], data["tool_source"]) is not None + assert Repo.insert_github_repo(session, data["repo_urls"][0], data["rg_id"], data["tool_source"], None) is not None + assert Repo.insert_github_repo(session, data["repo_urls"][1], data["rg_id"], data["tool_source"], None) is not None # invalid rg_id - assert Repo.insert_github_repo(session, data["repo_urls"][0], 12, data["tool_source"]) is None + assert Repo.insert_github_repo(session, data["repo_urls"][0], 12, data["tool_source"], None) is None # invalid type for repo url - assert Repo.insert_github_repo(session, 1, data["rg_id"], data["tool_source"]) is None + assert Repo.insert_github_repo(session, 1, data["rg_id"], data["tool_source"], None) is None # invalid type for rg_id - assert Repo.insert_github_repo(session, data["repo_urls"][1], "1", data["tool_source"]) is None + assert Repo.insert_github_repo(session, data["repo_urls"][1], "1", data["tool_source"], None) is None # invalid type for tool_source - assert Repo.insert_github_repo(session, data["repo_urls"][1], data["rg_id"], 52) is None + assert Repo.insert_github_repo(session, data["repo_urls"][1], data["rg_id"], 52, None) is None with test_db_engine.connect() as connection: From f6f4f46734535e7d7989f351fa57204130ecf843 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 12:19:50 -0600 Subject: [PATCH 111/122] Update gitlab_api_handler.py Fixing PyLint Typos Signed-off-by: Sean P. Goggins --- augur/tasks/gitlab/gitlab_api_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_api_handler.py b/augur/tasks/gitlab/gitlab_api_handler.py index 8735a8318b..2a1751d04c 100644 --- a/augur/tasks/gitlab/gitlab_api_handler.py +++ b/augur/tasks/gitlab/gitlab_api_handler.py @@ -313,7 +313,7 @@ def add_query_params(url: str, additional_params: dict) -> str: Args: url: the url that is being modified - additional_params: key value pairs specififying the paramaters to be added + additional_params: key value pairs specifying the parameters to be added Returns: The url with the key value pairs in additional_params added as query params @@ -383,4 +383,4 @@ def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, time.sleep(round(timeout*1.5)) return None - return response \ No newline at end of file + return response From 7433028d826a3557fd24bdfecfe6f9d193bd2c97 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 12:23:27 -0600 Subject: [PATCH 112/122] Update gitlab_api_handler.py Fixing spelling Signed-off-by: Sean P. Goggins --- augur/tasks/gitlab/gitlab_api_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/gitlab/gitlab_api_handler.py b/augur/tasks/gitlab/gitlab_api_handler.py index 2a1751d04c..5303d606e9 100644 --- a/augur/tasks/gitlab/gitlab_api_handler.py +++ b/augur/tasks/gitlab/gitlab_api_handler.py @@ -286,7 +286,7 @@ def _set_paginaton_query_params(self, url): ################################################################################ -# Url Helper Method to remove query paramaters from the url +# Url Helper Method to remove query parameters from the url def clean_url(url: str, keys: List[str]) -> str: """Remove query params from url. From 99ae0f3f27405c5206a557df919ad5a231655fc0 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 12:27:02 -0600 Subject: [PATCH 113/122] Update merge_request_task.py Spelling errors Signed-off-by: Sean P. Goggins --- augur/tasks/gitlab/merge_request_task.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index a7305acd46..157dd75075 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -352,10 +352,10 @@ def collect_merge_request_reviewers(mr_ids, repo_git) -> int: def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): """ - Retrieve only the needed data for mr reviewer data from the api response + Retrieve only the needed data for mr Reviewer data from the api response Arguments: - data: List of dictionaries of mr reviewer data + data: List of dictionaries of mr Reviewer data repo_id: augur id of the repo logger: logging object augur_db: sqlalchemy db object @@ -553,7 +553,7 @@ def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, r else: all_data[id] = page_data else: - raise Exception(f"Unexpected reponse type: {response_type}") + raise Exception(f"Unexpected response type: {response_type}") index += 1 From 205d0a3e04f6c8dd1f7dab584ba3dc4c1aac7650 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 12:30:41 -0600 Subject: [PATCH 114/122] Update merge_request_task.py Typo Fix Signed-off-by: Sean P. Goggins --- augur/tasks/gitlab/merge_request_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 157dd75075..ccf3c7e012 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -361,7 +361,7 @@ def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): augur_db: sqlalchemy db object """ - tool_source = "Mr Reviewr Task" + tool_source = "Mr Reviewer Task" tool_version = "2.0" data_source = "Gitlab API" From 3153f15186b4ac2ae493ef590d161d332e1b5b71 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 12:37:54 -0600 Subject: [PATCH 115/122] readthedocs.io error fix --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index d2a149d93b..7a99cc64d3 100644 --- a/setup.py +++ b/setup.py @@ -94,11 +94,11 @@ "pytest==6.2.5", # 7.1.2 "toml >= 0.10.2", # 0.10.2 "ipdb==0.13.9", # 0.13.9 - "sphinx==4.2.0", # 5.1.1 - "sphinx_rtd_theme==1.0.0", # 1.0.0 - "sphinxcontrib-openapi==0.7.0", # 0.7.0 + "sphinx==7.2.6", #4.2.0", # 5.1.1 + "sphinx_rtd_theme==2.0.0", # 1.0.0 + "sphinxcontrib-openapi==0.8.3", # 0.7.0 "sphinxcontrib-redoc==1.6.0", # 1.6.0 - "docutils==0.17.1" # 0.19 + "docutils==0.20.1" # 0.19 ] }, entry_points={ From 419f69a9ee177a6c19f7587e46990cada5b9c2ed Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 12:41:12 -0600 Subject: [PATCH 116/122] Relaxing mistune version requirements to let pip resolve the correct version, as it is merely a markdown parser. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7a99cc64d3..f87214316e 100644 --- a/setup.py +++ b/setup.py @@ -72,7 +72,7 @@ "slack==0.0.2", # 0.0.2 "boto3==1.17.57", # 1.24.56 "toml", # 0.10.2 - "mistune==0.8.4", # 2.0.4 + "mistune", # 2.0.4 "pyYaml", # 6.0 "redis==4.3.3", # 4.3.4 "XlsxWriter==1.3.7", # 3.0.3 From 54c20866e8eac4f0f5d1d3326b6f8ef9d9f7b52e Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 26 Jan 2024 16:03:30 -0600 Subject: [PATCH 117/122] Updated materialized views to include the right logic. Added indexes to materialized views missing indexes. Updated materialized view refresh script. --- .../26_materialized_view_unique_updates | 244 ++++++++++++++++++ augur/tasks/db/refresh_materialized_views.py | 65 ++++- 2 files changed, 303 insertions(+), 6 deletions(-) create mode 100644 augur/application/schema/alembic/versions/26_materialized_view_unique_updates diff --git a/augur/application/schema/alembic/versions/26_materialized_view_unique_updates b/augur/application/schema/alembic/versions/26_materialized_view_unique_updates new file mode 100644 index 0000000000..00b811b977 --- /dev/null +++ b/augur/application/schema/alembic/versions/26_materialized_view_unique_updates @@ -0,0 +1,244 @@ +""" Updating materialized views and associated indices + +Revision ID: 26 +Revises: 25 +Create Date: 2023-08-23 18:17:22.651191 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from sqlalchemy import text + +# revision identifiers, used by Alembic. +revision = '26' +down_revision = '25' +branch_labels = None +depends_on = None + + +def upgrade(): + + mview_keys_26() + +def downgrade(): + + upgrade=False + + mview_keys_26(upgrade) + +def mview_keys_26(upgrade=True): + + if upgrade: + conn.execute(text(""" + drop materialized view if exists augur_data.explorer_pr_assignments; + drop materialized view if exists augur_data.explorer_user_repos; + drop materialized view if exists augur_data.explorer_pr_response_times; + drop materialized view if exists augur_data.explorer_pr_response; + drop materialized view if exists augur_data.explorer_issue_assignments;""")) + + conn.execute(text(""" + create materialized view augur_data.explorer_pr_assignments as + SELECT + pr.pull_request_id, + pr.repo_id AS ID, + pr.pr_created_at AS created, + pr.pr_closed_at AS closed, + pre.created_at AS assign_date, + pre.ACTION AS assignment_action, + pre.cntrb_id AS assignee, + pre.node_id AS node_id + FROM + ( + augur_data.pull_requests pr + LEFT JOIN augur_data.pull_request_events pre ON ( + ( + ( pr.pull_request_id = pre.pull_request_id ) + AND ( + ( pre.ACTION ) :: TEXT = ANY ( ARRAY [ ( 'unassigned' :: CHARACTER VARYING ) :: TEXT, ( 'assigned' :: CHARACTER VARYING ) :: TEXT ] ) + ) + ) + ) + );""")) + conn.execute(text(""" + create materialized view augur_data.explorer_pr_response as + SELECT pr.pull_request_id, + pr.repo_id AS id, + pr.pr_augur_contributor_id AS cntrb_id, + m.msg_timestamp, + m.msg_cntrb_id, + pr.pr_created_at, + pr.pr_closed_at + FROM (augur_data.pull_requests pr + LEFT JOIN ( SELECT prr.pull_request_id, + m_1.msg_timestamp, + m_1.cntrb_id AS msg_cntrb_id + FROM augur_data.pull_request_review_message_ref prrmr, + augur_data.pull_requests pr_1, + augur_data.message m_1, + augur_data.pull_request_reviews prr + WHERE ((prrmr.pr_review_id = prr.pr_review_id) AND (prrmr.msg_id = m_1.msg_id) AND (prr.pull_request_id = pr_1.pull_request_id)) + UNION + SELECT prmr.pull_request_id, + m_1.msg_timestamp, + m_1.cntrb_id AS msg_cntrb_id + FROM augur_data.pull_request_message_ref prmr, + augur_data.pull_requests pr_1, + augur_data.message m_1 + WHERE ((prmr.pull_request_id = pr_1.pull_request_id) AND (prmr.msg_id = m_1.msg_id))) m ON ((m.pull_request_id = pr.pull_request_id)));""")) + + + + conn.execute(text(""" + create materialized view augur_data.explorer_user_repos as + SELECT a.login_name, + a.user_id, + b.group_id, + c.repo_id + FROM augur_operations.users a, + augur_operations.user_groups b, + augur_operations.user_repos c + WHERE ((a.user_id = b.user_id) AND (b.group_id = c.group_id)) + ORDER BY a.user_id;""")) + + conn.execute(text(""" + create materialized view augur_data.explorer_pr_response_times as + SELECT repo.repo_id, + pull_requests.pr_src_id, + repo.repo_name, + pull_requests.pr_src_author_association, + repo_groups.rg_name AS repo_group, + pull_requests.pr_src_state, + pull_requests.pr_merged_at, + pull_requests.pr_created_at, + pull_requests.pr_closed_at, + date_part('year'::text, (pull_requests.pr_created_at)::date) AS created_year, + date_part('month'::text, (pull_requests.pr_created_at)::date) AS created_month, + date_part('year'::text, (pull_requests.pr_closed_at)::date) AS closed_year, + date_part('month'::text, (pull_requests.pr_closed_at)::date) AS closed_month, + base_labels.pr_src_meta_label, + base_labels.pr_head_or_base, + ((EXTRACT(epoch FROM pull_requests.pr_closed_at) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (3600)::numeric) AS hours_to_close, + ((EXTRACT(epoch FROM pull_requests.pr_closed_at) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (86400)::numeric) AS days_to_close, + ((EXTRACT(epoch FROM response_times.first_response_time) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (3600)::numeric) AS hours_to_first_response, + ((EXTRACT(epoch FROM response_times.first_response_time) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (86400)::numeric) AS days_to_first_response, + ((EXTRACT(epoch FROM response_times.last_response_time) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (3600)::numeric) AS hours_to_last_response, + ((EXTRACT(epoch FROM response_times.last_response_time) - EXTRACT(epoch FROM pull_requests.pr_created_at)) / (86400)::numeric) AS days_to_last_response, + response_times.first_response_time, + response_times.last_response_time, + response_times.average_time_between_responses, + response_times.assigned_count, + response_times.review_requested_count, + response_times.labeled_count, + response_times.subscribed_count, + response_times.mentioned_count, + response_times.referenced_count, + response_times.closed_count, + response_times.head_ref_force_pushed_count, + response_times.merged_count, + response_times.milestoned_count, + response_times.unlabeled_count, + response_times.head_ref_deleted_count, + response_times.comment_count, + master_merged_counts.lines_added, + master_merged_counts.lines_removed, + all_commit_counts.commit_count, + master_merged_counts.file_count + FROM augur_data.repo, + augur_data.repo_groups, + ((((augur_data.pull_requests + LEFT JOIN ( SELECT pull_requests_1.pull_request_id, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'assigned'::text)) AS assigned_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'review_requested'::text)) AS review_requested_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'labeled'::text)) AS labeled_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'unlabeled'::text)) AS unlabeled_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'subscribed'::text)) AS subscribed_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'mentioned'::text)) AS mentioned_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'referenced'::text)) AS referenced_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'closed'::text)) AS closed_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'head_ref_force_pushed'::text)) AS head_ref_force_pushed_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'head_ref_deleted'::text)) AS head_ref_deleted_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'milestoned'::text)) AS milestoned_count, + count(*) FILTER (WHERE ((pull_request_events.action)::text = 'merged'::text)) AS merged_count, + min(message.msg_timestamp) AS first_response_time, + count(DISTINCT message.msg_timestamp) AS comment_count, + max(message.msg_timestamp) AS last_response_time, + ((max(message.msg_timestamp) - min(message.msg_timestamp)) / (count(DISTINCT message.msg_timestamp))::double precision) AS average_time_between_responses + FROM augur_data.pull_request_events, + augur_data.pull_requests pull_requests_1, + augur_data.repo repo_1, + augur_data.pull_request_message_ref, + augur_data.message + WHERE ((repo_1.repo_id = pull_requests_1.repo_id) AND (pull_requests_1.pull_request_id = pull_request_events.pull_request_id) AND (pull_requests_1.pull_request_id = pull_request_message_ref.pull_request_id) AND (pull_request_message_ref.msg_id = message.msg_id)) + GROUP BY pull_requests_1.pull_request_id) response_times ON ((pull_requests.pull_request_id = response_times.pull_request_id))) + LEFT JOIN ( SELECT pull_request_commits.pull_request_id, + count(DISTINCT pull_request_commits.pr_cmt_sha) AS commit_count + FROM augur_data.pull_request_commits, + augur_data.pull_requests pull_requests_1, + augur_data.pull_request_meta + WHERE ((pull_requests_1.pull_request_id = pull_request_commits.pull_request_id) AND (pull_requests_1.pull_request_id = pull_request_meta.pull_request_id) AND ((pull_request_commits.pr_cmt_sha)::text <> (pull_requests_1.pr_merge_commit_sha)::text) AND ((pull_request_commits.pr_cmt_sha)::text <> (pull_request_meta.pr_sha)::text)) + GROUP BY pull_request_commits.pull_request_id) all_commit_counts ON ((pull_requests.pull_request_id = all_commit_counts.pull_request_id))) + LEFT JOIN ( SELECT max(pull_request_meta.pr_repo_meta_id) AS max, + pull_request_meta.pull_request_id, + pull_request_meta.pr_head_or_base, + pull_request_meta.pr_src_meta_label + FROM augur_data.pull_requests pull_requests_1, + augur_data.pull_request_meta + WHERE ((pull_requests_1.pull_request_id = pull_request_meta.pull_request_id) AND ((pull_request_meta.pr_head_or_base)::text = 'base'::text)) + GROUP BY pull_request_meta.pull_request_id, pull_request_meta.pr_head_or_base, pull_request_meta.pr_src_meta_label) base_labels ON ((base_labels.pull_request_id = all_commit_counts.pull_request_id))) + LEFT JOIN ( SELECT sum(commits.cmt_added) AS lines_added, + sum(commits.cmt_removed) AS lines_removed, + pull_request_commits.pull_request_id, + count(DISTINCT commits.cmt_filename) AS file_count + FROM augur_data.pull_request_commits, + augur_data.commits, + augur_data.pull_requests pull_requests_1, + augur_data.pull_request_meta + WHERE (((commits.cmt_commit_hash)::text = (pull_request_commits.pr_cmt_sha)::text) AND (pull_requests_1.pull_request_id = pull_request_commits.pull_request_id) AND (pull_requests_1.pull_request_id = pull_request_meta.pull_request_id) AND (commits.repo_id = pull_requests_1.repo_id) AND ((commits.cmt_commit_hash)::text <> (pull_requests_1.pr_merge_commit_sha)::text) AND ((commits.cmt_commit_hash)::text <> (pull_request_meta.pr_sha)::text)) + GROUP BY pull_request_commits.pull_request_id) master_merged_counts ON ((base_labels.pull_request_id = master_merged_counts.pull_request_id))) + WHERE ((repo.repo_group_id = repo_groups.repo_group_id) AND (repo.repo_id = pull_requests.repo_id)) + ORDER BY response_times.merged_count DESC;""")) + + conn.execute(text(""" + create materialized view augur_data.explorer_issue_assignments as + SELECT + i.issue_id, + i.repo_id AS ID, + i.created_at AS created, + i.closed_at AS closed, + ie.created_at AS assign_date, + ie.ACTION AS assignment_action, + ie.cntrb_id AS assignee, + ie.node_id as node_id + FROM + ( + augur_data.issues i + LEFT JOIN augur_data.issue_events ie ON ( + ( + ( i.issue_id = ie.issue_id ) + AND ( + ( ie.ACTION ) :: TEXT = ANY ( ARRAY [ ( 'unassigned' :: CHARACTER VARYING ) :: TEXT, ( 'assigned' :: CHARACTER VARYING ) :: TEXT ] ) + ) + ) + ) + );""")) + + conn = op.get_bind() + conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.explorer_user_repos(login_name,user_id,group_id,repo_id);""")) + conn.execute(text("""COMMIT;""")) + + conn = op.get_bind() + conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.explorer_pr_response_times(repo_id, pr_src_id, pr_src_meta_label);""")) + conn.execute(text("""COMMIT;""")) + + conn = op.get_bind() + conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.explorer_pr_assignments(pull_request_id, id, node_id);""")) + conn.execute(text("""COMMIT;""")) + + conn = op.get_bind() + conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.explorer_issue_assignments(issue_id, id, node_id);""")) + conn.execute(text("""COMMIT;""")) + + conn = op.get_bind() + conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.explorer_pr_response(pull_request_id, id, cntrb_id, msg_cntrb_id, msg_timestamp);""")) + conn.execute(text("""COMMIT;""")) \ No newline at end of file diff --git a/augur/tasks/db/refresh_materialized_views.py b/augur/tasks/db/refresh_materialized_views.py index 76420c253e..f04d01552b 100644 --- a/augur/tasks/db/refresh_materialized_views.py +++ b/augur/tasks/db/refresh_materialized_views.py @@ -59,15 +59,35 @@ def refresh_materialized_views(): COMMIT; """) + mv9_refresh = s.sql.text(""" + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_user_repos with data; + COMMIT; + """) - try: - with DatabaseSession(logger, engine) as session: - session.execute_sql(mv1_refresh) - except Exception as e: - logger.info(f"error is {e}") - pass + mv10_refresh = s.sql.text(""" + + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response_times with data; + COMMIT; + """) + mv11_refresh = s.sql.text(""" + + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_assignments with data; + COMMIT; + """) + + mv12_refresh = s.sql.text(""" + + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_issue_assignments with data; + COMMIT; + """) + + mv13_refresh = s.sql.text(""" + + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response with data; + COMMIT; + """) try: with DatabaseSession(logger, engine) as session: @@ -125,7 +145,40 @@ def refresh_materialized_views(): logger.info(f"error is {e}") pass + try: + with DatabaseSession(logger, engine) as session: + session.execute_sql(mv9_refresh) + except Exception as e: + logger.info(f"error is {e}") + pass + + try: + with DatabaseSession(logger, engine) as session: + session.execute_sql(mv10_refresh) + except Exception as e: + logger.info(f"error is {e}") + pass + try: + with DatabaseSession(logger, engine) as session: + session.execute_sql(mv11_refresh) + except Exception as e: + logger.info(f"error is {e}") + pass + + try: + with DatabaseSession(logger, engine) as session: + session.execute_sql(mv12_refresh) + except Exception as e: + logger.info(f"error is {e}") + pass + + try: + with DatabaseSession(logger, engine) as session: + session.execute_sql(mv13_refresh) + except Exception as e: + logger.info(f"error is {e}") + pass From 36679429ef41cfdd43c46d4545493ad2941392d0 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 29 Jan 2024 11:23:38 -0600 Subject: [PATCH 118/122] updated file name --- ...view_unique_updates => 26_materialized_view_unique_updates.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename augur/application/schema/alembic/versions/{26_materialized_view_unique_updates => 26_materialized_view_unique_updates.py} (100%) diff --git a/augur/application/schema/alembic/versions/26_materialized_view_unique_updates b/augur/application/schema/alembic/versions/26_materialized_view_unique_updates.py similarity index 100% rename from augur/application/schema/alembic/versions/26_materialized_view_unique_updates rename to augur/application/schema/alembic/versions/26_materialized_view_unique_updates.py From 9ad969b39e74cf0d6827fd1e1e2f5d81081860b1 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 30 Jan 2024 07:38:12 -0600 Subject: [PATCH 119/122] Fix complexity routes --- augur/api/routes/__init__.py | 1 + augur/api/routes/complexity.py | 432 ++++++++++++++++----------------- 2 files changed, 217 insertions(+), 216 deletions(-) diff --git a/augur/api/routes/__init__.py b/augur/api/routes/__init__.py index 5e601f54e8..03c2e2fa71 100644 --- a/augur/api/routes/__init__.py +++ b/augur/api/routes/__init__.py @@ -11,3 +11,4 @@ from .user import * from .dei import * from .util import * +from .complexity import * diff --git a/augur/api/routes/complexity.py b/augur/api/routes/complexity.py index ba82a12599..bee39eb923 100644 --- a/augur/api/routes/complexity.py +++ b/augur/api/routes/complexity.py @@ -6,32 +6,71 @@ import os import requests -AUGUR_API_VERSION = 'api/unstable' +from augur.api.routes import AUGUR_API_VERSION +from ..server import app, engine -def create_routes(server): - @server.app.route('/{}/complexity/project_languages'.format(AUGUR_API_VERSION), methods=["GET"]) - def get_project_languages(): - project_languages_sql = s.sql.text(""" - SELECT - e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, - e.programming_language, - e.code_lines, - e.files - FROM - augur_data.repo, - (SELECT +@app.route('/{}/complexity/project_languages'.format(AUGUR_API_VERSION), methods=["GET"]) +def get_project_languages(): + project_languages_sql = s.sql.text(""" + SELECT + e.repo_id, + augur_data.repo.repo_git, + augur_data.repo.repo_name, + e.programming_language, + e.code_lines, + e.files + FROM + augur_data.repo, + (SELECT + d.repo_id, + d.programming_language, + SUM(d.code_lines) AS code_lines, + COUNT(*)::int AS files + FROM + (SELECT + augur_data.repo_labor.repo_id, + augur_data.repo_labor.programming_language, + augur_data.repo_labor.code_lines + FROM + augur_data.repo_labor, + ( SELECT + augur_data.repo_labor.repo_id, + MAX ( data_collection_date ) AS last_collected + FROM + augur_data.repo_labor + GROUP BY augur_data.repo_labor.repo_id) recent + WHERE + augur_data.repo_labor.repo_id = recent.repo_id + AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d + GROUP BY d.repo_id, d.programming_language) e + WHERE augur_data.repo.repo_id = e.repo_id + ORDER BY e.repo_id + """) + + with engine.connect() as conn: + results = pd.read_sql(project_languages_sql, conn) + data = results.to_json(orient="records", date_format='iso', date_unit='ms') + return Response(response=data, + status=200, + mimetype="application/json") + +@app.route('/{}/complexity/project_files'.format(AUGUR_API_VERSION), methods=["GET"]) +def get_project_files(): + project_files_sql = s.sql.text(""" + SELECT + e.repo_id, + augur_data.repo.repo_git, + augur_data.repo.repo_name, + e.files + FROM + augur_data.repo, + (SELECT d.repo_id, - d.programming_language, - SUM(d.code_lines) AS code_lines, - COUNT(*)::int AS files + count(*) AS files FROM (SELECT - augur_data.repo_labor.repo_id, - augur_data.repo_labor.programming_language, - augur_data.repo_labor.code_lines + augur_data.repo_labor.repo_id FROM augur_data.repo_labor, ( SELECT @@ -43,119 +82,122 @@ def get_project_languages(): WHERE augur_data.repo_labor.repo_id = recent.repo_id AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d - GROUP BY d.repo_id, d.programming_language) e - WHERE augur_data.repo.repo_id = e.repo_id - ORDER BY e.repo_id - """) + GROUP BY d.repo_id) e + WHERE augur_data.repo.repo_id = e.repo_id + ORDER BY e.repo_id + """) - with server.engine.connect() as conn: - results = pd.read_sql(project_languages_sql, conn) - data = results.to_json(orient="records", date_format='iso', date_unit='ms') - return Response(response=data, - status=200, - mimetype="application/json") + with engine.connect() as conn: + results = pd.read_sql(project_files_sql, conn) + data = results.to_json(orient="records", date_format='iso', date_unit='ms') + return Response(response=data, + status=200, + mimetype="application/json") - @server.app.route('/{}/complexity/project_files'.format(AUGUR_API_VERSION), methods=["GET"]) - def get_project_files(): - project_files_sql = s.sql.text(""" - SELECT - e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, - e.files +@app.route('/{}/complexity/project_lines'.format(AUGUR_API_VERSION), methods=["GET"]) +def get_project_lines(): + project_lines_sql = s.sql.text(""" + SELECT + e.repo_id, + augur_data.repo.repo_git, + augur_data.repo.repo_name, + e.total_lines, + e.average_lines + FROM + augur_data.repo, + (SELECT + d.repo_id, + SUM(d.total_lines) AS total_lines, + AVG(d.total_lines)::INT AS average_lines FROM - augur_data.repo, - (SELECT - d.repo_id, - count(*) AS files - FROM - (SELECT - augur_data.repo_labor.repo_id - FROM - augur_data.repo_labor, - ( SELECT - augur_data.repo_labor.repo_id, - MAX ( data_collection_date ) AS last_collected - FROM - augur_data.repo_labor - GROUP BY augur_data.repo_labor.repo_id) recent - WHERE - augur_data.repo_labor.repo_id = recent.repo_id - AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d - GROUP BY d.repo_id) e - WHERE augur_data.repo.repo_id = e.repo_id - ORDER BY e.repo_id - """) + (SELECT + augur_data.repo_labor.repo_id, + augur_data.repo_labor.total_lines + FROM + augur_data.repo_labor, + ( SELECT + augur_data.repo_labor.repo_id, + MAX ( data_collection_date ) AS last_collected + FROM + augur_data.repo_labor + GROUP BY augur_data.repo_labor.repo_id) recent + WHERE + augur_data.repo_labor.repo_id = recent.repo_id + AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d + GROUP BY d.repo_id) e + WHERE augur_data.repo.repo_id = e.repo_id + ORDER BY e.repo_id + """) - with server.engine.connect() as conn: - results = pd.read_sql(project_files_sql, conn) - data = results.to_json(orient="records", date_format='iso', date_unit='ms') - return Response(response=data, - status=200, - mimetype="application/json") + with engine.connect() as conn: + results = pd.read_sql(project_lines_sql, conn) + data = results.to_json(orient="records", date_format='iso', date_unit='ms') + return Response(response=data, + status=200, + mimetype="application/json") - @server.app.route('/{}/complexity/project_lines'.format(AUGUR_API_VERSION), methods=["GET"]) - def get_project_lines(): - project_lines_sql = s.sql.text(""" - SELECT - e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, - e.total_lines, - e.average_lines +@app.route('/{}/complexity/project_comment_lines'.format(AUGUR_API_VERSION), methods=["GET"]) +def get_project_comment_lines(): + comment_lines_sql = s.sql.text(""" + SELECT + e.repo_id, + augur_data.repo.repo_git, + augur_data.repo.repo_name, + e.comment_lines, + e.avg_comment_lines + FROM + augur_data.repo, + (SELECT + d.repo_id, + SUM(d.comment_lines) AS comment_lines, + AVG(d.comment_lines)::INT AS avg_comment_lines FROM - augur_data.repo, - (SELECT - d.repo_id, - SUM(d.total_lines) AS total_lines, - AVG(d.total_lines)::INT AS average_lines - FROM - (SELECT - augur_data.repo_labor.repo_id, - augur_data.repo_labor.total_lines - FROM - augur_data.repo_labor, - ( SELECT - augur_data.repo_labor.repo_id, - MAX ( data_collection_date ) AS last_collected - FROM - augur_data.repo_labor - GROUP BY augur_data.repo_labor.repo_id) recent - WHERE - augur_data.repo_labor.repo_id = recent.repo_id - AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d - GROUP BY d.repo_id) e - WHERE augur_data.repo.repo_id = e.repo_id - ORDER BY e.repo_id - """) + (SELECT + augur_data.repo_labor.repo_id, + augur_data.repo_labor.comment_lines + FROM + augur_data.repo_labor, + ( SELECT + augur_data.repo_labor.repo_id, + MAX ( data_collection_date ) AS last_collected + FROM + augur_data.repo_labor + GROUP BY augur_data.repo_labor.repo_id) recent + WHERE + augur_data.repo_labor.repo_id = recent.repo_id + AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d + GROUP BY d.repo_id) e + WHERE augur_data.repo.repo_id = e.repo_id + ORDER BY e.repo_id + """) - with server.engine.connect() as conn: - results = pd.read_sql(project_lines_sql, conn) - data = results.to_json(orient="records", date_format='iso', date_unit='ms') - return Response(response=data, - status=200, - mimetype="application/json") + with engine.connect() as conn: + results = pd.read_sql(comment_lines_sql, conn) + data = results.to_json(orient="records", date_format='iso', date_unit='ms') + return Response(response=data, + status=200, + mimetype="application/json") - @server.app.route('/{}/complexity/project_comment_lines'.format(AUGUR_API_VERSION), methods=["GET"]) - def get_project_comment_lines(): - comment_lines_sql = s.sql.text(""" - SELECT - e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, - e.comment_lines, - e.avg_comment_lines +@app.route('/{}/complexity/project_blank_lines'.format(AUGUR_API_VERSION), methods=["GET"]) +def get_project_blank_lines(): + blank_lines_sql = s.sql.text(""" + SELECT + e.repo_id, + augur_data.repo.repo_git, + augur_data.repo.repo_name, + e.blank_lines, + e.avg_blank_lines + FROM + augur_data.repo, + (SELECT + d.repo_id, + SUM(d.blank_lines) AS blank_lines, + AVG(d.blank_lines)::int AS avg_blank_lines FROM - augur_data.repo, - (SELECT - d.repo_id, - SUM(d.comment_lines) AS comment_lines, - AVG(d.comment_lines)::INT AS avg_comment_lines - FROM - (SELECT - augur_data.repo_labor.repo_id, - augur_data.repo_labor.comment_lines - FROM + (SELECT + augur_data.repo_labor.repo_id, + augur_data.repo_labor.blank_lines + FROM augur_data.repo_labor, ( SELECT augur_data.repo_labor.repo_id, @@ -167,99 +209,57 @@ def get_project_comment_lines(): augur_data.repo_labor.repo_id = recent.repo_id AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d GROUP BY d.repo_id) e - WHERE augur_data.repo.repo_id = e.repo_id - ORDER BY e.repo_id + WHERE augur_data.repo.repo_id = e.repo_id + ORDER BY e.repo_id """) - with server.engine.connect() as conn: - results = pd.read_sql(comment_lines_sql, conn) - data = results.to_json(orient="records", date_format='iso', date_unit='ms') - return Response(response=data, - status=200, - mimetype="application/json") - - @server.app.route('/{}/complexity/project_blank_lines'.format(AUGUR_API_VERSION), methods=["GET"]) - def get_project_blank_lines(): - blank_lines_sql = s.sql.text(""" - SELECT - e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, - e.blank_lines, - e.avg_blank_lines - FROM - augur_data.repo, - (SELECT - d.repo_id, - SUM(d.blank_lines) AS blank_lines, - AVG(d.blank_lines)::int AS avg_blank_lines - FROM - (SELECT - augur_data.repo_labor.repo_id, - augur_data.repo_labor.blank_lines - FROM - augur_data.repo_labor, - ( SELECT - augur_data.repo_labor.repo_id, - MAX ( data_collection_date ) AS last_collected - FROM - augur_data.repo_labor - GROUP BY augur_data.repo_labor.repo_id) recent - WHERE - augur_data.repo_labor.repo_id = recent.repo_id - AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d - GROUP BY d.repo_id) e - WHERE augur_data.repo.repo_id = e.repo_id - ORDER BY e.repo_id - """) - - with server.engine.connect() as conn: - results = pd.read_sql(blank_lines_sql, conn) - data = results.to_json(orient="records", date_format='iso', date_unit='ms') - return Response(response=data, - status=200, - mimetype="application/json") - + with engine.connect() as conn: + results = pd.read_sql(blank_lines_sql, conn) + data = results.to_json(orient="records", date_format='iso', date_unit='ms') + return Response(response=data, + status=200, + mimetype="application/json") + - @server.app.route('/{}/complexity/project_file_complexity'.format(AUGUR_API_VERSION), methods=["GET"]) - def get_project_file_complexity(): - project_file_complexity_sql = s.sql.text(""" - SELECT - e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, - e.sum_code_complexity, - e.average_code_complexity +@app.route('/{}/complexity/project_file_complexity'.format(AUGUR_API_VERSION), methods=["GET"]) +def get_project_file_complexity(): + project_file_complexity_sql = s.sql.text(""" + SELECT + e.repo_id, + augur_data.repo.repo_git, + augur_data.repo.repo_name, + e.sum_code_complexity, + e.average_code_complexity + FROM + augur_data.repo, + (SELECT + d.repo_id, + SUM(d.code_complexity) AS sum_code_complexity, + AVG(d.code_complexity)::int AS average_code_complexity FROM - augur_data.repo, - (SELECT - d.repo_id, - SUM(d.code_complexity) AS sum_code_complexity, - AVG(d.code_complexity)::int AS average_code_complexity - FROM - (SELECT - augur_data.repo_labor.repo_id, - augur_data.repo_labor.code_complexity - FROM - augur_data.repo_labor, - ( SELECT - augur_data.repo_labor.repo_id, - MAX ( data_collection_date ) AS last_collected - FROM - augur_data.repo_labor - GROUP BY augur_data.repo_labor.repo_id) recent - WHERE - augur_data.repo_labor.repo_id = recent.repo_id - AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d - GROUP BY d.repo_id) e - WHERE augur_data.repo.repo_id = e.repo_id - ORDER BY e.repo_id - """) - - with server.engine.connect() as conn: - results = pd.read_sql(project_file_complexity_sql, conn) - data = results.to_json(orient="records", date_format='iso', date_unit='ms') - return Response(response=data, - status=200, - mimetype="application/json") + (SELECT + augur_data.repo_labor.repo_id, + augur_data.repo_labor.code_complexity + FROM + augur_data.repo_labor, + ( SELECT + augur_data.repo_labor.repo_id, + MAX ( data_collection_date ) AS last_collected + FROM + augur_data.repo_labor + GROUP BY augur_data.repo_labor.repo_id) recent + WHERE + augur_data.repo_labor.repo_id = recent.repo_id + AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d + GROUP BY d.repo_id) e + WHERE augur_data.repo.repo_id = e.repo_id + ORDER BY e.repo_id + """) + with engine.connect() as conn: + results = pd.read_sql(project_file_complexity_sql, conn) + data = results.to_json(orient="records", date_format='iso', date_unit='ms') + return Response(response=data, + status=200, + mimetype="application/json") + From b8b2e94f914a4e51f1353e010df8fd7ec858e28a Mon Sep 17 00:00:00 2001 From: Gary White Jr Date: Tue, 6 Feb 2024 10:16:21 -0500 Subject: [PATCH 120/122] add volume support, patch python dockerfile dependencies Signed-off-by: Gary White Jr --- docker-compose.yml | 5 +++++ docker/backend/Dockerfile | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index bc8186914a..75ad63ea7f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,8 +8,11 @@ services: - "POSTGRES_DB=augur" - "POSTGRES_USER=${AUGUR_DB_USER:-augur}" - "POSTGRES_PASSWORD=${AUGUR_DB_PASSWORD:-augur}" + - "PGDATA=/var/lib/postgresql/data/pgdata" ports: - "127.0.0.1:${AUGUR_DB_PORT:-5432}:5432" + volumes: + - augurpostgres:/var/lib/postgresql/data redis: image: "redis:alpine" @@ -44,5 +47,7 @@ services: volumes: facade: driver: local + augurpostgres: + driver: local diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index b16deb4b34..962544e661 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -13,7 +13,7 @@ RUN set -x \ bash \ curl \ gcc \ - python3-pip \ + python3-dev \ wget \ postgresql-client \ && rm -rf /var/lib/apt/lists/* From f3f7fbedd2217b2c5d47e2367b8e22ed753b4640 Mon Sep 17 00:00:00 2001 From: Sean Goggins Date: Thu, 8 Feb 2024 16:01:45 +0000 Subject: [PATCH 121/122] fixing flower time synchronization issue Signed-off-by: Sean Goggins --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f87214316e..1a72c87b33 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ "celery==5.2.7", # 5.2.7 "httpx==0.23.0", # 0.23.0 "eventlet==0.33.3", - "flower==1.2.0", + "flower==2.0.1", "tornado==6.3.3", # added because it sometimes errors when tornado is not 6.1 even though nothing we install depends on it "pylint==2.15.5", "dnspython==2.2.1", From f46ee055c9f70166fe127fb6582211d4c8658f4b Mon Sep 17 00:00:00 2001 From: Sean Goggins Date: Thu, 8 Feb 2024 18:24:56 +0000 Subject: [PATCH 122/122] adding conn = op.get_bind() Signed-off-by: Sean Goggins --- .../alembic/versions/26_materialized_view_unique_updates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/augur/application/schema/alembic/versions/26_materialized_view_unique_updates.py b/augur/application/schema/alembic/versions/26_materialized_view_unique_updates.py index 00b811b977..f381ec48ef 100644 --- a/augur/application/schema/alembic/versions/26_materialized_view_unique_updates.py +++ b/augur/application/schema/alembic/versions/26_materialized_view_unique_updates.py @@ -30,6 +30,7 @@ def downgrade(): def mview_keys_26(upgrade=True): if upgrade: + conn = op.get_bind() conn.execute(text(""" drop materialized view if exists augur_data.explorer_pr_assignments; drop materialized view if exists augur_data.explorer_user_repos;