diff --git a/README.md b/README.md index 9977fc81a9..075a397fc5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.60.0 +# Augur NEW Release v0.60.2 [![first-timers-only](https://img.shields.io/badge/first--timers--only-friendly-blue.svg?style=flat-square)](https://www.firsttimersonly.com/) We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy of tagging issues for first timers only, and walking one newcomer through the resolution process weekly. [You can find these issues tagged with "first timers only" on our issues list.](https://github.com/chaoss/augur/labels/first-timers-only). @@ -7,7 +7,7 @@ ## NEW RELEASE ALERT! ### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md) -Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.60.0 +Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.60.2 - The `main` branch is a stable version of our new architecture, which features: - Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks. - A new job management architecture that uses Celery and Redis to manage queues, and enables users to run a Flower job monitoring dashboard diff --git a/augur/api/metrics/README.md b/augur/api/metrics/README.md index cabcc4475a..5990291bf1 100644 --- a/augur/api/metrics/README.md +++ b/augur/api/metrics/README.md @@ -26,7 +26,8 @@ from augur.application.db.engine import engine 4. Define any queries with the structure show below ```py repo_sql = s.sql.text(""" SELECT repo.repo_name FROM repo WHERE repo.repo_id = :repo_id """) -results = pd.read_sql(repo_sql, engine, params={'repo_id': repo_id}) +with engine.connect() as conn: + results = pd.read_sql(repo_sql, conn, params={'repo_id': repo_id}) ``` 5. Return either a pandas dataframe, dict, or json. - Note: If you return a pandas dataframe or dict it will be automatically converted into json diff --git a/augur/api/metrics/commit.py b/augur/api/metrics/commit.py index c143cd9f6e..41d86abbff 100644 --- a/augur/api/metrics/commit.py +++ b/augur/api/metrics/commit.py @@ -90,8 +90,9 @@ def committers(repo_group_id, repo_id=None, begin_date=None, end_date=None, peri """ ) - results = pd.read_sql(committersSQL, engine, params={'repo_id': repo_id, - 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date, 'period':period}) + with engine.connect() as conn: + results = pd.read_sql(committersSQL, conn, params={'repo_id': repo_id, + 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date, 'period':period}) return results @@ -167,8 +168,9 @@ def annual_commit_count_ranked_by_new_repo_in_repo_group(repo_group_id, repo_id= ORDER BY YEAR ASC """.format(table, period)) - results = pd.read_sql(cdRgNewrepRankedCommitsSQL, engine, params={'repo_id': repo_id, - 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(cdRgNewrepRankedCommitsSQL, conn, params={'repo_id': repo_id, + 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date}) return results @register_metric() @@ -265,8 +267,9 @@ def annual_commit_count_ranked_by_repo_in_repo_group(repo_group_id, repo_id=None LIMIT 10 """) - results = pd.read_sql(cdRgTpRankedCommitsSQL, engine, params={ "repo_group_id": repo_group_id, - "repo_id": repo_id}) + with engine.connect() as conn: + results = pd.read_sql(cdRgTpRankedCommitsSQL, conn, params={ "repo_group_id": repo_group_id, + "repo_id": repo_id}) return results @register_metric() @@ -296,8 +299,9 @@ def top_committers(repo_group_id, repo_id=None, year=None, threshold=0.8): ORDER BY patches DESC) a """) - results = pd.read_sql(total_commits_SQL, engine, - params={'year': year, 'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(total_commits_SQL, conn, + params={'year': year, 'repo_group_id': repo_group_id}) else: total_commits_SQL = s.sql.text(""" SELECT SUM(patches)::int @@ -308,8 +312,9 @@ def top_committers(repo_group_id, repo_id=None, year=None, threshold=0.8): ORDER BY patches DESC) a """) - results = pd.read_sql(total_commits_SQL, engine, - params={'year': year, 'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(total_commits_SQL, conn, + params={'year': year, 'repo_id': repo_id}) if not results.iloc[0]['sum']: return pd.DataFrame() @@ -334,8 +339,9 @@ def top_committers(repo_group_id, repo_id=None, year=None, threshold=0.8): ORDER BY commits DESC """) - results = pd.read_sql(committers_SQL, engine, - params={'year': year, 'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(committers_SQL, conn, + params={'year': year, 'repo_group_id': repo_group_id}) else: committers_SQL = s.sql.text(""" SELECT @@ -353,8 +359,9 @@ def top_committers(repo_group_id, repo_id=None, year=None, threshold=0.8): ORDER BY commits DESC """) - results = pd.read_sql(committers_SQL, engine, - params={'year': year, 'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(committers_SQL, conn, + params={'year': year, 'repo_id': repo_id}) cumsum = 0 for i, row in results.iterrows(): diff --git a/augur/api/metrics/contributor.py b/augur/api/metrics/contributor.py index 7d255ecb46..3f25236d0f 100644 --- a/augur/api/metrics/contributor.py +++ b/augur/api/metrics/contributor.py @@ -125,8 +125,9 @@ def contributors(repo_group_id, repo_id=None, period='day', begin_date=None, end ORDER BY total DESC """) - results = pd.read_sql(contributorsSQL, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(contributorsSQL, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) else: contributorsSQL = s.sql.text(""" SELECT id::text AS user_id, @@ -211,8 +212,9 @@ def contributors(repo_group_id, repo_id=None, period='day', begin_date=None, end ORDER BY total DESC """) - results = pd.read_sql(contributorsSQL, engine, params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(contributorsSQL, conn, params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results @register_metric() @@ -281,8 +283,9 @@ def contributors_new(repo_group_id, repo_id=None, period='day', begin_date=None, GROUP BY date, repo.repo_id, repo_name """) - results = pd.read_sql(contributorsNewSQL, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(contributorsNewSQL, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) else: contributorsNewSQL = s.sql.text(""" SELECT date_trunc(:period, b.created_at::DATE) AS date, COUNT(id) AS new_contributors, repo.repo_id, repo_name @@ -330,8 +333,9 @@ def contributors_new(repo_group_id, repo_id=None, period='day', begin_date=None, GROUP BY date, repo.repo_id, repo_name """) - results = pd.read_sql(contributorsNewSQL, engine, params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(contributorsNewSQL, conn, params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results @register_metric() @@ -351,7 +355,8 @@ def lines_changed_by_author(repo_group_id, repo_id=None): GROUP BY commits.repo_id, date_trunc('week', cmt_author_date::date), cmt_author_affiliation, cmt_author_email, repo_name ORDER BY date_trunc('week', cmt_author_date::date) ASC; """) - results = pd.read_sql(linesChangedByAuthorSQL, engine, params={"repo_id": repo_id}) + with engine.connect() as conn: + results = pd.read_sql(linesChangedByAuthorSQL, conn, params={"repo_id": repo_id}) return results else: linesChangedByAuthorSQL = s.sql.text(""" @@ -362,7 +367,8 @@ def lines_changed_by_author(repo_group_id, repo_id=None): GROUP BY repo_id, date_trunc('week', cmt_author_date::date), cmt_author_affiliation, cmt_author_email ORDER BY date_trunc('week', cmt_author_date::date) ASC; """) - results = pd.read_sql(linesChangedByAuthorSQL, engine, params={"repo_group_id": repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(linesChangedByAuthorSQL, conn, params={"repo_group_id": repo_group_id}) return results @register_metric() @@ -420,8 +426,9 @@ def contributors_code_development(repo_group_id, repo_id=None, period='all', beg GROUP BY a.email, a.repo_id, repo_name """) - results = pd.read_sql(contributorsSQL, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(contributorsSQL, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) else: contributorsSQL = s.sql.text(""" SELECT @@ -455,6 +462,7 @@ def contributors_code_development(repo_group_id, repo_id=None, period='all', beg ORDER BY commits desc, email """) - results = pd.read_sql(contributorsSQL, engine, params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(contributorsSQL, conn, params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results diff --git a/augur/api/metrics/deps.py b/augur/api/metrics/deps.py index deb5ac89fd..d92371d896 100644 --- a/augur/api/metrics/deps.py +++ b/augur/api/metrics/deps.py @@ -6,6 +6,7 @@ import sqlalchemy as s import pandas as pd from augur.api.util import register_metric +import datetime from ..server import engine @@ -45,7 +46,8 @@ def deps(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=No AND repo_dependencies.repo_id = :repo_id """) - results = pd.read_sql(depsSQL, engine) + with engine.connect() as conn: + results = pd.read_sql(depsSQL, conn) else: @@ -69,7 +71,8 @@ def deps(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=No AND repo.repo_group_id = :repo_group_id """) - results = pd.read_sql(depsSQL, engine) + with engine.connect() as conn: + results = pd.read_sql(depsSQL, conn) return results diff --git a/augur/api/metrics/insight.py b/augur/api/metrics/insight.py index 874f656f75..848161e1a8 100644 --- a/augur/api/metrics/insight.py +++ b/augur/api/metrics/insight.py @@ -29,5 +29,6 @@ def top_insights(repo_group_id, num_repos=6): LIMIT :num_repos ) """) - results = pd.read_sql(topInsightsSQL, engine, params={'repo_group_id': repo_group_id, 'num_repos': num_repos}) + with engine.connect() as conn: + results = pd.read_sql(topInsightsSQL, conn, params={'repo_group_id': repo_group_id, 'num_repos': num_repos}) return results diff --git a/augur/api/metrics/issue.py b/augur/api/metrics/issue.py index 72108bc20b..22ee2630b5 100644 --- a/augur/api/metrics/issue.py +++ b/augur/api/metrics/issue.py @@ -50,8 +50,10 @@ def issues_first_time_opened(repo_group_id, repo_id=None, period='day', begin_da GROUP BY issue_date, repo_name ORDER BY issue_date """) - results = pd.read_sql(issueNewContributor, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + + with engine.connect() as conn: + results = pd.read_sql(issueNewContributor, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) else: issueNewContributor = s.sql.text(""" SELECT @@ -76,9 +78,10 @@ def issues_first_time_opened(repo_group_id, repo_id=None, period='day', begin_da GROUP BY repo.repo_id, issue_date ORDER BY issue_date """) - results = pd.read_sql(issueNewContributor, engine, - params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(issueNewContributor, conn, + params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results @register_metric() @@ -119,8 +122,9 @@ def issues_first_time_closed(repo_group_id, repo_id=None, period='day', begin_da ) AS iss_close GROUP BY issue_date, repo_name """) - results = pd.read_sql(issuesClosedSQL, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(issuesClosedSQL, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) else: issuesClosedSQL = s.sql.text(""" SELECT date_trunc(:period, new_date::DATE) AS issue_date, @@ -141,8 +145,10 @@ def issues_first_time_closed(repo_group_id, repo_id=None, period='day', begin_da ) AS iss_close GROUP BY repo_id, repo_name,issue_date """) - results = pd.read_sql(issuesClosedSQL, engine, params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + + with engine.connect() as conn: + results = pd.read_sql(issuesClosedSQL, conn, params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results @@ -179,8 +185,9 @@ def issues_new(repo_group_id, repo_id=None, period='day', begin_date=None, end_d ORDER BY issues.repo_id, date """) - results = pd.read_sql(issues_new_SQL, engine, params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(issues_new_SQL, conn, params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results @@ -198,8 +205,9 @@ def issues_new(repo_group_id, repo_id=None, period='day', begin_date=None, end_d ORDER BY date; """) - results = pd.read_sql(issues_new_SQL, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(issues_new_SQL, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results @register_metric() @@ -235,8 +243,9 @@ def issues_active(repo_group_id, repo_id=None, period='day', begin_date=None, en ORDER BY issues.repo_id, date """) - results = pd.read_sql(issues_active_SQL, engine, params={'repo_group_id': repo_group_id, 'period':period, - 'begin_date': begin_date, 'end_date':end_date}) + with engine.connect() as conn: + results = pd.read_sql(issues_active_SQL, conn, params={'repo_group_id': repo_group_id, 'period':period, + 'begin_date': begin_date, 'end_date':end_date}) else: issues_active_SQL = s.sql.text(""" @@ -254,8 +263,9 @@ def issues_active(repo_group_id, repo_id=None, period='day', begin_date=None, en ORDER BY date """) - results = pd.read_sql(issues_active_SQL, engine, params={'repo_id': repo_id, 'period':period, - 'begin_date': begin_date, 'end_date':end_date}) + with engine.connect() as conn: + results = pd.read_sql(issues_active_SQL, conn, params={'repo_id': repo_id, 'period':period, + 'begin_date': begin_date, 'end_date':end_date}) return results @register_metric() @@ -290,8 +300,9 @@ def issues_closed(repo_group_id, repo_id=None, period='day', begin_date=None, en ORDER BY issues.repo_id, date """) - results = pd.read_sql(issues_closed_SQL, engine, params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(issues_closed_SQL, conn, params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) else: issues_closed_SQL = s.sql.text(""" @@ -308,8 +319,9 @@ def issues_closed(repo_group_id, repo_id=None, period='day', begin_date=None, en ORDER BY date; """) - results = pd.read_sql(issues_closed_SQL, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(issues_closed_SQL, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results @@ -347,9 +359,10 @@ def issue_duration(repo_group_id, repo_id=None, begin_date=None, end_date=None): ORDER BY repo_id, issue_id """) - results = pd.read_sql(issue_duration_SQL, engine, params={'repo_group_id': repo_group_id, - 'begin_date': begin_date, - 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(issue_duration_SQL, conn, params={'repo_group_id': repo_group_id, + 'begin_date': begin_date, + 'end_date': end_date}) results['duration'] = results['duration'].astype(str) return results @@ -371,9 +384,10 @@ def issue_duration(repo_group_id, repo_id=None, begin_date=None, end_date=None): ORDER BY issue_id; """) - results = pd.read_sql(issue_duration_SQL, engine, params={'repo_id': repo_id, - 'begin_date': begin_date, - 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(issue_duration_SQL, conn, params={'repo_id': repo_id, + 'begin_date': begin_date, + 'end_date': end_date}) results['duration'] = results['duration'].astype(str) return results @@ -417,9 +431,10 @@ def issue_participants(repo_group_id, repo_id=None, begin_date=None, end_date=No ORDER BY issues.repo_id, issues.created_at """) - result = pd.read_sql(issue_participants_SQL, engine, params={'repo_group_id': repo_group_id, - 'begin_date': begin_date, - 'end_date': end_date}) + with engine.connect() as conn: + result = pd.read_sql(issue_participants_SQL, conn, params={'repo_group_id': repo_group_id, + 'begin_date': begin_date, + 'end_date': end_date}) return result else: issue_participants_SQL = s.sql.text(""" @@ -445,9 +460,10 @@ def issue_participants(repo_group_id, repo_id=None, begin_date=None, end_date=No ORDER BY issues.created_at """) - result = pd.read_sql(issue_participants_SQL, engine, params={'repo_id': repo_id, - 'begin_date': begin_date, - 'end_date': end_date}) + with engine.connect() as conn: + result = pd.read_sql(issue_participants_SQL, conn, params={'repo_id': repo_id, + 'begin_date': begin_date, + 'end_date': end_date}) return result @register_metric() @@ -468,7 +484,9 @@ def issue_backlog(repo_group_id, repo_id=None): GROUP BY issues.repo_id, repo_name ORDER BY issues.repo_id """) - result = pd.read_sql(issue_backlog_SQL, engine, params={'repo_group_id': repo_group_id}) + + with engine.connect() as conn: + result = pd.read_sql(issue_backlog_SQL, conn, params={'repo_group_id': repo_group_id}) return result else: @@ -481,7 +499,8 @@ def issue_backlog(repo_group_id, repo_id=None): GROUP BY repo_name """) - result = pd.read_sql(issue_backlog_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + result = pd.read_sql(issue_backlog_SQL, conn, params={'repo_id': repo_id}) return result @register_metric() @@ -509,7 +528,8 @@ def issue_throughput(repo_group_id, repo_id=None): AND table1.repo_id = repo.repo_id """) - results = pd.read_sql(issue_throughput_SQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(issue_throughput_SQL, conn, params={'repo_group_id': repo_group_id}) return results else: @@ -525,7 +545,8 @@ def issue_throughput(repo_group_id, repo_id=None): WHERE table1.repo_id = repo.repo_id """) - result = pd.read_sql(issue_throughput_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + result = pd.read_sql(issue_throughput_SQL, conn, params={'repo_id': repo_id}) return result @register_metric() @@ -574,9 +595,10 @@ def issues_open_age(repo_group_id, repo_id=None, period='day', begin_date=None, ORDER BY open_date DESC """) - results = pd.read_sql(openAgeSQL, engine, - params={'repo_id': repo_id, 'repo_group_id': repo_group_id, - 'period': period, 'begin_date':begin_date, 'end_date':end_date}) + with engine.connect() as conn: + results = pd.read_sql(openAgeSQL, conn, + params={'repo_id': repo_id, 'repo_group_id': repo_group_id, + 'period': period, 'begin_date':begin_date, 'end_date':end_date}) return results @@ -634,11 +656,12 @@ def issues_closed_resolution_duration(repo_group_id, repo_id=None, period='day', ORDER BY gh_issue_number """) - results = pd.read_sql(issueSQL, engine, - params={'repo_id': repo_id, - 'repo_group_id': repo_group_id, - 'period': period, 'begin_date':begin_date, - 'end_date':end_date}) + with engine.connect() as conn: + results = pd.read_sql(issueSQL, conn, + params={'repo_id': repo_id, + 'repo_group_id': repo_group_id, + 'period': period, 'begin_date':begin_date, + 'end_date':end_date}) return results @@ -667,8 +690,9 @@ def average_issue_resolution_time(repo_group_id, repo_id=None): """) - results = pd.read_sql(avg_issue_resolution_SQL, engine, - params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(avg_issue_resolution_SQL, conn, + params={'repo_group_id': repo_group_id}) return results else: @@ -683,8 +707,9 @@ def average_issue_resolution_time(repo_group_id, repo_id=None): GROUP BY repo.repo_name """) - results = pd.read_sql(avg_issue_resolution_SQL, engine, - params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(avg_issue_resolution_SQL, conn, + params={'repo_id': repo_id}) return results @register_metric() @@ -757,7 +782,8 @@ def issues_maintainer_response_duration(repo_group_id, repo_id=None, begin_date= group by repo_id, repo_name """) - results = pd.read_sql(issuesSQL, engine, params={'repo_id': repo_id, 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(issuesSQL, conn, params={'repo_id': repo_id, 'repo_group_id': repo_group_id,'begin_date': begin_date, 'end_date': end_date}) return results @@ -780,7 +806,8 @@ def open_issues_count(repo_group_id, repo_id=None): GROUP BY date, repo_groups.rg_name ORDER BY date """) - results = pd.read_sql(openIssueCountSQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(openIssueCountSQL, conn, params={'repo_group_id': repo_group_id}) return results else: openIssueCountSQL = s.sql.text(""" @@ -794,7 +821,8 @@ def open_issues_count(repo_group_id, repo_id=None): GROUP BY date, repo.repo_id ORDER BY date """) - results = pd.read_sql(openIssueCountSQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(openIssueCountSQL, conn, params={'repo_id': repo_id}) return results @@ -817,7 +845,8 @@ def closed_issues_count(repo_group_id, repo_id=None): GROUP BY date, repo_groups.rg_name ORDER BY date """) - results = pd.read_sql(closedIssueCountSQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(closedIssueCountSQL, conn, params={'repo_group_id': repo_group_id}) return results else: closedIssueCountSQL = s.sql.text(""" @@ -831,7 +860,8 @@ def closed_issues_count(repo_group_id, repo_id=None): GROUP BY date, repo.repo_id ORDER BY date """) - results = pd.read_sql(closedIssueCountSQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(closedIssueCountSQL, conn, params={'repo_id': repo_id}) return results @register_metric() @@ -893,8 +923,9 @@ def issue_comments_mean(repo_group_id, repo_id=None, group_by='week'): else: raise ValueError("Incorrect value for 'group_by'") - results = pd.read_sql(issue_comments_mean_std_SQL, engine, - params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(issue_comments_mean_std_SQL, conn, + params={'repo_group_id': repo_group_id}) return results else: @@ -946,8 +977,9 @@ def issue_comments_mean(repo_group_id, repo_id=None, group_by='week'): else: raise ValueError("Incorrect value for 'group_by'") - results = pd.read_sql(issue_comments_mean_std_SQL, engine, - params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(issue_comments_mean_std_SQL, conn, + params={'repo_id': repo_id}) return results @register_metric() @@ -978,9 +1010,10 @@ def issue_comments_mean_std(repo_group_id, repo_id=None, group_by='week'): """) - results = pd.read_sql(issue_comments_mean_std_SQL, engine, - params={'repo_group_id': repo_group_id, - 'group_by': group_by}) + with engine.connect() as conn: + results = pd.read_sql(issue_comments_mean_std_SQL, conn, + params={'repo_group_id': repo_group_id, + 'group_by': group_by}) return results else: @@ -1006,8 +1039,9 @@ def issue_comments_mean_std(repo_group_id, repo_id=None, group_by='week'): ORDER BY date """) - results = pd.read_sql(issue_comments_mean_std_SQL, engine, - params={'repo_id': repo_id, 'group_by': group_by}) + with engine.connect() as conn: + results = pd.read_sql(issue_comments_mean_std_SQL, conn, + params={'repo_id': repo_id, 'group_by': group_by}) return results @register_metric() @@ -1057,6 +1091,7 @@ def abandoned_issues(repo_group_id, repo_id=None, period='day', begin_date=None, ''' ) - results = pd.read_sql(abandonedSQL, engine, params={'repo_id': repo_id, 'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(abandonedSQL, conn, params={'repo_id': repo_id, 'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results diff --git a/augur/api/metrics/message.py b/augur/api/metrics/message.py index 8c36c3a4c2..9988f5a0d5 100644 --- a/augur/api/metrics/message.py +++ b/augur/api/metrics/message.py @@ -56,9 +56,9 @@ def repo_messages(repo_group_id, repo_id=None, period='day', begin_date=None, en """) - - results = pd.read_sql(repomessagesSQL, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(repomessagesSQL, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) else: repomessagesSQL = s.sql.text(""" @@ -85,10 +85,11 @@ def repo_messages(repo_group_id, repo_id=None, period='day', begin_date=None, en rg_name, message_date """) - - results = pd.read_sql(repomessagesSQL, engine, - params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + + with engine.connect() as conn: + results = pd.read_sql(repomessagesSQL, conn, + params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results diff --git a/augur/api/metrics/pull_request.py b/augur/api/metrics/pull_request.py index 9fbcc61757..3b1798ec01 100644 --- a/augur/api/metrics/pull_request.py +++ b/augur/api/metrics/pull_request.py @@ -10,6 +10,53 @@ from ..server import engine +@register_metric() +def pull_requests_new(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None): + """ + Returns a time series of the number of new Pull Requests opened during a certain period. + + :param repo_id: The repository's id + :param repo_group_id: The repository's group id + :param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day' + :param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:01' + :param end_date: Specifies the end date, defaults to datetime.now() + :return: DataFrame of new Pull Requests/period + """ + if not begin_date: + begin_date = '1970-1-1 00:00:01' + if not end_date: + end_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + if repo_id: + new_pull_requests_query = s.sql.text(""" + SELECT DATE_TRUNC(:period, pr_created_at) AS created_date, + COUNT(pr_id) AS new_pull_requests + FROM pull_requests + WHERE repo_id = :repo_id + AND pr_created_at BETWEEN :begin_date AND :end_date + GROUP BY created_date + """) + + results = pd.read_sql(new_pull_requests_query, engine, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, + 'end_date': end_date}) + else: + new_pull_requests_query = s.sql.text(""" + SELECT DATE_TRUNC(:period, pr_created_at) AS created_date, + COUNT(pr_id) AS new_pull_requests + FROM pull_requests + WHERE repo_id IN (SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id) + AND pr_created_at BETWEEN :begin_date AND :end_date + GROUP BY created_date + """) + + results = pd.read_sql(new_pull_requests_query, engine, + params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, + 'end_date': end_date}) + + return results + @register_metric() def pull_requests_merge_contributor_new(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None): """ @@ -40,9 +87,10 @@ def pull_requests_merge_contributor_new(repo_group_id, repo_id=None, period='day """) - results = pd.read_sql(commitNewContributor, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, - 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(commitNewContributor, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, + 'end_date': end_date}) else: commitNewContributor = s.sql.text(""" SELECT abc.repo_id, repo_name ,date_trunc(:period, new_date::DATE) as commit_date, @@ -58,11 +106,11 @@ def pull_requests_merge_contributor_new(repo_group_id, repo_id=None, period='day GROUP BY abc.repo_id, repo_name, commit_date """) - - results = pd.read_sql(commitNewContributor, engine, - params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, - 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(commitNewContributor, conn, + params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, + 'end_date': end_date}) return results @register_metric() @@ -96,9 +144,10 @@ def pull_requests_closed_no_merge(repo_group_id, repo_id=None, period='day', beg - results = pd.read_sql(closedNoMerge, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, - 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(closedNoMerge, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, + 'end_date': end_date}) else: closedNoMerge = s.sql.text(""" @@ -110,11 +159,11 @@ def pull_requests_closed_no_merge(repo_group_id, repo_id=None, period='day', beg ORDER BY closed_date """) - - results = pd.read_sql(closedNoMerge, engine, - params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, - 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(closedNoMerge, conn, + params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, + 'end_date': end_date}) return results @register_metric() @@ -151,9 +200,10 @@ def reviews(repo_group_id, repo_id=None, period='day', begin_date=None, end_date """) - results = pd.read_sql(reviews_SQL, engine, - params={'period': period, 'repo_group_id': repo_group_id, - 'begin_date': begin_date, 'end_date': end_date }) + with engine.connect() as conn: + results = pd.read_sql(reviews_SQL, conn, + params={'period': period, 'repo_group_id': repo_group_id, + 'begin_date': begin_date, 'end_date': end_date }) return results else: @@ -171,10 +221,10 @@ def reviews(repo_group_id, repo_id=None, period='day', begin_date=None, end_date ORDER BY date """) - - results = pd.read_sql(reviews_SQL, engine, - params={'period': period, 'repo_id': repo_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(reviews_SQL, conn, + params={'period': period, 'repo_id': repo_id, + 'begin_date': begin_date, 'end_date': end_date}) return results @register_metric() @@ -211,10 +261,10 @@ def reviews_accepted(repo_group_id, repo_id=None, period='day', begin_date=None, ORDER BY pull_requests.repo_id, date """) - - results = pd.read_sql(reviews_accepted_SQL, engine, - params={'period': period, 'repo_group_id': repo_group_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(reviews_accepted_SQL, conn, + params={'period': period, 'repo_group_id': repo_group_id, + 'begin_date': begin_date, 'end_date': end_date}) return results else: reviews_accepted_SQL = s.sql.text(""" @@ -232,9 +282,10 @@ def reviews_accepted(repo_group_id, repo_id=None, period='day', begin_date=None, ORDER BY date """) - results = pd.read_sql(reviews_accepted_SQL, engine, - params={'period': period, 'repo_id': repo_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(reviews_accepted_SQL, conn, + params={'period': period, 'repo_id': repo_id, + 'begin_date': begin_date, 'end_date': end_date}) return results @register_metric() @@ -271,10 +322,10 @@ def reviews_declined(repo_group_id, repo_id=None, period='day', begin_date=None, ORDER BY pull_requests.repo_id, date """) - - results = pd.read_sql(reviews_declined_SQL, engine, - params={'period': period, 'repo_group_id': repo_group_id, - 'begin_date': begin_date, 'end_date': end_date }) + with engine.connect() as conn: + results = pd.read_sql(reviews_declined_SQL, conn, + params={'period': period, 'repo_group_id': repo_group_id, + 'begin_date': begin_date, 'end_date': end_date }) return results else: reviews_declined_SQL = s.sql.text(""" @@ -292,9 +343,10 @@ def reviews_declined(repo_group_id, repo_id=None, period='day', begin_date=None, ORDER BY date """) - results = pd.read_sql(reviews_declined_SQL, engine, - params={'period': period, 'repo_id': repo_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(reviews_declined_SQL, conn, + params={'period': period, 'repo_id': repo_id, + 'begin_date': begin_date, 'end_date': end_date}) return results @register_metric() @@ -331,11 +383,11 @@ def review_duration(repo_group_id, repo_id=None, begin_date=None, end_date=None) ORDER BY pull_requests.repo_id, pull_requests.pull_request_id """) - - results = pd.read_sql(review_duration_SQL, engine, - params={'repo_group_id': repo_group_id, - 'begin_date': begin_date, - 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(review_duration_SQL, conn, + params={'repo_group_id': repo_group_id, + 'begin_date': begin_date, + 'end_date': end_date}) results['duration'] = results['duration'].astype(str) return results else: @@ -355,10 +407,11 @@ def review_duration(repo_group_id, repo_id=None, begin_date=None, end_date=None) ORDER BY pull_requests.repo_id, pull_request_id """) - results = pd.read_sql(review_duration_SQL, engine, - params={'repo_id': repo_id, - 'begin_date': begin_date, - 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(review_duration_SQL, conn, + params={'repo_id': repo_id, + 'begin_date': begin_date, + 'end_date': end_date}) results['duration'] = results['duration'].astype(str) return results @@ -408,8 +461,9 @@ def pull_request_acceptance_rate(repo_group_id, repo_id=None, begin_date=None, e ON opened.date_created = accepted.accepted_on """) - results = pd.read_sql(prAccRateSQL, engine, params={'repo_group_id': repo_group_id, 'group_by': group_by, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(prAccRateSQL, conn, params={'repo_group_id': repo_group_id, 'group_by': group_by, + 'begin_date': begin_date, 'end_date': end_date}) return results else: prAccRateSQL = s.sql.text(""" @@ -441,8 +495,9 @@ def pull_request_acceptance_rate(repo_group_id, repo_id=None, begin_date=None, e ON opened.date_created = accepted.accepted_on """) - results = pd.read_sql(prAccRateSQL, engine, params={'repo_id': repo_id, 'group_by': group_by, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(prAccRateSQL, conn, params={'repo_id': repo_id, 'group_by': group_by, + 'begin_date': begin_date, 'end_date': end_date}) return results @register_metric() @@ -546,9 +601,10 @@ def pull_request_average_time_to_close(repo_group_id, repo_id=None, group_by='mo - pr_all = pd.read_sql(pr_all_SQL, engine, - params={'repo_id': repo_id, 'repo_group_id':repo_group_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + pr_all = pd.read_sql(pr_all_SQL, conn, + params={'repo_id': repo_id, 'repo_group_id':repo_group_id, + 'begin_date': begin_date, 'end_date': end_date}) if not repo_id: pr_avg_time_to_close = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_{}_to_close'.format(time_unit)]] else: @@ -657,10 +713,11 @@ def pull_request_merged_status_counts(repo_group_id, repo_id=None, begin_date='1 GROUP BY closed_year, closed_month, merged_status, time_between_responses.pr_closed_at, time_between_responses.average_time_between_responses """) - - pr_all = pd.read_sql(pr_all_SQL, engine, - params={'repo_id': repo_id, 'repo_group_id':repo_group_id, - 'begin_date': begin_date, 'end_date': end_date}) + + with engine.connect() as conn: + pr_all = pd.read_sql(pr_all_SQL, conn, + params={'repo_id': repo_id, 'repo_group_id':repo_group_id, + 'begin_date': begin_date, 'end_date': end_date}) if not repo_id: pr_avg_time_between_responses = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_{}_between_responses'.format(time_unit)]] else: @@ -767,10 +824,11 @@ def pull_request_average_commit_counts(repo_group_id, repo_id=None, group_by='mo GROUP BY closed_year, merged_status, data.pr_closed_at, data.commit_count """) - - pr_all = pd.read_sql(pr_all_SQL, engine, - params={'repo_id': repo_id, 'repo_group_id':repo_group_id, - 'begin_date': begin_date, 'end_date': end_date}) + + with engine.connect() as conn: + pr_all = pd.read_sql(pr_all_SQL, conn, + params={'repo_id': repo_id, 'repo_group_id':repo_group_id, + 'begin_date': begin_date, 'end_date': end_date}) if not repo_id: pr_avg_commit_counts = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_commits_per_pull_request']] else: @@ -926,10 +984,11 @@ def pull_request_average_event_counts(repo_group_id, repo_id=None, group_by='mon ORDER BY merged_status, closed_year, closed_week, closed_day """) - - pr_all = pd.read_sql(pr_all_SQL, engine, - params={'repo_id': repo_id, 'repo_group_id':repo_group_id, - 'begin_date': begin_date, 'end_date': end_date}) + + with engine.connect() as conn: + pr_all = pd.read_sql(pr_all_SQL, conn, + params={'repo_id': repo_id, 'repo_group_id':repo_group_id, + 'begin_date': begin_date, 'end_date': end_date}) count_names = ['assigned_count', 'review_requested_count', 'labeled_count', 'unlabeled_count', 'subscribed_count', 'mentioned_count', 'referenced_count', 'closed_count', 'head_ref_force_pushed_count', 'head_ref_deleted_count', 'milestoned_count', 'merged_count', 'comment_count'] average_count_names = [] @@ -1050,9 +1109,10 @@ def pull_request_average_time_to_responses_and_close(repo_group_id, repo_id=None GROUP BY closed_year, merged_status, response_times.first_response_time, response_times.last_response_time, response_times.pr_created_at, response_times.pr_closed_at """) - pr_all = pd.read_sql(pr_all_SQL, engine, - params={'repo_id': repo_id, 'repo_group_id':repo_group_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + pr_all = pd.read_sql(pr_all_SQL, conn, + params={'repo_id': repo_id, 'repo_group_id':repo_group_id, + 'begin_date': begin_date, 'end_date': end_date}) if not repo_id: avg_pr_time_to_responses_and_close = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_{}_to_first_response'.format(time_unit), 'average_{}_to_last_response'.format(time_unit), 'average_{}_to_close'.format(time_unit)]] @@ -1132,9 +1192,10 @@ def pull_request_merged_status_counts(repo_group_id, repo_id=None, begin_date='1 AND pr_closed_at::date <= :end_date ::date """) - - pr_all = pd.read_sql(pr_all_sql, engine, params={'repo_group_id': repo_group_id, - 'repo_id': repo_id, 'begin_date': begin_date, 'end_date': end_date}) + + with engine.connect() as conn: + pr_all = pd.read_sql(pr_all_sql, conn, params={'repo_group_id': repo_group_id, + 'repo_id': repo_id, 'begin_date': begin_date, 'end_date': end_date}) if not repo_id: pr_merged_counts = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).count().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['pull_request_count']] diff --git a/augur/api/metrics/release.py b/augur/api/metrics/release.py index 60f7793652..5594f7ef08 100644 --- a/augur/api/metrics/release.py +++ b/augur/api/metrics/release.py @@ -50,10 +50,10 @@ def releases(repo_group_id, repo_id=None, period='day', begin_date=None, end_dat ORDER BY releases.release_published_at DESC """) - - results = pd.read_sql(releases_SQL, engine, - params={'period': period, 'repo_group_id': repo_group_id, - 'begin_date': begin_date, 'end_date': end_date }) + with engine.connect() as conn: + results = pd.read_sql(releases_SQL, conn, + params={'period': period, 'repo_group_id': repo_group_id, + 'begin_date': begin_date, 'end_date': end_date }) return results else: @@ -80,10 +80,10 @@ def releases(repo_group_id, repo_id=None, period='day', begin_date=None, end_dat ORDER BY releases.release_published_at DESC """) - - results = pd.read_sql(releases_SQL, engine, - params={'period': period, 'repo_id': repo_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(releases_SQL, conn, + params={'period': period, 'repo_id': repo_id, + 'begin_date': begin_date, 'end_date': end_date}) return results @register_metric() @@ -127,10 +127,10 @@ def tag_only_releases(repo_group_id, repo_id=None, period='day', begin_date=None ORDER BY releases.release_published_at DESC """) - - results = pd.read_sql(releases_SQL, engine, - params={'period': period, 'repo_group_id': repo_group_id, - 'begin_date': begin_date, 'end_date': end_date }) + with engine.connect() as conn: + results = pd.read_sql(releases_SQL, conn, + params={'period': period, 'repo_group_id': repo_group_id, + 'begin_date': begin_date, 'end_date': end_date }) return results else: @@ -150,10 +150,11 @@ def tag_only_releases(repo_group_id, repo_id=None, period='day', begin_date=None ORDER BY releases.release_published_at DESC """) - results = pd.read_sql(releases_SQL, engine, - params={'period': period, 'repo_id': repo_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(releases_SQL, conn, + params={'period': period, 'repo_id': repo_id, + 'begin_date': begin_date, 'end_date': end_date}) return results -def create_release_metrics(metrics): - add_metrics(metrics, __name__) +#def create_release_metrics(metrics): +# add_metrics(metrics, __name__) diff --git a/augur/api/metrics/repo_meta.py b/augur/api/metrics/repo_meta.py index ca4d9668e2..c5d8e1138d 100644 --- a/augur/api/metrics/repo_meta.py +++ b/augur/api/metrics/repo_meta.py @@ -46,8 +46,8 @@ def code_changes(repo_group_id, repo_id=None, period='week', begin_date=None, en ORDER BY week """) - - results = pd.read_sql(code_changes_SQL, engine, params={'repo_group_id': repo_group_id, 'period': period, + with engine.connect() as conn: + results = pd.read_sql(code_changes_SQL, conn, params={'repo_group_id': repo_group_id, 'period': period, 'begin_date': begin_date, 'end_date': end_date}) results['week'] = results['week'].apply(lambda x: x - 1) results['date'] = results['year'].astype(str) + ' ' + results['week'].astype(str) + ' 0' @@ -68,9 +68,9 @@ def code_changes(repo_group_id, repo_id=None, period='week', begin_date=None, en ORDER BY week """) - - results = pd.read_sql(code_changes_SQL, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(code_changes_SQL, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) results['week'] = results['week'].apply(lambda x: x - 1) results['date'] = results['year'].astype(str) + ' ' + results['week'].astype(str) + ' 0' @@ -111,8 +111,9 @@ def code_changes_lines(repo_group_id, repo_id=None, period='day', begin_date=Non ORDER BY commits.repo_id, date """) - results = pd.read_sql(code_changes_lines_SQL, engine, params={'repo_group_id': repo_group_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(code_changes_lines_SQL, conn, params={'repo_group_id': repo_group_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results @@ -130,9 +131,9 @@ def code_changes_lines(repo_group_id, repo_id=None, period='day', begin_date=Non ORDER BY date; """) - - results = pd.read_sql(code_changes_lines_SQL, engine, params={'repo_id': repo_id, 'period': period, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(code_changes_lines_SQL, conn, params={'repo_id': repo_id, 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) return results @@ -163,8 +164,9 @@ def sub_projects(repo_group_id, repo_id=None, begin_date=None, end_date=None): AND repo_added BETWEEN :begin_date AND :end_date """) - results = pd.read_sql(sub_projectsSQL, engine, params={'repo_id': repo_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(sub_projectsSQL, conn, params={'repo_id': repo_id, + 'begin_date': begin_date, 'end_date': end_date}) else: sub_projectsSQL = s.sql.text(""" SELECT COUNT(*) AS sub_project_count @@ -173,8 +175,9 @@ def sub_projects(repo_group_id, repo_id=None, begin_date=None, end_date=None): AND repo_added BETWEEN :begin_date AND :end_date """) - results = pd.read_sql(sub_projectsSQL, engine, params={'repo_group_id': repo_group_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(sub_projectsSQL, conn, params={'repo_group_id': repo_group_id, + 'begin_date': begin_date, 'end_date': end_date}) return results @@ -194,8 +197,8 @@ def sbom_download(repo_group_id, repo_id=None): logger.debug(dosocs_SQL) params = {'repo_id': repo_id} - - return pd.read_sql(dosocs_SQL, engine, params=params) + with engine.connect() as conn: + return pd.read_sql(dosocs_SQL, conn, params=params) #return [json.dumps(license_information)] @register_metric() @@ -223,7 +226,8 @@ def cii_best_practices_badge(repo_group_id, repo_id=None): LIMIT 1 """) - raw_df = pd.read_sql(cii_best_practices_badge_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + raw_df = pd.read_sql(cii_best_practices_badge_SQL, conn, params={'repo_id': repo_id}) if len(raw_df) == 0: return [] @@ -263,8 +267,8 @@ def forks(repo_group_id, repo_id=None): ORDER BY repo_info.repo_id, date """) - - results = pd.read_sql(forks_SQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(forks_SQL, conn, params={'repo_group_id': repo_group_id}) return results else: @@ -278,8 +282,8 @@ def forks(repo_group_id, repo_id=None): ORDER BY date """) - - results = pd.read_sql(forks_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(forks_SQL, conn, params={'repo_id': repo_id}) return results @register_metric() @@ -303,8 +307,8 @@ def fork_count(repo_group_id, repo_id=None): WHERE repo_group_id = :repo_group_id) """) - - results = pd.read_sql(fork_count_SQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(fork_count_SQL, conn, params={'repo_group_id': repo_group_id}) return results else: fork_count_SQL = s.sql.text(""" @@ -315,8 +319,8 @@ def fork_count(repo_group_id, repo_id=None): LIMIT 1 """) - - results = pd.read_sql(fork_count_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(fork_count_SQL, conn, params={'repo_id': repo_id}) return results @register_metric() @@ -334,7 +338,8 @@ def languages(repo_group_id, repo_id=None): WHERE repo_id IN (SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id) """) - results = pd.read_sql(languages_SQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(languages_SQL, conn, params={'repo_group_id': repo_group_id}) return results else: @@ -344,8 +349,8 @@ def languages(repo_group_id, repo_id=None): WHERE repo_id = :repo_id """) - - results = pd.read_sql(languages_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(languages_SQL, conn, params={'repo_id': repo_id}) return results @register_metric(type="license") @@ -381,7 +386,8 @@ def license_files(license_id, spdx_binary, repo_group_id, repo_id=None,): b.license_id in ( 369,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482)); """) - results = pd.read_sql(license_data_SQL, engine, params={'repo_id': repo_id, 'spdx_binary': spdx_binary, 'license_id': license_id}) + with engine.connect() as conn: + results = pd.read_sql(license_data_SQL, conn, params={'repo_id': repo_id, 'spdx_binary': spdx_binary, 'license_id': license_id}) return results @register_metric() @@ -450,7 +456,8 @@ def license_declared(repo_group_id, repo_id=None): short_name; """) - results = pd.read_sql(license_declared_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(license_declared_SQL, conn, params={'repo_id': repo_id}) return results @register_metric() @@ -534,7 +541,8 @@ def license_coverage(repo_group_id, repo_id=None): GROUP BY a.name, a.licensed, a.licensed, b.total """) - results = pd.read_sql(license_declared_SQL, engine, params={'repo_id': repo_id, 'repo_group_id':repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(license_declared_SQL, conn, params={'repo_id': repo_id, 'repo_group_id':repo_group_id}) return results @@ -595,8 +603,8 @@ def license_count(repo_group_id, repo_id=None): GROUP BY a.name, a.number_of_license, a.licensed, b.total """) - - results = pd.read_sql(license_declared_SQL, engine, params={'repo_id': repo_id, 'repo_group_id':repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(license_declared_SQL, conn, params={'repo_id': repo_id, 'repo_group_id':repo_group_id}) return results @@ -624,8 +632,8 @@ def stars(repo_group_id, repo_id=None): ORDER BY repo_info.repo_id, date """) - - results = pd.read_sql(stars_SQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(stars_SQL, conn, params={'repo_group_id': repo_group_id}) return results else: @@ -639,7 +647,8 @@ def stars(repo_group_id, repo_id=None): ORDER BY date """) - results = pd.read_sql(stars_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(stars_SQL, conn, params={'repo_id': repo_id}) return results @register_metric() @@ -663,8 +672,8 @@ def stars_count(repo_group_id, repo_id=None): WHERE repo_group_id = :repo_group_id) """) - - results = pd.read_sql(stars_count_SQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(stars_count_SQL, conn, params={'repo_group_id': repo_group_id}) return results else: stars_count_SQL = s.sql.text(""" @@ -675,7 +684,8 @@ def stars_count(repo_group_id, repo_id=None): LIMIT 1 """) - results = pd.read_sql(stars_count_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(stars_count_SQL, conn, params={'repo_id': repo_id}) return results @register_metric() @@ -701,8 +711,8 @@ def watchers(repo_group_id, repo_id=None): ORDER BY repo_info.repo_id, date """) - - results = pd.read_sql(watchers_SQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(watchers_SQL, conn, params={'repo_group_id': repo_group_id}) return results else: @@ -716,8 +726,8 @@ def watchers(repo_group_id, repo_id=None): ORDER BY date """) - - results = pd.read_sql(watchers_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(watchers_SQL, conn, params={'repo_id': repo_id}) return results @register_metric() @@ -741,8 +751,8 @@ def watchers_count(repo_group_id, repo_id=None): WHERE repo_group_id = :repo_group_id) """) - - results = pd.read_sql(watchers_count_SQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(watchers_count_SQL, conn, params={'repo_group_id': repo_group_id}) return results else: watchers_count_SQL = s.sql.text(""" @@ -753,8 +763,8 @@ def watchers_count(repo_group_id, repo_id=None): LIMIT 1 """) - - results = pd.read_sql(watchers_count_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(watchers_count_SQL, conn, params={'repo_id': repo_id}) return results @register_metric() @@ -798,8 +808,9 @@ def annual_lines_of_code_count_ranked_by_new_repo_in_repo_group(repo_group_id, r LIMIT 10 """) - results = pd.read_sql(cdRgNewrepRankedCommitsSQL, engine, params={ "repo_group_id": repo_group_id, - "repo_id": repo_id, "calendar_year": calendar_year}) + with engine.connect() as conn: + results = pd.read_sql(cdRgNewrepRankedCommitsSQL, conn, params={ "repo_group_id": repo_group_id, + "repo_id": repo_id, "calendar_year": calendar_year}) return results @register_metric() @@ -894,9 +905,9 @@ def annual_lines_of_code_count_ranked_by_repo_in_repo_group(repo_group_id, repo_ """) - - results = pd.read_sql(cdRgTpRankedCommitsSQL, engine, params={ "repo_group_id": repo_group_id, - "repo_id": repo_id}) + with engine.connect() as conn: + results = pd.read_sql(cdRgTpRankedCommitsSQL, conn, params={ "repo_group_id": repo_group_id, + "repo_id": repo_id}) return results @register_metric() @@ -948,8 +959,8 @@ def lines_of_code_commit_counts_by_calendar_year_grouped(repo_url, calendar_year GROUP BY week """) - - results = pd.read_sql(cdRepTpIntervalLocCommitsSQL, engine, params={"repourl": '%{}%'.format(repo_url), 'calendar_year': calendar_year}) + with engine.connect() as conn: + results = pd.read_sql(cdRepTpIntervalLocCommitsSQL, conn, params={"repourl": '%{}%'.format(repo_url), 'calendar_year': calendar_year}) return results @register_metric() @@ -969,9 +980,9 @@ def average_weekly_commits(repo_group_id=None, repo_id=None, calendar_year=None) ORDER BY repo_name """.format(extra_and)) - - results = pd.read_sql(average_weekly_commits_sql, engine, params={"repo_group_id": repo_group_id, - "repo_id": repo_id, "calendar_year": calendar_year}) + with engine.connect() as conn: + results = pd.read_sql(average_weekly_commits_sql, conn, params={"repo_group_id": repo_group_id, + "repo_id": repo_id, "calendar_year": calendar_year}) return results @register_metric() @@ -1054,8 +1065,9 @@ def aggregate_summary(repo_group_id, repo_id=None, begin_date=None, end_date=Non ) commit_data """) - results = pd.read_sql(summarySQL, engine, params={'repo_group_id': repo_group_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(summarySQL, conn, params={'repo_group_id': repo_group_id, + 'begin_date': begin_date, 'end_date': end_date}) return results else: summarySQL = s.sql.text(""" @@ -1123,6 +1135,7 @@ def aggregate_summary(repo_group_id, repo_id=None, begin_date=None, end_date=Non ) commit_data """) - results = pd.read_sql(summarySQL, engine, params={'repo_id': repo_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(summarySQL, conn, params={'repo_id': repo_id, + 'begin_date': begin_date, 'end_date': end_date}) return results diff --git a/augur/api/metrics/toss.py b/augur/api/metrics/toss.py index 122cb35679..d3e91ad405 100644 --- a/augur/api/metrics/toss.py +++ b/augur/api/metrics/toss.py @@ -57,8 +57,9 @@ def toss_pull_request_acceptance_rate(repo_id, begin_date=None, end_date=None, g ) opened ON merged.repo_id = opened.repo_id """) - results = pd.read_sql(pr_acceptance_rate_sql, engine, params={'repo_id': repo_id, 'group_by': group_by, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(pr_acceptance_rate_sql, conn, params={'repo_id': repo_id, 'group_by': group_by, + 'begin_date': begin_date, 'end_date': end_date}) return results @@ -89,8 +90,9 @@ def toss_review_duration(repo_id, begin_date=None, end_date=None): AND :end_date """) - results = pd.read_sql(pr_acceptance_rate_sql, engine, params={'repo_id': repo_id, - 'begin_date': begin_date, 'end_date': end_date}) + with engine.connect() as conn: + results = pd.read_sql(pr_acceptance_rate_sql, conn, params={'repo_id': repo_id, + 'begin_date': begin_date, 'end_date': end_date}) if results.iloc[0]['duration'] is None: results.iloc[0]['duration'] = -1 else: @@ -120,5 +122,6 @@ def toss_repo_info(repo_id): LIMIT 1; """) - results = pd.read_sql(license_file_sql, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + results = pd.read_sql(license_file_sql, conn, params={'repo_id': repo_id}) return results diff --git a/augur/api/routes/collection_status.py b/augur/api/routes/collection_status.py index 58e17311fe..8afd8eb2da 100644 --- a/augur/api/routes/collection_status.py +++ b/augur/api/routes/collection_status.py @@ -25,7 +25,9 @@ def commit_collection_status(): # TODO: make this name automatic - wrapper? AND c.facade_status = 'Success'; """) - results = pd.read_sql(commit_collection_sql, engine) + + with engine.connect() as conn: + results = pd.read_sql(commit_collection_sql, conn) data = results.to_json( orient="records", date_format='iso', date_unit='ms') return Response(response=data, @@ -86,7 +88,9 @@ def issue_collection_status(): # TODO: make this name automatic - wrapper? ) D WHERE d.issues_enabled = 'true'; """) - results = pd.read_sql(issue_collection_sql, engine) + + with engine.connect() as conn: + results = pd.read_sql(issue_collection_sql, conn) data = results.to_json( orient="records", date_format='iso', date_unit='ms') parsed_data = json.loads(data) @@ -156,7 +160,9 @@ def pull_request_collection_status(): # TODO: make this name automatic - wrappe ORDER BY ratio_abs; """) - results = pd.read_sql(pull_request_collection_sql, engine) + + with engine.connect() as conn: + results = pd.read_sql(pull_request_collection_sql, conn) data = results.to_json( orient="records", date_format='iso', date_unit='ms') parsed_data = json.loads(data) diff --git a/augur/api/routes/complexity.py b/augur/api/routes/complexity.py index 81045720a1..ba82a12599 100644 --- a/augur/api/routes/complexity.py +++ b/augur/api/routes/complexity.py @@ -47,7 +47,9 @@ def get_project_languages(): WHERE augur_data.repo.repo_id = e.repo_id ORDER BY e.repo_id """) - results = pd.read_sql(project_languages_sql, server.engine) + + with server.engine.connect() as conn: + results = pd.read_sql(project_languages_sql, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, @@ -84,7 +86,9 @@ def get_project_files(): WHERE augur_data.repo.repo_id = e.repo_id ORDER BY e.repo_id """) - results = pd.read_sql(project_files_sql, server.engine) + + with server.engine.connect() as conn: + results = pd.read_sql(project_files_sql, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, @@ -124,7 +128,9 @@ def get_project_lines(): WHERE augur_data.repo.repo_id = e.repo_id ORDER BY e.repo_id """) - results = pd.read_sql(project_lines_sql, server.engine) + + with server.engine.connect() as conn: + results = pd.read_sql(project_lines_sql, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, @@ -164,7 +170,9 @@ def get_project_comment_lines(): WHERE augur_data.repo.repo_id = e.repo_id ORDER BY e.repo_id """) - results = pd.read_sql(comment_lines_sql, server.engine) + + with server.engine.connect() as conn: + results = pd.read_sql(comment_lines_sql, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, @@ -204,7 +212,9 @@ def get_project_blank_lines(): WHERE augur_data.repo.repo_id = e.repo_id ORDER BY e.repo_id """) - results = pd.read_sql(blank_lines_sql, server.engine) + + with server.engine.connect() as conn: + results = pd.read_sql(blank_lines_sql, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, @@ -245,7 +255,9 @@ def get_project_file_complexity(): WHERE augur_data.repo.repo_id = e.repo_id ORDER BY e.repo_id """) - results = pd.read_sql(project_file_complexity_sql, server.engine) + + with server.engine.connect() as conn: + results = pd.read_sql(project_file_complexity_sql, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, diff --git a/augur/api/routes/contributor_reports.py b/augur/api/routes/contributor_reports.py index 896e00fc0e..c600e81416 100644 --- a/augur/api/routes/contributor_reports.py +++ b/augur/api/routes/contributor_reports.py @@ -293,7 +293,9 @@ def new_contributor_data_collection(repo_id, required_contributions): WHERE RANK IN {rank_tuple} """) - df = pd.read_sql(contributor_query, engine) + + with engine.connect() as conn: + df = pd.read_sql(contributor_query, conn) df = df.loc[~df['full_name'].str.contains('bot', na=False)] df = df.loc[~df['login'].str.contains('bot', na=False)] @@ -334,7 +336,9 @@ def months_data_collection(start_date, end_date): FROM generate_series (TIMESTAMP '{start_date}', TIMESTAMP '{end_date}', INTERVAL '1 month' ) created_month ) d ) x ) y """) - months_df = pd.read_sql(months_query, engine) + + with engine.connect() as conn: + months_df = pd.read_sql(months_query, conn) # add yearmonths to months_df months_df[['year', 'month']] = months_df[['year', 'month']].astype(float).astype(int).astype(str) diff --git a/augur/api/routes/metadata.py b/augur/api/routes/metadata.py index 389a3d9d18..f49dbb88f8 100644 --- a/augur/api/routes/metadata.py +++ b/augur/api/routes/metadata.py @@ -47,7 +47,9 @@ def get_repo_info(): ORDER BY repo.repo_name; """) - results = pd.read_sql(repo_info_sql, engine) + + with engine.connect() as conn: + results = pd.read_sql(repo_info_sql, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') parsed_data = json.loads(data) return Response(response=data, @@ -61,7 +63,9 @@ def contributions_count(): group by repo_git order by contributions desc; """) - results = pd.read_sql(repo_info_sql, engine) + + with engine.connect() as conn: + results = pd.read_sql(repo_info_sql, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') parsed_data = json.loads(data) return Response(response=data, @@ -75,7 +79,9 @@ def contributors_count(): group by repo_git order by contributors desc; """) - results = pd.read_sql(repo_info_sql, engine) + + with engine.connect() as conn: + results = pd.read_sql(repo_info_sql, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') parsed_data = json.loads(data) return Response(response=data, diff --git a/augur/api/routes/pull_request_reports.py b/augur/api/routes/pull_request_reports.py index 02f6e235cd..72b7a2c44e 100644 --- a/augur/api/routes/pull_request_reports.py +++ b/augur/api/routes/pull_request_reports.py @@ -140,7 +140,9 @@ def pull_request_data_collection(repo_id, start_date, end_date): ORDER BY merged_count DESC """) - pr_all = pd.read_sql(pr_query, engine) + + with engine.connect() as conn: + pr_all = pd.read_sql(pr_query, conn) pr_all[['assigned_count', 'review_requested_count', diff --git a/augur/api/routes/util.py b/augur/api/routes/util.py index cd6a8ad3bc..1f95b9b7a2 100644 --- a/augur/api/routes/util.py +++ b/augur/api/routes/util.py @@ -23,7 +23,9 @@ def get_all_repo_groups(): #TODO: make this name automatic - wrapper? FROM repo_groups ORDER BY rg_name """) - results = pd.read_sql(repoGroupsSQL, engine) + + with engine.connect() as conn: + results = pd.read_sql(repoGroupsSQL, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, @@ -58,7 +60,9 @@ def get_all_repos(): JOIN repo_groups ON repo_groups.repo_group_id = repo.repo_group_id order by repo_name """) - results = pd.read_sql(get_all_repos_sql, engine) + + with engine.connect() as conn: + results = pd.read_sql(get_all_repos_sql, conn) results['url'] = results['url'].apply(lambda datum: datum.split('//')[1]) b64_urls = [] @@ -100,7 +104,8 @@ def get_repos_in_repo_group(repo_group_id): ORDER BY repo.repo_git """) - results = pd.read_sql(repos_in_repo_groups_SQL, engine, params={'repo_group_id': repo_group_id}) + with engine.connect() as conn: + results = pd.read_sql(repos_in_repo_groups_SQL, conn, params={'repo_group_id': repo_group_id}) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, @@ -116,7 +121,8 @@ def get_repo_by_git_name(owner, repo): GROUP BY repo_id, rg_name """) - results = pd.read_sql(get_repo_by_git_name_sql, engine, params={'owner': '%{}_'.format(owner), 'repo': repo,}) + with engine.connect() as conn: + results = pd.read_sql(get_repo_by_git_name_sql, conn, params={'owner': '%{}%'.format(owner), 'repo': repo,}) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, @@ -132,7 +138,9 @@ def get_repo_by_name(rg_name, repo_name): AND LOWER(rg_name) = LOWER(:rg_name) AND LOWER(repo_name) = LOWER(:repo_name) """) - results = pd.read_sql(get_repo_by_name_sql, engine, params={'rg_name': rg_name, 'repo_name': repo_name}) + + with engine.connect() as conn: + results = pd.read_sql(get_repo_by_name_sql, conn, params={'rg_name': rg_name, 'repo_name': repo_name}) results['url'] = results['url'].apply(lambda datum: datum.split('//')[1]) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, @@ -146,7 +154,9 @@ def get_group_by_name(rg_name): FROM repo_groups WHERE lower(rg_name) = lower(:rg_name) """) - results = pd.read_sql(groupSQL, engine, params={'rg_name': rg_name}) + + with engine.connect() as conn: + results = pd.read_sql(groupSQL, conn, params={'rg_name': rg_name}) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, @@ -160,7 +170,8 @@ def get_repos_for_dosocs(): WHERE a.setting='repo_directory' """) - results = pd.read_sql(get_repos_for_dosocs_SQL, engine) + with engine.connect() as conn: + results = pd.read_sql(get_repos_for_dosocs_SQL, conn) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, @@ -188,7 +199,9 @@ def get_issues(repo_group_id, repo_id=None): GROUP BY issues.issue_id ORDER by OPEN_DAY DESC """) - results = pd.read_sql(get_issues_sql, engine, params={'repo_group_id': repo_group_id}) + + with engine.connect() as conn: + results = pd.read_sql(get_issues_sql, conn, params={'repo_group_id': repo_group_id}) else: get_issues_sql = s.sql.text(""" SELECT issue_title, @@ -208,7 +221,9 @@ def get_issues(repo_group_id, repo_id=None): GROUP BY issues.issue_id, repo_name ORDER by OPEN_DAY DESC """) - results = pd.read_sql(get_issues_sql, engine, params={'repo_id': repo_id}) + + with engine.connect() as conn: + results = pd.read_sql(get_issues_sql, conn, params={'repo_id': repo_id}) data = results.to_json(orient="records", date_format='iso', date_unit='ms') return Response(response=data, status=200, diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index 8a9fc0597c..ed617cd36c 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -74,11 +74,19 @@ def repo_table_view(): pagination_offset = config.get_value("frontend", "pagination_offset") if current_user.is_authenticated: - data = current_user.get_repos(page = page, sort = sorting, direction = direction, search=query)[0] - page_count = (current_user.get_repo_count(search = query)[0] or 0) // pagination_offset + data = load_repos_test(user = current_user, search = query, page = page, sort = sorting, direction = direction, source = "user") + page_count = load_repos_test(user = current_user, search = query, count = True, source = "user") + # data = current_user.get_repos(page = page, sort = sorting, direction = direction, search=query)[0] + # page_count = (current_user.get_repo_count(search = query)[0] or 0) // pagination_offset else: - data = get_all_repos(page = page, sort = sorting, direction = direction, search=query)[0] - page_count = (get_all_repos_count(search = query)[0] or 0) // pagination_offset + data = load_repos_test(search = query, page = page, sort = sorting, direction = direction) + page_count = load_repos_test(search = query, count = True) + # data = get_all_repos(page = page, sort = sorting, direction = direction, search=query)[0] + # page_count = (get_all_repos_count(search = query)[0] or 0) // pagination_offset + + if not data.count(): + data = None + return render_module("repos-table", title="Repos", repos=data, query_key=query, activePage=page, pages=page_count, offset=pagination_offset, PS="repo_table_view", reverse = rev, sorting = sorting) diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py index 2289355743..76551a6ab9 100644 --- a/augur/api/view/utils.py +++ b/augur/api/view/utils.py @@ -4,7 +4,15 @@ from .init import * from ..server import app, db_session from augur.application.config import AugurConfig -import urllib.request, urllib.error, json, os, math, yaml, urllib3, time, logging, re +import urllib.request, urllib.error, json, os, math, yaml, urllib3, time, logging, re, math + +from augur.application.db.session import DatabaseSession +from augur.application.db.engine import DatabaseEngine +from augur.application.db.models import User, Repo, RepoGroup, UserGroup, UserRepo +from sqlalchemy import Column, Table, Integer, MetaData, or_, Label +from sqlalchemy.sql.operators import ilike_op, distinct_op +from sqlalchemy.sql.functions import coalesce +from augur.application.db.models.base import Base init_logging() @@ -66,6 +74,8 @@ def getSetting(key, section = "View"): loadSettings() +#version_check(settings) + """ ---------------------------------------------------------------- """ def loadReports(): @@ -298,3 +308,68 @@ def render_message(messageTitle, messageBody = None, title = None, redirect = No def render_module(module, **args): args.setdefault("body", module) return render_template('index.j2', **args) + +""" ---------------------------------------------------------------- + No longer used +""" +# My attempt at a loading page +def renderLoading(dest, query, request): + cache_files_requested.append(request) + return render_template('index.j2', body="loading", title="Loading", d=dest, query_key=query, api_url=getSetting('serving')) + +with DatabaseEngine() as engine: + augur_data_schema = MetaData(schema = "augur_data") + augur_data_schema.reflect(bind = engine, views = True) + + commits_materialized_view: Table = augur_data_schema.tables["augur_data.api_get_all_repos_commits"] + issues_materialized_view: Table = augur_data_schema.tables["augur_data.api_get_all_repos_issues"] + +""" ---------------------------------------------------------------- +""" +def load_repos_test(count = False, source = None, **kwargs): + columns: list[Label] = [ + Repo.repo_id.distinct().label("repo_id"), + Repo.description.label("description"), + Repo.repo_git.label("url"), + coalesce(commits_materialized_view.columns.commits_all_time, 0).label("commits_all_time"), + coalesce(issues_materialized_view.columns.issues_all_time, 0).label("issues_all_time"), + RepoGroup.rg_name.label("rg_name"), + Repo.repo_git.regexp_replace('.*github\.com\/[A-Za-z0-9 \- _]+\/([A-Za-z0-9 \- _ .]+)$', "\\1").label("repo_name"), + Repo.repo_git.regexp_replace('.*github\.com\/([A-Za-z0-9 \- _]+)\/[A-Za-z0-9 \- _ .]+$', "\\1").label("repo_owner"), + RepoGroup.repo_group_id.label("repo_group_id") + ] + + def get_colum_by_label(label: str)-> Label: + for column in columns: + if column.name == label: + return column + + repos = db_session.query(*columns)\ + .outerjoin(commits_materialized_view, Repo.repo_id == commits_materialized_view.columns.repo_id)\ + .outerjoin(issues_materialized_view, Repo.repo_id == issues_materialized_view.columns.repo_id)\ + .join(RepoGroup, Repo.repo_group_id == RepoGroup.repo_group_id) + + user: User = kwargs.get("user") + if user: + repos = repos.join(UserRepo, Repo.repo_id == UserRepo.repo_id)\ + .join(UserGroup, UserGroup.group_id == UserRepo.group_id)\ + .filter(UserGroup.user_id == user.user_id) + + search = kwargs.get("search") + qkey = kwargs.get("query_key") or ["repo_name", "repo_owner"] + if search: + if isinstance(qkey, list) and len(qkey) > 0: + repos = repos.filter(or_(ilike_op(get_colum_by_label(filter_column), f"%{search}%") for filter_column in qkey)) + else: + repos = repos.filter(ilike_op(get_colum_by_label(qkey), f"%{search}%")) + + page_size: int = kwargs.get("page_size") or 25 + if count: + c = repos.count() + return math.ceil(c / page_size) - 1 + + page: int = kwargs.get("page") or 0 + offset = page * page_size + + return repos.slice(offset, offset + page_size) + diff --git a/augur/application/cli/db.py b/augur/application/cli/db.py index f09aaabbd2..42d57ecc6b 100644 --- a/augur/application/cli/db.py +++ b/augur/application/cli/db.py @@ -99,7 +99,7 @@ def add_repo_groups(filename): """ Create new repo groups in Augur's database """ - with DatabaseEngine() as engine, engine.connect() as connection: + with DatabaseEngine() as engine, engine.begin() as connection: df = pd.read_sql( s.sql.text("SELECT repo_group_id FROM augur_data.repo_groups"), @@ -248,7 +248,7 @@ def update_api_key(api_key): """ ) - with DatabaseEngine() as engine, engine.connect() as connection: + with DatabaseEngine() as engine, engine.begin() as connection: connection.execute(update_api_key_sql, api_key=api_key) logger.info(f"Updated Augur API key to: {api_key}") diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 95cb0725d7..3ed584ee14 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -267,6 +267,7 @@ class Contributor(Base): @classmethod def from_github(cls, contributor, tool_source, tool_version, data_source): + from augur.tasks.util.AugurUUID import GithubUUID cntrb_id = GithubUUID() cntrb_id["user"] = contributor["id"] diff --git a/augur/application/db/session.py b/augur/application/db/session.py index 2212c1fdc1..f1d1e64dd0 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -85,7 +85,7 @@ def __del__(self): def execute_sql(self, sql_text): - with self.engine.connect() as connection: + with self.engine.begin() as connection: return_data = connection.execute(sql_text) @@ -93,10 +93,10 @@ def execute_sql(self, sql_text): def fetchall_data_from_sql_text(self,sql_text): - with self.engine.connect() as connection: + with self.engine.begin() as connection: - result = connection.execute(sql_text).fetchall() - return [dict(zip(row.keys(), row)) for row in result] + result = connection.execute(sql_text) + return [dict(row) for row in result.mappings()] def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: @@ -174,7 +174,9 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s while attempts < 10: try: - with EngineConnection(self.engine) as connection: + #begin keyword is needed for sqlalchemy 2.x + #this is because autocommit support was removed in 2.0 + with self.engine.begin() as connection: connection.execute(stmnt) break except OperationalError as e: @@ -191,14 +193,15 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s raise e except Exception as e: + #self.logger.info(e) if(len(data) == 1): raise e else: first_half = data[:len(data)//2] second_half = data[len(data)//2:] - self.insert_data(first_half, natural_keys, return_columns, string_fields, on_conflict_update) - self.insert_data(second_half, natural_keys, return_columns, string_fields, on_conflict_update) + self.insert_data(first_half, table,natural_keys, return_columns, string_fields, on_conflict_update) + self.insert_data(second_half,table, natural_keys, return_columns, string_fields, on_conflict_update) else: self.logger.error("Unable to insert data in 10 attempts") @@ -213,8 +216,8 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s # othewise it gets the requested return columns and returns them as a list of dicts while attempts < 10: try: - with EngineConnection(self.engine) as connection: - return_data_tuples = connection.execute(stmnt).fetchall() + with self.engine.begin() as connection: + return_data_tuples = connection.execute(stmnt) break except OperationalError as e: if isinstance(e.orig, DeadlockDetected): @@ -244,9 +247,11 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s if deadlock_detected is True: self.logger.error("Made it through even though Deadlock was detected") - return_data = [] - for data_tuple in return_data_tuples: - return_data.append(dict(data_tuple)) + return_data = [dict(row) for row in return_data_tuples.mappings()] + + #no longer working in sqlalchemy 2.x + #for data_tuple in return_data_tuples: + # return_data.append(dict(data_tuple)) # using on confilict do nothing does not return the # present values so this does gets the return values diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index d170ef243f..94127a43be 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -5,7 +5,9 @@ from alembic import context from augur.application.db.models.base import Base -from augur.application.db.engine import DatabaseEngine +from augur.application.db.engine import DatabaseEngine, get_database_string +from sqlalchemy import create_engine, event +from sqlalchemy.pool import NullPool # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -59,8 +61,20 @@ def run_migrations_online(): and associate a connection with the context. """ + url = get_database_string() + engine = create_engine(url) - with DatabaseEngine() as connectable, connectable.connect() as connection: + @event.listens_for(engine, "connect", insert=True) + def set_search_path(dbapi_connection, connection_record): + existing_autocommit = dbapi_connection.autocommit + dbapi_connection.autocommit = True + cursor = dbapi_connection.cursor() + cursor.execute("SET SESSION search_path=public,augur_data,augur_operations,spdx") + cursor.close() + dbapi_connection.autocommit = existing_autocommit + + + with engine.connect() as connection: context.configure( connection=connection, target_metadata=target_metadata, diff --git a/augur/application/schema/alembic/versions/1_augur_new_changes.py b/augur/application/schema/alembic/versions/1_augur_new_changes.py index 0be3780a36..2e8440294f 100644 --- a/augur/application/schema/alembic/versions/1_augur_new_changes.py +++ b/augur/application/schema/alembic/versions/1_augur_new_changes.py @@ -300,8 +300,9 @@ def change_cntrb_id_to_uuid_5(upgrade=True): """ INSERT INTO "augur_data"."contributors"("cntrb_id", "cntrb_login", "cntrb_email", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:cntrb_uuid, 'not-provided', NULL, NULL, '2019-06-13 11:33:39', NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1, 'nobody', 'http://fake.me', 'http://fake.me', 'x', 'http://fake.me', NULL, 'http://fake.me', 'http://fake.me', 'http://fake.me', 'http://fake.me', 'http://fake.me', 'http://fake.me', 'http://fake.me', 'http://fake.me', NULL, NULL, NULL, NULL, NULL, NULL, '2019-06-13 16:35:25'); """ - ), - cntrb_uuid=UnresolvableUUID().to_UUID() + ).bindparams( + cntrb_uuid=UnresolvableUUID().to_UUID() + ) ) conn.execute( @@ -309,8 +310,9 @@ def change_cntrb_id_to_uuid_5(upgrade=True): """ INSERT INTO "augur_data"."contributors" ("cntrb_id", "cntrb_login", "cntrb_email", "cntrb_full_name", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "cntrb_last_used", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "gl_web_url", "gl_avatar_url", "gl_state", "gl_username", "gl_full_name", "gl_id", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:cntrb_uuid, 'nan', 'kannayoshihiro@gmail.com', 'KANNA Yoshihiro', 'UTMC', '2009-04-17 12:43:58', NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 'kannayoshihiro@gmail.com', '2021-01-28 21:56:10-06', 74832, 'nan', 'https://api.github.com/users/nan', 'https://github.com/nan', 'MDQ6VXNlcjc0ODMy', 'https://avatars.githubusercontent.com/u/74832?v=4', '', 'https://api.github.com/users/nan/followers', 'https://api.github.com/users/nan/following{/other_user}', 'https://api.github.com/users/nan/gists{/gist_id}', 'https://api.github.com/users/nan/starred{/owner}{/repo}', 'https://api.github.com/users/nan/subscriptions', 'https://api.github.com/users/nan/orgs', 'https://api.github.com/users/nan/repos', 'https://api.github.com/users/nan/events{/privacy}', 'https://api.github.com/users/nan/received_events', 'User', 'false', NULL, NULL, NULL, NULL, NULL, NULL, 'GitHub API Worker', '1.0.0', 'GitHub API', '2021-10-28 15:23:46'); """ - ), - cntrb_uuid=GithubUUID().to_UUID() + ).bindparams( + cntrb_uuid=GithubUUID().to_UUID() + ) ) else: diff --git a/augur/application/schema/alembic/versions/3_oauth_and_user_groups.py b/augur/application/schema/alembic/versions/3_oauth_and_user_groups.py index 8d75b7a709..0d9c6d744a 100644 --- a/augur/application/schema/alembic/versions/3_oauth_and_user_groups.py +++ b/augur/application/schema/alembic/versions/3_oauth_and_user_groups.py @@ -85,9 +85,9 @@ def upgrade(): table_changes = """ - ALTER TABLE user_repos + ALTER TABLE augur_operations.user_repos ADD COLUMN group_id BIGINT, - ADD CONSTRAINT user_repos_group_id_fkey FOREIGN KEY (group_id) REFERENCES user_groups(group_id), + ADD CONSTRAINT user_repos_group_id_fkey FOREIGN KEY (group_id) REFERENCES augur_operations.user_groups(group_id), DROP COLUMN user_id, ADD PRIMARY KEY (group_id, repo_id); """ diff --git a/augur/application/schema/alembic/versions/9_add_repo_group_for_frontend_repos.py b/augur/application/schema/alembic/versions/9_add_repo_group_for_frontend_repos.py index 288f584cf5..52a6e017db 100644 --- a/augur/application/schema/alembic/versions/9_add_repo_group_for_frontend_repos.py +++ b/augur/application/schema/alembic/versions/9_add_repo_group_for_frontend_repos.py @@ -25,7 +25,7 @@ def upgrade(): conn = op.get_bind() result = conn.execute(text(f"""SELECT * FROM "augur_data"."repo_groups" WHERE rg_name='{repo_group_name}';""")).fetchall() if len(result) == 0: - conn.execute(f"""INSERT INTO "augur_data"."repo_groups" ("rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ('{repo_group_name}', 'DO NOT DELETE OR FRONTEND REPOS WILL BREAK', '', 0, '2023-02-17 15:00:00', NULL, NULL, NULL, NULL, NULL);""") + conn.execute(text(f"""INSERT INTO "augur_data"."repo_groups" ("rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ('{repo_group_name}', 'DO NOT DELETE OR FRONTEND REPOS WILL BREAK', '', 0, '2023-02-17 15:00:00', NULL, NULL, NULL, NULL, NULL);""")) # ### end Alembic commands ### diff --git a/augur/application/util.py b/augur/application/util.py index 1915abdeb0..03e591df98 100644 --- a/augur/application/util.py +++ b/augur/application/util.py @@ -25,6 +25,3 @@ def get_all_repos_count(**kwargs): result = controller.get_repo_count(source="all", **kwargs) return result - - - diff --git a/augur/tasks/data_analysis/clustering_worker/setup.py b/augur/tasks/data_analysis/clustering_worker/setup.py index 9a1b425f90..56e01e52f3 100644 --- a/augur/tasks/data_analysis/clustering_worker/setup.py +++ b/augur/tasks/data_analysis/clustering_worker/setup.py @@ -32,7 +32,7 @@ def read(filename): 'numpy==1.22.0', 'nltk==3.6.6', 'seaborn==0.11.1', - 'pandas==1.3.5', + 'pandas==1.5.3', 'matplotlib==3.5.1' ], classifiers=[ diff --git a/augur/tasks/data_analysis/clustering_worker/tasks.py b/augur/tasks/data_analysis/clustering_worker/tasks.py index 2d4f4973de..c102e6c227 100644 --- a/augur/tasks/data_analysis/clustering_worker/tasks.py +++ b/augur/tasks/data_analysis/clustering_worker/tasks.py @@ -116,7 +116,9 @@ def clustering_model(repo_git: str,logger,engine, session) -> None: """ ) # result = db.execute(delete_points_SQL, repo_id=repo_id, min_date=min_date) - msg_df_cur_repo = pd.read_sql(get_messages_for_repo_sql, engine, params={"repo_id": repo_id}) + + with engine.connect() as conn: + msg_df_cur_repo = pd.read_sql(get_messages_for_repo_sql, conn, params={"repo_id": repo_id}) logger.info(msg_df_cur_repo.head()) logger.debug(f"Repo message df size: {len(msg_df_cur_repo.index)}") @@ -303,7 +305,9 @@ def visualize_labels_PCA(features, labels, annotations, num_components, title): AND prmr.msg_id=m.msg_id """ ) - msg_df_all = pd.read_sql(get_messages_sql, engine, params={}) + + with engine.connect() as conn: + msg_df_all = pd.read_sql(get_messages_sql, conn, params={}) # select only highly active repos logger.debug("Selecting highly active repos") diff --git a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py index 8034112ad6..4521a722e2 100644 --- a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py +++ b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py @@ -56,9 +56,10 @@ def contributor_breadth_model() -> None: ) b """) - result = engine.execute(cntrb_login_query) + with engine.connect() as connection: + result = connection.execute(cntrb_login_query) - current_cntrb_logins = [dict(row) for row in result] + current_cntrb_logins = [dict(row) for row in result.mappings()] cntrb_newest_events_query = s.sql.text(""" @@ -68,8 +69,10 @@ def contributor_breadth_model() -> None: GROUP BY c.gh_login; """) - cntrb_newest_events_list = engine.execute(cntrb_newest_events_query) - cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list] + with engine.connect() as connection: + cntrb_newest_events_list = connection.execute(cntrb_newest_events_query) + + cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list.mappings()] cntrb_newest_events_map = {} for cntrb_event in cntrb_newest_events_list: diff --git a/augur/tasks/data_analysis/discourse_analysis/setup.py b/augur/tasks/data_analysis/discourse_analysis/setup.py index 9a4e91c018..f109164ffd 100644 --- a/augur/tasks/data_analysis/discourse_analysis/setup.py +++ b/augur/tasks/data_analysis/discourse_analysis/setup.py @@ -30,7 +30,7 @@ def read(filename): 'click==8.0.3', 'scipy==1.7.3', 'nltk==3.6.6', - 'pandas==1.3.5', + 'pandas==1.5.3', 'scikit-learn==1.1.3', 'textblob==0.15.3', 'python-crfsuite==0.9.8', diff --git a/augur/tasks/data_analysis/discourse_analysis/tasks.py b/augur/tasks/data_analysis/discourse_analysis/tasks.py index 2febe86360..5a9941679c 100644 --- a/augur/tasks/data_analysis/discourse_analysis/tasks.py +++ b/augur/tasks/data_analysis/discourse_analysis/tasks.py @@ -72,7 +72,9 @@ def discourse_analysis_model(repo_git: str,logger,engine) -> None: """) # result = db.execute(delete_points_SQL, repo_id=repo_id, min_date=min_date) - msg_df_cur_repo = pd.read_sql(get_messages_for_repo_sql, engine, params={"repo_id": repo_id}) + + with engine.connect() as conn: + msg_df_cur_repo = pd.read_sql(get_messages_for_repo_sql, conn, params={"repo_id": repo_id}) msg_df_cur_repo = msg_df_cur_repo.sort_values(by=['thread_id']).reset_index(drop=True) logger.info(msg_df_cur_repo.head()) diff --git a/augur/tasks/data_analysis/insight_worker/tasks.py b/augur/tasks/data_analysis/insight_worker/tasks.py index 7f506c8d12..37ae5f484c 100644 --- a/augur/tasks/data_analysis/insight_worker/tasks.py +++ b/augur/tasks/data_analysis/insight_worker/tasks.py @@ -134,13 +134,16 @@ def insight_model(repo_git: str,logger,engine,session) -> None: WHERE repo_insights.ri_metric = to_delete.ri_metric AND repo_insights.ri_field = to_delete.ri_field """) - result = engine.execute(delete_points_SQL, repo_id=repo_id, min_date=min_date) + + with engine.connect as conn: + result = conn.execute(delete_points_SQL, repo_id=repo_id, min_date=min_date) # get table values to check for dupes later on table_values_sql = s.sql.text("""SELECT * FROM repo_insights_records WHERE repo_id={}""".format(repo_id)) - insight_table_values = pd.read_sql(table_values_sql, engine, params={}) + with engine.connect() as conn: + insight_table_values = pd.read_sql(table_values_sql,conn, params={}) to_model_columns = df.columns[0:len(metrics) + 1] @@ -257,7 +260,7 @@ def classify_anomalies(df, metric): repo_insight_record_obj.ri_id)) # Send insight to Jonah for slack bot - send_insight(record, abs(next_recent_anomaly.iloc[0][metric] - mean), logger) + send_insight(record, abs(next_recent_anomaly.iloc[0][metric] - mean), logger,engine) insight_count += 1 else: @@ -526,8 +529,8 @@ def send_insight(insight, units_from_mean, logger, engine): FROM repo, repo_groups WHERE repo_id = {} """.format(insight['repo_id'])) - - repo = pd.read_sql(repoSQL, engine, params={}).iloc[0] + with engine.connect() as conn: + repo = pd.read_sql(repoSQL, conn, params={}).iloc[0] begin_date = datetime.datetime.now() - datetime.timedelta(days=anomaly_days) dict_date = insight['ri_date'].strftime("%Y-%m-%d %H:%M:%S") @@ -565,7 +568,8 @@ def clear_insights(repo_id, new_endpoint, new_field, logger): AND ri_field = '{}' """.format(repo_id, new_endpoint, new_field) try: - result = engine.execute(deleteSQL) + with engine.connect() as conn: + result = conn.execute(deleteSQL) except Exception as e: logger.info("Error occured deleting insight slot: {}".format(e)) @@ -582,7 +586,8 @@ def clear_insights(repo_id, new_endpoint, new_field, logger): AND ri_field = '{}' """.format(repo_id, new_endpoint, new_field) try: - result = engine.execute(deleteSQL) + with engine.connect() as conn: + result = conn.execute(deleteSQL) except Exception as e: logger.info("Error occured deleting insight slot: {}".format(e)) @@ -602,7 +607,8 @@ def clear_insight(repo_id, new_score, new_metric, new_field, logger): AND ri_field = '{}' ORDER BY ri_score DESC """.format(repo_id, new_metric, new_field)) - rec = json.loads(pd.read_sql(recordSQL, engine, params={}).to_json(orient='records')) + with engine.connect() as conn: + rec = json.loads(pd.read_sql(recordSQL, conn, params={}).to_json(orient='records')) logger.info("recordsql: {}, \n{}".format(recordSQL, rec)) # If new score is higher, continue with deletion if len(rec) > 0: @@ -623,7 +629,8 @@ def clear_insight(repo_id, new_score, new_metric, new_field, logger): AND ri_field = '{}' """.format(record['repo_id'], record['ri_metric'], record['ri_field']) try: - result = engine.execute(deleteSQL) + with engine.connect() as conn: + result = conn.execute(deleteSQL) except Exception as e: logger.info("Error occured deleting insight slot: {}".format(e)) else: @@ -637,7 +644,8 @@ def clear_insight(repo_id, new_score, new_metric, new_field, logger): WHERE repo_id = {} ORDER BY ri_score ASC """.format(repo_id)) - ins = json.loads(pd.read_sql(insightSQL, engine, params={}).to_json(orient='records')) + with engine.connect() as conn: + ins = json.loads(pd.read_sql(insightSQL, conn, params={}).to_json(orient='records')) logger.info("This repos insights: {}".format(ins)) # Determine if inisghts need to be deleted based on if there are more insights than we want stored, @@ -675,7 +683,8 @@ def clear_insight(repo_id, new_score, new_metric, new_field, logger): AND ri_metric = '{}' """.format(insight['repo_id'], insight['ri_metric']) try: - result = engine.execute(deleteSQL) + with engine.connect() as conn: + result = conn.execute(deleteSQL) except Exception as e: logger.info("Error occured deleting insight slot: {}".format(e)) @@ -744,7 +753,9 @@ def filter_duplicates(cols, tables, og_data, logger, engine): colSQL = s.sql.text(""" SELECT {} FROM {} """.format(col, table_str)) - values = pd.read_sql(colSQL, engine, params={}) + + with engine.connect() as conn: + values = pd.read_sql(colSQL, conn, params={}) for obj in og_data: if values.isin([obj[cols[col]]]).any().any(): diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index 311eb9b6f9..e3dedb4191 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -34,12 +34,12 @@ def read(filename): 'scikit-learn==1.1.3', #0.24.2', 'numpy==1.22.0', 'nltk==3.6.6', - 'pandas==1.3.5', + 'pandas==1.5.3', 'emoji==1.2.0', - 'Keras<2.9.0rc0', - 'Keras-Preprocessing==1.1.2', - 'tensorflow==2.8.0', - 'h5py~=3.6.0', + 'Keras==2.13.1', + 'Keras-Preprocessing', + 'tensorflow==2.13.1', + 'h5py==3.10.0', 'scikit-image==0.19.1', 'joblib==1.0.1', 'xgboost', diff --git a/augur/tasks/data_analysis/message_insights/tasks.py b/augur/tasks/data_analysis/message_insights/tasks.py index 1acec976c3..4727d3def7 100644 --- a/augur/tasks/data_analysis/message_insights/tasks.py +++ b/augur/tasks/data_analysis/message_insights/tasks.py @@ -59,7 +59,8 @@ def message_insight_model(repo_git: str,logger,engine, session) -> None: repo_exists_SQL = s.sql.text(""" SELECT exists (SELECT 1 FROM augur_data.message_analysis_summary WHERE repo_id = :repo_id LIMIT 1)""") - df_rep = pd.read_sql_query(repo_exists_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + df_rep = pd.read_sql_query(repo_exists_SQL, conn, params={'repo_id': repo_id}) #full_train = not(df_rep['exists'].iloc[0]) logger.info(f'Full Train: {full_train}') @@ -84,7 +85,8 @@ def message_insight_model(repo_git: str,logger,engine, session) -> None: where message.repo_id = :repo_id """) - df_past = pd.read_sql_query(past_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + df_past = pd.read_sql_query(past_SQL, conn, params={'repo_id': repo_id}) df_past['msg_timestamp'] = pd.to_datetime(df_past['msg_timestamp']) df_past = df_past.sort_values(by='msg_timestamp') @@ -124,7 +126,8 @@ def message_insight_model(repo_git: str,logger,engine, session) -> None: left outer join augur_data.issues on issue_message_ref.issue_id = issues.issue_id where message.repo_id = :repo_id""") - df_message = pd.read_sql_query(join_SQL, engine, params={'repo_id': repo_id, 'begin_date': begin_date}) + with engine.connect() as conn: + df_message = pd.read_sql_query(join_SQL, conn, params={'repo_id': repo_id, 'begin_date': begin_date}) logger.info(f'Messages dataframe dim: {df_message.shape}') logger.info(f'Value 1: {df_message.shape[0]}') @@ -159,7 +162,8 @@ def message_insight_model(repo_git: str,logger,engine, session) -> None: left outer join augur_data.issues on issue_message_ref.issue_id = issues.issue_id where issue_message_ref.repo_id = :repo_id""") - df_past = pd.read_sql_query(merge_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + df_past = pd.read_sql_query(merge_SQL, conn, params={'repo_id': repo_id}) df_past = df_past.loc[df_past['novelty_flag'] == 0] rec_errors = df_past['reconstruction_error'].tolist() threshold = threshold_otsu(np.array(rec_errors)) @@ -345,7 +349,8 @@ def message_insight_model(repo_git: str,logger,engine, session) -> None: FROM message_analysis_summary WHERE repo_id=:repo_id""") - df_past = pd.read_sql_query(message_analysis_query, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + df_past = pd.read_sql_query(message_analysis_query, conn, params={'repo_id': repo_id}) # df_past = get_table_values(cols=['period', 'positive_ratio', 'negative_ratio', 'novel_count'], # tables=['message_analysis_summary'], @@ -414,12 +419,13 @@ def send_insight(repo_id, insights, logger, engine): WHERE repo_id = {} """.format(repo_id)) - repo = pd.read_sql(repoSQL, engine, params={}).iloc[0] + with engine.connect() as conn: + repo = pd.read_sql(repoSQL, conn, params={}).iloc[0] to_send = { 'message_insight': True, 'repo_git': repo['repo_git'], - 'insight_begin_date': begin_date.strftime("%Y-%m-%d %H:%M:%S"), + 'insight_begin_date': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), # date from when insights are calculated 'sentiment': insights[0], # sentiment insight dict 'novelty': insights[1], # novelty insight dict @@ -449,13 +455,14 @@ def get_max_id(table, column, logger, engine, default=25150): SELECT max({0}.{1}) AS {1} FROM {0} """.format(table, column)) - rs = pd.read_sql(max_id_sql, engine, params={}) + + with engine.connect() as conn: + rs = pd.read_sql(max_id_sql, conn, params={}) if rs.iloc[0][column] is not None: max_id = int(rs.iloc[0][column]) + 1 logger.info("Found max id for {} column in the {} table: {}\n".format(column, table, max_id)) else: max_id = default - logger.warning("Could not find max id for {} column in the {} table... " + - "using default set to: {}\n".format(column, table, max_id)) + logger.warning(f"Could not find max id for {column} column in the {table} table... using default set to: {max_id}\n") return max_id diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py index dc13c94bf9..5132f29d2e 100644 --- a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py +++ b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py @@ -30,7 +30,7 @@ def read(filename): 'sklearn==0.0', 'nltk==3.6.6', 'numpy==1.22.0', - 'pandas==1.3.5', + 'pandas==1.5.3', 'emoji==1.2.0', 'joblib==1.0.1', 'xgboost==1.4.2', diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/tasks.py b/augur/tasks/data_analysis/pull_request_analysis_worker/tasks.py index c2816bed8c..9d6d5be78e 100644 --- a/augur/tasks/data_analysis/pull_request_analysis_worker/tasks.py +++ b/augur/tasks/data_analysis/pull_request_analysis_worker/tasks.py @@ -74,8 +74,8 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None: and pull_requests.repo_id = :repo_id and pr_src_state like 'open' """) - - df_pr = pd.read_sql_query(pr_SQL, engine, params={'begin_date': begin_date, 'repo_id': repo_id}) + with engine.connect() as conn: + df_pr = pd.read_sql_query(pr_SQL, conn, params={'begin_date': begin_date, 'repo_id': repo_id}) logger.info(f'PR Dataframe dim: {df_pr.shape}\n') @@ -106,15 +106,16 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None: select message.msg_id, msg_timestamp, msg_text, message.cntrb_id from augur_data.message left outer join augur_data.issue_message_ref on message.msg_id = issue_message_ref.msg_id left outer join augur_data.issues on issue_message_ref.issue_id = issues.issue_id where issue_message_ref.repo_id = :repo_id""") - - df_message = pd.read_sql_query(messages_SQL, engine, params={'repo_id': repo_id}) + with engine.connect() as conn: + df_message = pd.read_sql_query(messages_SQL, conn, params={'repo_id': repo_id}) logger.info(f'Mapping messages to PR, find comment & participants counts') # Map PR to its corresponding messages pr_ref_sql = s.sql.text("select * from augur_data.pull_request_message_ref") - df_pr_ref = pd.read_sql_query(pr_ref_sql, engine) + with engine.connect() as conn: + df_pr_ref = pd.read_sql_query(pr_ref_sql, conn) df_merge = pd.merge(df_pr, df_pr_ref, on='pull_request_id', how='left') df_merge = pd.merge(df_merge, df_message, on='msg_id', how='left') df_merge = df_merge.dropna(subset=['msg_id'], axis=0) @@ -167,7 +168,9 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None: SELECT repo_id, pull_requests_merged, pull_request_count,watchers_count, last_updated FROM augur_data.repo_info where repo_id = :repo_id """) - df_repo = pd.read_sql_query(repo_sql, engine, params={'repo_id': repo_id}) + + with engine.connect() as conn: + df_repo = pd.read_sql_query(repo_sql, conn, params={'repo_id': repo_id}) df_repo = df_repo.loc[df_repo.groupby('repo_id').last_updated.idxmax(), :] df_repo = df_repo.drop(['last_updated'], axis=1) diff --git a/augur/tasks/github/events/tasks.py b/augur/tasks/github/events/tasks.py index 129afd0de5..640079d852 100644 --- a/augur/tasks/github/events/tasks.py +++ b/augur/tasks/github/events/tasks.py @@ -210,9 +210,11 @@ def update_issue_closed_cntrbs_from_events(engine, repo_id): SELECT issue_id, cntrb_id from RankedIssues where rn=1 and repo_id={repo_id} and cntrb_id is not NULL """) - result = engine.execute(get_ranked_issues).fetchall() - update_data = [{'issue_id': row['issue_id'], 'cntrb_id': row['cntrb_id'], 'repo_id': repo_id} for row in result] + with engine.connect() as conn: + result = conn.execute(get_ranked_issues).fetchall() + + update_data = [{'issue_id': row[0], 'cntrb_id': row[1], 'repo_id': repo_id} for row in result] with engine.connect() as connection: update_stmt = s.text(""" UPDATE issues diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 577f17c32b..26d1027538 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -252,8 +252,8 @@ def insert_facade_contributors(repo_id): """).bindparams(repo_id=repo_id) #Execute statement with session. - result = manifest.augur_db.execute_sql(new_contrib_sql).fetchall() - new_contribs = [dict(zip(row.keys(), row)) for row in result] + result = manifest.augur_db.execute_sql(new_contrib_sql) + new_contribs = [dict(row) for row in result.mappings()] #print(new_contribs) @@ -303,8 +303,8 @@ def insert_facade_contributors(repo_id): #existing_cntrb_emails = json.loads(pd.read_sql(resolve_email_to_cntrb_id_sql, self.db, params={ # 'repo_id': repo_id}).to_json(orient="records")) - result = session.execute_sql(resolve_email_to_cntrb_id_sql).fetchall() - existing_cntrb_emails = [dict(zip(row.keys(), row)) for row in result] + result = session.execute_sql(resolve_email_to_cntrb_id_sql) + existing_cntrb_emails = [dict(row) for row in result.mappings()] print(existing_cntrb_emails) link_commits_to_contributor(session,list(existing_cntrb_emails)) diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index e7ebcd9457..81b4c4397a 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -20,8 +20,8 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth): pr_numbers = [] #pd.read_sql(pr_number_sql, self.db, params={}) - result = augur_db.execute_sql(pr_number_sql).fetchall() - pr_numbers = [dict(zip(row.keys(), row)) for row in result] + result = augur_db.execute_sql(pr_number_sql)#.fetchall() + pr_numbers = [dict(row) for row in result.mappings()] query = augur_db.session.query(Repo).filter(Repo.repo_id == repo_id) repo = execute_session_query(query, 'one') diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 54068d30a0..225f78ffde 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -102,8 +102,8 @@ def secondary_repo_collect_phase(repo_git): repo_task_group = group( process_pull_request_files.si(repo_git), process_pull_request_commits.si(repo_git), - process_ossf_dependency_metrics.si(repo_git), - chain(collect_pull_request_reviews.si(repo_git), collect_pull_request_review_comments.si(repo_git)) + chain(collect_pull_request_reviews.si(repo_git), collect_pull_request_review_comments.si(repo_git)), + process_ossf_dependency_metrics.si(repo_git) ) return repo_task_group diff --git a/augur/templates/login.j2 b/augur/templates/login.j2 index c71d02d50f..faaab620ea 100644 --- a/augur/templates/login.j2 +++ b/augur/templates/login.j2 @@ -108,7 +108,7 @@