Skip to content

Commit

Permalink
Merge pull request #2283 from chaoss/dev
Browse files Browse the repository at this point in the history
Release 0.50.1
  • Loading branch information
sgoggins committed Apr 4, 2023
2 parents 1a3b137 + 15d6e12 commit 696099b
Show file tree
Hide file tree
Showing 20 changed files with 125,944 additions and 45 deletions.
5 changes: 2 additions & 3 deletions README.md
@@ -1,5 +1,4 @@

# Augur NEW Release v0.50.0
# Augur NEW Release v0.50.1

[![first-timers-only](https://img.shields.io/badge/first--timers--only-friendly-blue.svg?style=flat-square)](https://www.firsttimersonly.com/) We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy of tagging issues for first timers only, and walking one newcomer through the resolution process weekly. [You can find these issues tagged with "first timers only" on our issues list.](https://github.com/chaoss/augur/labels/first-timers-only).

Expand All @@ -9,7 +8,7 @@
### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md)


Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.50.0
Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.50.1
- The `main` branch is a stable version of our new architecture, which features:
- Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks.
- A new job management architecture that uses Celery and Redis to manage queues, and enables users to run a Flower job monitoring dashboard
Expand Down
1 change: 1 addition & 0 deletions SECURITY.md
Expand Up @@ -6,6 +6,7 @@ These versions of Augur are currently supported with security updates.

| Version | Supported |
| ------- | ------------------ |
| 0.50.1 | :white_check_mark: |
| 0.50.0 | :white_check_mark: |
| 0.44.5 | :white_check_mark: |
| 0.44.3 | :white_check_mark: |
Expand Down
11 changes: 5 additions & 6 deletions augur/api/view/routes.py
Expand Up @@ -72,14 +72,13 @@ def repo_table_view():
config = AugurConfig(logger, db_session)

pagination_offset = config.get_value("frontend", "pagination_offset")


if current_user.is_authenticated:
data = current_user.get_repos(page = page, sort = sorting, direction = direction)[0]
page_count = (current_user.get_repo_count()[0] or 0) // pagination_offset
data = current_user.get_repos(page = page, sort = sorting, direction = direction, search=query)[0]
page_count = (current_user.get_repo_count(search = query)[0] or 0) // pagination_offset
else:
data = get_all_repos(page = page, sort = sorting, direction = direction)[0]
page_count = (get_all_repos_count()[0] or 0) // pagination_offset
data = get_all_repos(page = page, sort = sorting, direction = direction, search=query)[0]
page_count = (get_all_repos_count(search = query)[0] or 0) // pagination_offset

#if not cacheFileExists("repos.json"):
# return renderLoading("repos/views/table", query, "repos.json")
Expand Down Expand Up @@ -133,7 +132,7 @@ def repo_card_view():
This route returns the status view, which displays information
about the current status of collection in the backend
"""
@app.route('/status')
@app.route('/collection/status')
def status_view():
return render_module("status", title="Status")

Expand Down
4 changes: 2 additions & 2 deletions augur/application/cli/backend.py
Expand Up @@ -107,8 +107,8 @@ def start(disable_collection, development, port):
os.remove("celerybeat-schedule.db")

scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling"
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=14 -n core:{uuid.uuid4().hex}@%h"
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=5 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=60 -n core:{uuid.uuid4().hex}@%h"
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=10 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"

scheduling_worker_process = subprocess.Popen(scheduling_worker.split(" "))
core_worker_process = subprocess.Popen(core_worker.split(" "))
Expand Down
8 changes: 4 additions & 4 deletions augur/application/db/models/augur_operations.py
Expand Up @@ -449,20 +449,20 @@ def get_group_names(self):
return group_names, {"status": "success"}


def get_repos(self, page=0, page_size=25, sort="repo_id", direction="ASC"):
def get_repos(self, page=0, page_size=25, sort="repo_id", direction="ASC", search=None):

from augur.util.repo_load_controller import RepoLoadController

with DatabaseSession(logger) as session:
result = RepoLoadController(session).paginate_repos("user", page, page_size, sort, direction, user=self)
result = RepoLoadController(session).paginate_repos("user", page, page_size, sort, direction, user=self, search=search)

return result

def get_repo_count(self):
def get_repo_count(self, search = None):
from augur.util.repo_load_controller import RepoLoadController

with DatabaseSession(logger) as session:
result = RepoLoadController(session).get_repo_count(source="user", user=self)
result = RepoLoadController(session).get_repo_count(source="user", user=self, search = search)

return result

Expand Down
@@ -0,0 +1,36 @@
"""No null author affiliation and committer affiliation in commits table
Revision ID: 7
Revises: 6
Create Date: 2023-02-23 10:14:08.787528
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import text
import re


# revision identifiers, used by Alembic.
revision = '13'
down_revision = '12'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()
conn.execute(text(f"""
ALTER TABLE "augur_data"."commits"
ALTER COLUMN "cmt_author_affiliation" SET DEFAULT NULL,
ALTER COLUMN "cmt_committer_affiliation" SET DEFAULT NULL;
"""))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
@@ -0,0 +1,34 @@
"""Frequent vaccuming on high update commits table
Revision ID: 14
Revises: 13
Create Date: 2023-02-23 10:14:08.787528
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import text
import re


# revision identifiers, used by Alembic.
revision = '14'
down_revision = '13'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()
conn.execute(text(f"""
ALTER TABLE augur_data.commits SET (autovacuum_vacuum_scale_factor = 0, autovacuum_vacuum_threshold = 1000);
"""))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
@@ -0,0 +1,42 @@
"""Commit performance update
Revision ID: 15
Revises: 14
Create Date: 2023-02-23 10:14:08.787528
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import text
import re


# revision identifiers, used by Alembic.
revision = '15'
down_revision = '14'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()
conn.execute(text(f"""
ALTER TABLE "augur_data"."commits"
DROP CONSTRAINT "cmt_ght_author_cntrb_id_fk",
ADD CONSTRAINT "cmt_ght_author_cntrb_id_fk" FOREIGN KEY ("cmt_ght_author_id") REFERENCES "augur_data"."contributors" ("cntrb_id") ON DELETE RESTRICT ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED;
"""))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()
conn.execute(text(f"""
ALTER TABLE "augur_data"."commits"
DROP CONSTRAINT "cmt_ght_author_cntrb_id_fk",
ADD CONSTRAINT "cmt_ght_author_cntrb_id_fk" FOREIGN KEY ("cmt_ght_author_id") REFERENCES "augur_data"."contributors" ("cntrb_id");
"""))
pass
# ### end Alembic commands ###
8 changes: 4 additions & 4 deletions augur/application/util.py
Expand Up @@ -6,23 +6,23 @@

logger = logging.getLogger(__name__)

def get_all_repos(page=0, page_size=25, sort="repo_id", direction="ASC"):
def get_all_repos(page=0, page_size=25, sort="repo_id", direction="ASC", **kwargs):

with DatabaseEngine() as engine, DatabaseSession(logger, engine) as session:

controller = RepoLoadController(session)

result = controller.paginate_repos("all", page, page_size, sort, direction)
result = controller.paginate_repos("all", page, page_size, sort, direction, **kwargs)

return result

def get_all_repos_count():
def get_all_repos_count(**kwargs):

with DatabaseEngine() as engine, DatabaseSession(logger, engine) as session:

controller = RepoLoadController(session)

result = controller.get_repo_count(source="all")
result = controller.get_repo_count(source="all", **kwargs)

return result

Expand Down
Expand Up @@ -38,7 +38,7 @@
import configparser
import traceback
import sqlalchemy as s
from sqlalchemy.exc import IntegrityError
from sqlalchemy.exc import IntegrityError, DataError

def analyze_commit(session, repo_id, repo_loc, commit):

Expand Down Expand Up @@ -159,6 +159,28 @@ def store_commit(repos_id,commit,filename,

try:
session.execute_sql(store)
except DataError as e:
session.logger.error(f"Ran into bad data when trying to insert commit with values: \n {commit_record} \n Error: {e}")

#Check for improper utc timezone offset
#UTC timezone offset should be betwen -14:00 and +14:00

if "time zone displacement" in f"{e}":
commit_record['author_timestamp'] = placeholder_date
commit_record['committer_timestamp'] = placeholder_date

store = s.sql.text("""INSERT INTO commits (repo_id,cmt_commit_hash,cmt_filename,
cmt_author_name,cmt_author_raw_email,cmt_author_email,cmt_author_date,cmt_author_timestamp,
cmt_committer_name,cmt_committer_raw_email,cmt_committer_email,cmt_committer_date,cmt_committer_timestamp,
cmt_added,cmt_removed,cmt_whitespace, cmt_date_attempted, tool_source, tool_version, data_source)
VALUES (:repo_id,:commit,:filename,:author_name,:author_email_raw,:author_email,:author_date,:author_timestamp,
:committer_name,:committer_email_raw,:committer_email,:committer_date,:committer_timestamp,
:added,:removed,:whitespace,:committer_date,:tool_source,:tool_version,:data_source)
""").bindparams(**commit_record)

session.execute_sql(store)
else:
raise e
except Exception as e:

session.logger.error(f"Ran into issue when trying to insert commit with values: \n {commit_record} \n Error: {e}")
Expand Down
12 changes: 8 additions & 4 deletions augur/tasks/github/facade_github/tasks.py
Expand Up @@ -154,14 +154,17 @@ def process_commit_metadata(session,contributorQueue,repo_id):
return



#Replace each instance of a single or double quote with escape characters
#for postgres
escapedEmail = email.replace('"',r'\"')
escapedEmail = escapedEmail.replace("'",r'\'')
# Resolve any unresolved emails if we get to this point.
# They will get added to the alias table later
# Do this last to absolutely make sure that the email was resolved before we remove it from the unresolved table.
query = s.sql.text("""
DELETE FROM unresolved_commit_emails
WHERE email='{}'
""".format(email))
""".format(escapedEmail))

session.logger.info(f"Updating now resolved email {email}")

Expand Down Expand Up @@ -190,8 +193,9 @@ def link_commits_to_contributor(session,contributorQueue):
UPDATE commits
SET cmt_ght_author_id=:cntrb_id
WHERE
cmt_author_raw_email=:cntrb_email
OR cmt_author_email=:cntrb_email
(cmt_author_raw_email=:cntrb_email
OR cmt_author_email=:cntrb_email)
AND cmt_ght_author_id is NULL
""").bindparams(cntrb_id=cntrb["cntrb_id"],cntrb_email=cntrb["email"])

#engine.execute(query, **data)
Expand Down
8 changes: 4 additions & 4 deletions augur/tasks/start_tasks.py
Expand Up @@ -365,15 +365,15 @@ def augur_collection_monitor():
enabled_phase_names = get_enabled_phase_names_from_config(session.logger, session)

if primary_repo_collect_phase.__name__ in enabled_phase_names:
start_primary_collection(session, max_repo=50, days=30)
start_primary_collection(session, max_repo=40, days=30)

if secondary_repo_collect_phase.__name__ in enabled_phase_names:
start_secondary_collection(session, max_repo=30, days=30)
start_secondary_collection(session, max_repo=10, days=30)

if facade_phase.__name__ in enabled_phase_names:
#Schedule facade collection before clone/updates as that is a higher priority
start_facade_collection(session, max_repo=30, days=30)
start_facade_clone_update(session,max_repo=15,days=30)
start_facade_collection(session, max_repo=15, days=30)
start_facade_clone_update(session,max_repo=5,days=30)



0 comments on commit 696099b

Please sign in to comment.