diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py index d84c0fb3c..e9064a47f 100644 --- a/backend/btrixcloud/db.py +++ b/backend/btrixcloud/db.py @@ -17,7 +17,7 @@ from .migrations import BaseMigration -CURR_DB_VERSION = "0026" +CURR_DB_VERSION = "0025" # ============================================================================ diff --git a/backend/btrixcloud/migrations/migration_0026_crawl_pages.py b/backend/btrixcloud/migrations/migration_0026_crawl_pages.py deleted file mode 100644 index 18f8874e3..000000000 --- a/backend/btrixcloud/migrations/migration_0026_crawl_pages.py +++ /dev/null @@ -1,47 +0,0 @@ -""" -Migration 0026 -- Crawl Pages -""" - -from btrixcloud.migrations import BaseMigration -from btrixcloud.utils import gather_tasks_with_concurrency - - -MIGRATION_VERSION = "0026" - - -class Migration(BaseMigration): - """Migration class.""" - - def __init__(self, mdb, **kwargs): - super().__init__(mdb, migration_version=MIGRATION_VERSION) - self.page_ops = kwargs["page_ops"] - - async def migrate_up(self): - """Perform migration up. - - Add pages to database for each crawl without them, pulling from WACZ files. - """ - # pylint: disable=duplicate-code - crawls_mdb = self.mdb["crawls"] - pages_mdb = self.mdb["pages"] - - crawl_ids = await crawls_mdb.distinct( - "_id", {"type": "crawl", "finished": {"$ne": None}} - ) - crawl_ids_with_pages = await pages_mdb.distinct("crawl_id") - - crawl_ids_no_pages = list(set(crawl_ids) - set(crawl_ids_with_pages)) - if not crawl_ids_no_pages: - return - - all_coroutines = [] - - for crawl_id in crawl_ids_no_pages: - current_coroutine = self.page_ops.add_crawl_pages_to_db_from_wacz(crawl_id) - all_coroutines.append(current_coroutine) - - try: - await gather_tasks_with_concurrency(*all_coroutines) - # pylint: disable=broad-exception-caught, raise-missing-from - except Exception as err: - print(f"Error adding pages to db: {err}", flush=True)