diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 74897c63ab..e45ae1ac27 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -441,6 +441,13 @@ async def inc_crawl_exec_time(self, crawl_id, exec_time, last_updated_time): }, ) + async def get_crawl_exec_last_update_time(self, crawl_id): + """get crawl last updated time""" + res = await self.crawls.find_one( + {"_id": crawl_id, "type": "crawl"}, projection=["_lut"] + ) + return res and res.get("_lut") + async def get_crawl_state(self, crawl_id): """return current crawl state of a crawl""" res = await self.crawls.find_one( diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index bab54232bb..e050b09ce4 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -232,7 +232,9 @@ async def sync_crawls(self, data: MCSyncData): else: status.scale = crawl.scale - status.lastUpdatedTime = to_k8s_date(dt_now()) + now = dt_now() + await self.crawl_ops.inc_crawl_exec_time(crawl_id, 0, now) + status.lastUpdatedTime = to_k8s_date(now) children = self._load_redis(params, status, data.children) @@ -828,12 +830,15 @@ async def increment_pod_exec_time( """inc exec time tracking""" now = dt_now() - if not status.lastUpdatedTime: + update_start_time = await self.crawl_ops.get_crawl_exec_last_update_time( + crawl_id + ) + + if not update_start_time: + await self.crawl_ops.inc_crawl_exec_time(crawl_id, 0, now) status.lastUpdatedTime = to_k8s_date(now) return - update_start_time = from_k8s_date(status.lastUpdatedTime) - reason = None update_duration = (now - update_start_time).total_seconds() @@ -907,16 +912,6 @@ async def increment_pod_exec_time( max_duration = max(duration, max_duration) if exec_time: - if not await self.crawl_ops.inc_crawl_exec_time( - crawl_id, exec_time, status.lastUpdatedTime - ): - # if lastUpdatedTime is same as previous, something is wrong, don't update! - print( - "Already updated for lastUpdatedTime, skipping execTime update!", - flush=True, - ) - return - await self.org_ops.inc_org_time_stats(oid, exec_time, True) status.crawlExecTime += exec_time status.elapsedCrawlTime += max_duration @@ -926,6 +921,7 @@ async def increment_pod_exec_time( flush=True, ) + await self.crawl_ops.inc_crawl_exec_time(crawl_id, exec_time, now) status.lastUpdatedTime = to_k8s_date(now) def should_mark_waiting(self, state, started):