diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 231a5c038..8427f53ca 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -461,6 +461,13 @@ async def inc_crawl_exec_time(self, crawl_id, exec_time, last_updated_time): }, ) + async def get_crawl_exec_last_update_time(self, crawl_id): + """get crawl last updated time""" + res = await self.crawls.find_one( + {"_id": crawl_id, "type": "crawl"}, projection=["_lut"] + ) + return res and res.get("_lut") + async def get_crawl_state(self, crawl_id): """return current crawl state of a crawl""" res = await self.crawls.find_one( diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index e9109e489..2bea3f0ee 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -515,7 +515,9 @@ async def sync_crawls(self, data: MCSyncData): else: status.scale = crawl.scale - status.lastUpdatedTime = to_k8s_date(dt_now()) + now = dt_now() + await self.crawl_ops.inc_crawl_exec_time(crawl_id, 0, now) + status.lastUpdatedTime = to_k8s_date(now) children = self._load_redis(params, status, data.children) @@ -1107,12 +1109,15 @@ async def increment_pod_exec_time( """inc exec time tracking""" now = dt_now() - if not status.lastUpdatedTime: + update_start_time = await self.crawl_ops.get_crawl_exec_last_update_time( + crawl_id + ) + + if not update_start_time: + await self.crawl_ops.inc_crawl_exec_time(crawl_id, 0, now) status.lastUpdatedTime = to_k8s_date(now) return - update_start_time = from_k8s_date(status.lastUpdatedTime) - reason = None update_duration = (now - update_start_time).total_seconds() @@ -1186,16 +1191,6 @@ async def increment_pod_exec_time( max_duration = max(duration, max_duration) if exec_time: - if not await self.crawl_ops.inc_crawl_exec_time( - crawl_id, exec_time, status.lastUpdatedTime - ): - # if lastUpdatedTime is same as previous, something is wrong, don't update! - print( - "Already updated for lastUpdatedTime, skipping execTime update!", - flush=True, - ) - return - await self.org_ops.inc_org_time_stats(oid, exec_time, True) status.crawlExecTime += exec_time status.elapsedCrawlTime += max_duration @@ -1205,6 +1200,7 @@ async def increment_pod_exec_time( flush=True, ) + await self.crawl_ops.inc_crawl_exec_time(crawl_id, exec_time, now) status.lastUpdatedTime = to_k8s_date(now) def should_mark_waiting(self, state, started):