Skip to content

Commit

Permalink
[native] Improve Zombie Tasks logging.
Browse files Browse the repository at this point in the history
1. Only get task info is really needed.
2. Log also the number of extra references.
3. Log the type of Task for each task.
  • Loading branch information
spershin committed Apr 24, 2024
1 parent 369f9d3 commit 1b42782
Showing 1 changed file with 21 additions and 16 deletions.
37 changes: 21 additions & 16 deletions presto-native-execution/presto_cpp/main/TaskManager.cpp
Expand Up @@ -207,15 +207,16 @@ void checkSplitsForBatchTask(
}

struct ZombieTaskStats {
const std::string taskId;
const std::string taskInfo;

explicit ZombieTaskStats(const std::shared_ptr<exec::Task>& task)
: taskId(task->taskId()), taskInfo(task->toString()) {}

std::string toString() const {
return SystemConfig::instance()->logZombieTaskInfo() ? taskInfo : taskId;
}
const std::string info;
const long numExtraReferences;

ZombieTaskStats(
const std::shared_ptr<exec::Task>& task,
long numExtraReferences_)
: info(
SystemConfig::instance()->logZombieTaskInfo() ? task->toString()
: task->taskId()),
numExtraReferences(numExtraReferences_) {}
};

// Helper structure holding stats for 'zombie' tasks.
Expand All @@ -235,7 +236,9 @@ struct ZombieTaskStatsSet {
tasks.reserve(numSampleTasks);
}

void updateCounts(std::shared_ptr<exec::Task>& task) {
void updateCounts(
std::shared_ptr<exec::Task>& task,
const long numExtraReferences) {
switch (task->state()) {
case exec::TaskState::kRunning:
++numRunning;
Expand All @@ -256,7 +259,7 @@ struct ZombieTaskStatsSet {
break;
}
if (tasks.size() < numSampleTasks) {
tasks.emplace_back(task);
tasks.emplace_back(task, numExtraReferences);
}
}

Expand All @@ -271,8 +274,10 @@ struct ZombieTaskStatsSet {
<< numFailed << "] Sample task IDs (shows only "
<< numSampleTasks << " IDs): " << std::endl;
for (auto i = 0; i < tasks.size(); ++i) {
LOG(ERROR) << "Zombie Task[" << i + 1 << "/" << tasks.size()
<< "]: " << tasks[i].toString() << std::endl;
LOG(ERROR) << "Zombie " << hangingClassName << " [" << i + 1 << "/"
<< tasks.size()
<< "]: Extra Refs: " << tasks[i].numExtraReferences << ", "
<< tasks[i].info << std::endl;
}
}
};
Expand Down Expand Up @@ -615,7 +620,7 @@ std::unique_ptr<TaskInfo> TaskManager::deleteTask(
}

// Do not erase the finished/aborted tasks, because someone might still want
// to get some results from them. Instead we run a periodic task to clean up
// to get some results from them. Instead, we run a periodic task to clean up
// the old finished/aborted tasks.
if (prestoTask->info.taskStatus.state == protocol::TaskState::RUNNING) {
prestoTask->info.taskStatus.state = protocol::TaskState::ABORTED;
Expand Down Expand Up @@ -677,12 +682,12 @@ size_t TaskManager::cleanOldTasks() {
if (prestoTaskRefCount > 2) {
++zombiePrestoTaskCounts.numTotal;
if (task != nullptr) {
zombiePrestoTaskCounts.updateCounts(task);
zombiePrestoTaskCounts.updateCounts(task, prestoTaskRefCount - 2);
}
}
if (taskRefCount > 1) {
++zombieVeloxTaskCounts.numTotal;
zombieVeloxTaskCounts.updateCounts(task);
zombieVeloxTaskCounts.updateCounts(task, taskRefCount - 1);
}
} else {
taskIdsToClean.emplace(id);
Expand Down

0 comments on commit 1b42782

Please sign in to comment.