Skip to content

Commit

Permalink
[native] Improve Zombie Tasks logging.
Browse files Browse the repository at this point in the history
1. Only get task info is really needed.
2. Log also the number of extra references.
3. Log the type of Task for each task.
  • Loading branch information
spershin authored and xiaoxmeng committed Apr 30, 2024
1 parent 6b43924 commit c437ea4
Showing 1 changed file with 21 additions and 16 deletions.
37 changes: 21 additions & 16 deletions presto-native-execution/presto_cpp/main/TaskManager.cpp
Expand Up @@ -207,15 +207,16 @@ void checkSplitsForBatchTask(
}

struct ZombieTaskStats {
const std::string taskId;
const std::string taskInfo;

explicit ZombieTaskStats(const std::shared_ptr<exec::Task>& task)
: taskId(task->taskId()), taskInfo(task->toString()) {}

std::string toString() const {
return SystemConfig::instance()->logZombieTaskInfo() ? taskInfo : taskId;
}
const std::string info;
const long numExtraReferences;

ZombieTaskStats(
const std::shared_ptr<exec::Task>& task,
long _numExtraReferences)
: info(
SystemConfig::instance()->logZombieTaskInfo() ? task->toString()
: task->taskId()),
numExtraReferences(_numExtraReferences) {}
};

// Helper structure holding stats for 'zombie' tasks.
Expand All @@ -235,7 +236,9 @@ struct ZombieTaskStatsSet {
tasks.reserve(numSampleTasks);
}

void updateCounts(std::shared_ptr<exec::Task>& task) {
void updateCounts(
std::shared_ptr<exec::Task>& task,
long numExtraReferences) {
switch (task->state()) {
case exec::TaskState::kRunning:
++numRunning;
Expand All @@ -256,7 +259,7 @@ struct ZombieTaskStatsSet {
break;
}
if (tasks.size() < numSampleTasks) {
tasks.emplace_back(task);
tasks.emplace_back(task, numExtraReferences);
}
}

Expand All @@ -271,8 +274,10 @@ struct ZombieTaskStatsSet {
<< numFailed << "] Sample task IDs (shows only "
<< numSampleTasks << " IDs): " << std::endl;
for (auto i = 0; i < tasks.size(); ++i) {
LOG(ERROR) << "Zombie Task[" << i + 1 << "/" << tasks.size()
<< "]: " << tasks[i].toString() << std::endl;
LOG(ERROR) << "Zombie " << hangingClassName << " [" << i + 1 << "/"
<< tasks.size()
<< "]: Extra Refs: " << tasks[i].numExtraReferences << ", "
<< tasks[i].info << std::endl;
}
}
};
Expand Down Expand Up @@ -615,7 +620,7 @@ std::unique_ptr<TaskInfo> TaskManager::deleteTask(
}

// Do not erase the finished/aborted tasks, because someone might still want
// to get some results from them. Instead we run a periodic task to clean up
// to get some results from them. Instead, we run a periodic task to clean up
// the old finished/aborted tasks.
if (prestoTask->info.taskStatus.state == protocol::TaskState::RUNNING) {
prestoTask->info.taskStatus.state = protocol::TaskState::ABORTED;
Expand Down Expand Up @@ -677,12 +682,12 @@ size_t TaskManager::cleanOldTasks() {
if (prestoTaskRefCount > 2) {
++zombiePrestoTaskCounts.numTotal;
if (task != nullptr) {
zombiePrestoTaskCounts.updateCounts(task);
zombiePrestoTaskCounts.updateCounts(task, prestoTaskRefCount - 2);
}
}
if (taskRefCount > 1) {
++zombieVeloxTaskCounts.numTotal;
zombieVeloxTaskCounts.updateCounts(task);
zombieVeloxTaskCounts.updateCounts(task, taskRefCount - 1);
}
} else {
taskIdsToClean.emplace(id);
Expand Down

0 comments on commit c437ea4

Please sign in to comment.