-
Notifications
You must be signed in to change notification settings - Fork 5.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[native] Improve Zombie Tasks logging. #22607
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -207,15 +207,16 @@ void checkSplitsForBatchTask( | |
} | ||
|
||
struct ZombieTaskStats { | ||
const std::string taskId; | ||
const std::string taskInfo; | ||
|
||
explicit ZombieTaskStats(const std::shared_ptr<exec::Task>& task) | ||
: taskId(task->taskId()), taskInfo(task->toString()) {} | ||
|
||
std::string toString() const { | ||
return SystemConfig::instance()->logZombieTaskInfo() ? taskInfo : taskId; | ||
} | ||
const std::string info; | ||
const long numExtraReferences; | ||
|
||
ZombieTaskStats( | ||
const std::shared_ptr<exec::Task>& task, | ||
long _numExtraReferences) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @xiaoxmeng |
||
: info( | ||
SystemConfig::instance()->logZombieTaskInfo() ? task->toString() | ||
: task->taskId()), | ||
numExtraReferences(_numExtraReferences) {} | ||
}; | ||
|
||
// Helper structure holding stats for 'zombie' tasks. | ||
|
@@ -235,7 +236,9 @@ struct ZombieTaskStatsSet { | |
tasks.reserve(numSampleTasks); | ||
} | ||
|
||
void updateCounts(std::shared_ptr<exec::Task>& task) { | ||
void updateCounts( | ||
std::shared_ptr<exec::Task>& task, | ||
long numExtraReferences) { | ||
switch (task->state()) { | ||
case exec::TaskState::kRunning: | ||
++numRunning; | ||
|
@@ -256,7 +259,7 @@ struct ZombieTaskStatsSet { | |
break; | ||
} | ||
if (tasks.size() < numSampleTasks) { | ||
tasks.emplace_back(task); | ||
tasks.emplace_back(task, numExtraReferences); | ||
} | ||
} | ||
|
||
|
@@ -271,8 +274,10 @@ struct ZombieTaskStatsSet { | |
<< numFailed << "] Sample task IDs (shows only " | ||
<< numSampleTasks << " IDs): " << std::endl; | ||
for (auto i = 0; i < tasks.size(); ++i) { | ||
LOG(ERROR) << "Zombie Task[" << i + 1 << "/" << tasks.size() | ||
<< "]: " << tasks[i].toString() << std::endl; | ||
LOG(ERROR) << "Zombie " << hangingClassName << " [" << i + 1 << "/" | ||
<< tasks.size() | ||
<< "]: Extra Refs: " << tasks[i].numExtraReferences << ", " | ||
<< tasks[i].info << std::endl; | ||
} | ||
} | ||
}; | ||
|
@@ -615,7 +620,7 @@ std::unique_ptr<TaskInfo> TaskManager::deleteTask( | |
} | ||
|
||
// Do not erase the finished/aborted tasks, because someone might still want | ||
// to get some results from them. Instead we run a periodic task to clean up | ||
// to get some results from them. Instead, we run a periodic task to clean up | ||
// the old finished/aborted tasks. | ||
if (prestoTask->info.taskStatus.state == protocol::TaskState::RUNNING) { | ||
prestoTask->info.taskStatus.state = protocol::TaskState::ABORTED; | ||
|
@@ -677,12 +682,12 @@ size_t TaskManager::cleanOldTasks() { | |
if (prestoTaskRefCount > 2) { | ||
++zombiePrestoTaskCounts.numTotal; | ||
if (task != nullptr) { | ||
zombiePrestoTaskCounts.updateCounts(task); | ||
zombiePrestoTaskCounts.updateCounts(task, prestoTaskRefCount - 2); | ||
} | ||
} | ||
if (taskRefCount > 1) { | ||
++zombieVeloxTaskCounts.numTotal; | ||
zombieVeloxTaskCounts.updateCounts(task); | ||
zombieVeloxTaskCounts.updateCounts(task, taskRefCount - 1); | ||
} | ||
} else { | ||
taskIdsToClean.emplace(id); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Why was
explicit
removed? This should still be used for the construction during the emplace call like before?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
explicit is was used for single-argument constructors, so there would be no implicit type conversion.