Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an option to resume jobs using a different jobname. #2082

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 22 additions & 0 deletions siliconcompiler/core.py
Expand Up @@ -3754,6 +3754,12 @@ def _reset_flow_nodes(self, flow, nodes_to_execute):
# to set values to None for steps we may re-run so that merging
# manifests from _runtask() actually updates values.
should_resume = self.get("option", 'resume')

# If we are resuming a job with a new jobname,
# link the new build directory to the old one.
if self.get('option', 'resumename') and should_resume:
os.makedirs(self._getworkdir(jobname=self.get('option', 'resumename')), exist_ok=True)

for (step, index) in self._get_flowgraph_nodes(flow):
stepdir = self._getworkdir(step=step, index=index)
cfg = f"{stepdir}/outputs/{self.get('design')}.pkg.json"
Expand All @@ -3773,6 +3779,19 @@ def _reset_flow_nodes(self, flow, nodes_to_execute):
elif os.path.isfile(cfg):
node_status = Schema(manifest=cfg).get('flowgraph', flow, step, index, 'status')
self.set('flowgraph', flow, step, index, 'status', node_status)
if node_status == NodeStatus.SUCCESS and \
self.get('option', 'resumename') and \
should_resume:
os.makedirs(
os.path.join(self._getworkdir(jobname=self.get('option', 'resumename')),
step),
exist_ok=True)
os.symlink(self._getworkdir(jobname=self.get('option', 'jobname'),
step=step,
index=index),
self._getworkdir(jobname=self.get('option', 'resumename'),
step=step,
index=index))
else:
self.set('flowgraph', flow, step, index, 'status', NodeStatus.ERROR)

Expand All @@ -3793,6 +3812,9 @@ def _reset_flow_nodes(self, flow, nodes_to_execute):
for record in self.getkeys('record'):
self._clear_record(step, index, record)

if self.get('option', 'resumename') and should_resume:
self.set('option', 'jobname', self.get('option', 'resumename'))

def clean_build_dir(self):
if not self.get('option', 'resume') and not self.get('arg', 'step') \
and not self.get('option', 'from') and not self.get('record', 'remoteid'):
Expand Down
14 changes: 13 additions & 1 deletion siliconcompiler/schema/schema_cfg.py
Expand Up @@ -11,7 +11,7 @@
except ImportError:
from siliconcompiler.schema.utils import trim

SCHEMA_VERSION = '0.37.1'
SCHEMA_VERSION = '0.38.0'


#############################################################################
Expand Down Expand Up @@ -2434,6 +2434,18 @@ def schema_option(cfg):
enables multiple levels of transparent job, step, and index
introspection.""")

scparam(cfg, ['option', 'resumename'],
sctype='str',
scope='job',
defvalue=None,
shorthelp="Resume job name",
switch="-resumename <str>",
example=[
"cli: -resumename may2",
"api: chip.set('option', 'resumename', 'may2')"],
schelp="""Resume a previous run with a new jobname.
Has no effect if ('option', 'resume') is not set.""")

# TODO: remove?
scparam(cfg, ['option', 'jobinput', 'default', 'default'],
sctype='str',
Expand Down
27 changes: 26 additions & 1 deletion tests/core/data/defaults.json
Expand Up @@ -7140,6 +7140,31 @@
],
"type": "bool"
},
"resumename": {
"example": [
"cli: -resumename may2",
"api: chip.set('option', 'resumename', 'may2')"
],
"help": "Resume a previous run with a new jobname.\nHas no effect if ('option', 'resume') is not set.",
"lock": false,
"node": {
"default": {
"default": {
"signature": null,
"value": null
}
}
},
"notes": null,
"pernode": "never",
"require": null,
"scope": "job",
"shorthelp": "Resume job name",
"switch": [
"-resumename <str>"
],
"type": "str"
},
"scheduler": {
"cores": {
"example": [
Expand Down Expand Up @@ -10325,7 +10350,7 @@
"default": {
"default": {
"signature": null,
"value": "0.37.1"
"value": "0.38.0"
}
}
},
Expand Down
52 changes: 26 additions & 26 deletions tests/core/data/last_minor.json
Expand Up @@ -4808,31 +4808,6 @@
],
"type": "str"
},
"valid": {
"example": [
"cli: -flowgraph_valid 'asicflow cts 0 true'",
"api: chip.set('flowgraph', 'asicflow', 'cts', '0', 'valid', True)"
],
"help": "Flowgraph valid bit specified on a per step and per index basis.\nThe parameter can be used to control flow execution. If the bit\nis cleared (0), then the step/index combination is invalid and\nshould not be run.",
"lock": false,
"node": {
"default": {
"default": {
"signature": null,
"value": false
}
}
},
"notes": null,
"pernode": "never",
"require": "all",
"scope": "job",
"shorthelp": "Flowgraph: task valid bit",
"switch": [
"-flowgraph_valid 'flow step index <str>'"
],
"type": "bool"
},
"weight": {
"default": {
"example": [
Expand Down Expand Up @@ -7165,6 +7140,31 @@
],
"type": "bool"
},
"resumename": {
"example": [
"cli: -resumename may2",
"api: chip.set('option', 'resumename', 'may2')"
],
"help": "Resume a previous run with a new jobname.\nHas no effect if ('option', 'resume') is not set.",
"lock": false,
"node": {
"default": {
"default": {
"signature": null,
"value": null
}
}
},
"notes": null,
"pernode": "never",
"require": null,
"scope": "job",
"shorthelp": "Resume job name",
"switch": [
"-resumename <str>"
],
"type": "str"
},
"scheduler": {
"cores": {
"example": [
Expand Down Expand Up @@ -10350,7 +10350,7 @@
"default": {
"default": {
"signature": null,
"value": "0.37.0"
"value": "0.38.0"
}
}
},
Expand Down
40 changes: 40 additions & 0 deletions tests/flows/test_resume.py
Expand Up @@ -36,3 +36,43 @@ def test_resume(gcd_chip):
# Ensure flow finished successfully
assert gcd_chip.find_result('def', step='place') is not None
assert gcd_chip.find_result('gds', step='export') is not None


@pytest.mark.eda
@pytest.mark.timeout(600)
def test_resume_newjobname(gcd_chip):
# Set a value that will cause place to break
gcd_chip.set('tool', 'openroad', 'task', 'place', 'var', 'place_density', 'asdf',
step='place', index='0')

with pytest.raises(siliconcompiler.SiliconCompilerError):
gcd_chip.run()

# Ensure flow failed at placement, and store last modified time of floorplan
fp_result = gcd_chip.find_result('def', step='floorplan')
assert fp_result is not None
old_fp_mtime = os.path.getmtime(fp_result)

assert gcd_chip.find_result('def', step='place') is None
assert gcd_chip.find_result('gds', step='export') is None

# Fix place step, change jobname, and re-run
gcd_chip.set('tool', 'openroad', 'task', 'place', 'var', 'place_density', '0.40',
step='place', index='0')
gcd_chip.set('option', 'resume', True)
gcd_chip.set('option', 'resumename', 'job1')
gcd_chip.run()

# Ensure floorplan did not get re-run
fp_result = gcd_chip.find_result('def', step='floorplan')
assert fp_result is not None
assert os.path.getmtime(fp_result) == old_fp_mtime

# Ensure place/0 directory was not cleared out.
assert len(os.listdir(gcd_chip._getworkdir(step='place', index='0', jobname='job0'))) != 0

# Ensure flow finished successfully
assert gcd_chip.find_result('def', step='place', jobname='job1') is not None
assert gcd_chip.find_result('gds', step='export', jobname='job1') is not None
assert gcd_chip.find_result('def', step='place', jobname='job0') is None
assert gcd_chip.find_result('gds', step='export', jobname='job0') is None