/
workflow.py
1982 lines (1754 loc) · 67.3 KB
/
workflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2022, Johannes Köster"
__email__ = "johannes.koester@uni-due.de"
__license__ = "MIT"
import re
import os
import sys
import signal
import json
from tokenize import maybe
import urllib
from collections import OrderedDict, namedtuple
from itertools import filterfalse, chain
from functools import partial
from operator import attrgetter
import copy
import subprocess
from pathlib import Path
from urllib.request import pathname2url, url2pathname
from snakemake.logging import logger, format_resources, format_resource_names
from snakemake.rules import Rule, Ruleorder, RuleProxy
from snakemake.exceptions import (
CreateCondaEnvironmentException,
RuleException,
CreateRuleException,
UnknownRuleException,
NoRulesException,
print_exception,
WorkflowError,
)
from snakemake.shell import shell
from snakemake.dag import DAG
from snakemake.scheduler import JobScheduler
from snakemake.parser import parse
import snakemake.io
from snakemake.io import (
protected,
temp,
temporary,
ancient,
directory,
expand,
dynamic,
glob_wildcards,
flag,
not_iterable,
touch,
unpack,
local,
pipe,
service,
repeat,
report,
multiext,
IOFile,
)
from snakemake.persistence import Persistence
from snakemake.utils import update_config
from snakemake.script import script
from snakemake.notebook import notebook
from snakemake.wrapper import wrapper
from snakemake.cwl import cwl
from snakemake.template_rendering import render_template
import snakemake.wrapper
from snakemake.common import (
Mode,
bytesto,
ON_WINDOWS,
is_local_file,
parse_uri,
Rules,
Scatter,
Gather,
smart_join,
NOTHING_TO_BE_DONE_MSG,
)
from snakemake.utils import simplify_path
from snakemake.checkpoints import Checkpoint, Checkpoints
from snakemake.resources import DefaultResources
from snakemake.caching.local import OutputFileCache as LocalOutputFileCache
from snakemake.caching.remote import OutputFileCache as RemoteOutputFileCache
from snakemake.modules import ModuleInfo, WorkflowModifier, get_name_modifier_func
from snakemake.ruleinfo import RuleInfo
from snakemake.sourcecache import (
GenericSourceFile,
LocalSourceFile,
SourceCache,
SourceFile,
infer_source_file,
)
from snakemake.deployment.conda import Conda, is_conda_env_file
from snakemake import sourcecache
class Workflow:
def __init__(
self,
snakefile=None,
jobscript=None,
overwrite_shellcmd=None,
overwrite_config=None,
overwrite_workdir=None,
overwrite_configfiles=None,
overwrite_clusterconfig=None,
overwrite_threads=None,
overwrite_scatter=None,
overwrite_groups=None,
overwrite_resources=None,
group_components=None,
config_args=None,
debug=False,
verbose=False,
use_conda=False,
conda_frontend=None,
conda_prefix=None,
use_singularity=False,
use_env_modules=False,
singularity_prefix=None,
singularity_args="",
shadow_prefix=None,
scheduler_type="ilp",
scheduler_ilp_solver=None,
mode=Mode.default,
wrapper_prefix=None,
printshellcmds=False,
restart_times=None,
attempt=1,
default_remote_provider=None,
default_remote_prefix="",
run_local=True,
assume_shared_fs=True,
default_resources=None,
cache=None,
nodes=1,
cores=1,
resources=None,
conda_cleanup_pkgs=None,
edit_notebook=False,
envvars=None,
max_inventory_wait_time=20,
conda_not_block_search_path_envvars=False,
execute_subworkflows=True,
scheduler_solver_path=None,
conda_base_path=None,
check_envvars=True,
max_threads=None,
all_temp=False,
local_groupid="local",
keep_metadata=True,
latency_wait=3,
):
"""
Create the controller.
"""
self.global_resources = dict() if resources is None else resources
self.global_resources["_cores"] = cores
self.global_resources["_nodes"] = nodes
self._rules = OrderedDict()
self.default_target = None
self._workdir = None
self.overwrite_workdir = overwrite_workdir
self.workdir_init = os.path.abspath(os.curdir)
self._ruleorder = Ruleorder()
self._localrules = set()
self.linemaps = dict()
self.rule_count = 0
self.basedir = os.path.dirname(snakefile)
self.main_snakefile = os.path.abspath(snakefile)
self.included = []
self.included_stack = []
self.jobscript = jobscript
self.persistence = None
self._subworkflows = dict()
self.overwrite_shellcmd = overwrite_shellcmd
self.overwrite_config = overwrite_config or dict()
self.overwrite_configfiles = overwrite_configfiles
self.overwrite_clusterconfig = overwrite_clusterconfig or dict()
self.overwrite_threads = overwrite_threads or dict()
self.overwrite_resources = overwrite_resources or dict()
self.config_args = config_args
self.immediate_submit = None
self._onsuccess = lambda log: None
self._onerror = lambda log: None
self._onstart = lambda log: None
self._wildcard_constraints = dict()
self.debug = debug
self.verbose = verbose
self._rulecount = 0
self.use_conda = use_conda
self.conda_frontend = conda_frontend
self.conda_prefix = conda_prefix
self.use_singularity = use_singularity
self.use_env_modules = use_env_modules
self.singularity_prefix = singularity_prefix
self.singularity_args = singularity_args
self.shadow_prefix = shadow_prefix
self.scheduler_type = scheduler_type
self.scheduler_ilp_solver = scheduler_ilp_solver
self.global_container_img = None
self.global_is_containerized = False
self.mode = mode
self.wrapper_prefix = wrapper_prefix
self.printshellcmds = printshellcmds
self.restart_times = restart_times
self.attempt = attempt
self.default_remote_provider = default_remote_provider
self.default_remote_prefix = default_remote_prefix
self.configfiles = (
[] if overwrite_configfiles is None else list(overwrite_configfiles)
)
self.run_local = run_local
self.assume_shared_fs = assume_shared_fs
self.report_text = None
self.conda_cleanup_pkgs = conda_cleanup_pkgs
self.edit_notebook = edit_notebook
# environment variables to pass to jobs
# These are defined via the "envvars:" syntax in the Snakefile itself
self.envvars = set()
self.overwrite_groups = overwrite_groups or dict()
self.group_components = group_components or dict()
self._scatter = dict(overwrite_scatter or dict())
self.overwrite_scatter = overwrite_scatter or dict()
self.conda_not_block_search_path_envvars = conda_not_block_search_path_envvars
self.execute_subworkflows = execute_subworkflows
self.modules = dict()
self.sourcecache = SourceCache()
self.scheduler_solver_path = scheduler_solver_path
self._conda_base_path = conda_base_path
self.check_envvars = check_envvars
self.max_threads = max_threads
self.all_temp = all_temp
self.scheduler = None
self.local_groupid = local_groupid
self.keep_metadata = keep_metadata
self.latency_wait = latency_wait
_globals = globals()
_globals["workflow"] = self
_globals["cluster_config"] = copy.deepcopy(self.overwrite_clusterconfig)
_globals["rules"] = Rules()
_globals["checkpoints"] = Checkpoints()
_globals["scatter"] = Scatter()
_globals["gather"] = Gather()
_globals["github"] = sourcecache.GithubFile
_globals["gitlab"] = sourcecache.GitlabFile
self.vanilla_globals = dict(_globals)
self.modifier_stack = [WorkflowModifier(self, globals=_globals)]
self.enable_cache = False
if cache is not None:
self.enable_cache = True
self.cache_rules = set(cache)
if self.default_remote_provider is not None:
self.output_file_cache = RemoteOutputFileCache(
self.default_remote_provider
)
else:
self.output_file_cache = LocalOutputFileCache()
else:
self.output_file_cache = None
self.cache_rules = set()
if default_resources is not None:
self.default_resources = default_resources
else:
# only _cores, _nodes, and _tmpdir
self.default_resources = DefaultResources(mode="bare")
self.iocache = snakemake.io.IOCache(max_inventory_wait_time)
self.globals["config"] = copy.deepcopy(self.overwrite_config)
if envvars is not None:
self.register_envvars(*envvars)
@property
def conda_base_path(self):
if self._conda_base_path:
return self._conda_base_path
if self.use_conda:
try:
return Conda().prefix_path
except CreateCondaEnvironmentException as e:
# Return no preset conda base path now and report error later in jobs.
return None
else:
return None
@property
def modifier(self):
return self.modifier_stack[-1]
@property
def globals(self):
return self.modifier.globals
def lint(self, json=False):
from snakemake.linting.rules import RuleLinter
from snakemake.linting.snakefiles import SnakefileLinter
json_snakefile_lints, snakefile_linted = SnakefileLinter(
self, self.included
).lint(json=json)
json_rule_lints, rules_linted = RuleLinter(self, self.rules).lint(json=json)
linted = snakefile_linted or rules_linted
if json:
import json
print(
json.dumps(
{"snakefiles": json_snakefile_lints, "rules": json_rule_lints},
indent=2,
)
)
else:
if not linted:
logger.info("Congratulations, your workflow is in a good condition!")
return linted
def is_cached_rule(self, rule: Rule):
return rule.name in self.cache_rules
def get_sources(self):
files = set()
def local_path(f):
if not isinstance(f, SourceFile) and is_local_file(f):
return f
if isinstance(f, LocalSourceFile):
return f.get_path_or_uri()
def norm_rule_relpath(f, rule):
if not os.path.isabs(f):
f = os.path.join(rule.basedir, f)
return os.path.relpath(f)
# get registered sources
for f in self.included:
f = local_path(f)
if f:
try:
f = os.path.relpath(f)
except ValueError:
if ON_WINDOWS:
pass # relpath doesn't work on win if files are on different drive
else:
raise
files.add(f)
for rule in self.rules:
script_path = rule.script or rule.notebook
if script_path:
script_path = norm_rule_relpath(script_path, rule)
files.add(script_path)
script_dir = os.path.dirname(script_path)
files.update(
os.path.join(dirpath, f)
for dirpath, _, files in os.walk(script_dir)
for f in files
)
if rule.conda_env and rule.conda_env.is_file:
f = local_path(rule.conda_env.file)
if f:
# url points to a local env file
env_path = norm_rule_relpath(f, rule)
files.add(env_path)
for f in self.configfiles:
files.add(f)
# get git-managed files
# TODO allow a manifest file as alternative
try:
out = subprocess.check_output(
["git", "ls-files", "--recurse-submodules", "."], stderr=subprocess.PIPE
)
for f in out.decode().split("\n"):
if f:
files.add(os.path.relpath(f))
except subprocess.CalledProcessError as e:
if "fatal: not a git repository" in e.stderr.decode().lower():
logger.warning(
"Unable to retrieve additional files from git. "
"This is not a git repository."
)
else:
raise WorkflowError(
"Error executing git (Snakemake requires git to be installed for "
"remote execution without shared filesystem):\n" + e.stderr.decode()
)
return files
def check_source_sizes(self, filename, warning_size_gb=0.2):
"""A helper function to check the filesize, and return the file
to the calling function Additionally, given that we encourage these
packages to be small, we set a warning at 200MB (0.2GB).
"""
gb = bytesto(os.stat(filename).st_size, "g")
if gb > warning_size_gb:
logger.warning(
"File {} (size {} GB) is greater than the {} GB suggested size "
"Consider uploading larger files to storage first.".format(
filename, gb, warning_size_gb
)
)
return filename
@property
def subworkflows(self):
return self._subworkflows.values()
@property
def rules(self):
return self._rules.values()
@property
def cores(self):
if self._cores is None:
raise WorkflowError(
"Workflow requires a total number of cores to be defined (e.g. because a "
"rule defines its number of threads as a fraction of a total number of cores). "
"Please set it with --cores N with N being the desired number of cores. "
"Consider to use this in combination with --max-threads to avoid "
"jobs with too many threads for your setup. Also make sure to perform "
"a dryrun first."
)
return self._cores
@property
def _cores(self):
return self.global_resources["_cores"]
@property
def nodes(self):
return self.global_resources["_nodes"]
@property
def concrete_files(self):
return (
file
for rule in self.rules
for file in chain(rule.input, rule.output)
if not callable(file) and not file.contains_wildcard()
)
def check(self):
for clause in self._ruleorder:
for rulename in clause:
if not self.is_rule(rulename):
raise UnknownRuleException(
rulename, prefix="Error in ruleorder definition."
)
def add_rule(
self,
name=None,
lineno=None,
snakefile=None,
checkpoint=False,
allow_overwrite=False,
):
"""
Add a rule.
"""
is_overwrite = self.is_rule(name)
if not allow_overwrite and is_overwrite:
raise CreateRuleException(
"The name {} is already used by another rule".format(name)
)
rule = Rule(name, self, lineno=lineno, snakefile=snakefile)
self._rules[rule.name] = rule
if not is_overwrite:
self.rule_count += 1
if not self.default_target:
self.default_target = rule.name
return name
def is_rule(self, name):
"""
Return True if name is the name of a rule.
Arguments
name -- a name
"""
return name in self._rules
def get_rule(self, name):
"""
Get rule by name.
Arguments
name -- the name of the rule
"""
if not self._rules:
raise NoRulesException()
if not name in self._rules:
raise UnknownRuleException(name)
return self._rules[name]
def list_rules(self, only_targets=False):
rules = self.rules
if only_targets:
rules = filterfalse(Rule.has_wildcards, rules)
for rule in rules:
logger.rule_info(name=rule.name, docstring=rule.docstring)
def list_resources(self):
for resource in set(
resource for rule in self.rules for resource in rule.resources
):
if resource not in "_cores _nodes".split():
logger.info(resource)
def is_local(self, rule):
return rule.group is None and (
rule.name in self._localrules or rule.norun or rule.is_template_engine
)
def check_localrules(self):
undefined = self._localrules - set(rule.name for rule in self.rules)
if undefined:
logger.warning(
"localrules directive specifies rules that are not "
"present in the Snakefile:\n{}\n".format(
"\n".join(map("\t{}".format, undefined))
)
)
def inputfile(self, path):
"""Mark file as being an input file of the workflow.
This also means that eventual --default-remote-provider/prefix settings
will be applied to this file. The file is returned as _IOFile object,
such that it can e.g. be transparently opened with _IOFile.open().
"""
if isinstance(path, Path):
path = str(path)
if self.default_remote_provider is not None:
path = self.modifier.modify_path(path)
return IOFile(path)
def execute(
self,
targets=None,
dryrun=False,
generate_unit_tests=None,
touch=False,
scheduler_type=None,
scheduler_ilp_solver=None,
local_cores=1,
forcetargets=False,
forceall=False,
forcerun=None,
until=[],
omit_from=[],
prioritytargets=None,
quiet=False,
keepgoing=False,
printshellcmds=False,
printreason=False,
printdag=False,
cluster=None,
cluster_sync=None,
jobname=None,
immediate_submit=False,
ignore_ambiguity=False,
printrulegraph=False,
printfilegraph=False,
printd3dag=False,
drmaa=None,
drmaa_log_dir=None,
kubernetes=None,
tibanna=None,
tibanna_sfn=None,
google_lifesciences=None,
google_lifesciences_regions=None,
google_lifesciences_location=None,
google_lifesciences_cache=False,
tes=None,
precommand="",
preemption_default=None,
preemptible_rules=None,
tibanna_config=False,
container_image=None,
stats=None,
force_incomplete=False,
ignore_incomplete=False,
list_version_changes=False,
list_code_changes=False,
list_input_changes=False,
list_params_changes=False,
list_untracked=False,
list_conda_envs=False,
summary=False,
archive=None,
delete_all_output=False,
delete_temp_output=False,
detailed_summary=False,
wait_for_files=None,
nolock=False,
unlock=False,
notemp=False,
nodeps=False,
cleanup_metadata=None,
conda_cleanup_envs=False,
cleanup_shadow=False,
cleanup_scripts=True,
subsnakemake=None,
updated_files=None,
keep_target_files=False,
keep_shadow=False,
keep_remote_local=False,
allowed_rules=None,
max_jobs_per_second=None,
max_status_checks_per_second=None,
greediness=1.0,
no_hooks=False,
force_use_threads=False,
conda_create_envs_only=False,
cluster_status=None,
cluster_cancel=None,
cluster_cancel_nargs=None,
cluster_sidecar=None,
report=None,
report_stylesheet=None,
export_cwl=False,
batch=None,
keepincomplete=False,
):
self.check_localrules()
self.immediate_submit = immediate_submit
self.cleanup_scripts = cleanup_scripts
def rules(items):
return map(self._rules.__getitem__, filter(self.is_rule, items))
if keep_target_files:
def files(items):
return filterfalse(self.is_rule, items)
else:
def files(items):
relpath = (
lambda f: f
if os.path.isabs(f) or f.startswith("root://")
else os.path.relpath(f)
)
return map(relpath, filterfalse(self.is_rule, items))
if not targets:
targets = (
[self.default_target] if self.default_target is not None else list()
)
if prioritytargets is None:
prioritytargets = list()
if forcerun is None:
forcerun = list()
if until is None:
until = list()
if omit_from is None:
omit_from = list()
priorityrules = set(rules(prioritytargets))
priorityfiles = set(files(prioritytargets))
forcerules = set(rules(forcerun))
forcefiles = set(files(forcerun))
untilrules = set(rules(until))
untilfiles = set(files(until))
omitrules = set(rules(omit_from))
omitfiles = set(files(omit_from))
targetrules = set(
chain(
rules(targets),
filterfalse(Rule.has_wildcards, priorityrules),
filterfalse(Rule.has_wildcards, forcerules),
filterfalse(Rule.has_wildcards, untilrules),
)
)
targetfiles = set(chain(files(targets), priorityfiles, forcefiles, untilfiles))
if ON_WINDOWS:
targetfiles = set(tf.replace(os.sep, os.altsep) for tf in targetfiles)
if forcetargets:
forcefiles.update(targetfiles)
forcerules.update(targetrules)
rules = self.rules
if allowed_rules:
allowed_rules = set(allowed_rules)
rules = [rule for rule in rules if rule.name in allowed_rules]
if wait_for_files is not None:
try:
snakemake.io.wait_for_files(
wait_for_files, latency_wait=self.latency_wait
)
except IOError as e:
logger.error(str(e))
return False
dag = DAG(
self,
rules,
dryrun=dryrun,
targetfiles=targetfiles,
targetrules=targetrules,
# when cleaning up conda, we should enforce all possible jobs
# since their envs shall not be deleted
forceall=forceall or conda_cleanup_envs,
forcefiles=forcefiles,
forcerules=forcerules,
priorityfiles=priorityfiles,
priorityrules=priorityrules,
untilfiles=untilfiles,
untilrules=untilrules,
omitfiles=omitfiles,
omitrules=omitrules,
ignore_ambiguity=ignore_ambiguity,
force_incomplete=force_incomplete,
ignore_incomplete=ignore_incomplete
or printdag
or printrulegraph
or printfilegraph,
notemp=notemp,
keep_remote_local=keep_remote_local,
batch=batch,
)
self.persistence = Persistence(
nolock=nolock,
dag=dag,
conda_prefix=self.conda_prefix,
singularity_prefix=self.singularity_prefix,
shadow_prefix=self.shadow_prefix,
warn_only=dryrun
or printrulegraph
or printfilegraph
or printdag
or summary
or archive
or list_version_changes
or list_code_changes
or list_input_changes
or list_params_changes
or list_untracked
or delete_all_output
or delete_temp_output,
)
if self.mode in [Mode.subprocess, Mode.cluster]:
self.persistence.deactivate_cache()
if cleanup_metadata:
for f in cleanup_metadata:
self.persistence.cleanup_metadata(f)
return True
if unlock:
try:
self.persistence.cleanup_locks()
logger.info("Unlocking working directory.")
return True
except IOError:
logger.error(
"Error: Unlocking the directory {} failed. Maybe "
"you don't have the permissions?"
)
return False
logger.info("Building DAG of jobs...")
dag.init()
dag.update_checkpoint_dependencies()
dag.check_dynamic()
try:
self.persistence.lock()
except IOError:
logger.error(
"Error: Directory cannot be locked. Please make "
"sure that no other Snakemake process is trying to create "
"the same files in the following directory:\n{}\n"
"If you are sure that no other "
"instances of snakemake are running on this directory, "
"the remaining lock was likely caused by a kill signal or "
"a power loss. It can be removed with "
"the --unlock argument.".format(os.getcwd())
)
return False
if cleanup_shadow:
self.persistence.cleanup_shadow()
return True
if (
self.subworkflows
and self.execute_subworkflows
and not printdag
and not printrulegraph
and not printfilegraph
):
# backup globals
globals_backup = dict(self.globals)
# execute subworkflows
for subworkflow in self.subworkflows:
subworkflow_targets = subworkflow.targets(dag)
logger.debug(
"Files requested from subworkflow:\n {}".format(
"\n ".join(subworkflow_targets)
)
)
updated = list()
if subworkflow_targets:
logger.info("Executing subworkflow {}.".format(subworkflow.name))
if not subsnakemake(
subworkflow.snakefile,
workdir=subworkflow.workdir,
targets=subworkflow_targets,
cores=self._cores,
nodes=self.nodes,
configfiles=[subworkflow.configfile]
if subworkflow.configfile
else None,
updated_files=updated,
):
return False
dag.updated_subworkflow_files.update(
subworkflow.target(f) for f in updated
)
else:
logger.info(
"Subworkflow {}: {}".format(
subworkflow.name, NOTHING_TO_BE_DONE_MSG
)
)
if self.subworkflows:
logger.info("Executing main workflow.")
# rescue globals
self.globals.update(globals_backup)
dag.postprocess(update_needrun=False)
if not dryrun:
# deactivate IOCache such that from now on we always get updated
# size, existence and mtime information
# ATTENTION: this may never be removed without really good reason.
# Otherwise weird things may happen.
self.iocache.deactivate()
# clear and deactivate persistence cache, from now on we want to see updates
self.persistence.deactivate_cache()
if nodeps:
missing_input = [
f
for job in dag.targetjobs
for f in job.input
if dag.needrun(job) and not os.path.exists(f)
]
if missing_input:
logger.error(
"Dependency resolution disabled (--nodeps) "
"but missing input "
"files detected. If this happens on a cluster, please make sure "
"that you handle the dependencies yourself or turn off "
"--immediate-submit. Missing input files:\n{}".format(
"\n".join(missing_input)
)
)
return False
if immediate_submit and any(dag.checkpoint_jobs):
logger.error(
"Immediate submit mode (--immediate-submit) may not be used for workflows "
"with checkpoint jobs, as the dependencies cannot be determined before "
"execution in such cases."
)
return False
updated_files.extend(f for job in dag.needrun_jobs for f in job.output)
if generate_unit_tests:
from snakemake import unit_tests
path = generate_unit_tests
deploy = []
if self.use_conda:
deploy.append("conda")
if self.use_singularity:
deploy.append("singularity")
unit_tests.generate(
dag, path, deploy, configfiles=self.overwrite_configfiles
)
return True
elif export_cwl:
from snakemake.cwl import dag_to_cwl
import json
with open(export_cwl, "w") as cwl:
json.dump(dag_to_cwl(dag), cwl, indent=4)
return True
elif report:
from snakemake.report import auto_report
auto_report(dag, report, stylesheet=report_stylesheet)
return True
elif printd3dag:
dag.d3dag()
return True
elif printdag:
print(dag)
return True
elif printrulegraph:
print(dag.rule_dot())
return True
elif printfilegraph:
print(dag.filegraph_dot())
return True
elif summary:
print("\n".join(dag.summary(detailed=False)))
return True
elif detailed_summary:
print("\n".join(dag.summary(detailed=True)))
return True
elif archive:
dag.archive(archive)
return True
elif delete_all_output:
dag.clean(only_temp=False, dryrun=dryrun)
return True
elif delete_temp_output:
dag.clean(only_temp=True, dryrun=dryrun)
return True
elif list_version_changes:
items = dag.get_outputs_with_changes("version")
if items:
print(*items, sep="\n")
return True
elif list_code_changes:
items = dag.get_outputs_with_changes("code")
if items:
print(*items, sep="\n")
return True
elif list_input_changes:
items = dag.get_outputs_with_changes("input")
if items:
print(*items, sep="\n")
return True
elif list_params_changes:
items = dag.get_outputs_with_changes("params")
if items:
print(*items, sep="\n")
return True
elif list_untracked:
dag.list_untracked()
return True
if self.use_singularity and self.assume_shared_fs:
dag.pull_container_imgs(
dryrun=dryrun or list_conda_envs, quiet=list_conda_envs
)
if self.use_conda:
dag.create_conda_envs(
dryrun=dryrun or list_conda_envs or conda_cleanup_envs,
quiet=list_conda_envs,
)
if conda_create_envs_only:
return True
if list_conda_envs:
print("environment", "container", "location", sep="\t")
for env in set(job.conda_env for job in dag.jobs):
if env and not env.is_named:
print(
env.file.simplify_path(),
env.container_img_url or "",
simplify_path(env.address),
sep="\t",
)
return True
if conda_cleanup_envs:
self.persistence.conda_cleanup_envs()
return True
self.scheduler = JobScheduler(
self,
dag,
local_cores=local_cores,
dryrun=dryrun,