-
Notifications
You must be signed in to change notification settings - Fork 0
/
stage2.py
536 lines (453 loc) · 25.6 KB
/
stage2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Stage2 code generator: internal API to public API.
stage2 takes in the generated code from stage1, and possible additional
user-defined Fortran interfaces (see ``add_intfs()``). These are treated
on equal footing as stage1 code.
stage2 analyzes the dependencies between the stage1 functions, and writes
wrappers, where all bound symbols (quantities defined by any of the stage1
functions) are automatically computed, by calling other stage1 functions
(recursing where necessary).
Each function generated by stage2 takes in values only for the free symbols
(quantities *not* defined by any of the stage1 functions) encountered anywhere
in its call tree. This makes e.g. ∂²ϕ/∂Bx² "see" the dependencies on e.g.
u0, I4, and εxx.
"Free symbol" is here meant in a mathematical sense; in the programming sense,
these "free symbols" appear in the formal parameter list of the function being
generated, so they are bound names.
Created on Tue Oct 24 14:07:45 2017
@author: Juha Jeronen <juha.jeronen@tut.fi>
"""
import re
import os
from iterutil import uniqify
from util import fold_fortran_code, TextMultiBuffer
import fian # Fortran interface analyzer, for reading stage1 code
_outfileheader = \
"""!******************************************************************************
!* Code generated with mgs-galfenol-codegen stage2 *
!* *
!* See https://github.com/TUTElectromechanics/mm-codegen for more information *
!* *
!* This file is part of 'elmer-mgs-galfenol' *
!******************************************************************************
"""
class CodeGenerator:
"""Generate stage2 code (public API) for the stage1 code (internal functions)."""
# no constructor, this is OOFP with just static and class methods.
@staticmethod
def analyze_args(fname, args, lookup, recurse):
"""Split args of stage1 function ``fname`` into bound and free sets.
Any arg that exists as a key in ``lookup`` is considered to be bound
to that function.
All other arguments are considered free.
Parameters:
fname: str
Name of the stage1 function being analyzed.
args: tuple of str
Names of formal parameters of fname. Each arg is, generally:
a) the name of another stage1 function, or
b) a free argument (anything not defined by a stage1 function).
See ``fian.analyze_interface()``.
lookup: dict(str -> list(str))
Function name (for all functions) to list of its inargs.
recurse: bool
If True, recurse into ``args`` with the help of ``lookup``.
If False, analyze ``args`` themselves only.
Returns:
tuple (bound, free), where each item is:
set of ``(level, arg, fname)`` tuples (argument records, argrecs):
where
``level`` (int) is the recursion depth where ``arg`` was
seen. A ``level`` of 0 means top level. A level ``i > 0``
means "needed as a dependency from level ``i-1``".
``arg`` (str) is the argument name.
``fname`` (str) is the name of the function whose argument
``arg`` is. Can be used to retrieve metadata (such as dtype
and dimspec) from the results of the interface analyzer.
The same arg may appear at multiple levels; in this case,
each level has its own instance in the results. If you need
just the names, see ``strip_argrecs()``.
"""
def analyze(fname, args, level):
bound = set()
free = set()
for arg in args:
if arg in lookup: # if we know a stage1 function of this name
bound.add((level, arg, fname))
if recurse:
b, f = analyze(arg, lookup[arg], level+1)
bound.update(b)
free.update(f)
else:
free.add((level, arg, fname))
return (bound, free)
return analyze(fname, args, level=0)
@staticmethod
def strip_argrecs(argrecs):
"""Strip all except the argument names from an output set of analyze_args()."""
return tuple(uniqify([arg for (_, arg, _) in argrecs])) # level, arg, fname
@classmethod
def validate_bound_args(cls, bound, lookup):
"""Validate bound args.
If validation passes, it proves that the dependencies (as declared
by the formal parameter names) between the functions in the stage1
Fortran interface are NOT:
- recursive (calling itself or any parent on its call stack)
- mutually recursive (a calling b and b calling a; detected even if
the calls are in different call chains)
Parameters:
bound: set of ``(level, arg, fname)`` (int, str, str) tuples
as output by ``analyze_args()`` with ``recurse=True``.
(Note: supply the bound set only!)
lookup: dict(str -> list(str))
Function name (for all functions) to list of its inargs.
Returns:
None. ``ValueError`` is raised if the validation fails.
"""
# Check top level; we should be given only bound args.
fnames = cls.strip_argrecs(bound) # the **args** are the fnames to validate.
invalid_args = [fname for fname in fnames if fname not in lookup]
if len(invalid_args):
raise ValueError("Got free top-level fname(s) {invalid}; validator checks only bound args".format(invalid=invalid_args))
# Sets of callers of each bound var, for mutual recursion detection.
#
# The callers of "func" are the content of the call stack just before
# we push "func" itself onto the stack. This includes "implicit" callers,
# in the sense that f in f(g(h(x))) implicitly calls h, because g does.
#
# (To collect only the explicit callers, we would take only the
# current topmost item in the call stack.)
#
# The sets of callers are built globally across all call chains;
# the set of callers of "func" is updated with any new callers
# of "func" encountered in any call chain.
#
callers_of = {} # str: set
def update_callers_of(k, more):
if k not in callers_of:
callers_of[k] = more
else:
callers_of[k].update(more)
# Validate each chain individually. As a side effect, build callers_of.
r = [] # for error reporting
def process(fname, callstack):
# Track *each chain of calls* independently. (E.g. in dwp_dI6 in
# the 3par model, both I5 and I6, at the same level, depend on exx.)
if fname in lookup: # check bound only; may have free args at deeper levels
if fname in callstack: # recursive call, not allowed
r.append((toplevel_fname, fname, callstack))
update_callers_of(fname, set(callstack))
new_callstack = callstack + [fname]
for dependency in lookup[fname]:
process(dependency, new_callstack)
for toplevel_fname in fnames:
process(toplevel_fname, [])
if len(r):
raise ValueError("recursion detected; (top-level fname, target, callstack) info follows: {invalid}".format(invalid=", ".join(str(item) for item in r)))
# Detect mutual recursion between different call chains.
# a = the thing being called; b = its callers
mr = [(a, b) for a in callers_of.keys() for b in callers_of[a] if a in callers_of[b]]
if len(mr):
raise ValueError("mutual recursion (possibly implicit) detected, function pair(s): {invalid}".format(invalid=mr))
# When writing stage2 code, arguments of stage1 functions may be:
#
# - Free arguments, for which no stage1 function exists.
# - Must be supplied by end user; add to arg list of stage2 wrapper.
# - May appear deeper in the call tree; propagate also those to the
# top-level wrapper.
# - Bound arguments, obtainable as the return value of a stage1 function.
# - Call the stage1 function in the wrapper body, then use the result.
# Resolve any dependencies recursively.
# - Processing in descending order of call tree depth, of the deepest
# instance of each bound arg, we always have its dependencies, since:
# 1) No recursion or mutual recursion in the call tree
# 2) The leaves of the call tree depend only on free args (at most)
# - Optimization: in a single call, we may re-use the stage1 results
# as many times as we need, because stage1 consists of pure functions.
#
@classmethod
def write_stage2_object(cls, objtype, stage1_oname, stage1_args, metas, lookup, outbuf):
"""Write stage2 function/subroutine wrapper (public API) for a stage1 function/subroutine.
Parameters:
objtype: str, one of:
"function", "subroutine"
stage1_oname: str
Name of the stage1 function/subroutine.
stage1_args: list(str)
All arguments of the stage1 function/subroutine, in original order.
metas: dict(str -> metarec)
Lookup table of metadata records of all objects.
key = function name, value = corresponding metarec.
lookup: dict(str -> list(str))
Lookup table of intent(in) args of all functions.
(Functions only, as we do not allow subroutines as dependencies.)
outbuf: util.TextMultiBuffer
Where to write the output. Keyed under ".f90" and ".h".
Returns:
None. Mutates outbuf instead!
"""
key_impl = ".f90"
key_intf = ".h"
key_both = (key_impl, key_intf)
# Get the dtype of the return value of a stage1 function.
def rettype_of(fname):
metarec = metas[fname] # metadata record for fname
retval_meta = metarec[fname] # return value metadata: key in metarec = function name itself
dtype, _, _ = retval_meta
return dtype
# Sort by level (descending), then by name.
def level_sortkey(argrec):
level, argname, _ = argrec
return (-level, argname)
# Sort by intent, then lexicographically.
def intent_sortkey(argrec):
_, argname, fname = argrec
metarec = metas[fname] # metadata record for function whose argument this is
_, intent, _ = metarec[argname]
return (intent, argname) # "in" sorts before "inout" and "out" so we're good.
# Analyze all args, because intent(out) args for subroutines are
# also free; must be detected as such for the post-binding validation.
bound_set, free_set = cls.analyze_args(stage1_oname, stage1_args,
lookup, recurse=True)
# Check that we can handle the declared dependencies between the bound args.
cls.validate_bound_args(bound_set, lookup)
# Find the function (in the call chain) in whose arguments each freevar
# originally appears; needed to access the metadata for the freevar.
#
# DANGER: slight oversimplification:
# We assume all instances of a freevar with the same name mean the same thing!
#
arg_to_metasrc = {arg: fname for _,arg,fname in free_set}
# Order free args by intent ("in" first), then lexicographically.
# Order bound args by level, descending, for dependency resolution.
# uniqify(), as the same arg may appear at different levels.
freevars = cls.strip_argrecs(sorted(free_set, key=intent_sortkey))
boundvars = cls.strip_argrecs(sorted(bound_set, key=level_sortkey))
# output: function header
return_decl = "{rettype} ".format(rettype=rettype_of(stage1_oname)) if objtype == "function" else ""
stage2_oname = "{name}_public".format(name=stage1_oname) # name of public API function/subroutine to write
outbuf.append(key_both, "\n")
outbuf.append(key_intf, "interface\n")
outbuf.append(key_both, "{return_decl}{objtype} {name}(".format(return_decl=return_decl,
objtype=objtype,
name=stage2_oname))
outbuf.append(key_both, ", ".join(freevars))
outbuf.append(key_both, ")\n")
outbuf.append(key_both, "use types\n")
# output: argument declarations for the public API function (free args only!)
outbuf.append(key_both, "implicit none\n")
for fvar in freevars:
# Get the metadata record for the function whose argument
# this freevar originally is.
#
# DANGER: slight oversimplification:
# We assume all instances of a freevar with the same name mean the same thing!
# (So it doesn't matter even if we get the "wrong" metasrc, as long as
# it takes this freevar as an argument.)
#
metarec = metas[arg_to_metasrc[fvar]]
dtype, intent, dimspec = metarec[fvar]
if dimspec is not None:
outbuf.append(key_both, "{dtype}, intent({intent}), dimension({dimspec}) :: {argname}\n".format(dtype=dtype,
intent=intent,
dimspec=dimspec,
argname=fvar))
else:
outbuf.append(key_both, "{dtype}, intent({intent}) :: {argname}\n".format(dtype=dtype,
intent=intent,
argname=fvar))
# Declare any needed localvars and populate them by calls to
# the stage1 functions represented by boundvars.
bound_to_local = {} # populated later
def bind_to_locals(names):
result = [(bound_to_local[name] if name in boundvars else name) for name in names]
# sanity check: each bound arg in names should now be bound to
# something, so the result should have only localvars or freevars.
localvars = bound_to_local.values()
invalid_args = [name for name in result if name not in localvars and name not in freevars]
if len(invalid_args):
raise RuntimeError("post-binding check: undefined symbol(s) {invalid}, neither in localvars nor in freevars".format(invalid=invalid_args))
return result
# We must first process all boundvars to generate all of localvars,
# but we must output the declarations of all localvars first,
# before writing the calls to the boundvar functions (that then
# populate the localvars). Solution: use a temporary buffer.
lvar_code = ""
for bvar in boundvars: # follow the ordering by level, descending (deepest first)
lvar = "{boundvar}_".format(boundvar=bvar)
# Write code to call the stage1 function for this boundvar.
#
# The descending level ordering makes sure that the arguments of
# each generated call will contain only vars that already have
# a localvar, or free vars. In each call, no unbound vars remain.
#
# Take the argument list from lookup[], because it preserves
# the ordering of the args (which are positional in Fortran).
#
# TODO later: if no function name matches an input arg,
# we could check if there is a subroutine that provides
# it as one of its output args, and call it.
bvar_args = ", ".join(bind_to_locals(lookup[bvar]))
lvar_code += "{localvar} = {boundvar}({args})\n".format(localvar=lvar,
boundvar=bvar,
args=bvar_args)
bound_to_local[bvar] = lvar # later calls can bind to this result
if len(boundvars):
lvar_code += "\n"
# output: declare localvars
for bvar in boundvars: # use same ordering as boundvars, for readability
outbuf.append(key_impl, "{rettype} {localvar}\n".format(rettype=rettype_of(bvar),
localvar=bound_to_local[bvar]))
# output: evaluate localvars
outbuf.append(key_impl, "\n")
outbuf.append(key_impl, lvar_code)
# output: call the wrapped stage1 function
final_args = ", ".join(bind_to_locals(stage1_args))
if objtype == "function":
outbuf.append(key_impl, "{retname} = {stage1_name}({args})\n".format(retname=stage2_oname,
stage1_name=stage1_oname,
args=final_args))
else: # objtype == "subroutine":
outbuf.append(key_impl, "call {stage1_name}({args})\n".format(stage1_name=stage1_oname,
args=final_args))
outbuf.append(key_impl, "\n")
outbuf.append(key_both, "end {objtype}\n".format(objtype=objtype))
outbuf.append(key_intf, "end interface\n")
@classmethod
def run(cls, s1code):
"""Generate the stage2 code (i.e. the public API) based on stage1 code.
Parameters:
s1code: [(label, filename, content), ...]
Stage1 code, in the output format of stage1.CodeGenerator.run().
If you need additional user-defined interfaces, use add_intfs()
on s1code before calling run().
Returns:
tuple of tuples, stage2 code. Each item has the format:
(label, output_filename, content)
"""
stage1_intf = intfs_only(s1code)
generated_code_out = []
for i, (label, input_filename, content) in enumerate(stage1_intf, start=1):
progress_header_outer = "({iteration:d}/{total:d})".format(iteration=i, total=len(stage1_intf))
print("stage2: {outer_progress} {label} model: generating public API based on '{file}'".format(outer_progress=progress_header_outer,
label=label,
file=input_filename))
data_funcs, data_subroutines = fian.analyze_interface(content)
# The bound args lookup table is determined by the functions only,
# since we do not allow subroutines to appear as a dependency.
_, lookup, _ = data_funcs
# Map function/subroutine names to their parameter metadata.
#
# Both functions and subroutines need access to the metadata
# of functions (for processing dependencies).
def objname_to_meta():
_, _, fmeta = data_funcs
_, _, smeta = data_subroutines
out = fmeta.copy()
out.update(smeta)
return out
metas = objname_to_meta()
# Text of implementation and interface will be added into named
# buffers. This is convenient because they are mostly identical.
outbuf = TextMultiBuffer()
# Generate public API for functions, then for subroutines.
for objtype, (objs, _, _) in (("function", data_funcs),
("subroutine", data_subroutines)):
for j, (stage1_oname, stage1_args) in enumerate(objs, start=1):
progress_header_inner = "({iteration:d}/{total:d})".format(iteration=j, total=len(objs))
progress_header = "{outer_progress} {inner_progress}".format(outer_progress=progress_header_outer,
inner_progress=progress_header_inner)
print("stage2: {header} {label} model: public API for {objtype} {name}".format(header=progress_header,
label=label,
objtype=objtype,
name=stage1_oname))
cls.write_stage2_object(objtype, stage1_oname, stage1_args,
metas, lookup, outbuf) # mutates outbuf!
# Generate the final code for the output files.
outfile_basename = "mgs_{label}".format(label=label)
for key in sorted(outbuf.keys()):
outfile_name = "{basename}{file_ext}".format(basename=outfile_basename, file_ext=key)
final_code = _outfileheader + fold_fortran_code(outbuf[key])
generated_code_out.append((label, outfile_name, final_code))
return generated_code_out
##############################################################################
# Main program (stage2)
##############################################################################
def intfs_only(s1code):
"""Given s1code, keep only interfaces (.h); ignore implementations (.f90)."""
return [(l, f, c) for l, f, c in s1code if f.endswith(".h")]
def load_stage1_intfs(path):
"""Load interfaces of stage1 generated code.
Parameters:
path: str
Filesystem path to read data from. Relative or absolute.
No final pathsep. Example: "." for the current directory.
Will be scanned for filenames of the form "mgs_*_impl.h",
where the wildcard gives the model label.
Returns:
[(label,filename,content), ...]
where
label: str
Label from the model. Deduced from the filename.
filename: str
Basename of the file (no path).
content: str
File content as one string (containing linefeeds).
"""
p_maybepath = r"(?:.*{pathsep})?".format(pathsep=os.path.sep)
p_basename = r"mgs_(.*)_impl"
p_interface = r"\.h"
pattern = "{maybepath}{basename}{interface}".format(maybepath=p_maybepath,
basename=p_basename,
interface=p_interface)
def relevant(filename):
return len(re.findall(pattern, filename))
def getlabel(filename):
matches = re.findall(pattern, filename)
assert len(matches) == 1
group = matches[0]
return group
files_and_dirs = [os.path.join(path, x) for x in os.listdir(path)]
files = [x for x in files_and_dirs if os.path.isfile(x)]
matching_files = [x for x in files if relevant(x)]
if not len(matching_files):
raise(ValueError("No stage1 files found; please generate them first by running stage1.py."))
def read(filename):
with open(filename, "rt", encoding="utf-8") as f:
content = f.read()
return content
return [(getlabel(f), os.path.basename(f), read(f)) for f in matching_files]
def add_intfs(s1code, path, basenames):
"""Add user-defined stage1 interfaces.
Parameters:
s1code: [(label,filename,content), ...]
As output by ``load_stage1_intfs()`` or ``stage1.CodeGenerator.run()``.
path: str
Filesystem path to read data from. Relative or absolute.
No final pathsep. Example: "." for the current directory.
basenames: list(str)
Basenames of files containing additional user-defined interfaces.
Their content is just pasted to the end of the s1code content of
each model, so they get handled on equal footing with any stage1
generated code.
In the basenames, the tag "{label}" is replaced by each label from
s1code. Any missing files are ignored (so it's ok for a file to
exist for only some of the models).
Returns:
[(label,filename,content), ...]
where each content has been updated with the additional interfaces.
"""
out = []
for l, f, c in s1code:
for basename in (fn.format(label=l) for fn in basenames):
filename = os.path.join(path, basename)
try:
with open(filename, "rt", encoding="utf-8") as file:
print("stage2: {label} model: reading additional interface '{file}'".format(label=l, file=basename))
content = file.read()
c += content
except FileNotFoundError:
print("stage2: {label} model: no match for '{file}', ignoring".format(label=l, file=basename))
out.append((l, f, c))
return out