Skip to content

Commit

Permalink
Merge pull request #1032 from spcl/simple-fixes
Browse files Browse the repository at this point in the history
Assortment of simple fixes
  • Loading branch information
tbennun committed Jun 30, 2022
2 parents 7254f7c + d1a13f1 commit 602e5cf
Show file tree
Hide file tree
Showing 26 changed files with 760 additions and 208 deletions.
21 changes: 15 additions & 6 deletions dace/codegen/compiled_sdfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
argnames = []
sig = []
# Type checking
for a, arg, atype in zip(argnames, arglist, argtypes):
for i, (a, arg, atype) in enumerate(zip(argnames, arglist, argtypes)):
if not dtypes.is_array(arg) and isinstance(atype, dt.Array):
if isinstance(arg, list):
print('WARNING: Casting list argument "%s" to ndarray' % a)
Expand All @@ -375,8 +375,8 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
elif (isinstance(arg, int) and atype.dtype.type == np.uint32 and arg >= 0 and arg <= (1 << 32) - 1):
pass
else:
print('WARNING: Casting scalar argument "%s" from %s to %s' %
(a, type(arg).__name__, atype.dtype.type))
warnings.warn(f'Casting scalar argument "{a}" from {type(arg).__name__} to {atype.dtype.type}')
arglist[i] = atype.dtype.type(arg)
elif (isinstance(atype, dt.Array) and isinstance(arg, np.ndarray)
and atype.dtype.as_numpy_dtype() != arg.dtype):
# Make exception for vector types
Expand All @@ -387,7 +387,7 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
(arg.dtype, a, atype.dtype.type.__name__))
elif (isinstance(atype, dt.Array) and isinstance(arg, np.ndarray) and arg.base is not None
and not '__return' in a and not Config.get_bool('compiler', 'allow_view_arguments')):
raise TypeError('Passing a numpy view (e.g., sub-array or "A.T") to DaCe '
raise TypeError(f'Passing a numpy view (e.g., sub-array or "A.T") "{a}" to DaCe '
'programs is not allowed in order to retain analyzability. '
'Please make a copy with "numpy.copy(...)". If you know what '
'you are doing, you can override this error in the '
Expand Down Expand Up @@ -434,8 +434,17 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
atype) if dtypes.is_array(arg) else (arg, actype, atype)
for arg, actype, atype, _ in callparams)

newargs = tuple(
actype(arg) if (not isinstance(arg, ctypes._SimpleCData)) else arg for arg, actype, atype in newargs)
try:
newargs = tuple(
actype(arg) if (not isinstance(arg, ctypes._SimpleCData)) else arg for arg, actype, atype in newargs)
except TypeError:
# Pinpoint bad argument
for i, (arg, actype, _) in enumerate(newargs):
try:
if not isinstance(arg, ctypes._SimpleCData):
actype(arg)
except TypeError as ex:
raise TypeError(f'Invalid type for scalar argument "{callparams[i][3]}": {ex}')

self._lastargs = newargs, initargs
return self._lastargs
Expand Down
15 changes: 12 additions & 3 deletions dace/codegen/control_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
})
"""

import ast
from dataclasses import dataclass
from typing import (Callable, Dict, Iterator, List, Optional, Sequence, Set, Tuple, Union)
import sympy as sp
Expand Down Expand Up @@ -125,7 +126,7 @@ def as_cpp(self, codegen, symbols) -> str:
else:
# Dispatch empty state in any case in order to register that the
# state was dispatched
self.dispatch_state(self.state)
expr += self.dispatch_state(self.state)

# If any state has no children, it should jump to the end of the SDFG
if not self.last_state and sdfg.out_degree(self.state) == 0:
Expand Down Expand Up @@ -550,12 +551,20 @@ def _cases_from_branches(
m = cond.match(sp.Eq(a, b))
if m:
# Obtain original code for variable
astvar = edges[0].data.condition.code[0].value.left
call_or_compare = edges[0].data.condition.code[0].value
if isinstance(call_or_compare, ast.Call):
astvar = call_or_compare.args[0]
else: # Binary comparison
astvar = call_or_compare.left
else:
# Try integer == symbol
m = cond.match(sp.Eq(b, a))
if m:
astvar = edges[0].data.condition.code[0].value.right
call_or_compare = edges[0].data.condition.code[0].value
if isinstance(call_or_compare, ast.Call):
astvar = call_or_compare.args[1]
else: # Binary comparison
astvar = call_or_compare.right
else:
return None

Expand Down
33 changes: 32 additions & 1 deletion dace/codegen/cppunparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,38 @@ def _Attribute(self, t):
self.write(".")
self.write(t.attr)

def _Call(self, t):
# Replace boolean ops from SymPy
callcmps = {
"Eq": ast.Eq,
"NotEq": ast.NotEq,
"Ne": ast.NotEq,
"Lt": ast.Lt,
"Le": ast.LtE,
"LtE": ast.LtE,
"Gt": ast.Gt,
"Ge": ast.GtE,
"GtE": ast.GtE,
}
callbools = {
"And": ast.And,
"Or": ast.Or,
}

def _Call(self, t: ast.Call):
# Special cases for sympy functions
if isinstance(t.func, ast.Name):
if t.func.id in self.callcmps:
op = self.callcmps[t.func.id]()
self.dispatch(
ast.Compare(left=t.args[0],
ops=[op for _ in range(1, len(t.args))],
comparators=[t.args[i] for i in range(1, len(t.args))]))
return
elif t.func.id in self.callbools:
op = self.callbools[t.func.id]()
self.dispatch(ast.BoolOp(op=op, values=t.args))
return

self.dispatch(t.func)
self.write("(")
comma = False
Expand Down
5 changes: 4 additions & 1 deletion dace/codegen/targets/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,10 @@ def process_out_memlets(self,
ptrname = cpp.ptr(memlet.data, desc, sdfg, self._frame)
is_global = desc.lifetime in (dtypes.AllocationLifetime.Global,
dtypes.AllocationLifetime.Persistent)
defined_type, _ = self._dispatcher.defined_vars.get(ptrname, is_global=is_global)
try:
defined_type, _ = self._dispatcher.declared_arrays.get(ptrname, is_global=is_global)
except KeyError:
defined_type, _ = self._dispatcher.defined_vars.get(ptrname, is_global=is_global)

if defined_type == DefinedType.Scalar:
mname = cpp.ptr(memlet.data, desc, sdfg, self._frame)
Expand Down
29 changes: 21 additions & 8 deletions dace/codegen/targets/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,8 @@ def increment(streams):
if isinstance(node, nodes.NestedSDFG):
if node.schedule == dtypes.ScheduleType.GPU_Device:
continue
if node.schedule not in dtypes.GPU_SCHEDULES:
max_streams, max_events = self._compute_cudastreams(node.sdfg, max_streams, max_events + 1)
node._cuda_stream = max_streams
node._cs_childpath = False
max_streams = increment(max_streams)
Expand Down Expand Up @@ -1243,8 +1245,6 @@ def generate_scope(self, sdfg, dfg_scope, state_id, function_stream, callsite_st
# TODO move this into _get_const_params(dfg_scope)
const_params |= set((str(e.src)) for e in dace.sdfg.dynamic_map_inputs(state, scope_entry))

kernel_args_typed = [('const ' if k in const_params else '') + v.as_arg(name=k) for k, v in kernel_args.items()]

# Store init/exit code streams
old_entry_stream = self.scope_entry_stream
old_exit_stream = self.scope_exit_stream
Expand All @@ -1258,11 +1258,12 @@ def generate_scope(self, sdfg, dfg_scope, state_id, function_stream, callsite_st
outer_stream = CodeIOStream()
instr.on_scope_exit(sdfg, state, scope_exit, outer_stream, self.scope_exit_stream, self._globalcode)

# Redefine constant arguments
# Redefine constant arguments and rename arguments to device counterparts
# TODO: This (const behavior and code below) is all a hack.
# Refactor and fix when nested SDFGs are separate functions.
self._dispatcher.defined_vars.enter_scope(scope_entry)
for aname, arg in kernel_args.items():
prototype_kernel_args = {}
for aname, arg in kernel_args.items(): # `list` wrapper is used to modify kernel_args within the loop
if aname in const_params:
defined_type, ctype = None, None
if aname in sdfg.arrays:
Expand All @@ -1283,7 +1284,7 @@ def generate_scope(self, sdfg, dfg_scope, state_id, function_stream, callsite_st
pass
ptrname = cpp.ptr(aname, data_desc, sdfg, self._frame)
if not defined_type:
defined_type, ctype = self._dispatcher.defined_vars.get(ptrname)
defined_type, ctype = self._dispatcher.defined_vars.get(ptrname, is_global=is_global)

CUDACodeGen._in_device_code = True
inner_ptrname = cpp.ptr(aname, data_desc, sdfg, self._frame)
Expand All @@ -1293,16 +1294,29 @@ def generate_scope(self, sdfg, dfg_scope, state_id, function_stream, callsite_st
defined_type,
'const %s' % ctype,
allow_shadowing=True)

# Rename argument in kernel prototype as necessary
aname = inner_ptrname
else:
if aname in sdfg.arrays:
data_desc = sdfg.arrays[aname]
ptrname = cpp.ptr(aname, data_desc, sdfg, self._frame)
defined_type, ctype = self._dispatcher.defined_vars.get(ptrname)
is_global = data_desc.lifetime in (dtypes.AllocationLifetime.Global,
dtypes.AllocationLifetime.Persistent)
defined_type, ctype = self._dispatcher.defined_vars.get(ptrname, is_global=is_global)
CUDACodeGen._in_device_code = True
inner_ptrname = cpp.ptr(aname, data_desc, sdfg, self._frame)
CUDACodeGen._in_device_code = False
self._dispatcher.defined_vars.add(inner_ptrname, defined_type, ctype, allow_shadowing=True)

# Rename argument in kernel prototype as necessary
aname = inner_ptrname

prototype_kernel_args[aname] = arg

kernel_args_typed = [('const ' if k in const_params else '') + v.as_arg(name=k)
for k, v in prototype_kernel_args.items()]

kernel_stream = CodeIOStream()
self.generate_kernel_scope(sdfg, dfg_scope, state_id, scope_entry.map, kernel_name, grid_dims, block_dims,
tbmap, dtbmap, kernel_args_typed, self._globalcode, kernel_stream)
Expand Down Expand Up @@ -1398,7 +1412,7 @@ def generate_scope(self, sdfg, dfg_scope, state_id, function_stream, callsite_st
void *{kname}_args[] = {{ {kargs} }};
{backend}LaunchKernel((void*){kname}, dim3({gdims}), dim3({bdims}), {kname}_args, {dynsmem}, {stream});'''.format(
kname=kernel_name,
kargs=', '.join(['(void *)&' + arg for arg in kernel_args] + extra_kernel_args),
kargs=', '.join(['(void *)&' + arg for arg in prototype_kernel_args] + extra_kernel_args),
gdims='dace_number_blocks, 1, 1' if is_persistent else ', '.join(_topy(grid_dims)),
bdims=', '.join(_topy(block_dims)),
dynsmem=_topy(dynsmem_size),
Expand Down Expand Up @@ -2144,7 +2158,6 @@ def generate_devicelevel_scope(self, sdfg, dfg_scope, state_id, function_stream,
# Rewrite grid conditions
for cond in self._kernel_grid_conditions:
callsite_stream.write(cond, sdfg, state_id, scope_entry)


def generate_node(self, sdfg, dfg, state_id, node, function_stream, callsite_stream):
if self.node_dispatch_predicate(sdfg, dfg, node):
Expand Down

0 comments on commit 602e5cf

Please sign in to comment.