Merge pull request #2102 from devitocodes/patch-seq-interp

compiler: Patch loop nests a-la PrecomputedInterpolation
devitocodes · Apr 13, 2023 · 0f9b303 · 0f9b303
2 parents 468e493 + 64dda43
commit 0f9b303
Show file tree

Hide file tree

Showing 6 changed files with 111 additions and 8 deletions.
diff --git a/FAQ.md b/FAQ.md
@@ -19,6 +19,7 @@
 - [What are the keys to fast code](#what-are-the-keys-to-fast-code)
 - [As time increases in the finite difference evolution, are wavefield arrays "swapped" as you might see in c/c++ code](#as-time-increases-in-the-finite-difference-evolution-are-wavefield-arrays-swapped-as-you-might-see-in-cc-code)
 - [What units are typically used in Devito examples](#what-units-are-typically-used-in-devito-examples)
+- [Can I subclass basic types such as TimeFunction](#can-i-subclass-basic-types-such-as-timefunction)
 - [How can I change the compilation flags (for example, I want to change the optimization level from -O3 to -O0)](#how-can-i-change-the-compilation-flags-for-example-i-want-to-change-the-optimization-level-from--o3-to--o0)
 - [Is the jitted code IEEE-compliant](#is-the-jitted-code-ieee-compliant)
 - [Can I control the MPI domain decomposition](#can-i-control-the-mpi-domain-decomposition)
@@ -482,6 +483,49 @@ Instead of swapping arrays, devito uses the modulus of a time index to map incre
 [top](#Frequently-Asked-Questions)
 
 
+## Can I subclass basic types such as TimeFunction
+
+Yes, just like we did for our seismic examples, for example, the [PointSource class](https://github.com/devitocodes/devito/blob/master/examples/seismic/source.py). A few caveats are necessary, though.
+
+First, classes such as `Function` or `SparseTimeFunction` are inherently complex. In fact, `SparseTimeFunction` itself is a subclass of `Function`. The whole class hierarchy is modular and well-structured, but at the same time, it's depth and offers several hooks to allow specialization by subclasses -- for example, all methods starting with `__` such as `__init_finalize__` or `__shape_setup__`. It will take some time to digest it. Luckily, you will only need to learn a little of this, at least for simple subclasses. 
+
+Second, you must know that these objects are subjected to so-called reconstruction during compilation. Objects are immutable inside Devito; therefore, even a straightforward symbolic transformation such as `f[x] -> f[y]` boils down to performing a reconstruction, that is, creating a whole new object. Since `f` carries around several attributes (e.g., shape, grid, dimensions), each time Devito performs a reconstruction, we only want to specify which attributes are changing -- not all of them, as it would make the code ugly and incredibly complicated. The solution to this problem is that all the base symbolic types inherit from a common base class called `Reconstructable`; a `Reconstructable` object has two special class attributes, called `__rargs__` and `__rkwargs__`. If a subclass adds a new positional or keyword argument to its `__init_finalize__`, it must also be added to `__rargs__` or `__rkwargs__`, respectively. This will provide Devito with enough information to perform a reconstruction when it's needed during compilation. The following example should clarify:
+
+```
+class Foo(Reconstructable):
+    __rargs__ = ('a', 'b')
+    __rkwargs__ = ('c',)
+
+    def __init__(self, a, b, c=4):
+           self.a = a
+           self.b = b
+           self.c = c
+
+    def __repr__(self):
+        return "x(%d, %d, %d)" % (self.a, self.b, self.c)
+
+class Bar(Foo):
+    __rkwargs__ = Foo.__rkwargs__ + ('d',)
+
+    def __init__(self, *args, d=6, **kwargs)
+            super().__init__(*args, **kwargs)
+            self.d = d
+
+```
+
+You are unlikely to care about how reconstruction works in practice, but here are a few examples for `a = Foo(3, 5)` to give you more context.
+
+```
+a._rebuild() -> "x(3, 5, 4)" (i.e., copy of `a`).
+a._rebuild(4) -> "x(4, 5, 4)"
+a._rebuild(4, 7) -> "x(4, 7, 4)"
+a._rebuild(c=5) -> "x(3, 5, 5)"
+a._rebuild(1, c=7) -> "x(1, 5, 7)"
+```
+
+[top](#Frequently-Asked-Questions)
+
+
 ## How can I change the compilation flags (for example, I want to change the optimization level from -O3 to -O0)
 
 There is currently no API to achieve this straightforwardly. However, there are three workarounds:

diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py
@@ -347,6 +347,7 @@ class Communications(Queue):
     """
 
     _q_guards_in_key = True
+    _q_properties_in_key = True
 
     B = Symbol(name='⊥')
 
@@ -369,7 +370,7 @@ def callback(self, clusters, prefix, seen=None):
             halo_scheme = HaloScheme(c.exprs, c.ispace)
 
             if not halo_scheme.is_void and \
-               d in halo_scheme.distributed_aindices:
+               c.properties.is_parallel_relaxed(d):
                 points = set()
                 for f in halo_scheme.fmapper:
                     for a in c.scope.getreads(f):
@@ -389,8 +390,9 @@ def callback(self, clusters, prefix, seen=None):
         processed = []
         if exprs:
             ispace = prefix[:prefix.index(d)]
+            properties = prefix.properties.drop(d)
 
-            processed.append(Cluster(exprs, ispace, c.guards))
+            processed.append(Cluster(exprs, ispace, c.guards, properties))
             seen.update(clusters)
 
         processed.extend(clusters)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
@@ -370,7 +370,7 @@ def callback():
             lhs = self.obj.subs(self_subs)
             rhs = prod(coeffs) * _expr.subs(dim_subs)
 
-            return [Eq(lhs, lhs + rhs)]
+            return [Inc(lhs, rhs)]
 
         return Interpolation(expr, offset, increment, self_subs, self, callback)
 
@@ -400,6 +400,6 @@ def callback():
                 coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
             rhs = prod(coeffs) * _expr
             _field = _field.subs(dim_subs)
-            return [Eq(_field, _field + rhs.subs(dim_subs))]
+            return [Inc(_field, rhs.subs(dim_subs))]
 
         return Injection(field, expr, offset, self, callback)
diff --git a/examples/seismic/source.py b/examples/seismic/source.py
@@ -199,7 +199,7 @@ class WaveletSource(PointSource):
         Firing time (defaults to 1 / f0)
     """
 
-    __rkwargs__ = PointSource.__rkwargs__ + ['f0', 'a', 'f0']
+    __rkwargs__ = PointSource.__rkwargs__ + ['f0', 'a', 't0']
 
     @classmethod
     def __args_setup__(cls, *args, **kwargs):

diff --git a/requirements.txt b/requirements.txt
@@ -10,7 +10,6 @@ py-cpuinfo<10
 cgen>=2020.1
 codepy>=2019.1
 click<9.0
-codecov
 multidict
 anytree>=2.4.3,<=2.8
 pyrevolve>=2.1.3

diff --git a/tests/test_dle.py b/tests/test_dle.py
@@ -6,8 +6,9 @@
 
 from conftest import assert_structure, assert_blocking, _R, skipif
 from devito import (Grid, Function, TimeFunction, SparseTimeFunction, SpaceDimension,
-                    CustomDimension, Dimension, SubDimension, Eq, Inc, ReduceMax,
-                    Operator, configuration, dimensions, info, cos)
+                    CustomDimension, Dimension, SubDimension,
+                    PrecomputedSparseTimeFunction, Eq, Inc, ReduceMax, Operator,
+                    configuration, dimensions, info, cos)
 from devito.exceptions import InvalidArgument
 from devito.ir.iet import (Iteration, FindNodes, IsPerfectIteration,
                            retrieve_iteration_tree)
@@ -262,6 +263,31 @@ def test_cache_blocking_structure_optrelax_linalg(opt, expected):
     assert np.linalg.norm(F.data) == 128.0
 
 
+def test_cache_blocking_structure_optrelax_prec_inject():
+    grid = Grid(shape=(10, 10))
+    dt = grid.stepping_dim.spacing
+
+    u = TimeFunction(name="u", grid=grid, time_order=2, space_order=4)
+
+    # The values we put it don't matter, we won't run an Operator
+    points = [(0.05, 0.9), (0.01, 0.8), (0.07, 0.84)]
+    gridpoints = [(5, 90), (1, 80), (7, 84)]
+    interpolation_coeffs = np.ndarray(shape=(3, 2, 2))
+    sf = PrecomputedSparseTimeFunction(
+        name='s', grid=grid, r=2, npoint=len(points), nt=5,
+        gridpoints=gridpoints, interpolation_coeffs=interpolation_coeffs
+    )
+
+    eqns = sf.inject(field=u.forward, expr=sf * dt**2)
+
+    op = Operator(eqns, opt=('advanced', {'blockrelax': 'device-aware',
+                                          'openmp': True,
+                                          'par-collapse-ncores': 1}))
+
+    assert_structure(op, ['t,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rx,ry'],
+                     't,p_s0_blk0,p_s,rx,ry')
+
+
 class TestBlockingParTile(object):
 
     @pytest.mark.parametrize('par_tile,expected', [
@@ -891,6 +917,38 @@ def test_simd_space_invariant(self):
         op.apply()
         assert np.isclose(np.linalg.norm(f.data), 37.1458, rtol=1e-5)
 
+    def test_parallel_prec_inject(self):
+        grid = Grid(shape=(10, 10))
+        dt = grid.stepping_dim.spacing
+
+        u = TimeFunction(name="u", grid=grid, time_order=2, space_order=4)
+
+        # The values we put it don't matter, we won't run an Operator
+        points = [(0.05, 0.9), (0.01, 0.8), (0.07, 0.84)]
+        gridpoints = [(5, 90), (1, 80), (7, 84)]
+        interpolation_coeffs = np.ndarray(shape=(3, 2, 2))
+        sf = PrecomputedSparseTimeFunction(
+            name='s', grid=grid, r=2, npoint=len(points), nt=5,
+            gridpoints=gridpoints, interpolation_coeffs=interpolation_coeffs
+        )
+
+        eqns = sf.inject(field=u.forward, expr=sf * dt**2)
+
+        op0 = Operator(eqns, opt=('advanced', {'openmp': True,
+                                               'par-collapse-ncores': 1}))
+        iterations = FindNodes(Iteration).visit(op0)
+        assert all(not i.pragmas for i in iterations[:2])
+        assert 'omp for collapse(2) schedule(dynamic,chunk_size)'\
+            in iterations[2].pragmas[0].value
+
+        op1 = Operator(eqns, opt=('advanced', {'openmp': True,
+                                               'par-collapse-ncores': 1,
+                                               'par-collapse-work': 1}))
+        iterations = FindNodes(Iteration).visit(op1)
+        assert not iterations[0].pragmas
+        assert 'omp for collapse(3) schedule(dynamic,chunk_size)'\
+            in iterations[1].pragmas[0].value
+
 
 class TestNestedParallelism(object):