Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Persistent fusion view fix #1552

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 27 additions & 21 deletions dace/transformation/subgraph/gpu_persistent_fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,12 @@
return False

# for now exactly one inner and one outer entry state
entry_states_in, entry_states_out = \
GPUPersistentKernel.get_entry_states(sdfg, subgraph)
entry_states_in, entry_states_out = GPUPersistentKernel.get_entry_states(sdfg, subgraph)

Check warning on line 81 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L81

Added line #L81 was not covered by tests
if len(entry_states_in) != 1 or len(entry_states_out) > 1:
return False

entry_state_in = entry_states_in.pop()
if len(entry_states_out) == 1 \
and len(sdfg.edges_between(entry_states_out.pop(),
entry_state_in)
) > 1:
if len(entry_states_out) == 1 and len(sdfg.edges_between(entry_states_out.pop(), entry_state_in)) > 1:

Check warning on line 86 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L86

Added line #L86 was not covered by tests
return False

# for now only one outside state allowed, multiple inner exit states
Expand Down Expand Up @@ -118,10 +114,8 @@
_, exit_states_out = self.get_exit_states(sdfg, subgraph)

entry_state_in = entry_states_in.pop()
entry_state_out = entry_states_out.pop() \
if len(entry_states_out) > 0 else None
exit_state_out = exit_states_out.pop() \
if len(exit_states_out) > 0 else None
entry_state_out = entry_states_out.pop() if len(entry_states_out) > 0 else None
exit_state_out = exit_states_out.pop() if len(exit_states_out) > 0 else None

Check warning on line 118 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L117-L118

Added lines #L117 - L118 were not covered by tests

launch_state = None
entry_guard_state = None
Expand Down Expand Up @@ -194,12 +188,10 @@
sdfg.remove_nodes_from(subgraph.nodes())
other_states = sdfg.nodes()

if entry_state_out is not None \
and len(sdfg.edges_between(entry_state_out, launch_state)) == 0:
if entry_state_out is not None and len(sdfg.edges_between(entry_state_out, launch_state)) == 0:

Check warning on line 191 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L191

Added line #L191 was not covered by tests
sdfg.add_edge(entry_state_out, launch_state, InterstateEdge())

if exit_state_out is not None \
and len(sdfg.edges_between(launch_state, exit_state_out)) == 0:
if exit_state_out is not None and len(sdfg.edges_between(launch_state, exit_state_out)) == 0:

Check warning on line 194 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L194

Added line #L194 was not covered by tests
sdfg.add_edge(launch_state, exit_state_out, InterstateEdge())

# Handle data for kernel
Expand All @@ -208,17 +200,32 @@
other_data = set(node.data for state in other_states for node in state.nodes()
if isinstance(node, nodes.AccessNode))

# Add code->code scalar memlets
for state in kernel_sdfg.nodes():
for edge in state.edges():
if (isinstance(edge.src, dace.nodes.CodeNode) and isinstance(edge.dst, dace.nodes.CodeNode)

Check warning on line 206 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L204-L206

Added lines #L204 - L206 were not covered by tests
and not edge.data.is_empty()):
data = edge.data.data
kernel_sdfg.add_datadesc(data, sdfg.arrays[data])
del sdfg.arrays[data]

Check warning on line 210 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L208-L210

Added lines #L208 - L210 were not covered by tests

# move Streams and Register data into the nested SDFG
# normal data will be added as kernel argument
kernel_args = []
for data in kernel_data:
if data not in other_data and (isinstance(sdfg.arrays[data], dace.data.Stream) or
(isinstance(sdfg.arrays[data], dace.data.Array) and sdfg.arrays[data].storage
desc = sdfg.arrays[data]
if isinstance(desc, dace.data.View):
kernel_sdfg.add_datadesc(data, desc)

Check warning on line 218 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L216-L218

Added lines #L216 - L218 were not covered by tests
# Potentially replicate the view
if data not in other_data:
del sdfg.arrays[data]
elif data not in other_data and (isinstance(desc, dace.data.Stream) or

Check warning on line 222 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L220-L222

Added lines #L220 - L222 were not covered by tests
(isinstance(desc, dace.data.Array) and desc.storage
in (StorageType.Register, StorageType.GPU_Shared))):
kernel_sdfg.add_datadesc(data, sdfg.arrays[data])
kernel_sdfg.add_datadesc(data, desc)

Check warning on line 225 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L225

Added line #L225 was not covered by tests
del sdfg.arrays[data]
else:
copy_desc = copy.deepcopy(sdfg.arrays[data])
copy_desc = copy.deepcopy(desc)

Check warning on line 228 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L228

Added line #L228 was not covered by tests
copy_desc.transient = False
copy_desc.storage = StorageType.Default
kernel_sdfg.add_datadesc(data, copy_desc)
Expand Down Expand Up @@ -303,9 +310,8 @@
]

for node in state.data_nodes():
if type(node.desc(sdfg)) in [dace.data.Array,
dace.data.Stream] \
and node.desc(sdfg).storage not in gpu_accessible:
if type(node.desc(sdfg)) in [dace.data.Array, dace.data.Stream

Check warning on line 313 in dace/transformation/subgraph/gpu_persistent_fusion.py

View check run for this annotation

Codecov / codecov/patch

dace/transformation/subgraph/gpu_persistent_fusion.py#L313

Added line #L313 was not covered by tests
] and node.desc(sdfg).storage not in gpu_accessible:
return False

gpu_fused_schedules = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,17 @@ def _make_sdfg():

tasklet = s_init.add_tasklet('set_result', {'root_idx'}, {'result_out'}, 'result_out = 0 if i == root_idx else -1')

s_init.add_memlet_path(root_in, map_entry, tasklet, dst_conn='root_idx', memlet=dace.Memlet.simple(root_in.data, '0'))
s_init.add_memlet_path(root_in,
map_entry,
tasklet,
dst_conn='root_idx',
memlet=dace.Memlet.simple(root_in.data, '0'))

s_init.add_memlet_path(tasklet,
map_exit,
result_out,
src_conn='result_out',
memlet=dace.Memlet.simple(result_out.data, 'i'))
map_exit,
result_out,
src_conn='result_out',
memlet=dace.Memlet.simple(result_out.data, 'i'))

# -------------------------------------------------------------

Expand Down Expand Up @@ -146,6 +150,7 @@ def _make_sdfg():
bfs.validate()
return bfs, s_init


# -----------------------------
# Helper functions to init data
# -----------------------------
Expand Down Expand Up @@ -268,7 +273,6 @@ def fill_update_state(state, front_in, front_in_count, front_out, front_out_coun
state.add_memlet_path(s_frontier_io, front_out, memlet=dace.Memlet.simple(front_out.data, '0'))



@pytest.mark.gpu
def test_persistent_fusion():
sdfg, s_init = _make_sdfg()
Expand Down Expand Up @@ -328,7 +332,6 @@ def test_persistent_fusion():
def test_persistent_fusion_interstate():
N = dace.symbol('N', dtype=dace.int64)


@dace.program(auto_optimize=False, device=dace.DeviceType.GPU)
def func(A: dace.float64[N], B: dace.float64[N]):
a = 10.2
Expand Down Expand Up @@ -357,11 +360,48 @@ def func(A: dace.float64[N], B: dace.float64[N]):
func.f(aref, B)

sdfg(A=A, B=B, N=N)

assert np.allclose(A, aref)


# Actual execution
@pytest.mark.gpu
def test_output_view():
N = dace.symbol('N')
M = dace.symbol('M')

@dace.program
def correlation_kernel(float_n: dace.float64, data: dace.float64[N, M]):

mean = np.mean(data, axis=0)
# stddev = np.std(data, axis=0)
stddev = np.sqrt(np.mean(np.subtract(data, mean)**2, axis=0))
stddev[stddev <= 0.1] = 1.0
# data -= mean
np.subtract(data, mean, out=data)
# data /= np.sqrt(float_n) * stddev
np.divide(data, np.sqrt(float_n) * stddev, out=data)
corr = np.eye(M, dtype=data.dtype)
for i in range(M - 1):
# corr[i, i+1:M] = np.transpose(data[:, i+1:M]) @ data[:, i]
corr[i, i + 1:M] = data[:, i] @ data[:, i + 1:M]
corr[i + 1:M, i] = corr[i, i + 1:M]

return corr

sdfg = correlation_kernel.to_sdfg()
sdfg.apply_gpu_transformations()

# Apply persistent fusion on all but the first and last nodes
content_states = set(sdfg.states()) - {sdfg.start_state, sdfg.sink_nodes()[0]}
transform = GPUPersistentKernel()
transform.setup_match(SubgraphView(sdfg, content_states))
transform.kernel_prefix = 'top_kernel'
transform.apply(sdfg)

sdfg.validate()


if __name__ == "__main__":
test_persistent_fusion()
test_persistent_fusion_interstate()
test_output_view()