Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

applet.interface.better_la: a more performant logic analyzer #490

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
191 changes: 191 additions & 0 deletions software/glasgow/applet/interface/better_la/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from collections import defaultdict
import io
import logging
import argparse
from vcd import VCDWriter
from amaranth import *
from amaranth.lib.cdc import FFSynchronizer

from ....gateware.pads import *
from ....gateware.analyzer import *
from ... import *
from .signal_compressor import SignalCompressor
from .arbiter import LAArbiter

# This LA uses a simple protocol for sending compressed values over the FIFO which is explained
# in the arbiter.py (high level chunks) and signal_compressor.py (low level packets) files.
# The basic architecture is as follows:
# +------------------+ +--------+
# Pin0 --->| SignalCompressor |------>| FIFO |-----+
# +------------------+ +--------+ |
# |
# +------------------+ +--------+ |
# Pin1 --->| SignalCompressor |------>| FIFO |-----+ +-----------+ +----------+
# +------------------+ +--------+ | | | | |
# +---->| LAArbiter |----->| USB-FIFO |
# +------------------+ +--------+ | | | | |
# Pin2 --->| SignalCompressor |------>| FIFO |-----+ +-----------+ +----------+
# +------------------+ +--------+ |
# |
# +------------------+ +--------+ |
# PinN --->| ... |------>| ... |-----+
# +------------------+ +--------+

class BetterLASubtarget(Elaboratable):
def __init__(self, pads, in_fifo, counter_target=False):
self.pads = pads
self.in_fifo = in_fifo
self.counter_target = counter_target

self.la = LAArbiter(in_fifo)

def elaborate(self, platform):
m = Module()
m.submodules += self.la

if self.counter_target:
print("building bitstream with simulated counter target")
counter = Signal(len(self.pads.i_t.i)+2)
m.d.sync += counter.eq(counter + 1)
m.d.comb += self.la.input.eq(counter[2:])
else:
print("building bitstream connected to real target")
pins_i = Signal.like(self.pads.i_t.i)
m.submodules += FFSynchronizer(self.pads.i_t.i, pins_i)
m.d.comb += self.la.input.eq(pins_i)

return m


class BetterLAApplet(GlasgowApplet):
logger = logging.getLogger(__name__)
help = "capture logic waveforms"
description = """
A somewhat better logic analyzer applet that allows for the capture of traces as VCD files.
"""

# The FPGA on revA/revB is too slow for the complicated logic in this Applet
required_revision = "C0"

@classmethod
def add_build_arguments(cls, parser, access):
super().add_build_arguments(parser, access)

access.add_pin_set_argument(parser, "i", width=range(1, 17), default=1)
parser.add_argument(
"--counter-target", default=False, action="store_true",
help="simulate a target with a counter signal",
)

def build(self, target, args):
self.mux_interface = iface = target.multiplexer.claim_interface(self, args)
iface.add_subtarget(BetterLASubtarget(
pads=iface.get_pads(args, pin_sets=("i",)),
in_fifo=iface.get_in_fifo(depth=512*16, auto_flush=False),
counter_target=args.counter_target
))

self._sample_freq = target.sys_clk_freq
self._pins = getattr(args, "pin_set_i")

@classmethod
def add_run_arguments(cls, parser, access):
super().add_run_arguments(parser, access)

g_pulls = parser.add_mutually_exclusive_group()
g_pulls.add_argument(
"--pull-ups", default=False, action="store_true",
help="enable pull-ups on all pins")
g_pulls.add_argument(
"--pull-downs", default=False, action="store_true",
help="enable pull-downs on all pins")

async def run(self, device, args):
pull_low = set()
pull_high = set()
if args.pull_ups:
pull_high = set(args.pin_set_i)
if args.pull_downs:
pull_low = set(args.pin_set_i)
iface = await device.demultiplexer.claim_interface(self, self.mux_interface, args,
pull_low=pull_low, pull_high=pull_high)
return iface

@classmethod
def add_interact_arguments(cls, parser):
parser.add_argument(
"file", metavar="VCD-FILE", type=argparse.FileType("w"),
help="write VCD waveforms to VCD-FILE")
parser.add_argument("--buffer-size", type=int, default=10,
help="how much data to capture in MB")

async def interact(self, device, args, iface):
# Step 1: record a buffer
# we do this before to get the full USB performance and not have any lag-spikes in between
try:
print(f"starting capture of {args.buffer_size} MB")
buffer = await iface.read(1024*1024 * args.buffer_size)
except KeyboardInterrupt:
pass
finally:
print("captured buffer, converting...")


# Step 2: parse the packets from the captured buffer and sort them into channels
ptr = 0
async def read(size, ) -> bytes:
nonlocal ptr
to_return = buffer[ptr:ptr+size]
ptr += size
if ptr >= len(buffer):
return None
return to_return
channels = defaultdict(list)
chunks = 0
while True:
read_result = await LAArbiter.read_chunk(read)
if read_result is None:
break
channel, chunk = read_result
if len(chunk) == 255:
print(f"channel {channel} overrun")
break
channels[self._pins[channel]].extend(chunk)
chunks += 1

# Step 3: convert each channels packets into events, attach timestamps and sort them by
# timestamp
events = []
cycles = None
for p, pkgs in channels.items():
cycle = 0
for pkg in pkgs:
for value, duration in SignalCompressor.decode_pkg(pkg):
events.append((cycle, p, value))
cycle += duration
cycles = cycle if cycles is None else cycle if cycle < cycles else cycles
events.sort(key=lambda e: e[0])

# Step 3.5: report statistics
total_pkgs = sum(len(pkgs) for pkgs in channels.values())
total_bytes = chunks + total_pkgs * 2
print(f"captured {cycles} samples ({cycles / self._sample_freq * 1000}ms)")
print(f"chunking overhead: {chunks / total_bytes * 100}%")
print(f"compression gain: {100 - (total_bytes * 8 / (cycle * len(self._pins)) * 100)}%")


# Step 4: write out VCD file
vcd_writer = VCDWriter(args.file, timescale="1 ns", check_values=False)
vcd_signals = {
p: vcd_writer.register_var(scope="", name="pin[{}]".format(p), var_type="wire",
size=1, init=0)
for p in self._pins
}
for cycle, p, value in events:
if cycle > cycles:
# we dont write any timestamps for which we dont have data on all channels
break
signal = vcd_signals[p]
timestamp = cycle * 1_000_000_000 // self._sample_freq
vcd_writer.change(signal, timestamp, value)
vcd_writer.close(timestamp)
110 changes: 110 additions & 0 deletions software/glasgow/applet/interface/better_la/arbiter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from typing import Callable, List
from amaranth import *
from amaranth.lib.fifo import SyncFIFOBuffered

from .signal_compressor import SignalCompressor
from .step_encoder import StepEncoder
from .argmax import ArgMax

class LAArbiter(Elaboratable):
"""This Logic Analyzer arbiter instanciates n Signal compressors and n Fifos and arbeites the
output of the fifos based on priority. Its output format is one byte of
[4bit channel][4bit length encoded using the table below] followed by 2*length bytes of
compressed channel data.
"""

LENGTH_ENCODING = [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 255]

def __init__(self, output_fifo: SyncFIFOBuffered, n_channels=16):
self.output_fifo = output_fifo
assert output_fifo.width == 8
self.input = Signal(n_channels)

def elaborate(self, platform):
m = Module()

fifos: List[SyncFIFOBuffered] = []
encoded_fifo_levels = []
for i, sig in enumerate(self.input):
fifo = SyncFIFOBuffered(width=16, depth=256) # this is exactly one ice40 bram
m.submodules[f"fifo_{i}"] = fifo
fifos.append(fifo)

compressor = SignalCompressor(sig)
m.submodules[f"compressor_{i}"] = compressor
m.d.comb += fifo.w_en.eq(compressor.valid)
m.d.comb += fifo.w_data.eq(compressor.value)

step_encoder = StepEncoder(fifo.r_level, self.LENGTH_ENCODING)
m.submodules[f"step_encoder_{i}"] = step_encoder
encoded_fifo_levels.append(step_encoder.output)

fifo_r_data = Array(fifo.r_data for fifo in fifos)
fifo_r_en = Array(fifo.r_en for fifo in fifos)
fifo_r_rdy = Array(fifo.r_rdy for fifo in fifos)
length_decoding = Array(self.LENGTH_ENCODING)

# the argmax introduces 2 cycles of latency with pipelining to meet timing
# to acomodate for that we get the real level of the selected fifo in a combinatorial path
# it does not matter if we select a suboptimal fifo but it is bad if we assume a wrong level
argmax = m.submodules.argmax = ArgMax(encoded_fifo_levels, sync_levels=[1, 3])
max_fifo_idx = argmax.max_idx
encoded_fifo_levels_array = Array(encoded_fifo_levels)
max_fifo_level_encoded = Signal(4)
m.d.comb += max_fifo_level_encoded.eq(encoded_fifo_levels_array[max_fifo_idx])
max_fifo_level = Signal(8)
m.d.comb += max_fifo_level.eq(length_decoding[max_fifo_level_encoded])
max_fifo_r_rdy = Signal()
m.d.comb += max_fifo_r_rdy.eq(fifo_r_rdy[max_fifo_idx])

to_transfer = Signal(4)
current_channel = Signal(4)
with m.FSM():
with m.State("wait"):
with m.If(max_fifo_r_rdy):
m.next = "announce"

with m.State("announce"):
m.d.sync += to_transfer.eq(max_fifo_level)
m.d.sync += current_channel.eq(max_fifo_idx)

m.d.comb += self.output_fifo.w_data.eq(Cat(max_fifo_idx, max_fifo_level_encoded))
m.d.comb += self.output_fifo.w_en.eq(max_fifo_r_rdy)
with m.If(~max_fifo_r_rdy):
m.next = "wait"
with m.Elif(self.output_fifo.w_rdy):
m.next = "send_lower"

with m.State("send_lower"):
m.d.comb += self.output_fifo.w_data.eq(fifo_r_data[current_channel][0:8])
m.d.comb += self.output_fifo.w_en.eq(1)
with m.If(self.output_fifo.w_rdy):
m.next = "send_upper"
with m.State("send_upper"):
m.d.comb += self.output_fifo.w_data.eq(fifo_r_data[current_channel][8:16])
m.d.comb += self.output_fifo.w_en.eq(1)
with m.If(self.output_fifo.w_rdy):
m.d.comb += fifo_r_en[current_channel].eq(1)
with m.If(to_transfer > 1):
m.next = "send_lower"
m.d.sync += to_transfer.eq(to_transfer - 1)
with m.Else():
with m.If(max_fifo_r_rdy):
m.next = "announce"
with m.Else():
m.next = "wait"

return m

@staticmethod
async def read_chunk(read: Callable[[int], bytes]):
header = (await read(1))[0]
if header is None:
return None
channel = header & 0b1111
length_encoded = header >> 4
length = LAArbiter.LENGTH_ENCODING[length_encoded]
contents = (await read(2 * length))
if contents is None:
return None
return channel, [contents[2*i+1] << 8 | contents[2*i] for i in range(length)]
52 changes: 52 additions & 0 deletions software/glasgow/applet/interface/better_la/argmax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from typing import List
from amaranth import *

class ArgMax(Elaboratable):
"""
Find the maximum value and the index of the maximum value of a list of signals using a
comparison-tree.
"""
def __init__(self, signals: List[Signal], sync_levels=[]):
self.signals = signals

self.sync_levels = sync_levels

self.max_value = Signal.like(signals[0])
self.max_idx = Signal(range(len(signals)))

def elaborate(self, platform):
m = Module()

def build_tree(signals, offset=0, level=0):
suffix = f"l{level}_{offset}to{offset+len(signals)}"

domain = m.d.sync if level in self.sync_levels else m.d.comb

if len(signals) == 1:
return signals[0], offset
elif len(signals) == 2:
a, b = signals
value = Signal.like(self.signals[0], name=f"max_val_{suffix}")
index = Signal.like(self.max_idx, name=f"max_idx_{suffix}")
domain += [
value.eq(Mux(a > b, a, b)),
index.eq(Mux(a > b, offset, offset + 1))
]
return value, index
else:
half = len(signals) // 2
a, a_idx = build_tree(signals[:half], offset=offset, level=level+1)
b, b_idx = build_tree(signals[half:], offset=offset + half, level=level+1)
value = Signal.like(self.signals[0], name=f"max_val_{suffix}")
index = Signal.like(self.max_idx, name=f"max_idx_{suffix}")
domain += [
value.eq(Mux(a > b, a, b)),
index.eq(Mux(a > b, a_idx, b_idx))
]
return value, index

val, idx = build_tree(self.signals)
m.d.comb += self.max_value.eq(val)
m.d.comb += self.max_idx.eq(idx)

return m