From f44758ef41e0bdbb6fdc2a70ec288ac04e54acae Mon Sep 17 00:00:00 2001
From: Sarah Griffiths <sarah.griffiths@nanoporetech.com>
Date: Wed, 9 Aug 2023 18:33:38 +0000
Subject: [PATCH] CW-2260 ezcharts report

---
 CHANGELOG.md                                  |   9 +-
 bin/workflow_glue/bokeh_plot.py               | 196 +++++++
 bin/workflow_glue/report.py                   | 488 +++++++-----------
 .../report_utils/report_utils.py              | 158 ++++++
 bin/workflow_glue/run_plannotate.py           |   4 +-
 main.nf                                       |   9 +-
 nextflow.config                               |   6 +-
 7 files changed, 565 insertions(+), 305 deletions(-)
 create mode 100755 bin/workflow_glue/bokeh_plot.py
 create mode 100755 bin/workflow_glue/report_utils/report_utils.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c676f23..5fe30a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,14 +4,15 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [unreleased]
+## [v0.5.0]
+### Changed
+- The report has been updated and re-ordered to improve usability.
+- Default basecaller cfg is now `dna_r10.4.1_e8.2_400bps_sup@v4.2.0`.
+- Docker will use an ARM platform image on appropriate devices.
 ### Fixed
 - Updated basecaller cfg model options.
 - Workflow will still output report if there are no assemblies.
 
-### Changed
-- Default basecaller cfg is now `dna_r10.4.1_e8.2_400bps_sup@v4.2.0`.
-
 ## [v0.4.0]
 ### Added
 - Documentation updated to include workflow steps.
diff --git a/bin/workflow_glue/bokeh_plot.py b/bin/workflow_glue/bokeh_plot.py
new file mode 100755
index 0000000..3618e13
--- /dev/null
+++ b/bin/workflow_glue/bokeh_plot.py
@@ -0,0 +1,196 @@
+"""Script from plannotate edited to work with bokeh 3."""
+from math import pi
+
+from bokeh.models import ColumnDataSource, HoverTool, Range1d, WheelZoomTool
+from bokeh.models.annotations import Label
+from bokeh.plotting import figure
+import numpy as np
+import pandas as pd
+from plannotate.bokeh_plot import calc_glyphs, calc_level, calc_num_markers
+import plannotate.resources as rsc
+
+
+def get_bokeh(df, linear=False):
+    """Get bokeh from plannotate updated to use Bokeh v3 to match ezcharts."""
+    # df = df.fillna("")
+    x = 0
+    y = 0
+    baseradius = .18
+    tooltips = """
+            <font size="4"><b>@Feature</b> — @Type   @pi_permatch_int</font><br>
+            @Description
+    """  # noqa: E501
+    hover = HoverTool(tooltips=tooltips)
+    plotsize = .35
+    plotdimen = 800
+    x_range = Range1d(-plotsize, plotsize, bounds=(-.5, .5), min_interval=.1)
+    y_range = Range1d(-plotsize, plotsize, bounds=(-.5, .5), min_interval=.1)
+    toolbar = "right"
+    p = figure(
+        height=plotdimen, width=plotdimen, title="",
+        toolbar_location=toolbar, toolbar_sticky=False,
+        match_aspect=True,
+        sizing_mode='scale_width', tools=['save', 'pan'],
+        x_range=x_range, y_range=y_range)
+
+    # x_range=(-plotsize, plotsize), y_range=(-plotsize, plotsize))
+    p.toolbar.logo = None
+    p.add_tools(WheelZoomTool(zoom_on_axis=False))
+    p.toolbar.active_scroll = p.select_one(WheelZoomTool)
+
+    # backbone line
+    p.circle(
+        x=x, y=y, radius=baseradius, line_color="#000000",
+        fill_color=None, line_width=2.5)
+
+    df = calc_level(df)
+
+    if linear:
+        line_length = baseradius / 5
+        p.line(
+            [0, 0], [baseradius - line_length, baseradius + line_length],
+            line_width=4, level="overlay", line_color="black")
+
+    df['pi_permatch_int'] = df['pi_permatch'].astype('int')
+
+    df['pi_permatch_int'] = df['pi_permatch_int'].astype(str) + "%"
+    # removes percent from infernal hits
+    df.loc[df['db'] == "Rfam", 'pi_permatch_int'] = ""
+
+    df['rstart'] = ((df["qstart"]/df["qlen"])*2*pi)
+    df['rend'] = ((df["qend"]/df["qlen"])*2*pi)
+    df['rstart'] = np.where(
+        df['rstart'] < 0, df['rstart'] + (2*pi), df['rstart'])
+    df['rend'] = np.where(df['rend'] < 0, df['rend'] + (2*pi), df['rend'])
+    df['rend'] = np.where(
+        df['rend'] < df['rstart'], df['rend'] + (2*pi), df['rend'])
+
+    df['Type'] = df['Type'].str.replace('rep_origin', 'origin of replication')
+
+    # DDE0BD
+    # C97064
+    # C9E4CA
+    fullcolordf = pd.read_csv(
+        rsc.get_resource("data", "colors.csv"), index_col=0)
+    fragcolordf = fullcolordf.copy()
+    fragcolordf[['fill_color', 'line_color']] = fragcolordf[
+        ['line_color', 'fill_color']]
+    fragcolordf["fill_color"] = "#ffffff"
+
+    full = df[df["fragment"] == False] # noqa
+
+    full = full.merge(
+        fullcolordf,
+        how="left", on=["Type"])
+    full['legend'] = full['Type']
+    full = full.fillna(
+        {"color": "grey",
+         "fill_color": "#808080",
+         "line_color": "#000000"})
+    frag = df[df["fragment"]]
+    frag = frag.merge(fragcolordf, how="left", on=["Type"])
+    frag = frag.fillna(
+        {"color": "grey",
+         "fill_color": "#ffffff",
+         "line_color": "#808080"})
+
+    df = full.append(frag).reset_index(drop=True)
+
+    # add orientation column
+    orient = pd.read_csv(
+        rsc.get_resource("data", "feature_orientation.csv"),
+        header=None, names=["Type", "has_orientation"])
+    orient['Type'] = orient['Type']
+    orient['has_orientation'] = orient['has_orientation'].map({"T": True})
+    df = df.merge(orient, on="Type", how="left")
+    df['Type'] = df['Type'].str.replace("_", " ")
+    df['has_orientation'] = df['has_orientation'].fillna(value=False)
+    df[[
+        'x', 'y', "Lx1", "Ly1",
+        "annoLineColor", "lineX",
+        "lineY", "theta", "text_align"]] = df.apply(calc_glyphs, axis=1)
+    df['legend'] = df['Type']
+    # allowedtypes = ['CDS',"promoter","origin of replication","swissprot"]
+    allowedtypes = fullcolordf['Type']
+    mask = ~df['legend'].isin(allowedtypes)
+    df.loc[mask, 'legend'] = 'misc feature'
+
+    # plot annotations
+    source = ColumnDataSource(df)
+    hover_labels = p.patches(
+        'x', 'y', fill_color='fill_color', line_color='line_color',
+        name="features", line_width=2.5,
+        source=source, legend_group="legend")
+    p.multi_line(
+        xs="lineX", ys="lineY", line_color="annoLineColor",
+        line_width=3, level="overlay",
+        line_cap='round', alpha=.5, source=source)
+
+    # `text_align` cannot read from `source` -- have to do this workaround
+    right = ColumnDataSource(df[df['text_align'] == 'right'])
+    left = ColumnDataSource(df[df['text_align'] == 'left'])
+    bcenter = ColumnDataSource(df[df['text_align'] == 'b_center'])
+    tcenter = ColumnDataSource(df[df['text_align'] == 't_center'])
+
+    text_level = 'overlay'
+    p.text(
+        x="Lx1", y="Ly1", name="2", x_offset=3, y_offset=8,
+        text_align="left",
+        text='Feature', level=text_level, source=right)
+    p.text(
+        x="Lx1", y="Ly1", name="2", x_offset=-5, y_offset=8,
+        text_align="right", text='Feature', level=text_level, source=left)
+    p.text(
+        x="Lx1", y="Ly1", name="2", x_offset=0, y_offset=15,
+        text_align="center", text='Feature',
+        level=text_level, source=bcenter)
+    p.text(
+        x="Lx1", y="Ly1", name="2", x_offset=0, y_offset=0,
+        text_align="center",
+        text='Feature', level=text_level, source=tcenter)
+
+    # calculate chunk size(s) for drawing lines
+    plaslen = df.iloc[0]['qlen']
+    ticks = calc_num_markers(plaslen)
+    ticks_cds = ColumnDataSource(ticks)
+    p.multi_line(
+        xs="lineX", ys="lineY", line_color="black", line_width=2,
+        level="underlay", line_cap='round',
+        alpha=.5, source=ticks_cds)
+
+    right = ColumnDataSource(ticks[ticks['text_align'] == 'right'])
+    left = ColumnDataSource(ticks[ticks['text_align'] == 'left'])
+    bcenter = ColumnDataSource(ticks[ticks['text_align'] == 'b_center'])
+    tcenter = ColumnDataSource(ticks[ticks['text_align'] == 't_center'])
+    p.text(
+        x="Lx1", y="Ly1", name="2", x_offset=3, y_offset=6,
+        text_align="left", text='bp', alpha=.5,
+        text_font_size='size', level=text_level, source=right)
+    p.text(
+        x="Lx1", y="Ly1", name="2", x_offset=-5,
+        y_offset=6, text_align="right",
+        text='bp', alpha=.5, text_font_size='size',
+        level=text_level, source=left)
+    p.text(
+        x="Lx1", y="Ly1", name="2", x_offset=0, y_offset=15,
+        text_align="center", text='bp', alpha=.5,
+        text_font_size='size', level=text_level, source=bcenter)
+    p.text(
+        x="Lx1", y="Ly1", name="2", x_offset=0, y_offset=-3,
+        text_align="center", text='bp', alpha=.5,
+        text_font_size='size', level=text_level, source=tcenter)
+    p.add_tools(hover)
+    p.add_layout(Label(
+        x=0, y=0, name="2", x_offset=0, y_offset=-8,
+        text_align="center",
+        text=f"{plaslen} bp", text_color="#7b7b7b",
+        text_font_size='16px', level=text_level))
+    p.hover.renderers = [hover_labels]
+    p.axis.axis_label = None
+    p.axis.visible = False
+    p.grid.grid_line_color = "#EFEFEF"
+    p.outline_line_color = "#DDDDDD"
+    p.legend.location = 'bottom_left'
+    p.legend.border_line_color = "#EFEFEF"
+    p.legend.visible = True
+    return p
diff --git a/bin/workflow_glue/report.py b/bin/workflow_glue/report.py
index 9f87de2..9a9e88d 100755
--- a/bin/workflow_glue/report.py
+++ b/bin/workflow_glue/report.py
@@ -1,174 +1,111 @@
 #!/usr/bin/env python
-
-"""Report component for displaying information from wf-clone-validation."""
-import argparse
+"""Create workflow report."""
+from itertools import count, takewhile
 import json
 import os
 
-
-from aplanat import bars, report
-from aplanat.components import bcfstats
-from aplanat.components import fastcat
-from aplanat.components import simple as scomponents
-import aplanat.graphics
-from aplanat.util import Colors
-from bokeh.layouts import layout
-from bokeh.models import Panel, Tabs
+from dominate.tags import p, pre
+from dominate.util import raw
+from ezcharts.components import bokehchart
+from ezcharts.components.fastcat import draw_all_plots
+from ezcharts.components.reports import labs
+from ezcharts.layout.snippets import Stats, Tabs
+from ezcharts.layout.snippets.table import DataTable
+from ezcharts.plots.__init__ import BokehPlot
+from ezcharts.plots.util import read_files
+from ezcharts.util import get_named_logger  # noqa: ABS101
 import pandas as pd
-from plannotate.bokeh_plot import get_bokeh
-from .util import wf_parser  # noqa: ABS101
+from workflow_glue.bokeh_plot import get_bokeh
+from workflow_glue.report_utils import report_utils
 
+from .util import wf_parser  # noqa: ABS101
 
-def tidyup_status_file(status_sheet, annotations):
-    """Tidy up the sample status file."""
-    sample_status = pd.read_csv(status_sheet[0], header=None)
-    unique_samples = sample_status[0].unique()
-    pass_fail_dic = {}
-    for sample in unique_samples:
-        pass_fail_dic[sample] = 'Completed successfully'
-    filter_pass = sample_status[sample_status[1] != 'Completed successfully']
-    failures = dict(zip(filter_pass[0], filter_pass[1]))
-    completed_annotations = list(annotations)
-    success = sample_status[sample_status[1] == 'Completed successfully']
-    no_annotations = success[~success[0].isin(completed_annotations)]
-    for sample in list(no_annotations[0]):
-        failures[sample] = 'Completed but no annotations found in the database'
-    all_sample_names = unique_samples.tolist()
-    all_sample_names.sort()
-    passed_list = unique_samples.tolist()
-    for k, v in failures.items():
-        pass_fail_dic[k] = v
-        if v != 'Completed but failed to reconcile':
-            passed_list.remove(k)
-    passed_list.sort()
-    status_df = pd.DataFrame(
-        pass_fail_dic.items(), columns=['Sample', 'pass/failed reason'])
-    sort_df = status_df['Sample'].astype(str).argsort()
-    status_df = status_df.iloc[sort_df]
-    return (status_df, passed_list, all_sample_names, pass_fail_dic)
+THEME = 'epi2melabs'
 
 
-def read_files(summaries, sep='\t'):
-    """Read a set of files and join to single dataframe."""
-    dfs = list()
-    for fname in sorted(summaries):
-        dfs.append(pd.read_csv(fname, sep=sep))
-    return pd.concat(dfs)
+def sliced(seq, n):
+    """Slice up a string."""
+    iterator = takewhile(len, (seq[i: i + n] for i in count(0, n)))
+    slices = list(iterator)
+    return slices
 
 
-def fastcat_report_tab(file_name, tab_name):
-    """Read fastcat dataframe and create a tab with qual and len plots."""
-    df = pd.read_csv(file_name, sep='\t')
-    depth = len(df.index)
-    min_length = df["read_length"].min()
-    max_length = df["read_length"].max()
-    lengthplot = fastcat.read_length_plot(
-        df,
-        min_len=min_length,
-        max_len=max_length)
-    qstatplot = fastcat.read_quality_plot(df)
-    exec_summary = aplanat.graphics.InfoGraphItems()
-    exec_summary.append(
-        'No. reads',
-        str(depth),
-        "bars", '')
-    exec_plot = aplanat.graphics.infographic(
-        exec_summary.values(), ncols=1)
-    tab = Panel(child=layout(
-        [[exec_plot], [lengthplot], [qstatplot]],
-        aspect_ratio="auto",
-        sizing_mode='stretch_width'),
-        title=tab_name)
-    return tab
+def format_msa(inserts):
+    """Format the MSA."""
+    collect_sliced = []
+    name_list = []
+    reorg_slices = []
+    for i in list(inserts['msa']):
+        name_list.append(i[0:10])
+        collect_sliced.append(sliced(i[10:], 80))
+    for slice_index in range(len(collect_sliced[0])):
+        for index, value in enumerate(collect_sliced):
+            reorg_slices.append(name_list[index] + value[slice_index])
+        reorg_slices.append('')
+    return reorg_slices
 
 
-def create_fastcat_dic(sample_names, raw, hostfilt, downsampled):
-    """Create dictionary using sample names and fastcat files available."""
-    per_sample_dic = {}
-    lists = {'raw': raw, 'hostfilt': hostfilt, 'downsampled': downsampled}
-    for sample in sample_names:
-        new_dic = {}
-        item_search = '/' + sample + '.'
-        for list_name, fc_list in lists.items():
-            #  find index of item that contains the sample name as a substring
-            item_index = [i for i, s in enumerate(fc_list) if item_search in s]
-            if item_index:
-                indice = item_index[0]
-                new_dic[list_name] = fc_list[indice]
-            else:
-                pass
-        per_sample_dic[sample] = new_dic
-    return per_sample_dic
+def button_format(reason):
+    """Format button, blue for success, red for failure."""
+    if reason == "Completed successfully":
+        return """<span class="badge bg-primary">""" + reason + """</span>"""
+    else:
+        return """<span class="badge bg-danger">""" + reason + """</span>"""
 
 
 def main(args):
-    """Entry point to create a wf-clone-validation report."""
-    report_doc = report.WFReport(
-        "Clone Validation Report",
-        "wf-clone-validation",
-        revision=args.revision,
-        commit=args.commit)
-
-    # summary section
-    section = report_doc.add_section()
-    section.markdown("### Summary")
-    seq_summary = read_files(args.per_barcode_stats)
-    section = report_doc.add_section()
-    barcode_counts = (
-        pd.DataFrame(seq_summary['sample_name'].value_counts())
-        .sort_index()
-        .reset_index()
-        .rename(
-            columns={'index': 'sample', 'sample_name': 'count'})
-    )
-    bc_counts = bars.simple_bar(
-        barcode_counts['sample'].astype(str), barcode_counts['count'],
-        colors=[Colors.cerulean]*len(barcode_counts),
-        title=(
-            'Number of reads per barcode'),
-        plot_width=None
-    )
-    bc_counts.xaxis.major_label_orientation = 3.14/2
-    section.plot(
-        layout(
-            [[bc_counts]],
-            sizing_mode="stretch_width"))
-
-    # We defer this until processing through all the samples in the loop below
-    summary_placeholder = report_doc.add_section(key='stats_table')
+    """Run the entry point."""
+    logger = get_named_logger("Report")
+    report = labs.LabsReport(
+        "Clone validation report", "wf-clone-validation-new",
+        args.params, args.versions)
     plannotate_annotations = json.load(open(args.plannotate_json)).keys()
-    lengths = json.load(open(args.lengths))
-    pass_fail = tidyup_status_file(args.status, plannotate_annotations)
-    # find inserts and put in dir
-    current_directory = os.getcwd()
-    make_directory = os.path.join(current_directory, r'inserts')
-    if not os.path.exists(make_directory):
-        os.makedirs(make_directory)
-    if os.stat(args.inserts_json).st_size != 0:
-        inserts = json.load(open(args.inserts_json))
-        insert_placeholder = report_doc.add_section(key='insert_table')
-        seq_segment = inserts['bed_df']
-        seq_segment = pd.read_json(inserts['bed_df'])
-        insert_placeholder.markdown("""
-### Insert sequences
-This table shows which primers were found in the consensus sequence
-of each sample and where the inserts were found.
-""")
-        section = report_doc.add_section()
-        section.markdown("""
-### Multiple sequence alignment
-This section shows the inserts aligned with each other or a reference
-sequence if provided.
+    # Get sample info from status file
+    passed_samples, sample_names, sample_status_dic = \
+        report_utils.tidyup_status_file(
+            args.status, plannotate_annotations)
+    # Sample status table
+    with report.add_section("Sample status", "Sample status"):
+        p("""
+This table gives the status of each sample, either completed \
+successfully or the reason the workflow failed. \
+If applicable the mean quality for the whole \
+plasmid assembly has been provided, derived by Medaka \
+from aligning the reads to the consensus.
 """)
-        section.markdown("<pre>" + os.linesep.join(inserts['msa']) + "</pre>")
-
-    # Per sample details
-    section = report_doc.add_section()
-    section.markdown("""
-### Assemblies
-For each assembly, read length statistics are displayed as well as a
-[pLannotate plot](https://github.com/barricklab/pLannotate),
-and quality plots.
+        lengths = json.load(open(args.lengths))
+        sample_stats = []
+        for item in sample_names:
+            try:
+                stats_table = [item] + [int(lengths[item]['reflen'])]
+            except KeyError:
+                stats_table = [item] + ['N/A']
+            sample_stats.append(stats_table)
+        stats_df = pd.DataFrame(
+            sample_stats, columns=["Sample", "Length"])
+        status_df = pd.DataFrame(
+            sample_status_dic.items(), columns=['Sample', 'pass/failed reason'])
+        sort_df = status_df['Sample'].astype(str).argsort()
+        status_df = status_df.iloc[sort_df]
+        merged = pd.merge(status_df, stats_df)
+        merged.to_csv('sample_status.txt', index=False)
+        if ('assembly_quality/OPTIONAL_FILE' not in args.assembly_quality):
+            qc_df = read_files(args.assembly_quality)[
+                ['sample_name', 'mean_quality']]
+            qc_df = qc_df.rename(columns={
+                'sample_name': 'Sample', 'mean_quality': 'Mean Quality'})
+            qc_df = qc_df.reset_index(drop=True)
+            merged = pd.merge(merged, qc_df, how="outer")
+            merged.fillna('N/A', inplace=True)
+            merged['pass/failed reason'] = merged.apply(
+                lambda merged: button_format(merged['pass/failed reason']),
+                axis=1)
+        DataTable.from_pandas(merged, use_index=False)
+    with report.add_section("Plannotate", "Plannotate"):
+        raw("""The Plasmid annotation plot and feature table are produced using \
+<a href="http://plannotate.barricklab.org/">pLannotate</a>""")
+        p("""
+A pLannotate plot is shown for each assembly.
 A feature table provides descriptions of the annotated sequence.
 
 Unfilled features on the plannotate plots are incomplete features; the sequence
@@ -179,170 +116,135 @@ def main(args):
 small feature fragments may affect plasmid function if they result in cryptic
 gene expression or are inadvertently combined with other elements during later
 cloning steps.
-
-The Plasmid annotation plot and feature table are produced using
-[Plannotate](http://plannotate.barricklab.org/).
 """)
-    passed_samples = pass_fail[1]
-    host_ref_stats = args.host_filter_stats
-    downsampled_stats = args.downsampled_stats
-    initial_stats = args.per_barcode_stats
-    if ('host_filter_stats/OPTIONAL_FILE' in host_ref_stats):
-        host_filt = host_ref_stats.remove('host_filter_stats/OPTIONAL_FILE')
-        if host_filt is None:
-            host_filt = []
-    else:
-        host_filt = host_ref_stats
-    if ('host_filter_stats/OPTIONAL_FILE' in downsampled_stats):
-        summary_stats = downsampled_stats.remove(
-            'downsampled_stats/OPTIONAL_FILE')
-        if summary_stats is None:
-            summary_stats = []
-    else:
-        summary_stats = downsampled_stats
-    sample_names = pass_fail[2]
-    fast_cat_dic = create_fastcat_dic(
-        sample_names, initial_stats, host_filt, summary_stats)
-    plannotate = json.load(open(args.plannotate_json))
-    sample_stats = []
-
-    # stats graphs and plannotate where appropriate
-    for item in sample_names:
-        section = report_doc.add_section()
-        section.markdown('### Sample: {}'.format(str(item)))
-        if item in passed_samples:
-            section.markdown('*{}*'.format(pass_fail[3][item]))
-            tup_dic = plannotate[item]
-            annotations = pd.read_json(tup_dic['annotations'])
-            plot = get_bokeh(pd.read_json(tup_dic['plot']), False)
-            fast_cat_tabs = fast_cat_dic[item]
-            alltabs = []
-            for key, value in fast_cat_tabs.items():
-                alltabs.append(fastcat_report_tab(value, key))
-            cover_panel = Tabs(tabs=alltabs)
-            plasmidplot = [plot]
-            stats_table = [item] + [int(lengths[item]['reflen'])]
-            sample_stats.append(stats_table)
-            section.plot(layout(
-                            [[cover_panel, plasmidplot]],
-                            sizing_mode='scale_width'))
-            section.table(annotations.drop(
-                columns=['Plasmid length']),
-                index=False, key="table"+str(item))
-        else:
-            section.markdown('*{}*'.format(pass_fail[3][item]))
-            fast_cat_tabs = fast_cat_dic[item]
-            alltabs = []
-            for key, value in fast_cat_tabs.items():
-                alltabs.append(fastcat_report_tab(value, key))
-            cover_panel = Tabs(tabs=alltabs)
-            section.plot(cover_panel)
-            try:
-                stats_table = [item] + [int(lengths[item]['reflen'])]
-            except KeyError:
-                stats_table = [item] + ['N/A']
-            sample_stats.append(stats_table)
-
-    # high level sample status table
-    summary_placeholder.markdown("### Sample status")
-    stats_df = pd.DataFrame(
-        sample_stats, columns=["Sample", "Length"])
-    merged_inner = pd.merge(pass_fail[0], stats_df)
-    summary_placeholder.table(merged_inner, index=False, key='stats_table')
-    merged_inner.to_csv('sample_status.txt', index=False)
-
-    def insert_len(start, end, length):
-        if end >= start:
-            insert_length = end - start
-        else:
-            insert_length = (length - start) + end
-        return insert_length
-
+        # Load and plot per sample plannotate json
+        plannotate = json.load(open(args.plannotate_json))
+        tabs = Tabs()
+        with tabs.add_dropdown_menu():
+            for item in passed_samples:
+                plan_item = plannotate[item]
+                annotations = pd.read_json(plan_item['annotations'])
+                with tabs.add_dropdown_tab(str(item)):
+                    bk_plot = BokehPlot()
+                    bk_plot._fig = get_bokeh(pd.read_json(plan_item['plot']))
+                    bk_plot._fig.xgrid.grid_line_color = None
+                    bk_plot._fig.ygrid.grid_line_color = None
+                    bokehchart.BokehChart(
+                        bk_plot, 'epi2melabs',
+                        width='100%', height='100%')
+                    DataTable.from_pandas(annotations, use_index=False)
+    # Per barcode read count plot
+    report_utils.read_count_barplot(args.per_barcode_stats, report)
+    with report.add_section("Read stats", "Read stats"):
+        p("""
+For each assembly, read length statistics and plots of quality \
+before and after downsampling, and after host filtering \
+if a host reference was provided.
+""")
+        fastcat_dic = report_utils.create_fastcat_dic(
+            sample_names, args.per_barcode_stats,
+            args.host_filter_stats,  args.downsampled_stats)
+        tabs = Tabs()
+        with tabs.add_dropdown_menu():
+            for item in sample_names:
+                with tabs.add_dropdown_tab(str(item)):
+                    p(sample_status_dic[item])
+                    internal_tabs = Tabs()
+                    # For each sample, plot fastcat stats
+                    fastcat_tabs = fastcat_dic[item]
+                    for key, value in fastcat_tabs.items():
+                        with internal_tabs.add_tab(key):
+                            seq_summary = pd.read_csv(value, sep='\t')
+                            depth = len(seq_summary.index)
+                            Stats(
+                                columns=2,
+                                items=[
+                                    (depth, 'Read count'),
+                                ])
+                            draw_all_plots(seq_summary, THEME)
+        # Insert info table
     if os.stat(args.inserts_json).st_size != 0:
-        test_df = seq_segment.merge(stats_df, how='left')
-        test_df['Insert length'] = list(
-            map(
-                insert_len,
-                test_df['start'],
-                test_df['end'],
-                test_df['Length']))
-        test_df = test_df.drop(columns='Length')
-        insert_placeholder.table(test_df, index=False, key='insert_table')
-    # Assembly QC section
-    section = report_doc.add_section()
-    section.markdown('### Assembly Quality')
-    if 'assembly_quality/OPTIONAL_FILE' not in args.assembly_quality:
-        section.markdown(
-            'This table gives the mean quality for the whole \
-            plasmid assembly. This is provided by Medaka, derived \
-            from aligning the reads to the consensus.')
-        qual_df = read_files(
-            args.assembly_quality)[['sample_name', 'mean_quality']]
-        qual_df = qual_df.rename(columns={
-            'sample_name': 'Sample', 'mean_quality': 'Mean Quality'})
-        section.table(qual_df, index=False)
-    if 'qc_inserts/OPTIONAL_FILE' not in args.qc_inserts:
-        bcf_header = """
-### Insert variant call summaries
-
-The following tables and figures are derived using `bcftools`,
-comparing the consensus insert with the reference
-insert if provided.
-"""
-        report_doc.add_section(
-            section=bcfstats.full_report(args.qc_inserts, header=bcf_header))
-    # Versions and params
-    report_doc.add_section(
-        section=scomponents.version_table(args.versions))
-    report_doc.add_section(
-        section=scomponents.params_table(args.params))
-    report_doc.write(args.report_name)
+        inserts = json.load(open(args.inserts_json))
+        with report.add_section("Insert sequences", "Inserts"):
+            seq_segment = pd.read_json(inserts['bed_df'])
+            insert_df = seq_segment.merge(stats_df, how='left')
+            insert_df['Insert length'] = list(
+                map(
+                    report_utils.insert_len,
+                    insert_df['start'],
+                    insert_df['end'],
+                    insert_df['Length']))
+            insert_df = insert_df.drop(columns='Length')
+            p("""
+This table shows which primers were found in the consensus sequence
+of each sample and where the inserts were found.
+""")
+            DataTable.from_pandas(insert_df, use_index=False)
+        # MSA section if inserts available
+        with report.add_section("Multiple Sequence Alignment", "MSA"):
+            p("""
+This section shows the inserts aligned with each other or a reference
+sequence if provided.
+""")
+            formatted_msa = format_msa(inserts)
+            pre(os.linesep.join(formatted_msa))
+    if ('OPTIONAL_FILE' not in os.listdir(args.qc_inserts)):
+        with report.add_section("Insert variants", "Insert variants"):
+            p("""
+The following tables and figures are output from bcftools from
+finding any variants between
+the consensus insert and the provided reference
+insert.
+""")
+            variants_df = report_utils.variant_counts_table(args.qc_inserts, report)
+            DataTable.from_pandas(variants_df, use_index=False)
+            trans_df = report_utils.trans_counts(args.qc_inserts, report)
+            DataTable.from_pandas(trans_df, use_index=False)
+    report.write(args.report)
+    logger.info(f"Report written to {args.report}.")
 
 
 def argparser():
     """Argument parser for entrypoint."""
     parser = wf_parser("report")
-
+    parser.add_argument("report", help="Report output file")
     parser.add_argument(
-        "--downsampled_stats",
-        nargs='+',
-        required=True
-    )
+        "--stats", nargs='*', help="Fastcat per-read stats file(s).")
+    parser.add_argument(
+        "--versions", required=True,
+        help="directory containing CSVs containing name,version.")
+    parser.add_argument(
+        "--params", default=None, required=True,
+        help="A JSON file containing the workflow parameter key/values")
     parser.add_argument(
         "--revision", default='unknown',
-        help="revision number")
+        help="git branch/tag of the executed workflow")
     parser.add_argument(
         "--commit", default='unknown',
-        help="git commit number")
-    parser.add_argument(
-        "--status", nargs='+',
-        help="status")
+        help="git commit of the executed workflow")
     parser.add_argument(
         "--per_barcode_stats", nargs='+',
         help="fastcat stats file for each sample before filtering")
     parser.add_argument(
-        "--host_filter_stats", nargs='+',
-        help="fastcat stats file after host filtering")
-    parser.add_argument(
-        "--params", default=None,
-        help="A csv containing the parameter key/values")
-    parser.add_argument(
-        "--versions",
-        help="directory contained CSVs containing name,version.")
+        "--status", nargs='+',
+        help="status")
     parser.add_argument(
         "--plannotate_json",
         help="Plannotate Json.")
     parser.add_argument(
         "--inserts_json",
         help="inserts Json.")
-    parser.add_argument(
-        "--report_name",
-        help="report name")
     parser.add_argument(
         "--lengths",
         help="report name")
     parser.add_argument(
-        "--qc_inserts",  nargs='+',
+        "--downsampled_stats",
+        nargs='+')
+    parser.add_argument(
+        "--host_filter_stats", nargs='+',
+        help="fastcat stats file after host filtering")
+    parser.add_argument(
+        "--qc_inserts",
         help="insert vcfs")
     parser.add_argument(
         "--assembly_quality",  nargs='+',
@@ -351,5 +253,5 @@ def argparser():
 
 
 if __name__ == "__main__":
-    args = argparse().parse_args()
-    main(args)
+    parser = argparser()
+    main(parser.parse_args())
diff --git a/bin/workflow_glue/report_utils/report_utils.py b/bin/workflow_glue/report_utils/report_utils.py
new file mode 100755
index 0000000..6fdc5eb
--- /dev/null
+++ b/bin/workflow_glue/report_utils/report_utils.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+"""Create tables for the report."""
+
+from dominate.tags import p
+from ezcharts.components import bcfstats
+from ezcharts.components.ezchart import EZChart
+from ezcharts.plots.categorical import barplot
+from ezcharts.plots.util import read_files
+import pandas as pd
+
+THEME = 'epi2melabs'
+
+
+def tidyup_status_file(status_sheet, annotations):
+    """Tidy up the sample status file."""
+    sample_status = pd.read_csv(status_sheet[0], header=None)
+    unique_samples = sample_status[0].unique()
+    sample_status_dic = {}
+    # Default all to success
+    for sample in unique_samples:
+        sample_status_dic[sample] = 'Completed successfully'
+    filter_pass = sample_status[sample_status[1] != 'Completed successfully']
+    # Collect failures
+    failures = dict(zip(filter_pass[0], filter_pass[1]))
+    completed_annotations = list(annotations)
+    # If no failure for a sample status sheet then success
+    success = sample_status[sample_status[1] == 'Completed successfully']
+    # Check for corresponding annotations, if none update status
+    no_annotations = success[~success[0].isin(completed_annotations)]
+    for sample in list(no_annotations[0]):
+        failures[sample] = 'Completed but no annotations found in the database'
+    passed_list = unique_samples.tolist()
+    # Update sample status dictionary with any failure messages
+    # Also create a list of passed samples to iterate through later
+    for k, v in failures.items():
+        sample_status_dic[k] = v
+        if v != 'Completed but failed to reconcile':
+            passed_list.remove(k)
+    passed_list.sort()
+    # Output list of all sample names
+    all_sample_names = unique_samples.tolist()
+    all_sample_names.sort()
+    return (passed_list, all_sample_names, sample_status_dic)
+
+
+def read_count_barplot(per_barcode_stats, report):
+    """Plot per sample read count bar chart."""
+    # Per barcode read count
+    seq_summary = read_files(per_barcode_stats)
+    barcode_counts = (
+        pd.DataFrame(seq_summary['sample_name'].value_counts())
+        .sort_index()
+        .reset_index()
+        .rename(
+            columns={'index': 'Sample', 'sample_name': 'Count'})
+    )
+    with report.add_section("Read Counts", "Read Counts"):
+        p(
+            """Number of reads per sample."""
+        )
+        order = barcode_counts["Sample"].values.tolist()
+        plt = barplot(
+            data=barcode_counts, x='Sample', y='Count', order=order)
+        plt.xAxis = dict(
+            name="Sample",
+            type='category',
+            axisLabel=dict(interval=0, rotate=45),
+            axisTick=dict(alignWithLabel=True))
+        plt.yAxis = dict(name="Count", type='value')
+        EZChart(
+            plt,
+            theme='epi2melabs')
+
+
+def insert_len(start, end, length):
+    """Insert length calc."""
+    if end >= start:
+        insert_length = end - start
+    else:
+        insert_length = (length - start) + end
+    return insert_length
+
+
+def create_fastcat_dic(sample_names, raw, host_file, downsampled_file):
+    """Create dictionary using sample names and fastcat files available."""
+    # Collect per sample fastcat stats check no optional files
+    if ('host_filter_stats/OPTIONAL_FILE' in host_file):
+        host_filt = host_file.remove('host_filter_stats/OPTIONAL_FILE')
+        if host_filt is None:
+            host_filt = []
+    else:
+        host_filt = host_file
+    if ('downsampled_stats/OPTIONAL_FILE' in downsampled_file):
+        summary_stats = downsampled_file.remove(
+            'downsampled_stats/OPTIONAL_FILE')
+        if summary_stats is None:
+            summary_stats = []
+    else:
+        summary_stats = downsampled_file
+    per_sample_dic = {}
+    lists = {'Raw': raw, 'Hostfilt': host_filt, 'Downsampled': summary_stats}
+    for sample in sample_names:
+        new_dic = {}
+        item_search = '/' + sample + '.'
+        for list_name, fc_list in lists.items():
+            #  find index of item that contains the sample name as a substring
+            item_index = [i for i, s in enumerate(fc_list) if item_search in s]
+            if item_index:
+                indice = item_index[0]
+                new_dic[list_name] = fc_list[indice]
+            else:
+                pass
+        per_sample_dic[sample] = new_dic
+    return per_sample_dic
+
+
+def variant_counts_table(
+        bcf_stats,
+        header="**Variant counts:**", report=None):
+    """Create a report section contains variant counts.
+
+    :param bcf_stats: one or more outputs from `bcftools stats`.
+    :param sample_as_columns: transpose table to put data for each
+        sample into a column.
+    :param header: a markdown formatted header.
+    :param report: an HTMLSection instance.
+
+    :returns: an HTMLSection instance, if `report` was provided the given
+        instance is modified and returned.
+    """
+    bcf_stats = bcfstats.load_bcfstats(bcf_stats)
+    p("""
+Variant counts per sample. See output bcf
+file for info on individual variants.
+""")
+    df = bcf_stats['SN'].drop(columns='samples')
+    return df
+
+
+def trans_counts(
+        bcf_stats,
+        header="**Transitions and tranversions:**", report=None):
+    """Create a report section with transition and transversion counts.
+
+    :param bcf_stats: one or more outputs from `bcftools stats`.
+    :param header: a markdown formatted header.
+    :param report: an HTMLSection instance.
+
+    :returns: an HTMLSection instance, if `report` was provided the given
+        instance is modified and returned.
+    """
+    bcf_stats = bcfstats.load_bcfstats(bcf_stats)
+    p("""
+Trans counts per sample.
+See output bcf file for info on individual transitions.
+""")
+    df = bcf_stats['TSTV']
+    return df
diff --git a/bin/workflow_glue/run_plannotate.py b/bin/workflow_glue/run_plannotate.py
index 2967c4e..6b2ecce 100755
--- a/bin/workflow_glue/run_plannotate.py
+++ b/bin/workflow_glue/run_plannotate.py
@@ -5,12 +5,12 @@
 import json
 import os
 
-from aplanat import json_item
+from bokeh.embed import json_item
 import numpy as np
 import pandas as pd
 from plannotate.annotate import annotate
-from plannotate.bokeh_plot import get_bokeh
 import pysam
+from workflow_glue.bokeh_plot import get_bokeh
 from .util import wf_parser  # noqa: ABS101
 
 
diff --git a/main.nf b/main.nf
index d1a61a2..58673c7 100644
--- a/main.nf
+++ b/main.nf
@@ -302,10 +302,13 @@ process getVersions {
     seqkit version | sed 's/ /,/' >> versions.txt
     trycycler --version | sed 's/ /,/' >> versions.txt
     bedtools --version | sed 's/ /,/' >> versions.txt
-    flye --version | sed 's/ /,/' >> versions.txt
+    flye --version |  sed 's/^/flye,/' >> versions.txt
     fastcat --version | sed 's/^/fastcat,/' >> versions.txt
     rasusa --version | sed 's/ /,/' >> versions.txt
     python -c "import spoa; print(spoa.__version__)" | sed 's/^/spoa,/'  >> versions.txt
+    python -c "import pandas; print(pandas.__version__)" | sed 's/^/pandas,/'  >> versions.txt
+    python -c "import plannotate; print(plannotate.__version__)" | sed 's/^/plannotate,/'  >> versions.txt
+    python -c "import bokeh; print(bokeh.__version__)" | sed 's/^/bokeh,/'  >> versions.txt
     """
 }
 
@@ -434,6 +437,7 @@ process report {
         report_name = "wf-clone-validation-report.html"
     """
     workflow-glue report \
+     $report_name \
     --downsampled_stats downsampled_stats/* \
     --revision $workflow.revision \
     --commit $workflow.commitId \
@@ -442,11 +446,10 @@ process report {
     --host_filter_stats host_filter_stats/* \
     --params params.json \
     --versions versions \
-    --report_name $report_name \
     --plannotate_json $plannotate_json \
     --lengths $lengths \
     --inserts_json $inserts_json \
-    --qc_inserts qc_inserts/* \
+    --qc_inserts qc_inserts \
     --assembly_quality assembly_quality/*
     """
 }
diff --git a/nextflow.config b/nextflow.config
index f9db822..11cd493 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -45,9 +45,9 @@ params {
           "--fastq 'wf-clone-validation-demo/test'",
           "--primers 'wf-clone-validation-demo/primers.tsv'",
       ]
-      container_sha = "sha4b40b56828d0127650b778a795ea4cd6094a5a0c"
+      container_sha = "shab8863b29b5cfa427d7f8e5519bd71b956785c315"
       container_sha_medaka = "sha6a58ce1871560b8ddaada5cc1dfc51e5c03442e8"
-      common_sha = "sha0fa3896acb70eecc0d432c91a1516d596a87741c"
+      common_sha = "sha5fc720674a26bc63a6f31ed186344209175b54b1"
       agent = null
     }
 }
@@ -59,7 +59,7 @@ manifest {
     description     = 'De-novo reconstruction of synthetic plasmid sequences.'
     mainScript      = 'main.nf'
     nextflowVersion = '>=23.04.2'
-    version         = 'v0.4.0'
+    version         = 'v0.5.0'
 }
 
 executor {