diff --git a/NEWS b/NEWS.md
similarity index 88%
rename from NEWS
rename to NEWS.md
index 115641e4c..13554c7c6 100644
--- a/NEWS
+++ b/NEWS.md
@@ -1,3 +1,75 @@
+Release 1.19 (12th December 2023)
+---------------------------------
+
+New work and changes:
+
+* Samtools coverage: add a new --plot-depth option to draw depth (of
+  coverage) rather than the percentage of bases covered.
+  (PR #1910.  Thanks to Pierre Lindenbaum)
+
+* Samtools merge / sort: add a lexicographical name-sort option via the -N
+  option.  The "natural" alpha-numeric sort is still available via -n.
+  (PR #1900, fixes #1500.  Reported by Steve Huang)
+
+* Samtools view: add -N ^NAME_FILE and -R ^RG_FILE options.  The standard -N
+  and -R options only output reads matching a specified list of names or
+  read-groups.  With a caret (^) prefix these may be negated to only output
+  read not matching the specified files.
+  (PR #1896, fixes #1895.  Suggested by Feng Tian)
+
+* Cope with Htslib's change to no longer close stdout on hts_close.
+  Htslib companion PR is samtools/htslib#1665.
+  (PR #1909.  Thanks to John Marshall)
+
+* Plot-bamstats: add a new plot of the read lengths ("RL") from samtools
+  stats output.
+  (PR #1922, fixes #1824.  Thanks to @erboone, suggested by Martin Pollard)
+
+* Samtools split: support splitting files based on the contents of
+  auxiliary tags.  Also adds a -M option to limit the number of files split
+  can make, to avoid accidental resource over-allocation, and fixes some
+  issues with --write-index.
+  (PR #1222, PR #1933, fixes #1758.  Thanks to Valeriu Ohan, suggested by
+  Scott Norton)
+
+Bug Fixes:
+
+* Samtools stats: empty barcode tags are now treated as having no barcode.
+  (PR #1929, fixes #1926.  Reported by Jukka Matilainen)
+
+* Samtools cat: add support for non-seekable streams.  The file format
+  detection blocked pipes from working before, but now files may be
+  non-seekable such as stdin or a pipe.
+  (PR #1930, fixes #1731.  Reported by Julian Hess)
+
+* Samtools mpileup -aa (absolutely all positions) now produces an output
+  even when given an empty input file.
+  (PR #1939.  Reported by Chang Y)
+
+* Samtools markdup: speed up optical duplicate tagging on regions with very
+  deep data.
+  (PR #1952)
+
+Documentation:
+
+* Samtools mpileup: add more usage examples to the man page.
+  (PR #1913, fixes #1801)
+
+* Samtools fastq: explicitly document the order that filters apply.
+  (PR #1907)
+
+* Samtools merge: fix example output to use an uppercase RG PL field.
+  (PR #1917.  Thanks to John Marshall.  Reported by Michael Macias)
+
+* Add hclen SAM filter documentation.
+  (PR #1902.  See also samtools/htslib#1660)
+
+* Samtools consensus: remove the -5 option from documentation.  This option
+  was renamed before the consensus subcommand was merged, but accidentally
+  left in the man page.
+  (PR #1924)
+
+
 Release 1.18 (25th July 2023)
 -----------------------------
 
@@ -7,51 +79,51 @@ New work and changes:
   minimiser sort to arrange the minimiser values in the same order as they
   occur in the reference genome. This is acts as an extremely crude and
   simplistic read aligner that can be used to boost read compression.
-  (PR#1818)
+  (PR #1818)
 
 * Add a --duplicate-count option to markdup.  Adds the number of duplicates
   (including itself) to the original read in a 'dc' tag.
-  (PR#1816. Thanks to wulj2)
+  (PR #1816. Thanks to wulj2)
 
 * Make calmd handle unaligned data or empty files without throwing an error.
   This is to make pipelines work more smoothly.  A warning will still be issued.
-  (PR#1841, fixes #1839.  Reported by Filipe G. Vieira)
+  (PR #1841, fixes #1839.  Reported by Filipe G. Vieira)
 
 * Consistent, more comprehensive flag filtering for fasta/fastq.  Added
   --rf/--incl[ude]-flags and long options for -F (--excl[ude]-flags and
   -f (--require-flags).
-  (PR#1842.  Thanks to Devang Thakkar)
+  (PR #1842.  Thanks to Devang Thakkar)
 
 * Apply fastq --input-fmt-option settings.  Previously any options specified
   were not being applied to the input file.
-  (PR#1855.  Thanks to John Marshall)
+  (PR #1855.  Thanks to John Marshall)
 
 * Add fastq -d TAG[:VAL] check.  This mirrors view -d and will only output
   alignments that match TAG (and VAL if specified).
-  (PR#1863, fixes #1854.  Requested by Rasmus Kirkegaard)
+  (PR #1863, fixes #1854.  Requested by Rasmus Kirkegaard)
 
 * Extend import --order TAG to --order TAG:length.  If length is specified, the
   tag format goes from integer to a 0-padded string format.  This is a
   workaround for BAM and CRAM that cannot encode an order tag of over 4 billion
   records.
-  (PR#1850, fixes #1847.  Reported by Feng Tian)
+  (PR #1850, fixes #1847.  Reported by Feng Tian)
 
 * New -aa mode for consensus.  This works like the -aa option in depth and
   mpileup. The single 'a' reports all bases in contigs covered by alignments.
   Double 'aa' (or '-a -a') reports Ns even for the references with no alignments
   against them.
-  (PR#1851, fixes #1849.  Requested by Tim Fennell)
+  (PR #1851, fixes #1849.  Requested by Tim Fennell)
 
 * Add long option support to samtools index.
-  (PR#1872, fixes #1869.  Reported by Jason Bacon)
+  (PR #1872, fixes #1869.  Reported by Jason Bacon)
 
 * Be consistent with rounding of "average length" in samtools stats.
-  (PR#1876, fixes #1867.  Reported by Jelinek-J)
+  (PR #1876, fixes #1867.  Reported by Jelinek-J)
 
 * Add option to ampliconclip that marks reads as unmapped when they do not have
   enough aligned bases left after clipping.  Default is to unmap reads with zero
   aligned bases.
-  (PR#1865, fixes #1856.  Requested by ces)
+  (PR #1865, fixes #1856.  Requested by ces)
 
 Bug Fixes:
 
@@ -62,45 +134,45 @@ Bug Fixes:
   although the change in 1.11 was bug-fixing multi-threaded index queries.
   This bug did not affect index building.  There is no need to reindex your
   CRAM files.
-  (PR#samtools/htslib#1574, PR#samtools/htslib#1640. Fixes
+  (PR #samtools/htslib#1574, PR #samtools/htslib#1640. Fixes
   #samtools/htslib#1569, #samtools/htslib#1639, #1808, #1819.  Reported by
   xuxif, Jens Reeder and Jared Simpson)
 
 * Fix a sort -M bug (regression) when merging sub-blocks.  Data was valid but
   in a poor order for compression.
-  (PR#1812)
+  (PR #1812)
 
 * Fix bug in split output format.  Now SAM and CRAM format can chosen as well
   as BAM.  Also a documentation change, see below.
-  (PR#1821)
+  (PR #1821)
 
 * Add error checking to view -e filter expression code.  Invalid expressions
   were not returning an error code.
-  (PR#1833, fixes #1829.  Reported by Steve Huang)
+  (PR #1833, fixes #1829.  Reported by Steve Huang)
 
 * Fix reheader CRAM output version.  Sets the correct CRAM output version for
   non-3.0 CRAMs.
-  (PR#1868, fixes #1866.  Reported by John Marshall)
+  (PR #1868, fixes #1866.  Reported by John Marshall)
 
 Documentation:
 
 * Expand the default filtering information on the mpileup man page.
-  (PR#1802, fixes #1801.  Reported by gevro)
+  (PR #1802, fixes #1801.  Reported by gevro)
 
 * Add an explanation of the default behaviour of split files on generating
   a file for reads with missing or unrecognised RG tags.  Also a small bug fix,
   see above.
-  (PR#1821, fixes #1817.  Reported by Steve Huang)
+  (PR #1821, fixes #1817.  Reported by Steve Huang)
 
 * In the INSTALL instructions, switched back to openssl for Alpine.  This
   matches the current Alpine Linux practice.
-  (PR#1837, see htslib#1591.  Reported by John Marshall)
+  (PR #1837, see htslib#1591.  Reported by John Marshall)
 
 * Fix various typos caught by lintian parsers.
-  (PR#1877.  Thanks to Étienne Mollier)
+  (PR #1877.  Thanks to Étienne Mollier)
 
 * Document consensus --qual-calibration option.
-  (PR#1880, fixes #1879.  Reported by John Marshall)
+  (PR #1880, fixes #1879.  Reported by John Marshall)
 
 * Updated the page about samtools duplicate marking with more detail at
   www.htslib.org/algorithms/duplicate.html
@@ -108,7 +180,7 @@ Documentation:
 Non user-visible changes and build improvements:
 
 * Removed a redundant line that caused a warning in gcc-13.
-  (PR#1838)
+  (PR #1838)
 
 
 Release 1.17 (21st February 2023)
@@ -121,115 +193,115 @@ New work and changes:
   reverse direction, sequence and its quality values are reversed and
   complemented and the reverse flag is reset.  Supplementary and secondary
   alignment data are discarded.
-  (PR#1767, implements #1682. Requested by dkj)
+  (PR #1767, implements #1682. Requested by dkj)
 
 * New samtools cram-size subcommand.  It writes out metrics about a CRAM file
   reporting aggregate sizes per block "Content ID" fields, the data-series
   contained within them, and the compression methods used.
-  (PR#1777)
+  (PR #1777)
 
 * Added a --sanitize option to fixmate and view.  This performs some sanity
   checks on the state of SAM record fields, fixing up common mistakes made by
   aligners.
-  (PR#1698)
+  (PR #1698)
 
 * Permit 1 thread with samtools view.  All other subcommands already allow this
   and it does provide a modest speed increase.
-  (PR#1755, fixes #1743. Reported by Goran Vinterhalter)
+  (PR #1755, fixes #1743. Reported by Goran Vinterhalter)
 
 * Add CRAM_OPT_REQUIRED_FIELDS option for view -c.  This is a big speed up for
   CRAM (maybe 5-fold), but it depends on which filtering options are being used.
-  (PR#1776, fixes #1775. Reported by Chang Y)
+  (PR #1776, fixes #1775. Reported by Chang Y)
 
 * New filtering options in samtools depth.  The new --excl-flags option is a
   synonym for -G, with --incl-flags and --require-flags added to match view
   logic.
-  (PR#1718, fixes #1702. Reported by Dario Beraldi)
+  (PR #1718, fixes #1702. Reported by Dario Beraldi)
 
 * Speed up calmd's slow handling of non-position-sorted data by adding caching.
   This uses more memory but is only activated when needed.
-  (PR#1723, fixes #1595. Reported by lxwgcool)
+  (PR #1723, fixes #1595. Reported by lxwgcool)
 
 * Improve samtools consensus for platforms with instrument specific profiles,
   considerably helping for data with very different indel error models and
   providing base quality recalibration tables. On PacBio HiFi, ONT and
   Ultima Genomics consensus qualities are also redistributed within homopolymers
   and the likelihood of nearby indel errors is raised.
-  (PR#1721, PR#1733)
+  (PR #1721, PR #1733)
 
 * Consensus --mark-ins option.  This permits he consensus output to include a
   markup indicating the next base is an insertion. This is necessary as we need
   a way of outputting both consensus and also how that consensus marries up with
   the reference coordinates.
-  (PR#1746)
+  (PR #1746)
 
 * Make faidx/fqidx output line length default to the input line length.
-  (PR#1738, fixes #1734. Reported by John Marshall)
+  (PR #1738, fixes #1734. Reported by John Marshall)
 
 * Speed up optical duplicate checking where data has a lot of duplicates
   compared to non-duplicates.
-  (PR#1779, fixes #1771. Reported by Poshi)
+  (PR #1779, fixes #1771. Reported by Poshi)
 
 * For collate use TMPDIR environment variable, when looking for a temporary
   folder.
-  (PR#1782, based on PR#1178 and fixes #1172.  Reported by Martin Pollard)
+  (PR #1782, based on PR #1178 and fixes #1172.  Reported by Martin Pollard)
 
 Bug Fixes:
 
 * Fix stats breakage on long deletions when given a reference.
-  (PR#1712, fixes #1707. Reported by John Didion)
+  (PR #1712, fixes #1707. Reported by John Didion)
 
 * In ampliconclip, stop hard clipping from wrongly removing entire reads.
-  (PR#1722, fixes #1717. Reported by Kevin Xu)
+  (PR #1722, fixes #1717. Reported by Kevin Xu)
 
 * Fix bug in ampliconstats where references mentioned in the input file headers
   but not in the bed file would cause it to complain that the SAM headers were
   inconsistent.
-  (PR#1727, fixes #1650. Reported by jPontix)
+  (PR #1727, fixes #1650. Reported by jPontix)
 
 * Fixed SEGV in samtools collate when no filename given.
-  (PR#1724)
+  (PR #1724)
 
 * Changed the default UMI barcode regex in markdup.  The old regex was too
   restrictive.  This version will at least allow the default read name UMI as
   given in the Illumina example documentation.
-  (PR#1737, fixes #1730. Reported by yloemie)
+  (PR #1737, fixes #1730. Reported by yloemie)
 
 * Fix samtools consensus buffer overrun with MD:Z handling.
-  (PR#1745, fixes #1744. Reported by trilisser)
+  (PR #1745, fixes #1744. Reported by trilisser)
 
 * Fix a buffer read-overflow in mpileup and tview on sequences with seq "*".
-  (PR#1747)
+  (PR #1747)
 
 * Fix view -X command line parsing that was broken in 1.15.
-  (PR#1772, fixes #1720.  Reported by Francisco Rodríguez-Algarra
+  (PR #1772, fixes #1720.  Reported by Francisco Rodríguez-Algarra
   and Miguel Machado)
 
 * Stop samtools view -d from reporting meaningless system errors when
   tag validation fails.
-  (PR#1796)
+  (PR #1796)
 
 Documentation:
 
 * Add a description of the samtools tview display layout to the man page.
   Documents . vs , and upper vs lowercase. Adds a -s sample example, and
   documents the -w option.
-  (PR#1765, fixes #1759. Reported by Lucas Ferreira da Silva)
+  (PR #1765, fixes #1759. Reported by Lucas Ferreira da Silva)
 
 * Clarify intention of samtools fasta/q in man page and soft vs hard
   clipping.
-  (PR#1794, fixes #1792. Reported by Ryan Lorig-Roach)
+  (PR #1794, fixes #1792. Reported by Ryan Lorig-Roach)
 
 * Minor fix to wording of mpileup --rf usage and man page.
-  (PR#1795, fixes #1791. Reported by Luka Pavageau)
+  (PR #1795, fixes #1791. Reported by Luka Pavageau)
 
 Non user-visible changes and build improvements:
 
 * Use POSIX grep in testing as egrep and fgrep are considered obsolete.
-  (PR#1726, thanks to David Seifert)
+  (PR #1726, thanks to David Seifert)
 
 * Switch MacOS CI tests to an ARM-based image.
-  (PR#1770)
+  (PR #1770)
 
 
 Release 1.16.1 (2nd September 2022)
@@ -240,11 +312,11 @@ Bug fixes:
  * Fixed a bug with the template-coordinate sort which caused incorrect
    ordering when using threads, or processing large files that don't
    fit completely in memory.
-   (PR#1703, thanks to Nils Homer)
+   (PR #1703, thanks to Nils Homer)
 
  * Fixed a crash that occurred when trying to use `samtools merge` in
    template-coordinate mode.
-   (PR#1705, thanks to Nils Homer)
+   (PR #1705, thanks to Nils Homer)
 
 Release 1.16 (18th August 2022)
 -------------------------------
@@ -253,90 +325,90 @@ New work and changes:
 
  * samtools reference command added.  This subcommand extracts the embedded
    reference out of a CRAM file.
-   (PR#1649, addresses #723.  Requested by Torsten Seemann)
+   (PR #1649, addresses #723.  Requested by Torsten Seemann)
 
  * samtools import now adds grouped by query-name to the header.
-   (PR#1633, thanks to Nils Homer)
+   (PR #1633, thanks to Nils Homer)
 
  * Made samtools view read error messages more generic.  Former error message
    would claim that there was a "truncated file or corrupt BAM index file" with
    no real justification.  Also reset errno in stream_view which could lead to
    confusing error messages.
-   (PR#1645, addresses some of the issues in #1640.  Reported by Jian-Guo Zhou)
+   (PR #1645, addresses some of the issues in #1640.  Reported by Jian-Guo Zhou)
 
  * Make samtools view -p also clear mqual, tlen and cigar.
-   (PR#1647, fixes #1606.  Reported by eboyden)
+   (PR #1647, fixes #1606.  Reported by eboyden)
 
  * Add bedcov option -c to report read count.
-   (PR#1644, fixes #1629.  Reported by Natchaphon Rajudom)
+   (PR #1644, fixes #1629.  Reported by Natchaphon Rajudom)
 
  * Add UMI/barcode handling to samtools markdup.
-   (PR#1630, fixes #1358 and #1514.  Reported by Gert Hulselmans and Poshi)
+   (PR #1630, fixes #1358 and #1514.  Reported by Gert Hulselmans and Poshi)
 
  * Add a new template coordinate sort order to samtools sort and
    samtools merge.  This is useful when working with unique molecular
    identifiers (UMIs).
-   (PR#1605, fixes #1591.  Thanks to Nils Homer)
+   (PR #1605, fixes #1591.  Thanks to Nils Homer)
 
  * Rename mpileup --ignore-overlaps to --ignore-overlaps-removal
    or --disable-overlap-removal.  The previous name was ambiguous and was often
    read as an option to enable removal of overlapping bases, while in reality
    this is on by default and the option turns off the ability to remove
    overlapping bases.
-   (PR#1666, fixes #1663.  Reported by yangdingyangding)
+   (PR #1666, fixes #1663.  Reported by yangdingyangding)
 
  * The dict command can now read BWA's .alt file and add AH:* tags
    indicating reference sequences that represent alternate loci.
-   (PR#1676.  Thanks to John Marshall)
+   (PR #1676.  Thanks to John Marshall)
 
  * The "samtools index" command can now accept multiple alignment filenames
    with the new -M option, and will index each of them separately. (Specifying
    the output index filename via out.index or the new -o option is currently
    only applicable when there is only one alignment file to be indexed.)
-   (PR#1674.  Reported by Abigail Ramsøe and Nicola Romanò.
+   (PR #1674.  Reported by Abigail Ramsøe and Nicola Romanò.
     Thanks to John Marshall)
 
  * Allow samtools fastq -T "*".
    This allows all tags from SAM records to be written to fastq headers. This is
    a counterpart to samtools import -T "*".
-   (PR#1679.  Thanks to cjw85)
+   (PR #1679.  Thanks to cjw85)
 
 Bug Fixes:
 
  * Re-enable --reference option for samtools depth.  The reference is not used
    but this makes the command line usage compatible with older releases.
-   (PR#1646, fixes #1643.  Reported by Randy Harr)
+   (PR #1646, fixes #1643.  Reported by Randy Harr)
 
  * Fix regex coordinate bug in samtools markdup.
-   (PR#1657, fixes #1642.  Reported by Randy Harr)
+   (PR #1657, fixes #1642.  Reported by Randy Harr)
 
  * Fix divide by zero in plot-bamstats -m, on unmapped data.
-   (PR#1678, fixes #1675.  Thanks to Shane McCarthy)
+   (PR #1678, fixes #1675.  Thanks to Shane McCarthy)
 
  * Fix missing RG headers when using samtools merge -r.
-   (PR#1683, addresses htslib#1479.  Reported by Alex Leonard)
+   (PR #1683, addresses htslib#1479.  Reported by Alex Leonard)
 
  * Fix a possible unaligned access in samtools reference.
-   (PR#1696)
+   (PR #1696)
 
 Documentation:
 
  * Add documentation on CRAM compression profiles and some of the newer options
    that appear in CRAM 3.1 and above.
-   (PR#1659, fixes #1656.  Reported by Matthias De Smet)
+   (PR #1659, fixes #1656.  Reported by Matthias De Smet)
 
  * Add "sclen" filter expression keyword documentation.
-   (PR#1661, see also htslib#1441)
+   (PR #1661, see also htslib#1441)
 
  * Extend FILTER EXPRESSION man page section to match the changes made in
    HTSlib.
-   (PR#1687, samtools/htslib#1478)
+   (PR #1687, samtools/htslib#1478)
 
 Non user-visible changes and build improvements:
 
  * Ensure generated test files are ignored (by git) and cleaned (by make
    testclean)
-   (PR#1692, Thanks to John Marshall)
+   (PR #1692, Thanks to John Marshall)
 
 Release 1.15.1 (7th April 2022)
 -------------------------------
@@ -701,7 +773,7 @@ Release 1.12 (17th March 2021)
 
  * samtools markdup now benefits from an increase in performance in the
    situation when a single read has tens or hundreds of thousands of duplicates.
-   Thanks to `@denriquez` for reporting the issue. (#1345; fixed #1325)
+   Thanks to @denriquez for reporting the issue. (#1345; fixed #1325)
 
  * The documentation for samtools ampliconstats has been added to the samtools
    man page. (#1351)
@@ -727,7 +799,7 @@ Release 1.12 (17th March 2021)
    (#1343)
 
  * The documentation for `samtools depth -s` has been improved.
-   Thanks to `@wulj2`. (#1355)
+   Thanks to @wulj2. (#1355)
 
  * Fixed a `samtools merge` segmentation fault when it failed to merge
    header `@PG` records. Thanks to John Marshall.  (#1394; reported by
@@ -903,10 +975,10 @@ Changes affecting the whole of samtools, or multiple sub-commands:
    for invalid headers, it is possible that some illegal files will now
    be rejected when they would have been allowed by earlier versions. (#998)
 
-   Examples of problems that will now be rejected include @SQ lines with
-   no SN: tag, and @RG or @PG lines with no ID: tag.
+   Examples of problems that will now be rejected include `@SQ` lines with
+   no SN: tag, and `@RG` or `@PG` lines with no ID: tag.
 
- * samtools sub-commands will now add '@PG' header lines to output sam/bam/cram
+ * samtools sub-commands will now add `@PG` header lines to output sam/bam/cram
    files.  To disable this, use the '--no-PG' option. (#1087; #1097)
 
  * samtools now supports alignment records with reference positions greater
@@ -1086,7 +1158,7 @@ Changes affecting specific sub-commands:
 
  * samtools quickcheck:
 
-   - Add unmapped (-u) option, which disables the check for @SQ lines in
+   - Add unmapped (-u) option, which disables the check for `@SQ` lines in
      the header. (#920, thanks to Shane McCarthy)
 
  * samtools reheader:
@@ -1110,7 +1182,7 @@ Changes affecting specific sub-commands:
      header file to be specified in its own option. (#961)
 
    - Fixed bug where samtools split would crash if it read a SAM header that
-     contained an @RG line with no ID tag. (#954, reported by @blue-bird1)
+     contained an `@RG` line with no ID tag. (#954, reported by @blue-bird1)
 
  * samtools stats:
 
@@ -1333,7 +1405,7 @@ Release 1.7 (26th January 2018)
 * samtools bedcov will now ignore BED comment and header lines (#571; thanks
   to Daniel Baker).
 
-* samtools collate now updates the @HD SO: and GO: tags, and sort will
+* samtools collate now updates the `@HD` SO: and GO: tags, and sort will
   remove a GO: tag if present.  (#757; reported by Imran Haque).
 
 * Bug-fixes:
@@ -1400,7 +1472,7 @@ Release 1.5 [Solstice Release] (21st June 2017)
   (#675; thanks to Patrick Marks of 10xgenomics).
 
 Release 1.4.1  (8th May 2017)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------
 
 * Added options to fastq to create fastq files from BC (or other)
   tags.
@@ -1425,13 +1497,13 @@ Release 1.4.1  (8th May 2017)
 
 
 Release 1.4  (13 March 2017)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------
 
 Noteworthy changes in samtools:
 
 * Fixed Issue #345 - out-by-one error in insert-size in samtools stats
 
-* bam_split now add a @PG header to the bam file
+* bam_split now add a `@PG` header to the bam file
 
 * Added mate cigar tag support to fixmate
 
@@ -1463,7 +1535,7 @@ Noteworthy changes in samtools:
 
 
 Beta Release 1.3.1  (22 April 2016)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------------
 
 Noteworthy changes in samtools:
 
@@ -1488,20 +1560,20 @@ Noteworthy changes in samtools:
   field (#517).
 
 * The merge and sort commands now handle (unusual) BAM files that have no
-  textual @SQ headers (#548, #550).
+  textual `@SQ` headers (#548, #550).
 
 * Sorting files containing @CO headers no longer duplicates the comment
   headers, which previously happened on large sorts for which temporary
   files were needed (#563).
 
-* The rmdup and view -l commands no longer crash on @RG headers that do not
+* The rmdup and view -l commands no longer crash on `@RG` headers that do not
   have a LB field (#538).
 
 * Fixed miscellaneous issues #128, #130, #131, #489, and #514.
 
 
 Beta Release 1.3  (15 December 2015)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 Noteworthy changes in samtools:
 
@@ -1536,7 +1608,7 @@ Noteworthy changes in samtools:
   Previously it listened to $(LDLIBS) instead; if you were overriding
   that, you should now override LIBS rather than LDLIBS.
 
-* A new addreplacerg command that adds or alters @RG headers and RG:Z
+* A new addreplacerg command that adds or alters `@RG` headers and RG:Z
   record tags has been added.
 
 * The rmdup command no longer immediately aborts (previously it always
@@ -1597,7 +1669,7 @@ Noteworthy changes in samtools:
 
 
 Beta Release 1.2  (2 February 2015)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------------
 
 Noteworthy changes in samtools:
 
@@ -1613,7 +1685,7 @@ Noteworthy changes in samtools:
 
 
 Beta Release 1.1 (19 September, 2014)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------------------
 
 Notable changes in samtools:
 
@@ -1628,7 +1700,7 @@ Notable changes in samtools:
 
 
 Beta Release 1.0 (15 August, 2014)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
 First release of HTSlib-based samtools.
 
@@ -1641,13 +1713,13 @@ superseded by BGZF/bgzip and have been removed from samtools.
 
 
 Beta Release 0.1.20 (15 August, 2014)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------------------
 
 Final release of standalone samtools.
 
 
 Beta Release 0.1.19 (15 March, 2013)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 Notable changes in samtools and bcftools:
 
@@ -1677,7 +1749,7 @@ http://github.com/samtools/samtools/commits/master
 
 
 Beta Release 0.1.18 (2 September, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------------
 
 Notable changes in samtools:
 
@@ -1704,14 +1776,14 @@ Notable changes in bcftools:
 
 
 Beta Release 0.1.17 (6 July, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
-With the maturity of `mpileup' and the lack of update in the `pileup' command,
-the `pileup' command is now formally dropped. Most of the pileup functionality,
+With the maturity of `mpileup` and the lack of update in the `pileup` command,
+the `pileup` command is now formally dropped. Most of the pileup functionality,
 such as outputting mapping quality and read positions, have been added
-`mpileup'.
+`mpileup`.
 
-Since this release, `bcftools view' is able to perform contrast SNP calling
+Since this release, `bcftools view` is able to perform contrast SNP calling
 (option -T) for discovering de novo and/or somatic mutations between a pair of
 samples or in a family trio. Potential mutations are scored by a log likelihood
 ratio, which is very simple in math, but should be comparable to more
@@ -1729,7 +1801,7 @@ Other notable changes in samtools:
  * Fixed an issue where mpileup does not apply BAQ for the first few reads when
    a region is specified.
 
- * Fixed an issue where `faidx' does not work with FASTA files with long lines.
+ * Fixed an issue where `faidx` does not work with FASTA files with long lines.
 
  * Bugfix: wrong SP genotype information in the BCF output.
 
@@ -1748,14 +1820,14 @@ Other notable changes in bcftools:
 
 
 Beta Release 0.1.16 (21 April, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 Notable changes in samtools:
 
- * Support the new SAM/BAM type `B' in the latest SAM spec v1.4.
+ * Support the new SAM/BAM type `B` in the latest SAM spec v1.4.
 
- * When the output file of `samtools merge' exists, do not overwrite it unless
-   a new command-line option `-f' is applied.
+ * When the output file of `samtools merge` exists, do not overwrite it unless
+   a new command-line option `-f` is applied.
 
  * Bugfix: BED support is not working when the input BED is not sorted.
 
@@ -1773,11 +1845,11 @@ Notable changes in bcftools:
  * Use Brent's method to estimate the site allele frequency when EM converges
    slowly. The resulting ML estimate of allele frequnecy is more accurate.
 
- * Added the `ldpair' command, which computes r^2 between SNP pairs given in
+ * Added the `ldpair` command, which computes r^2 between SNP pairs given in
    an input file.
 
-Also, the `pileup' command, which has been deprecated by `mpileup' since
-version 0.1.10, will be dropped in the next release. The old `pileup' command
+Also, the `pileup` command, which has been deprecated by `mpileup` since
+version 0.1.10, will be dropped in the next release. The old `pileup` command
 is substandard and causing a lot of confusion.
 
 (0.1.16: 21 April 2011, r963:234)
@@ -1785,27 +1857,27 @@ is substandard and causing a lot of confusion.
 
 
 Beta Release 0.1.15 (10 April, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 Notable changes:
 
  * Allow to perform variant calling or to extract information in multiple
-   regions specified by a BED file (`samtools mpileup -l', `samtools view -L'
-   and `bcftools view -l').
+   regions specified by a BED file (`samtools mpileup -l`, `samtools view -L`
+   and `bcftools view -l`).
 
- * Added the `depth' command to samtools to compute the per-base depth with a
-   simpler interface. File `bam2depth.c', which implements this command, is the
+ * Added the `depth` command to samtools to compute the per-base depth with a
+   simpler interface. File `bam2depth.c`, which implements this command, is the
    recommended example on how to use the mpileup APIs.
 
  * Estimate genotype frequencies with ML; perform chi^2 based Hardy-Weinberg
    test using this estimate.
 
- * For `samtools view', when `-R' is specified, drop read groups in the header
+ * For `samtools view`, when `-R` is specified, drop read groups in the header
    that are not contained in the specified file.
 
- * For `samtools flagstat', separate QC-pass and QC-fail reads.
+ * For `samtools flagstat`, separate QC-pass and QC-fail reads.
 
- * Improved the command line help of `samtools mpileup' and `bcftools view'.
+ * Improved the command line help of `samtools mpileup` and `bcftools view`.
 
  * Use a global variable to control the verbose level of samtools stderr
    output. Nonetheless, it has not been full utilized.
@@ -1818,7 +1890,7 @@ Notable changes:
 
 
 Beta release 0.1.14 (21 March, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 This release implements a method for testing associations for case-control
 data. The method does not call genotypes but instead sums over all genotype
@@ -1832,48 +1904,48 @@ ploidy of each sample (see also manual bcftools/bcftools.1).
 
 Other notable changes:
 
- * Added `bcftools view -F' to parse BCF files generated by samtools r921 or
+ * Added `bcftools view -F` to parse BCF files generated by samtools r921 or
    older which encodes PL in a different way.
 
- * Changed the behavior of `bcftools view -s'. Now when a list of samples is
+ * Changed the behavior of `bcftools view -s`. Now when a list of samples is
    provided, the samples in the output will be reordered to match the ordering
    in the sample list. This change is mainly designed for association test.
 
- * Sped up `bcftools view -v' for target sequencing given thousands of samples.
-   Also added a new option `view -d' to skip loci where only a few samples are
+ * Sped up `bcftools view -v` for target sequencing given thousands of samples.
+   Also added a new option `view -d` to skip loci where only a few samples are
    covered by reads.
 
  * Dropped HWE test. This feature has never been implemented properly. An EM
    should be much better. To be implemented in future.
 
- * Added the `cat' command to samtools. This command concatenate BAMs with
+ * Added the `cat` command to samtools. This command concatenate BAMs with
    identical sequence dictionaries in an efficient way. Modified from bam_cat.c
    written by Chris Saunders.
 
- * Added `samtools view -1' to write BAMs at a low compression level but twice
-   faster to create. The `sort' command generates temporary files at a low
+ * Added `samtools view -1` to write BAMs at a low compression level but twice
+   faster to create. The `sort` command generates temporary files at a low
    compression level as well.
 
- * Added `samtools mpileup -6' to accept "BAM" with Illumina 1.3+ quality
+ * Added `samtools mpileup -6` to accept "BAM" with Illumina 1.3+ quality
    strings (strictly speaking, such a file is not BAM).
 
- * Added `samtools mpileup -L' to skip INDEL calling in regions with
+ * Added `samtools mpileup -L` to skip INDEL calling in regions with
    excessively high coverage. Such regions dramatically slow down mpileup.
 
- * Updated `misc/export2sam.pl', provided by Chris Saunders from Illumina Inc.
+ * Updated `misc/export2sam.pl`, provided by Chris Saunders from Illumina Inc.
 
 (0.1.14: 21 March 2011, r933:170)
 
 
 
 Beta release 0.1.13 (1 March, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------------
 
 The most important though largely invisible modification is the change of the
 order of genotypes in the PL VCF/BCF tag. This is to conform the upcoming VCF
 spec v4.1. The change means that 0.1.13 is not backward compatible with VCF/BCF
 generated by samtools older than r921 inclusive.  VCF/BCF generated by the new
-samtools will contain a line `##fileformat=VCFv4.1' as well as the samtools
+samtools will contain a line `##fileformat=VCFv4.1` as well as the samtools
 version number.
 
 Single Individual Haplotyping (SIH) is added as an experimental feature. It
@@ -1891,17 +1963,17 @@ Other notable changes in samtools:
  * Improved sorting order checking in indexing. Now indexing is the preferred way
    to check if a BAM is sorted.
 
- * Added a switch `-E' to mpileup and calmd. This option uses an alternative way
+ * Added a switch `-E` to mpileup and calmd. This option uses an alternative way
    to apply BAQ, which increases sensistivity, especially to MNPs, at the cost of
    a little loss in specificity.
 
- * Added `mpileup -A' to allow to use reads in anomalous pairs in SNP calling.
+ * Added `mpileup -A` to allow to use reads in anomalous pairs in SNP calling.
 
- * Added `mpileup -m' to allow fine control of the collection of INDEL candidates.
+ * Added `mpileup -m` to allow fine control of the collection of INDEL candidates.
 
- * Added `mpileup -S' to compute per-sample strand bias P-value.
+ * Added `mpileup -S` to compute per-sample strand bias P-value.
 
- * Added `mpileup -G' to exclude read groups in variant calling.
+ * Added `mpileup -G` to exclude read groups in variant calling.
 
  * Fixed segfault in indel calling related to unmapped and refskip reads.
 
@@ -1912,7 +1984,7 @@ Other notable changes in samtools:
 
  * Fixed a very rare memory issue in bam_md.c
 
- * Fixed an out-of-boundary bug in mpileup when the read base is `N'.
+ * Fixed an out-of-boundary bug in mpileup when the read base is `N`.
 
  * Fixed a compiling error when the knetfile library is not used. Fixed a
    library compiling error due to the lack of bam_nt16_nt4_table[] table.
@@ -1923,9 +1995,9 @@ Other notable changes in bcftools:
 
  * Updated the BCF spec.
 
- * Added the `FQ' VCF INFO field, which gives the phred-scaled probability
+ * Added the `FQ` VCF INFO field, which gives the phred-scaled probability
    of all samples being the same (identical to the reference or all homozygous
-   variants). Option `view -f' has been dropped.
+   variants). Option `view -f` has been dropped.
 
  * Implemented "vcfutils.pl vcf2fq" to generate a consensus sequence
    similar to "samtools.pl pileup2fq".
@@ -1935,7 +2007,7 @@ Other notable changes in bcftools:
 
  * Output bcftools specific INFO and FORMAT in the VCF header.
 
- * Added `view -s' to call variants from a subset of samples.
+ * Added `view -s` to call variants from a subset of samples.
 
  * Properly convert VCF to BCF with a user provided sequence dictionary. Nonetheless,
    custom fields are still unparsed and will be stored as a missing value.
@@ -1948,7 +2020,7 @@ Other notable changes in bcftools:
 
 
 Beta release 0.1.12a (2 December, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------------
 
 This is another bug fix release:
 
@@ -1974,7 +2046,7 @@ This is another bug fix release:
 
 
 Beta release 0.1.11 (21 November, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------------
 
 This is mainly a bug fix release:
 
@@ -1995,8 +2067,8 @@ One minor feature has been implemented in bcftools:
    result in the new mode, although the reference sequence has to be
    used in realignment when SAMtools computes genotype likelihoods.
 
-In addition, since 0.1.10, the `pileup' command has been deprecated by
-`mpileup' which is more powerful and more accurate. The `pileup' command
+In addition, since 0.1.10, the `pileup` command has been deprecated by
+`mpileup` which is more powerful and more accurate. The `pileup` command
 will not be removed in the next few releases, but new features will not
 be added.
 
@@ -2005,7 +2077,7 @@ be added.
 
 
 Beta Release 0.1.10 (16 November, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------------
 
 This release is featured as the first major improvement to the indel
 caller. The method is similar to the old one implemented in the pileup
@@ -2040,7 +2112,7 @@ Other notable changes:
 
 
 Beta Release 0.1.9 (27 October, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------------------
 
 This release is featured as the first major improvement to the samtools'
 SNP caller.  It comes with a revised MAQ error model, the support of
@@ -2089,36 +2161,36 @@ In addition to the three major improvements, other notable changes are:
 
 
 Beta Release 0.1.8 (11 July, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
 Notable functional changes:
 
- * Added the `reheader' command which replaces a BAM header with a new
+ * Added the `reheader` command which replaces a BAM header with a new
    header. This command is much faster than replacing header by
    BAM->SAM->BAM conversions.
 
- * Added the `mpileup' command which computes the pileup of multiple
+ * Added the `mpileup` command which computes the pileup of multiple
    alignments.
 
- * The `index' command now stores the number of mapped and unmapped
+ * The `index` command now stores the number of mapped and unmapped
    reads in the index file. This information can be retrieved quickly by
-   the new `idxstats' command.
+   the new `idxstats` command.
 
  * By default, pileup used the SOAPsnp model for SNP calling. This
    avoids the floating overflow in the MAQ model which leads to spurious
    calls in repetitive regions, although these calls will be immediately
    filtered by varFilter.
 
- * The `tview' command now correctly handles CIGARs like 7I10M and
+ * The `tview` command now correctly handles CIGARs like 7I10M and
    10M1P1I10M which cause assertion failure in earlier versions.
 
- * Tview accepts a region like `=10,000' where `=' stands for the
+ * Tview accepts a region like `=10,000` where `=` stands for the
    current sequence name. This saves typing for long sequence names.
 
- * Added the `-d' option to `pileup' which avoids slow indel calling
+ * Added the `-d` option to `pileup` which avoids slow indel calling
    in ultradeep regions by subsampling reads locally.
 
- * Added the `-R' option to `view' which retrieves alignments in read
+ * Added the `-R` option to `view` which retrieves alignments in read
    groups listed in the specified file.
 
 Performance improvements:
@@ -2141,9 +2213,9 @@ Bug fixes:
  * Fixed another issue in the indel caller which may lead to incorrect
    genotype.
 
- * Fixed a bug in `sort' when option `-o' is applied.
+ * Fixed a bug in `sort` when option `-o` is applied.
 
- * Fixed a bug in `view -r'.
+ * Fixed a bug in `view -r`.
 
 APIs and other changes:
 
@@ -2168,7 +2240,7 @@ Changes in other utilities:
 
 
 Beta Release 0.1.7 (10 November, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------------
 
 Notable changes:
 
@@ -2208,7 +2280,7 @@ Changes in other utilities:
 
 
 Beta Release 0.1.6 (2 September, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------------
 
 Notable changes:
 
@@ -2228,17 +2300,17 @@ Notable changes:
  * The view command now recognizes and optionally prints FLAG in HEXs or
    strings to make a SAM file more friendly to human eyes. This is a
    samtools-C extension, not implemented in Picard for the time
-   being. Please type `samtools view -?' for more information.
+   being. Please type `samtools view -?` for more information.
 
  * BAM files now have an end-of-file (EOF) marker to facilitate
    truncation detection. A warning will be given if an on-disk BAM file
    does not have this marker. The warning will be seen on BAM files
    generated by an older version of samtools. It does NO harm.
 
- * New key bindings in tview: `r' to show read names and `s' to show
+ * New key bindings in tview: 'r' to show read names and 's' to show
    reference skip (N operation) as deletions.
 
- * Fixed a bug in `samtools merge -n'.
+ * Fixed a bug in `samtools merge -n`.
 
  * Samtools merge now optionally copies the header of a user specified
    SAM file to the resultant BAM output.
@@ -2259,7 +2331,7 @@ Changes in other utilities:
 
 
 Beta Release 0.1.5 (7 July, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------
 
 Notable changes:
 
@@ -2295,7 +2367,7 @@ Notable changes:
    option is preferred when the output is piped to other commands.
 
  * In view, added '-l' and '-r' to get the alignments for one library or
-   read group. The "@RG" header lines are now partially parsed.
+   read group. The `@RG` header lines are now partially parsed.
 
  * Do not include command line utilities to libbam.a.
 
@@ -2320,7 +2392,7 @@ bugs.
 
 
 Beta Release 0.1.4 (21 May, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------
 
 Notable changes:
 
@@ -2334,7 +2406,7 @@ Notable changes:
 
  * Unified the interface to BAM and SAM I/O. This is done by
    implementing a wrapper on top of the old APIs and therefore old APIs
-   are still valid. The new I/O APIs also recognize the @SQ header
+   are still valid. The new I/O APIs also recognize the `@SQ` header
    lines.
 
  * Generate the MD tag.
@@ -2346,7 +2418,7 @@ Notable changes:
 
  * Implemented the GNU building system. However, currently the building
    system does not generate libbam.a. We will improve this later. For
-   the time being, `make -f Makefile.generic' is preferred.
+   the time being, `make -f Makefile.generic` is preferred.
 
  * Fixed a minor bug in pileup: the first read in a chromosome may be
    skipped.
@@ -2361,7 +2433,7 @@ Notable changes:
 
 
 Beta Release 0.1.3 (15 April, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------------
 
 Notable changes in SAMtools:
 
@@ -2386,7 +2458,7 @@ Notable changes in SAMtools:
  * Fixed a minor bug in indexing: the bin number of an unmapped read is
    wrongly calculated.
 
- * Added `flagstat' command to show statistics on the FLAG field.
+ * Added `flagstat` command to show statistics on the FLAG field.
 
  * Improved indel caller by setting the maximum window size in local
    realignment.
@@ -2408,7 +2480,7 @@ Changes in other utilities:
 
 
 Beta Release 0.1.2 (28 January, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------------------
 
 Notable changes in SAMtools:
 
@@ -2477,8 +2549,8 @@ Changes in other utilities:
 
 
 Beta Release 0.1.1 (22 December, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------------
 
 The is the first public release of samtools. For more information,
-please check the manual page `samtools.1' and the samtools website
+please check the manual page `samtools.1` and the samtools website
 http://samtools.sourceforge.net
diff --git a/README b/README
index 8f4f2369a..df040d9d3 100644
--- a/README
+++ b/README
@@ -9,7 +9,7 @@ Building samtools
 The typical simple case of building Samtools using the HTSlib bundled within
 this Samtools release tarball is done as follows:
 
-    cd .../samtools-1.18 # Within the unpacked release directory
+    cd .../samtools-1.19 # Within the unpacked release directory
     ./configure
     make
 
@@ -21,7 +21,7 @@ install samtools etc properly into a directory of your choosing.  Building for
 installation using the HTSlib bundled within this Samtools release tarball,
 and building the various HTSlib utilities such as bgzip is done as follows:
 
-    cd .../samtools-1.18 # Within the unpacked release directory
+    cd .../samtools-1.19 # Within the unpacked release directory
     ./configure --prefix=/path/to/location
     make all all-htslib
     make install install-htslib
@@ -48,7 +48,7 @@ There are two advantages to this:
 To build with plug-ins, you need to use the --enable-plugins configure option
 as follows:
 
-    cd .../samtools-1.18 # Within the unpacked release directory
+    cd .../samtools-1.19 # Within the unpacked release directory
     ./configure --enable-plugins --prefix=/path/to/location
     make all all-htslib
     make install install-htslib
@@ -66,8 +66,8 @@ Setting --with-plugin-path is useful if you want to run directly from
 the source distribution instead of installing the package.  In that case
 you can use:
 
-    cd .../samtools-1.18 # Within the unpacked release directory
-    ./configure --enable-plugins --with-plugin-path=$PWD/htslib-1.18
+    cd .../samtools-1.19 # Within the unpacked release directory
+    ./configure --enable-plugins --with-plugin-path=$PWD/htslib-1.19
     make all all-htslib
 
 It is possible to override the built-in search path using the HTS_PATH
diff --git a/bam_cat.c b/bam_cat.c
index ed8cf58c5..4bc845dca 100644
--- a/bam_cat.c
+++ b/bam_cat.c
@@ -1,6 +1,6 @@
 /*  bam_cat.c -- efficiently concatenates bam files.
 
-    Copyright (C) 2008-2009, 2011-2013, 2015-2017, 2019, 2021 Genome Research Ltd.
+    Copyright (C) 2008-2009, 2011-2013, 2015-2017, 2019, 2021, 2023 Genome Research Ltd.
     Modified SAMtools work copyright (C) 2010 Illumina, Inc.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -49,52 +49,76 @@ Illumina.
 #include "samtools.h"
 #include "sam_opts.h"
 
-/*
- * Check the files are consistent and capable of being concatenated.
- * Also fills out the version numbers and produces a new sam_hdr_t
- * structure with merged RG lines.
- * Note it is only a simple merge.
- *
- * Returns updated header on success;
- *        NULL on failure.
- */
-static sam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const sam_hdr_t *h,
-                                     int *vers_maj_p, int *vers_min_p) {
+/// cat_check_merge_hdr - check compatibility and merge RG hearders merges RGon both CRAM and BAM.
+/** @param firstfile - pointer to the 1sr file opened in caller
+ *  @param nfn - number of files to be processed, including the firstfile
+ *  @param fn - array of file paths to be processed
+ *  @param h - sam header pointer which contains explicitly given header
+ *  @param vers_maj_p - cram major version set and send out for output creation
+ *  @param vers_min_p - cram min version set and send out for output creation
+ *  @param out_h - pointer to sam header pointer, outputs the merged header
+ * returns array of opened samFile pointers on success and NULL on failure
+ * This method has the merged header processing for cram and bam.
+ * RG lines are merged for both cram and bam. For cram, version match for each
+ * file and order match of RG lines are compared as well.
+ * Note: it is a simple merge of RG lines alone.
+*/
+static samFile** cat_check_merge_hdr(samFile * const firstfile, int nfn, char * const *fn, const sam_hdr_t *h,
+                                     int *vers_maj_p, int *vers_min_p, sam_hdr_t **out_h) {
     int i, vers_maj = -1, vers_min = -1;
     sam_hdr_t *new_h = NULL, *old_h = NULL;
     samFile *in = NULL;
     kstring_t ks = KS_INITIALIZE;
-
-    if (h) {
-        new_h = sam_hdr_dup(h);
-        if (!new_h) {
-            fprintf(stderr, "[%s] ERROR: header duplication failed.\n",
-                    __func__);
-            goto fail;
+    samFile **files = calloc(nfn, sizeof(samFile *));
+    if(!files) {
+        fprintf(stderr, "[%s] ERROR: failed to allocate space for file handles.\n", __func__);
+        return NULL;
+    }
+    if (!out_h || !firstfile) {
+        fprintf(stderr, "[%s] ERROR: header check failed.\n", __func__);
+        goto fail;
+    }
+    if (*out_h) {           //use header if one is already present
+        new_h = *out_h;
+    }
+    else {
+        if (h) {            //use the explicit header given
+            new_h = sam_hdr_dup(h);
+            if (!new_h) {
+                fprintf(stderr, "[%s] ERROR: header duplication failed.\n",
+                        __func__);
+                goto fail;
+            }
         }
     }
 
     for (i = 0; i < nfn; ++i) {
-        cram_fd *in_c;
         int ki;
-
-        in = sam_open(fn[i], "rc");
+        //1st file is already open and passed, rest open locally
+        files[i] = in = i ? sam_open(fn[i], "r") : firstfile;
         if (in == 0) {
             print_error_errno("cat", "fail to open file '%s'", fn[i]);
             goto fail;
         }
-        in_c = in->fp.cram;
-
-        int vmaj = cram_major_vers(in_c);
-        int vmin = cram_minor_vers(in_c);
-        if ((vers_maj != -1 && vers_maj != vmaj) ||
-            (vers_min != -1 && vers_min != vmin)) {
-            fprintf(stderr, "[%s] ERROR: input files have differing version numbers.\n",
-                    __func__);
+        if (firstfile->format.format != in->format.format) {
+            print_error("cat", "File %s is of different format!", fn[i]);
             goto fail;
         }
-        vers_maj = vmaj;
-        vers_min = vmin;
+        if (firstfile->format.format == cram) {     //version check for cram
+            cram_fd *in_c;
+            in_c = in->fp.cram;
+
+            int vmaj = cram_major_vers(in_c);
+            int vmin = cram_minor_vers(in_c);
+            if ((vers_maj != -1 && vers_maj != vmaj) ||
+                (vers_min != -1 && vers_min != vmin)) {
+                fprintf(stderr, "[%s] ERROR: input files have differing version numbers.\n",
+                        __func__);
+                goto fail;
+            }
+            vers_maj = vmaj;
+            vers_min = vmin;
+        }
 
         old_h = sam_hdr_read(in);
         if (!old_h) {
@@ -111,10 +135,10 @@ static sam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const sam_hdr_t
                 goto fail;
             }
             sam_hdr_destroy(old_h);
-            sam_close(in);
+            old_h = NULL;
             continue;
         }
-
+        //merge RG lines
         int old_count = sam_hdr_count_lines(old_h, "RG");
         for (ki = 0; ki < old_count; ki++) {
             const char *old_name = sam_hdr_line_name(old_h, "RG", ki);
@@ -136,7 +160,8 @@ static sam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const sam_hdr_t
             }
         }
 
-        if (old_count > 1 && sam_hdr_count_lines(new_h, "RG") == old_count) {
+        if (firstfile->format.format == cram && old_count > 1 && sam_hdr_count_lines(new_h, "RG") == old_count) {
+            //RG order check for cram
             for (ki = 0; ki < old_count; ki++) {
                 const char *old_name = sam_hdr_line_name(old_h, "RG", ki);
                 const char *new_name = sam_hdr_line_name(new_h, "RG", ki);
@@ -148,72 +173,61 @@ static sam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const sam_hdr_t
             }
         }
 
-        sam_hdr_destroy(old_h);
-        sam_close(in);
+        sam_hdr_destroy(old_h); old_h = NULL;
     }
 
     ks_free(&ks);
 
-    *vers_maj_p = vers_maj;
-    *vers_min_p = vers_min;
-
-    return new_h;
+    if (vers_maj_p) {
+        *vers_maj_p = vers_maj;
+    }
+    if (vers_min_p) {
+        *vers_min_p = vers_min;
+    }
+    *out_h = new_h;
+    return files;
 
 fail:
     ks_free(&ks);
     if (old_h) sam_hdr_destroy(old_h);
     if (new_h) sam_hdr_destroy(new_h);
-    if (in) sam_close(in);
+    *out_h = NULL;
+    for (i = 1; i < nfn; ++i) {         //close files other than the firstfile
+        if (files[i]) {
+            sam_close(files[i]);
+        }
+    }
+    free(files);
 
     return NULL;
 }
 
-
-/*
- * CRAM files don't store the RG:Z:ID per read in the aux field.
- * Instead they have a numerical data series (RG) to point each read
- * back to the Nth @RG line in the file.  This means that we may need
- * to edit the RG data series (if the files were produced from
- * "samtools split" for example).
- *
- * The encoding method is stored in the compression header. Typical
- * examples:
- *
- * RG => EXTERNAL {18}           # Block content-id 18 holds RG values
- *                               # as a series of ITF8 encoded values
- *
- * RG => HUFFMAN {1, 255, 255, 255, 255, 255, 1, 0}
- *                               # One RG value #-1.  (No RG)
- *
- * RG => HUFFMAN {1, 0, 1, 0}    # One RG value #0 (always first RG)
- *
- * RG => HUFFMAN {2, 0, 1, 2, 1, 1}
- *                               # Two RG values, #0 and #1, written
- *                               # to the CORE block and possibly
- *                               # mixed with other data series.
- *
- * A single value can (but may not be) implemented as a zero bit
- * huffman code.  In this situation we can change the meta-data in the
- * compression header to renumber an RG value..
- */
-int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram, sam_global_args *ga, char *arg_list, int no_pg)
+int cram_cat(samFile * const firstfile, int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram, sam_global_args *ga, char *arg_list, int no_pg)
 {
-    samFile *out;
+    samFile *out = NULL;
     cram_fd *out_c;
-    int i, vers_maj, vers_min;
+    int i, vers_maj, vers_min, ret = -1;
     sam_hdr_t *new_h = NULL;
+    samFile **files = NULL;
 
-    /* Check consistent versioning and compatible headers */
-    if (!(new_h = cram_cat_check_hdr(nfn, fn, h, &vers_maj, &vers_min)))
+    /* Check consistent versioning and compatible headers;
+    merges RG lines, opens all files and returns them that multiple non-seekable
+    stream inputs can be handled */
+    if (!(files = cat_check_merge_hdr(firstfile, nfn, fn, h, &vers_maj, &vers_min, &new_h)))
         return -1;
 
+    if (!new_h) {
+        print_error_errno("cat", "failed to make output header");
+        goto closefiles;
+    }
+
     /* Open the file with cram_vers */
     char vers[100];
-    sprintf(vers, "%d.%d", vers_maj, vers_min);
+    snprintf(vers, sizeof(vers), "%d.%d", vers_maj, vers_min);
     out = sam_open_format(outcram, "wc", &ga->out);
     if (out == 0) {
         print_error_errno("cat", "fail to open output file '%s'", outcram);
-        return -1;
+        goto closefiles;
     }
     out_c = out->fp.cram;
     cram_set_option(out_c, CRAM_OPT_VERSION, vers);
@@ -224,12 +238,13 @@ int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram,
                                  arg_list ? "CL": NULL,
                                  arg_list ? arg_list : NULL,
                                  NULL))
-        return -1;
+        goto closefiles;
 
     if (sam_hdr_write(out, new_h) < 0) {
         print_error_errno("cat", "Couldn't write header");
-        return -1;
+        goto closefiles;
     }
+    out_c = out->fp.cram;
 
     for (i = 0; i < nfn; ++i) {
         samFile *in;
@@ -238,17 +253,17 @@ int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram,
         sam_hdr_t *old_h;
         int new_rg = -1;
 
-        in = sam_open(fn[i], "rc");
+        in = files[i];
         if (in == 0) {
             print_error_errno("cat", "fail to open file '%s'", fn[i]);
-            return -1;
+            goto closefiles;
         }
         in_c = in->fp.cram;
 
         old_h = sam_hdr_read(in);
         if (!old_h) {
             print_error("cat", "fail to read the header of file '%s'", fn[i]);
-            return -1;
+            goto closefiles;
         }
 
         // Compute RG mapping if suitable for changing.
@@ -258,11 +273,11 @@ int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram,
                 new_rg = sam_hdr_line_index(new_h, "RG", old_name);
                 if (new_rg < 0) {
                     print_error("cat", "fail to find @RG line '%s' in the new header", old_name);
-                    return -1;
+                    goto closefiles;
                 }
             } else {
                 print_error("cat", "fail to find @RG line in file '%s'", fn[i]);
-                return -1;
+                goto closefiles;
             }
         } else {
             new_rg = 0;
@@ -274,7 +289,7 @@ int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram,
                 cram_block *blk;
                 // Container compression header
                 if (!(blk = cram_read_block(in_c)))
-                    return -1;
+                    goto closefiles;
                 cram_free_block(blk);
                 cram_free_container(c);
                 continue;
@@ -292,34 +307,42 @@ int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram,
 
                 // Not switching rg so do the usual read/write loop
                 if (cram_write_container(out_c, c) != 0)
-                    return -1;
+                    goto closefiles;
 
                 // Container compression header
                 if (!(blk = cram_read_block(in_c)))
-                    return -1;
+                    goto closefiles;
                 if (cram_write_block(out_c, blk) != 0) {
                     cram_free_block(blk);
-                    return -1;
+                    goto closefiles;
                 }
                 cram_free_block(blk);
 
-
                 // Container num_blocks can be invalid, due to a bug.
                 // Instead we iterate in slice context instead.
                 (void)cram_container_get_landmarks(c, &num_slices);
                 cram_copy_slice(in_c, out_c, num_slices);
             }
-
             cram_free_container(c);
         }
-
         sam_hdr_destroy(old_h);
-        sam_close(in);
     }
-    sam_close(out);
-    sam_hdr_destroy(new_h);
+    ret = 0;
 
-    return 0;
+closefiles:
+    if (out) {
+        sam_close(out);
+    }
+    if (new_h) {
+        sam_hdr_destroy(new_h);
+    }
+    for (i = 1; i < nfn; ++i) {     //skip firstfile and close rest
+        if (files[i]) {
+            sam_close(files[i]);
+        }
+    }
+    free(files);
+    return ret;
 }
 
 
@@ -330,31 +353,40 @@ int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram,
 
 #define BGZF_EMPTY_BLOCK_SIZE 28
 
-int bam_cat(int nfn, char * const *fn, sam_hdr_t *h, const char* outbam, char *arg_list, int no_pg)
+int bam_cat(samFile * const firstfile, int nfn, char * const *fn, sam_hdr_t *h, const char* outbam, char *arg_list, int no_pg)
 {
-    BGZF *fp, *in = NULL;
+    BGZF *fp = NULL, *in = NULL;
     uint8_t *buf = NULL;
     uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
     const int es=BGZF_EMPTY_BLOCK_SIZE;
     int i;
+    samFile **files = NULL;
+    sam_hdr_t *new_h = NULL;
 
+    /* merges RG lines, opens all files and returns them that multiple non-seekable
+    stream inputs can be handled */
+    if (!(files = cat_check_merge_hdr(firstfile, nfn, fn, h, NULL, NULL, &new_h)))
+        return -1;
+    if (!new_h) {
+        print_error_errno("cat", "failed to make output header");
+        goto fail;
+    }
     fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
     if (fp == 0) {
         print_error_errno("cat", "fail to open output file '%s'", outbam);
-        return -1;
+        goto fail;
     }
-    if (h) {
-        if (!no_pg && sam_hdr_add_pg(h, "samtools",
-                                     "VN", samtools_version(),
-                                     arg_list ? "CL": NULL,
-                                     arg_list ? arg_list : NULL,
-                                     NULL))
-            goto fail;
 
-        if (bam_hdr_write(fp, h) < 0) {
-            print_error_errno("cat", "Couldn't write header");
-            goto fail;
-        }
+    if (!no_pg && sam_hdr_add_pg(new_h, "samtools",
+                                    "VN", samtools_version(),
+                                    arg_list ? "CL": NULL,
+                                    arg_list ? arg_list : NULL,
+                                    NULL))
+        goto fail;
+
+    if (bam_hdr_write(fp, new_h) < 0) {
+        print_error_errno("cat", "Couldn't write header");
+        goto fail;
     }
 
     buf = (uint8_t*) malloc(BUF_SIZE);
@@ -363,35 +395,13 @@ int bam_cat(int nfn, char * const *fn, sam_hdr_t *h, const char* outbam, char *a
         goto fail;
     }
     for(i = 0; i < nfn; ++i){
-        sam_hdr_t *old;
         int len,j;
-
-        in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r");
+        in = files[i]->fp.bgzf;
         if (in == 0) {
             print_error_errno("cat", "fail to open file '%s'", fn[i]);
             goto fail;
         }
-        if (in->is_write) return -1;
-
-        old = bam_hdr_read(in);
-        if (old == NULL) {
-            fprintf(stderr, "[%s] ERROR: couldn't read header for '%s'.\n",
-                    __func__, fn[i]);
-            goto fail;
-        }
-        if (h == 0 && i == 0) {
-            if (!no_pg && sam_hdr_add_pg(old, "samtools",
-                                         "VN", samtools_version(),
-                                         arg_list ? "CL": NULL,
-                                         arg_list ? arg_list : NULL,
-                                         NULL))
-                goto fail;
-
-            if (bam_hdr_write(fp, old) < 0) {
-                print_error_errno("cat", "Couldn't write header");
-                goto fail;
-            }
-        }
+        if (in->is_write) goto fail;
 
         if (in->block_offset < in->block_length) {
             if (bgzf_write(fp, (char *)in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
@@ -432,27 +442,42 @@ int bam_cat(int nfn, char * const *fn, sam_hdr_t *h, const char* outbam, char *a
                 if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
             }
         }
-        sam_hdr_destroy(old);
-        bgzf_close(in);
         in = NULL;
     }
     free(buf);
     if (bgzf_close(fp) < 0) {
         fprintf(stderr, "[%s] Error on closing '%s'.\n", __func__, outbam);
-        return -1;
+        goto fail;
+    }
+    for (i = 1; i < nfn; ++i) {     //skip firstfile and close rest
+        if (files[i]) {
+            sam_close(files[i]);
+        }
     }
+    free(files);
+    sam_hdr_destroy(new_h);
     return 0;
 
  write_fail:
     fprintf(stderr, "[%s] Error writing to '%s'.\n", __func__, outbam);
  fail:
-    if (in) bgzf_close(in);
+    if (new_h) {
+        sam_hdr_destroy(new_h);
+    }
     if (fp) bgzf_close(fp);
     free(buf);
+
+    if (files) {
+        for(i = 1; i < nfn; ++i) {  //except the firstfile
+            if(files[i]) {
+                sam_close(files[i]);
+            }
+        }
+        free(files);
+    }
     return -1;
 }
 
-
 int main_cat(int argc, char *argv[])
 {
     sam_hdr_t *h = 0;
@@ -472,21 +497,22 @@ int main_cat(int argc, char *argv[])
     char *arg_list = NULL;
 
     sam_global_args_init(&ga);
-
     while ((c = getopt_long(argc, argv, "h:o:b:@:", lopts, NULL)) >= 0) {
         switch (c) {
             case 'h': {
                 samFile *fph = sam_open(optarg, "r");
                 if (fph == 0) {
                     fprintf(stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, optarg);
-                    return 1;
+                    ret = 1;
+                    goto end;
                 }
                 h = sam_hdr_read(fph);
                 if (h == NULL) {
                     fprintf(stderr,
                             "[%s] ERROR: failed to read the header from '%s'.\n",
                             __func__, optarg);
-                    return 1;
+                    ret = 1;
+                    goto end;
                 }
                 sam_close(fph);
                 break;
@@ -521,7 +547,8 @@ int main_cat(int argc, char *argv[])
 
     if (!no_pg && !(arg_list = stringify_argv(argc+1, argv-1))) {
         print_error("cat", "failed to create arg_list");
-        return 1;
+        ret = 1;
+        goto end;
     }
 
     // Append files specified in argv to the list.
@@ -541,34 +568,34 @@ int main_cat(int argc, char *argv[])
         fprintf(stderr, "         -h FILE  copy the header from FILE [default is 1st input file]\n");
         fprintf(stderr, "         -o FILE  output BAM/CRAM\n");
         fprintf(stderr, "         --no-PG  do not add a PG line\n");
-        sam_global_opt_help(stderr, "--..-@-.");
-        return 1;
+        sam_global_opt_help(stderr, "---.-@-.");
+        ret = 1;
+        goto end;
     }
 
     in = sam_open(infns[0], "r");
     if (!in) {
         print_error_errno("cat", "failed to open file '%s'", infns[0]);
-        return 1;
+        ret = 1;
+        goto end;
     }
 
     switch (hts_get_format(in)->format) {
     case bam:
-        sam_close(in);
-        if (bam_cat(infns_size+nargv_fns, infns, h, outfn? outfn : "-", arg_list, no_pg) < 0)
+        if (bam_cat(in, infns_size+nargv_fns, infns, h, outfn? outfn : "-", arg_list, no_pg) < 0)
             ret = 1;
         break;
 
     case cram:
-        sam_close(in);
-        if (cram_cat(infns_size+nargv_fns, infns, h, outfn? outfn : "-", &ga, arg_list, no_pg) < 0)
+        if (cram_cat(in, infns_size+nargv_fns, infns, h, outfn? outfn : "-", &ga, arg_list, no_pg) < 0)
             ret = 1;
         break;
 
     default:
-        sam_close(in);
         fprintf(stderr, "[%s] ERROR: input is not BAM or CRAM\n", __func__);
-        return 1;
+        ret = 1;
     }
+    sam_close(in);
 
  end:
     if (infns_size > 0) {
@@ -582,6 +609,7 @@ int main_cat(int argc, char *argv[])
     free(arg_list);
     if (h)
         sam_hdr_destroy(h);
+    sam_global_args_free(&ga);
 
     return ret;
 }
diff --git a/bam_markdup.c b/bam_markdup.c
index 677a47f82..17fedd58d 100644
--- a/bam_markdup.c
+++ b/bam_markdup.c
@@ -122,6 +122,7 @@ typedef struct {
     int opt;
     int beg;
     int end;
+    int len;
 } check_t;
 
 typedef struct {
@@ -875,7 +876,10 @@ static int is_optical_duplicate(md_param_t *param, bam1_t *ori, bam1_t *dup, lon
         return ret;
     }
 
-    if (strncmp(original + o_beg, duplicate + d_beg, o_end - o_beg) == 0) {
+    int o_len = o_end - o_beg;
+    int d_len = d_end - d_beg;
+
+    if ((o_len == d_len) && memcmp(original + o_beg, duplicate + d_beg, o_len) == 0) {
         long xdiff, ydiff;
 
         if (ox > dx) {
@@ -918,7 +922,10 @@ static int optical_duplicate_partial(md_param_t *param, const char *name, const
         return ret;
     }
 
-    if (strncmp(name + o_beg, duplicate + d_beg, o_end - o_beg) == 0) {
+    int o_len = o_end - o_beg;
+    int d_len = d_end - d_beg;
+
+    if ((o_len == d_len) && memcmp(name + o_beg, duplicate + d_beg, o_len) == 0) {
         // the initial parts match, look at the numbers
         long xdiff, ydiff;
 
@@ -945,6 +952,7 @@ static int optical_duplicate_partial(md_param_t *param, const char *name, const
     c->y = dy;
     c->beg = d_beg;
     c->end = d_end;
+    c->len = d_end - d_beg;
 
     return ret;
 }
@@ -1156,9 +1164,15 @@ static int check_chain_against_original(md_param_t *param, khash_t(duplicates) *
 }
 
 
-static int xcoord_sort(const void *a, const void *b) {
+static int chain_sort(const void *a, const void *b) {
     check_t *ac = (check_t *) a;
     check_t *bc = (check_t *) b;
+    int ret;
+
+    if ((ret = ac->len - bc->len))
+        return ret;
+    else if ((ret = memcmp(bam_get_qname(ac->b) + ac->beg, bam_get_qname(bc->b) + bc->beg, ac->len)))
+        return ret;
 
     return (ac->x - bc->x);
 }
@@ -1170,106 +1184,113 @@ static int check_duplicate_chain(md_param_t *param, khash_t(duplicates) *dup_has
     int ret = 0;
     size_t curr = 0;
 
-    qsort(list->c, list->length, sizeof(list->c[0]), xcoord_sort);
+    qsort(list->c, list->length, sizeof(list->c[0]), chain_sort);
 
     while (curr < list->length - 1) {
-        check_t *current = &list->c[curr];
-        size_t count = curr;
-        char *cur_name = bam_get_qname(current->b);
-        int current_paired = (current->b->core.flag & BAM_FPAIRED) && !(current->b->core.flag & BAM_FMUNMAP);
+        check_t *base = &list->c[curr];
+        char *base_name = bam_get_qname(base->b);
+        int end_name_match = curr;
 
-        while (++count < list->length && (list->c[count].x - current->x <= param->opt_dist)) {
-            // while close enough along the x coordinate
-            check_t *chk = &list->c[count];
+        // find the end of the matching name parts
+        while (++end_name_match < list->length) {
+            check_t *chk = &list->c[end_name_match];
 
-            if (current->opt && chk->opt)
-                continue;
+            if ((base->len == chk->len) && memcmp(base_name + base->beg, bam_get_qname(chk->b) + chk->beg, base->len) != 0)
+                break;
+        }
 
-            // if both are already optical duplicates there is no need to check again, otherwise...
+        while (curr < end_name_match) {
+            size_t count = curr;
+            check_t *current = &list->c[curr];
+            int current_paired = (current->b->core.flag & BAM_FPAIRED) && !(current->b->core.flag & BAM_FMUNMAP);
 
-            long ydiff;
+            while (++count < end_name_match && (list->c[count].x - current->x <= param->opt_dist)) {
+                // while close enough along the x coordinate
+                check_t *chk = &list->c[count];
 
-            if (current->y > chk->y) {
-                ydiff = current->y - chk->y;
-            } else {
-                ydiff = chk->y - current->y;
-            }
+                if (current->opt && chk->opt)
+                    continue;
 
-            if (ydiff > param->opt_dist)
-                continue;
+                long ydiff;
 
-            // the number are right, check the names
-            if (strncmp(cur_name + current->beg, bam_get_qname(chk->b) + chk->beg, current->end - current->beg) != 0)
-                continue;
+                if (current->y > chk->y) {
+                    ydiff = current->y - chk->y;
+                } else {
+                    ydiff = chk->y - current->y;
+                }
 
-            // optical duplicates
-            int chk_dup = 0;
-            int chk_paired = (chk->b->core.flag & BAM_FPAIRED) && !(chk->b->core.flag & BAM_FMUNMAP);
+                if (ydiff > param->opt_dist)
+                    continue;
 
-            if (current_paired != chk_paired) {
-                if (!chk_paired) {
-                    // chk is single vs pair, this is a dup.
-                    chk_dup = 1;
-                }
-            } else {
-                // do it by scores
-                int64_t cur_score, chk_score;
+                // optical duplicates
+                int chk_dup = 0;
+                int chk_paired = (chk->b->core.flag & BAM_FPAIRED) && !(chk->b->core.flag & BAM_FMUNMAP);
 
-                if ((current->b->core.flag & BAM_FQCFAIL) != (chk->b->core.flag & BAM_FQCFAIL)) {
-                    if (current->b->core.flag & BAM_FQCFAIL) {
-                        cur_score = 0;
-                        chk_score = 1;
-                    } else {
-                        cur_score = 1;
-                        chk_score = 0;
+                if (current_paired != chk_paired) {
+                    if (!chk_paired) {
+                        // chk is single vs pair, this is a dup.
+                        chk_dup = 1;
                     }
                 } else {
-                    cur_score = current->score;
-                    chk_score = chk->score;
+                    // do it by scores
+                    int64_t cur_score, chk_score;
 
-                    if (current_paired) {
-                        // they are pairs so add mate scores.
-                        chk_score += chk->mate_score;
-                        cur_score += current->mate_score;
+                    if ((current->b->core.flag & BAM_FQCFAIL) != (chk->b->core.flag & BAM_FQCFAIL)) {
+                        if (current->b->core.flag & BAM_FQCFAIL) {
+                            cur_score = 0;
+                            chk_score = 1;
+                        } else {
+                            cur_score = 1;
+                            chk_score = 0;
+                        }
+                    } else {
+                        cur_score = current->score;
+                        chk_score = chk->score;
+
+                        if (current_paired) {
+                            // they are pairs so add mate scores.
+                            chk_score += chk->mate_score;
+                            cur_score += current->mate_score;
+                        }
                     }
-                }
 
-                if (cur_score == chk_score) {
-                    if (strcmp(bam_get_qname(chk->b), cur_name) < 0) {
-                        chk_score++;
-                    } else {
-                        chk_score--;
+                    if (cur_score == chk_score) {
+                        if (strcmp(bam_get_qname(chk->b), bam_get_qname(current->b)) < 0) {
+                            chk_score++;
+                        } else {
+                            chk_score--;
+                        }
                     }
-                }
 
-                if (cur_score > chk_score) {
-                    chk_dup = 1;
+                    if (cur_score > chk_score) {
+                        chk_dup = 1;
+                    }
                 }
-            }
 
-            if (chk_dup) {
-                // the duplicate is the optical duplicate
-                if (!chk->opt) { // only change if not already an optical duplicate
-                    if (optical_retag(param, dup_hash, chk->b, chk_paired, stats)) {
-                        ret = -1;
-                        goto fail;
-                    }
+                if (chk_dup) {
+                    // the duplicate is the optical duplicate
+                    if (!chk->opt) { // only change if not already an optical duplicate
+                        if (optical_retag(param, dup_hash, chk->b, chk_paired, stats)) {
+                            ret = -1;
+                            goto fail;
+                        }
 
-                    chk->opt = 1;
-                }
-            } else {
-                if (!current->opt) {
-                    if (optical_retag(param, dup_hash, current->b, current_paired, stats)) {
-                        ret = -1;
-                        goto fail;
+                        chk->opt = 1;
                     }
+                } else {
+                    if (!current->opt) {
+                        if (optical_retag(param, dup_hash, current->b, current_paired, stats)) {
+                            ret = -1;
+                            goto fail;
+                        }
 
-                    current->opt = 1;
+                        current->opt = 1;
+                    }
                 }
             }
-        }
 
-        curr++;
+            curr++;
+        }
     }
 
  fail:
diff --git a/bam_plcmd.c b/bam_plcmd.c
index 264a7f5a0..c61d4b681 100644
--- a/bam_plcmd.c
+++ b/bam_plcmd.c
@@ -570,9 +570,11 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
     int ret;
     int last_tid = -1;
     hts_pos_t last_pos = -1;
+    int one_seq = 0;
 
     // begin pileup
     while ( (ret=bam_mplp64_auto(iter, &tid, &pos, n_plp, plp)) > 0) {
+        one_seq = 1; // at least 1 output
         if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
         mplp_get_ref(data[0], tid, &ref, &ref_len);
         //printf("tid=%d len=%d ref=%p/%s\n", tid, ref_len, ref, ref);
@@ -811,8 +813,11 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
             last_tid = tid0;
             last_pos = beg0-1;
             mplp_get_ref(data[0], tid0, &ref, &ref_len);
+        } else if (last_tid < 0 && !one_seq && conf->all > 1) {
+            last_tid = 0; // --aa on a blank file
         }
-       while (last_tid >= 0 && last_tid < sam_hdr_nref(h)) {
+        while (last_tid >= 0 && last_tid < sam_hdr_nref(h)) {
+            mplp_get_ref(data[0], last_tid, &ref, &ref_len);
             while (++last_pos < sam_hdr_tid2len(h, last_tid)) {
                 if (last_pos >= end0) break;
                 if (conf->bed && bed_overlap(conf->bed, sam_hdr_tid2name(h, last_tid), last_pos, last_pos + 1) == 0)
diff --git a/bam_sort.c b/bam_sort.c
index b44bd665d..d5533f5cf 100644
--- a/bam_sort.c
+++ b/bam_sort.c
@@ -164,11 +164,15 @@ static template_coordinate_key_t* template_coordinate_key(bam1_t *b, template_co
 
 typedef enum {Coordinate, QueryName, TagCoordinate, TagQueryName, MinHash, TemplateCoordinate} SamOrder;
 static SamOrder g_sam_order = Coordinate;
+static int natural_sort = 1; // not ASCII, but alphanumeric: a12b > a7b
 static char g_sort_tag[2] = {0,0};
 
 #define is_digit(c) ((c)<='9' && (c)>='0')
 static int strnum_cmp(const char *_a, const char *_b)
 {
+    if (!natural_sort)
+        return strcmp(_a,_b);
+
     const unsigned char *a = (const unsigned char*)_a, *b = (const unsigned char*)_b;
     const unsigned char *pa = a, *pb = b;
     while (*pa && *pb) {
@@ -1535,7 +1539,8 @@ static void merge_usage(FILE *to)
 "   or: samtools merge [options] <out.bam> <in1.bam> ... <inN.bam>\n"
 "\n"
 "Options:\n"
-"  -n         Input files are sorted by read name\n"
+"  -n         Input files are sorted by read name (natural)\n"
+"  -N         Input files are sorted by read name (ASCII)\n"
 "  -t TAG     Input files are sorted by TAG value\n"
 "  -r         Attach RG tag (inferred from file names)\n"
 "  -u         Uncompressed BAM output\n"
@@ -1581,11 +1586,12 @@ int bam_merge(int argc, char *argv[])
         return 0;
     }
 
-    while ((c = getopt_long(argc, argv, "h:nru1R:o:f@:l:cps:b:O:t:XL:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "h:nNru1R:o:f@:l:cps:b:O:t:XL:", lopts, NULL)) >= 0) {
         switch (c) {
         case 'r': flag |= MERGE_RG; break;
         case 'f': flag |= MERGE_FORCE; break;
         case 'h': fn_headers = optarg; break;
+        case 'N': natural_sort = 0; // fall through
         case 'n': sam_order = QueryName; break;
         case 'o': fnout = optarg; break;
         case 't': sort_tag = optarg; break;
@@ -3287,6 +3293,9 @@ int bam_sort_core_ext(SamOrder sam_order, char* sort_tag, int minimiser_kmer,
             break;
         case QueryName:
             new_so = "queryname";
+            new_ss = natural_sort
+                ? "queryname:natural"
+                : "queryname:lexicographical";
             break;
         case MinHash:
             new_so = "coordinate";
@@ -3605,7 +3614,8 @@ static void sort_usage(FILE *fp)
 "  -I FILE    Order minimisers by their position in FILE FASTA\n"
 "  -w INT     Window size for minimiser indexing via -I ref.fa [100]\n"
 "  -H         Squash homopolymers when computing minimiser\n"
-"  -n         Sort by read name (not compatible with samtools index command)\n"
+"  -n         Sort by read name (natural): cannot be used with samtools index\n"
+"  -N         Sort by read name (ASCII): cannot be used with samtools index\n"
 "  -t TAG     Sort by value of TAG. Uses position as secondary index (or read name if -n is set)\n"
 "  -o FILE    Write final output to FILE rather than standard output\n"
 "  -T PREFIX  Write temporary files to PREFIX.nnnn.bam\n"
@@ -3658,9 +3668,10 @@ int bam_sort(int argc, char *argv[])
         { NULL, 0, NULL, 0 }
     };
 
-    while ((c = getopt_long(argc, argv, "l:m:no:O:T:@:t:MI:K:uRw:H", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "l:m:nNo:O:T:@:t:MI:K:uRw:H", lopts, NULL)) >= 0) {
         switch (c) {
         case 'o': fnout = optarg; o_seen = 1; break;
+        case 'N': natural_sort = 0; // fall through
         case 'n': sam_order = QueryName; break;
         case 't': by_tag = true; sort_tag = optarg; break;
         case 'm': {
@@ -3732,7 +3743,7 @@ int bam_sort(int argc, char *argv[])
         goto sort_end;
     }
 
-    if (ga.write_index && (sam_order == QueryName || sam_order == TagQueryName || sam_order == TagCoordinate || sam_order == TemplateCoordinate)) {
+    if (ga.write_index && sam_order != Coordinate) {
         fprintf(stderr, "[W::bam_sort] Ignoring --write-index as it only works for position sorted files.\n");
         ga.write_index = 0;
     }
diff --git a/bam_split.c b/bam_split.c
index e9f0fb591..e0f730a33 100644
--- a/bam_split.c
+++ b/bam_split.c
@@ -48,11 +48,15 @@ struct parsed_opts {
     const char *unaccounted_header_name;
     const char *unaccounted_name;
     const char *output_format_string;
+    const char *tag;
+    long max_split;
     bool verbose;
     int no_pg;
     sam_global_args ga;
 };
 
+#define DEFAULT_MAX_SPLIT 100
+
 typedef struct parsed_opts parsed_opts_t;
 
 struct state {
@@ -60,13 +64,14 @@ struct state {
     sam_hdr_t* merged_input_header;
     samFile* unaccounted_file;
     sam_hdr_t* unaccounted_header;
+    char *unaccounted_idx_fn;
     size_t output_count;
-    char** rg_id;
-    char **rg_index_file_name;
-    char **rg_output_file_name;
-    samFile** rg_output_file;
-    sam_hdr_t** rg_output_header;
-    kh_c2i_t* rg_hash;
+    char **tag_vals;
+    char **index_file_name;
+    char **output_file_name;
+    samFile **output_file;
+    sam_hdr_t **output_header;
+    kh_c2i_t* tag_val_hash;
     htsThreadPool p;
     int write_index;
 };
@@ -82,19 +87,22 @@ static void usage(FILE *write_to)
 "Usage: samtools split [-u <unaccounted.bam>] [-h <unaccounted_header.sam>]\n"
 "                      [-f <format_string>] [-v] <merged.bam>\n"
 "Options:\n"
-"  -f STRING       output filename format string [\"%%*_%%#.%%.\"]\n"
-"  -u FILE1        put reads with no RG tag or an unrecognised RG tag in FILE1\n"
-"  -h FILE2        ... and override the header with FILE2 (-u file only)\n"
-"  -v              verbose output\n"
-"  --no-PG         do not add a PG line\n");
+"  -f STRING           output filename format string [\"%%*_%%#.%%.\"]\n"
+"  -u FILE1            put left-over reads in FILE1\n"
+"  -h FILE2            ... and override the header with FILE2 (-u file only)\n"
+"  -d TAG              split by TAG value. TAG value must be a string.\n"
+"  -M,--max-split NUM  limit number of output files from -d to NUM [%d]\n"
+"  -v                  verbose output\n"
+"  --no-PG             do not add a PG line\n",
+            DEFAULT_MAX_SPLIT);
     sam_global_opt_help(write_to, "-....@..");
     fprintf(write_to,
 "\n"
 "Format string expansions:\n"
 "  %%%%     %%\n"
 "  %%*     basename\n"
-"  %%#     @RG index\n"
-"  %%!     @RG ID\n"
+"  %%#     index (of @RG in the header, or count of TAG values seen so far)\n"
+"  %%!     @RG ID or TAG value\n"
 "  %%.     filename extension for output format\n"
       );
 }
@@ -104,17 +112,19 @@ static parsed_opts_t* parse_args(int argc, char** argv)
 {
     if (argc == 1) { usage(stdout); return NULL; }
 
-    const char *optstring = "vf:h:u:@:";
+    const char *optstring = "vf:h:u:d:M:@:";
 
     static const struct option lopts[] = {
         SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0, '@'),
         {"no-PG", no_argument, NULL, 1},
+        {"max-split", required_argument, NULL, 'M'},
         { NULL, 0, NULL, 0 }
     };
 
     parsed_opts_t* retval = calloc(sizeof(parsed_opts_t), 1);
     if (! retval ) { perror("cannot allocate option parsing memory"); return NULL; }
 
+    retval->max_split = DEFAULT_MAX_SPLIT;
     sam_global_args_init(&retval->ga);
 
     int opt;
@@ -132,6 +142,22 @@ static parsed_opts_t* parse_args(int argc, char** argv)
         case 'u':
             retval->unaccounted_name = optarg;
             break;
+        case 'd':
+            retval->tag = optarg;
+            retval->output_format_string = "%*_%!.%.";
+            break;
+        case 'M': {
+            char *end = optarg;
+            retval->max_split = strtol(optarg, &end, 10);
+            if (*optarg == '\0' || *end != '\0' || retval->max_split == 0) {
+                print_error("split", "Invalid -M argument: \"%s\"", optarg);
+                free(retval);
+                return NULL;
+            }
+            if (retval->max_split < 0) // No limit requested
+                retval->max_split = LONG_MAX;
+            break;
+        }
         case 1:
             retval->no_pg = 1;
             break;
@@ -163,7 +189,7 @@ static parsed_opts_t* parse_args(int argc, char** argv)
 }
 
 // Expands a output filename format string
-static char* expand_format_string(const char* format_string, const char* basename, const char* rg_id, const int rg_idx, const htsFormat *format)
+static char* expand_format_string(const char* format_string, const char* basename, const char* tag_val, const int file_idx, const htsFormat *format)
 {
     kstring_t str = { 0, 0, NULL };
     const char* pointer = format_string;
@@ -179,10 +205,10 @@ static char* expand_format_string(const char* format_string, const char* basenam
                 if (kputs(basename, &str) < 0) goto memfail;
                 break;
             case '#':
-                if (kputl(rg_idx, &str) < 0) goto memfail;
+                if (kputl(file_idx, &str) < 0) goto memfail;
                 break;
             case '!':
-                if (kputs(rg_id, &str) < 0) goto memfail;
+                if (kputs(tag_val, &str) < 0) goto memfail;
                 break;
             case '.':
                 // Only really need to cope with sam, bam, cram
@@ -275,6 +301,154 @@ static int header_compatible(sam_hdr_t *hdr1, sam_hdr_t *hdr2)
     return 0;
 }
 
+static int grow_output_lists(state_t *state, size_t count) {
+    char **new_list = realloc(state->tag_vals, count * sizeof(char *));
+    if (!new_list)
+        return -1;
+    state->tag_vals = new_list;
+    new_list = realloc(state->index_file_name, count * sizeof(char *));
+    if (!new_list)
+        return -1;
+    state->index_file_name = new_list;
+    new_list = realloc(state->output_file_name, count * sizeof(char *));
+    if (!new_list)
+        return -1;
+    state->output_file_name = new_list;
+    samFile **new_file = realloc(state->output_file,
+                                 count * sizeof(samFile *));
+    if (!new_file)
+        return -1;
+    state->output_file = new_file;
+    sam_hdr_t **new_hdr = realloc(state->output_header,
+                                  count * sizeof(sam_hdr_t *));
+    if (!new_hdr)
+        return -1;
+    state->output_header = new_hdr;
+    return 0;
+}
+
+static khiter_t prep_sam_file(parsed_opts_t *opts, state_t *state,
+                              const char *tag, const char *arg_list,
+                              int is_rg) {
+    char *input_base_name = NULL, *new_file_name = NULL, *tag_val = NULL;
+    char *new_idx_fn = NULL;
+    sam_hdr_t *new_hdr = NULL;
+    samFile *new_sam_file = NULL;
+
+    khiter_t i = kh_get_c2i(state->tag_val_hash, tag);
+    if (i != kh_end(state->tag_val_hash)) {
+        return i;
+    }
+    // create new file
+    if (grow_output_lists(state, state->output_count + 1) != 0) {
+        print_error_errno("split", "Couldn't grow output lists");
+        return kh_end(state->tag_val_hash);
+    }
+    tag_val = strdup(tag);
+    if (!tag_val) {
+        print_error_errno("split", "Couldn't copy tag value");
+        return kh_end(state->tag_val_hash);
+    }
+    char *dirsep = strrchr(opts->merged_input_name, '/');
+    input_base_name = strdup(dirsep? dirsep+1 : opts->merged_input_name);
+    if (!input_base_name) {
+        print_error_errno("split", "Filename parsing failed");
+        goto fail;
+    }
+
+    char* extension = strrchr(input_base_name, '.');
+    if (extension) *extension = '\0';
+
+    new_file_name = expand_format_string(opts->output_format_string, input_base_name, tag, 0, &opts->ga.out);
+    if (!new_file_name) {
+        print_error_errno("split", "Filename creation failed");
+        goto fail;
+    }
+
+    new_hdr = sam_hdr_dup(state->merged_input_header);
+    if (!new_hdr) {
+        print_error_errno("split", "Duplicating header for file \"%s\" failed", new_file_name);
+        goto fail;
+    }
+    if (!opts->no_pg && sam_hdr_add_pg(new_hdr, "samtools",
+                                       "VN", samtools_version(),
+                                       arg_list ? "CL": NULL,
+                                       arg_list ? arg_list : NULL,
+                                       NULL)) {
+        print_error_errno("split", "Adding PG line to file \"%s\" failed", new_file_name);
+        goto fail;
+    }
+
+    if (is_rg) {
+        // If here, we've found an RG:Z: tag without a corresponding @RG
+        // line in the header.
+        if (sam_hdr_remove_lines(new_hdr, "RG", "ID", NULL) != 0) {
+            print_error_errno("split",
+                              "Failed to remove @RG lines from file \"%s\"",
+                              new_file_name);
+            goto fail;
+        }
+        if (sam_hdr_add_line(new_hdr, "RG", "ID", tag_val, NULL) != 0) {
+            print_error_errno("split",
+                              "Failed to add @RG line to file \"%s\"",
+                              new_file_name);
+            goto fail;
+        }
+    }
+
+    char outmode[4] = "w";
+    sam_open_mode(outmode + 1, new_file_name, NULL);
+    new_sam_file = sam_open_format(new_file_name, outmode, &opts->ga.out);
+    if (!new_sam_file) {
+        print_error_errno("split", "Opening filename for writing \"%s\" failed", new_file_name);
+        goto fail;
+    }
+    if (state->p.pool)
+        hts_set_opt(new_sam_file, HTS_OPT_THREAD_POOL, &state->p);
+
+    if (sam_hdr_write(new_sam_file, new_hdr) != 0) {
+        print_error_errno("split", "Couldn't write header to \"%s\"",
+                          new_file_name);
+        goto fail;
+    }
+
+    if (state->write_index) {
+        new_idx_fn = auto_index(new_sam_file, new_file_name, new_hdr);
+        if (!new_idx_fn) {
+            print_error_errno("split", "Creating index file for file \"%s\" failed", new_file_name);
+            goto fail;
+        }
+    }
+
+    int ret = -1;
+    i = kh_put_c2i(state->tag_val_hash, tag_val, &ret);
+    if (ret < 0) {
+        print_error_errno("split", "Adding file \"%s\" failed", new_file_name);
+        goto fail;
+    }
+
+    kh_val(state->tag_val_hash, i) = state->output_count;
+    state->tag_vals[state->output_count] = tag_val;
+    state->index_file_name[state->output_count] = new_idx_fn;
+    state->output_file_name[state->output_count] = new_file_name;
+    state->output_file[state->output_count] = new_sam_file;
+    state->output_header[state->output_count] = new_hdr;
+    state->output_count++;
+    free(input_base_name);
+    return i;
+
+ fail:
+    free(input_base_name);
+    free(new_file_name);
+    free(tag_val);
+    free(new_idx_fn);
+    if (new_hdr)
+        sam_hdr_destroy(new_hdr);
+    if (new_sam_file)
+        sam_close(new_sam_file);
+    return kh_end(state->tag_val_hash);
+}
+
 // Set the initial state
 static state_t* init(parsed_opts_t* opts, const char *arg_list)
 {
@@ -306,6 +480,7 @@ static state_t* init(parsed_opts_t* opts, const char *arg_list)
         cleanup_state(retval, false);
         return NULL;
     }
+    retval->write_index = opts->ga.write_index;
 
     if (opts->unaccounted_name) {
         if (opts->unaccounted_header_name) {
@@ -331,10 +506,10 @@ static state_t* init(parsed_opts_t* opts, const char *arg_list)
         } else {
             retval->unaccounted_header = sam_hdr_dup(retval->merged_input_header);
             if (!opts->no_pg && sam_hdr_add_pg(retval->unaccounted_header, "samtools",
-                                    "VN", samtools_version(),
-                                    arg_list ? "CL": NULL,
-                                    arg_list ? arg_list : NULL,
-                                    NULL)) {
+                                               "VN", samtools_version(),
+                                               arg_list ? "CL": NULL,
+                                               arg_list ? arg_list : NULL,
+                                               NULL)) {
                 print_error("split", "Could not rewrite header for \"%s\"", opts->unaccounted_name);
                 cleanup_state(retval, false);
                 return NULL;
@@ -354,23 +529,40 @@ static state_t* init(parsed_opts_t* opts, const char *arg_list)
             hts_set_opt(retval->unaccounted_file, HTS_OPT_THREAD_POOL, &retval->p);
     }
 
-    // Open output files for RGs
-    if (!count_RG(retval->merged_input_header, &retval->output_count, &retval->rg_id)) return NULL;
-    if (opts->verbose) fprintf(stderr, "@RG's found %zu\n",retval->output_count);
+    int is_rg = !opts->tag || strcmp(opts->tag, "RG") == 0;
+    if (is_rg) {
+        if (!count_RG(retval->merged_input_header,
+                      &retval->output_count, &retval->tag_vals)) {
+            cleanup_state(retval, false);
+            return NULL;
+        }
+        if (opts->verbose)
+            fprintf(stderr, "@RG's found %zu\n",retval->output_count);
+    } else {
+        retval->output_count = 0;
+    }
+
     // Prevent calloc(0, size);
     size_t num = retval->output_count ? retval->output_count : 1;
-    retval->rg_index_file_name = (char **)calloc(num, sizeof(char *));
-    retval->rg_output_file_name = (char **)calloc(num, sizeof(char *));
-    retval->rg_output_file = (samFile**)calloc(num, sizeof(samFile*));
-    retval->rg_output_header = (sam_hdr_t**)calloc(num, sizeof(sam_hdr_t*));
-    retval->rg_hash = kh_init_c2i();
-    if (!retval->rg_output_file_name || !retval->rg_output_file || !retval->rg_output_header ||
-        !retval->rg_hash || !retval->rg_index_file_name) {
+    retval->index_file_name = (char **)calloc(num, sizeof(char *));
+    retval->output_file_name = (char **)calloc(num, sizeof(char *));
+    retval->output_file = (samFile**)calloc(num, sizeof(samFile*));
+    retval->output_header = (sam_hdr_t**)calloc(num, sizeof(sam_hdr_t*));
+    retval->tag_val_hash = kh_init_c2i();
+    if (!retval->output_file_name || !retval->output_file || !retval->output_header ||
+        !retval->tag_val_hash || !retval->index_file_name) {
         print_error_errno("split", "Could not initialise output file array");
         cleanup_state(retval, false);
         return NULL;
     }
+    if (!is_rg)
+        return retval;  // Done for this case - outputs will be opened later
 
+    // Adjust max_split if too small for the read-groups listed in the header
+    if (opts->max_split < retval->output_count)
+        opts->max_split = retval->output_count;
+
+    // Open output files for RGs
     char* dirsep = strrchr(opts->merged_input_name, '/');
     char* input_base_name = strdup(dirsep? dirsep+1 : opts->merged_input_name);
     if (!input_base_name) {
@@ -388,7 +580,7 @@ static state_t* init(parsed_opts_t* opts, const char *arg_list)
 
         output_filename = expand_format_string(opts->output_format_string,
                                                input_base_name,
-                                               retval->rg_id[i], i,
+                                               retval->tag_vals[i], i,
                                                &opts->ga.out);
 
         if ( output_filename == NULL ) {
@@ -397,40 +589,39 @@ static state_t* init(parsed_opts_t* opts, const char *arg_list)
             return NULL;
         }
 
-        retval->rg_output_file_name[i] = output_filename;
-
+        retval->output_file_name[i] = output_filename;
         sam_open_mode(outmode + 1, output_filename, NULL);
-        retval->rg_output_file[i] = sam_open_format(output_filename, outmode, &opts->ga.out);
+        retval->output_file[i] = sam_open_format(output_filename, outmode, &opts->ga.out);
 
-        if (retval->rg_output_file[i] == NULL) {
+        if (retval->output_file[i] == NULL) {
             print_error_errno("split", "Could not open \"%s\"", output_filename);
             cleanup_state(retval, false);
             free(input_base_name);
             return NULL;
         }
         if (retval->p.pool)
-            hts_set_opt(retval->rg_output_file[i], HTS_OPT_THREAD_POOL, &retval->p);
+            hts_set_opt(retval->output_file[i], HTS_OPT_THREAD_POOL, &retval->p);
 
         // Record index in hash
         int ret;
-        khiter_t iter = kh_put_c2i(retval->rg_hash, retval->rg_id[i], &ret);
+        khiter_t iter = kh_put_c2i(retval->tag_val_hash, retval->tag_vals[i], &ret);
         if (ret < 0) {
             print_error_errno("split", "Couldn't add @RG ID to look-up table");
             cleanup_state(retval, false);
             free(input_base_name);
             return NULL;
         }
-        kh_val(retval->rg_hash,iter) = i;
+        kh_val(retval->tag_val_hash,iter) = i;
 
         // Set and edit header
-        retval->rg_output_header[i] = sam_hdr_dup(retval->merged_input_header);
-        if (sam_hdr_remove_except(retval->rg_output_header[i], "RG", "ID", retval->rg_id[i]) ||
-           (!opts->no_pg &&
-            sam_hdr_add_pg(retval->rg_output_header[i], "samtools",
-                        "VN", samtools_version(),
-                        arg_list ? "CL": NULL,
-                        arg_list ? arg_list : NULL,
-                        NULL))) {
+        retval->output_header[i] = sam_hdr_dup(retval->merged_input_header);
+        if (sam_hdr_remove_except(retval->output_header[i], "RG", "ID", retval->tag_vals[i]) ||
+            (!opts->no_pg &&
+             sam_hdr_add_pg(retval->output_header[i], "samtools",
+                            "VN", samtools_version(),
+                            arg_list ? "CL": NULL,
+                            arg_list ? arg_list : NULL,
+                            NULL))) {
             print_error("split", "Could not rewrite header for \"%s\"", output_filename);
             cleanup_state(retval, false);
             free(input_base_name);
@@ -439,34 +630,30 @@ static state_t* init(parsed_opts_t* opts, const char *arg_list)
     }
 
     free(input_base_name);
-    retval->write_index = opts->ga.write_index;
 
     return retval;
 }
 
-static bool split(state_t* state)
+static bool split(state_t* state, parsed_opts_t *opts, char *arg_list)
 {
-    if (state->unaccounted_file && sam_hdr_write(state->unaccounted_file, state->unaccounted_header) != 0) {
-        print_error_errno("split", "Could not write output file header");
-        return false;
-    }
-    size_t i;
-    for (i = 0; i < state->output_count; i++) {
-        if (sam_hdr_write(state->rg_output_file[i], state->rg_output_header[i]) != 0) {
-            print_error_errno("split", "Could not write file header to \"%s\"", state->rg_output_file_name[i]);
+    int is_rg = !opts->tag || strcmp(opts->tag, "RG") == 0;
+    if (state->unaccounted_file) {
+        if (sam_hdr_write(state->unaccounted_file, state->unaccounted_header) != 0) {
+            print_error_errno("split", "Could not write output file header");
             return false;
         }
-        if (state->write_index) {
-            state->rg_index_file_name[i] = auto_index(state->rg_output_file[i],
-                                                      state->rg_output_file_name[i],
-                                                      state->rg_output_header[i]);
-            if (!state->rg_index_file_name[i]) {
-                print_error_errno("split", "Could not create index for file \"%s\"", state->rg_output_file_name[i]);
+        if (opts->ga.write_index) {
+            state->unaccounted_idx_fn = auto_index(state->unaccounted_file,
+                                                   opts->unaccounted_name,
+                                                   state->unaccounted_header);
+            if (!state->unaccounted_idx_fn) {
+                print_error_errno("split",
+                                  "Creating index file for file \"%s\" failed",
+                                  opts->unaccounted_name);
                 return false;
             }
         }
     }
-
     bam1_t* file_read = bam_init1();
     // Read the first record
     int r;
@@ -480,25 +667,59 @@ static bool split(state_t* state)
         }
     }
 
+    if (is_rg) {
+        size_t i;
+        for (i = 0; i < state->output_count; i++) {
+            if (sam_hdr_write(state->output_file[i], state->output_header[i]) != 0) {
+                print_error_errno("split", "Could not write file header to \"%s\"", state->output_file_name[i]);
+                goto error;
+            }
+            if (state->write_index) {
+                state->index_file_name[i] = auto_index(state->output_file[i],
+                        state->output_file_name[i],
+                        state->output_header[i]);
+                if (!state->index_file_name[i]) {
+                    print_error_errno("split", "Could not create index for file \"%s\"", state->output_file_name[i]);
+                    goto error;
+                }
+            }
+        }
+    }
     while (file_read != NULL) {
         // Get RG tag from read and look it up in hash to find file to output it to
-        uint8_t* tag = bam_aux_get(file_read, "RG");
+        uint8_t* tag = bam_aux_get(file_read, is_rg ? "RG" : opts->tag);
+        char *val = tag ? bam_aux2Z(tag) : NULL;
         khiter_t iter;
-        if ( tag != NULL ) {
-            char* rg = bam_aux2Z(tag);
-            iter = kh_get_c2i(state->rg_hash, rg);
+        if ( val != NULL ) {
+            iter = kh_get_c2i(state->tag_val_hash, val);
         } else {
-            iter = kh_end(state->rg_hash);
+            iter = kh_end(state->tag_val_hash);
+        }
+
+        // Check for opts->tag here instead of !is_rg so we open new
+        // files if the user specified '-d RG' and we find a RG:Z: value
+        // that wasn't listed in the header.  If the '-d' option is
+        // not used, we don't open a file to preserve existing behaviour.
+        if (opts->tag && val && iter == kh_end(state->tag_val_hash)
+            && state->output_count < opts->max_split) {
+            // Need to open a new output file
+            iter = prep_sam_file(opts, state, val, arg_list, is_rg);
+            if (iter == kh_end(state->tag_val_hash)) { // Open failed
+                print_error("split",
+                            "Could not create output file for tag \"%s:%s\"",
+                            opts->tag, bam_aux2Z(tag));
+                goto error;
+
+            }
         }
 
         // Write the read out to correct file
-        if (iter != kh_end(state->rg_hash)) {
+        if (iter != kh_end(state->tag_val_hash)) {
             // if found write to the appropriate untangled bam
-            int i = kh_val(state->rg_hash,iter);
-            if (sam_write1(state->rg_output_file[i], state->rg_output_header[i], file_read) < 0) {
-                print_error_errno("split", "Could not write to \"%s\"", state->rg_output_file_name[i]);
-                bam_destroy1(file_read);
-                return false;
+            int i = kh_val(state->tag_val_hash,iter);
+            if (sam_write1(state->output_file[i], state->output_header[i], file_read) < 0) {
+                print_error_errno("split", "Could not write to \"%s\"", state->output_file_name[i]);
+                goto error;
             }
         } else {
             // otherwise write to the unaccounted bam if there is one or fail
@@ -506,15 +727,14 @@ static bool split(state_t* state)
                 if (tag) {
                     fprintf(stderr, "Read \"%s\" with unaccounted for tag \"%s\".\n", bam_get_qname(file_read), bam_aux2Z(tag));
                 } else {
-                    fprintf(stderr, "Read \"%s\" has no RG tag.\n", bam_get_qname(file_read));
+                    fprintf(stderr, "Read \"%s\" has no %s tag.\n",
+                            bam_get_qname(file_read), is_rg ? "RG" : opts->tag);
                 }
-                bam_destroy1(file_read);
-                return false;
+                goto error;
             } else {
                 if (sam_write1(state->unaccounted_file, state->unaccounted_header, file_read) < 0) {
                     print_error_errno("split", "Could not write to unaccounted output file");
-                    bam_destroy1(file_read);
-                    return false;
+                    goto error;
                 }
             }
         }
@@ -532,16 +752,27 @@ static bool split(state_t* state)
     }
 
     if (state->write_index) {
+        size_t i;
         for (i = 0; i < state->output_count; i++) {
-            if (sam_idx_save(state->rg_output_file[i]) < 0) {
-                print_error_errno("split", "writing index failed");
+            if (sam_idx_save(state->output_file[i]) < 0) {
+                print_error_errno("split", "writing index \"%s\" failed",
+                                  state->index_file_name[i]);
+                return false;
+            }
+        }
+        if (state->unaccounted_file) {
+            if (sam_idx_save(state->unaccounted_file) < 0) {
+                print_error_errno("split", "writing index \"%s\" failed",
+                                  state->unaccounted_idx_fn);
                 return false;
             }
-            free(state->rg_index_file_name[i]);
         }
     }
 
     return true;
+error:
+    bam_destroy1(file_read);
+    return false;
 }
 
 static int cleanup_state(state_t* status, bool check_close)
@@ -559,25 +790,28 @@ static int cleanup_state(state_t* status, bool check_close)
     sam_close(status->merged_input_file);
     size_t i;
     for (i = 0; i < status->output_count; i++) {
-        if (status->rg_output_header && status->rg_output_header[i])
-            sam_hdr_destroy(status->rg_output_header[i]);
-        if (status->rg_output_file && status->rg_output_file[i]) {
-            if (sam_close(status->rg_output_file[i]) < 0 && check_close) {
-                print_error("split", "Error on closing output file \"%s\"", status->rg_output_file_name[i]);
+        if (status->output_header && status->output_header[i])
+            sam_hdr_destroy(status->output_header[i]);
+        if (status->output_file && status->output_file[i]) {
+            if (sam_close(status->output_file[i]) < 0 && check_close) {
+                print_error("split", "Error on closing output file \"%s\"", status->output_file_name[i]);
                 ret = -1;
             }
         }
-        if (status->rg_id) free(status->rg_id[i]);
-        if (status->rg_output_file_name) free(status->rg_output_file_name[i]);
+        if (status->tag_vals) free(status->tag_vals[i]);
+        if (status->output_file_name) free(status->output_file_name[i]);
+        if (status->index_file_name[i]) free(status->index_file_name[i]);
     }
     if (status->merged_input_header)
         sam_hdr_destroy(status->merged_input_header);
-    free(status->rg_output_header);
-    free(status->rg_output_file);
-    free(status->rg_output_file_name);
-    free(status->rg_index_file_name);
-    kh_destroy_c2i(status->rg_hash);
-    free(status->rg_id);
+    free(status->output_header);
+    free(status->output_file);
+    free(status->output_file_name);
+    free(status->index_file_name);
+    free(status->unaccounted_idx_fn);
+    kh_destroy_c2i(status->tag_val_hash);
+
+    free(status->tag_vals);
     if (status->p.pool)
         hts_tpool_destroy(status->p.pool);
     free(status);
@@ -603,7 +837,7 @@ int main_split(int argc, char** argv)
     state_t* status = init(opts, arg_list);
     if (!status) goto cleanup_opts;
 
-    if (!split(status)) {
+    if (!split(status, opts, arg_list)) {
         cleanup_state(status, false);
         goto cleanup_opts;
     }
diff --git a/bamtk.c b/bamtk.c
index e05ea1816..221787561 100644
--- a/bamtk.c
+++ b/bamtk.c
@@ -303,5 +303,14 @@ int main(int argc, char *argv[])
         fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
         return 1;
     }
+
+    // For subcommands that may have produced substantial output on stdout,
+    // make a final check for delayed I/O errors. Ignore EBADF as other code
+    // may have already closed stdout.
+    if (fclose(stdout) != 0 && errno != EBADF) {
+        print_error_errno(argv[1], "closing standard output failed");
+        return 1;
+    }
+
     return ret;
 }
diff --git a/coverage.c b/coverage.c
index dedaa8e99..68dd6be5e 100644
--- a/coverage.c
+++ b/coverage.c
@@ -120,6 +120,7 @@ static int usage() {
             "                          effectively removing any depth limit.\n"
             "Output options:\n"
             "  -m, --histogram         show histogram instead of tabular output\n"
+            "  -D, --plot-depth        plot depth instead of tabular output\n"
             "  -A, --ascii             show only ASCII characters in histogram\n"
             "  -o, --output FILE       write output to FILE [stdout]\n"
             "  -H, --no-header         don't print a header in tabular mode\n"
@@ -209,7 +210,7 @@ void print_tabular_line(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *s
 }
 
 void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, int tid, const uint32_t *hist,
-        const int hist_size, const bool full_utf) {
+        const int hist_size, const bool full_utf, const bool plot_coverage) {
     int i, col;
     bool show_percentiles = false;
     const int n_rows = 10;
@@ -221,7 +222,7 @@ void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, in
     double hist_data[hist_size];
     double max_val = 0.0;
     for (i = 0; i < hist_size; ++i) {
-        hist_data[i] = 100 * hist[i] / (double) stats[tid].bin_width;
+        hist_data[i] = (plot_coverage?1:100) * hist[i] / (double) stats[tid].bin_width;
         if (hist_data[i] > max_val) max_val = hist_data[i];
     }
 
@@ -231,7 +232,9 @@ void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, in
     double row_bin_size = max_val / (double) n_rows;
     for (i = n_rows-1; i >= 0; --i) {
         double current_bin = row_bin_size * i;
-        if (show_percentiles) {
+        if (plot_coverage) {
+            fprintf(file_out, ">%8.1f ",i*row_bin_size);
+        } else if (show_percentiles) {
             fprintf(file_out, ">%3i%% ", i*10);
         } else {
             fprintf(file_out, ">%7.2f%% ", current_bin);
@@ -265,7 +268,13 @@ void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, in
                             stats[tid].summed_mapQ/(double) stats[tid].n_selected_reads); break;
             case 1: fprintf(file_out, "Histo bin width: %sbp",
                             readable_bps(stats[tid].bin_width, buf)); break;
-            case 0: fprintf(file_out, "Histo max bin:   %.5g%%", max_val); break;
+            case 0: if (plot_coverage) {
+                        fprintf(file_out, "Histo max cov:   %.5g",  max_val);
+                    } else {
+                        fprintf(file_out, "Histo max bin:   %.5g%%", max_val);
+                    }
+                    break;
+
         };
         fputc('\n', file_out);
     }
@@ -313,6 +322,7 @@ int main_coverage(int argc, char *argv[]) {
     bool opt_print_header = true;
     bool opt_print_tabular = true;
     bool opt_print_histogram = false;
+    bool opt_plot_coverage = false;
     bool opt_full_utf = true;
 
     FILE *file_out = stdout;
@@ -332,6 +342,7 @@ int main_coverage(int argc, char *argv[]) {
         {"min-bq", required_argument, NULL, 'Q'},
         {"histogram", no_argument, NULL, 'm'},
         {"ascii", no_argument, NULL, 'A'},
+        {"plot-depth", no_argument, NULL, 'D'},
         {"output", required_argument, NULL, 'o'},
         {"no-header", no_argument, NULL, 'H'},
         {"n-bins", required_argument, NULL, 'w'},
@@ -344,7 +355,7 @@ int main_coverage(int argc, char *argv[]) {
     // parse the command line
     int c;
     opterr = 0;
-    while ((c = getopt_long(argc, argv, "Ao:l:q:Q:hHw:r:b:md:", lopts, NULL)) != -1) {
+    while ((c = getopt_long(argc, argv, "Ao:l:q:Q:hHw:r:b:md:D", lopts, NULL)) != -1) {
         switch (c) {
             case 1:
                 if ((required_flags = bam_str2flag(optarg)) < 0) {
@@ -368,6 +379,10 @@ int main_coverage(int argc, char *argv[]) {
             case 'A': opt_full_utf = false;
                       opt_print_histogram = true; opt_print_tabular = false;
                       break;
+            case 'D': opt_print_histogram = true;
+                     opt_print_tabular = false;
+                     opt_plot_coverage = true;
+                     break;
             case 'H': opt_print_header = false; break;
             case 'h': return usage();
             default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
@@ -560,7 +575,7 @@ int main_coverage(int argc, char *argv[]) {
         if (tid != old_tid) { // Next target sequence
             if (old_tid >= 0) {
                 if (opt_print_histogram) {
-                    print_hist(file_out, h, stats, old_tid, hist, n_bins, opt_full_utf);
+                    print_hist(file_out, h, stats, old_tid, hist, n_bins, opt_full_utf, opt_plot_coverage);
                     fputc('\n', file_out);
                 } else if (opt_print_tabular) {
                     print_tabular_line(file_out, h, stats, old_tid);
@@ -604,12 +619,14 @@ int main_coverage(int argc, char *argv[]) {
                 count_base = true;
                 stats[tid].summed_coverage += depth_at_pos;
             }
-            // hist[current_bin] += depth_at_pos;  // Add counts to the histogram here to have one based on coverage
-            //fprintf(file_out, "\t%d", n_plp[i] - m); // this the depth to output
+
+            if(current_bin < n_bins && opt_plot_coverage) {
+                hist[current_bin] += depth_at_pos;
+                }
         }
         if (count_base) {
             stats[tid].n_covered_bases++;
-            if (opt_print_histogram && current_bin < n_bins)
+            if (opt_print_histogram && current_bin < n_bins && !opt_plot_coverage)
                 ++(hist[current_bin]); // Histogram based on breadth of coverage
         }
     }
@@ -620,7 +637,7 @@ int main_coverage(int argc, char *argv[]) {
 
     if (tid < n_targets && tid >=0) {
         if (opt_print_histogram) {
-            print_hist(file_out, h, stats, tid, hist, n_bins, opt_full_utf);
+            print_hist(file_out, h, stats, tid, hist, n_bins, opt_full_utf, opt_plot_coverage);
         } else if (opt_print_tabular) {
             print_tabular_line(file_out, h, stats, tid);
         }
diff --git a/doc/samtools-addreplacerg.1 b/doc/samtools-addreplacerg.1
index f794ca547..5a60626f9 100644
--- a/doc/samtools-addreplacerg.1
+++ b/doc/samtools-addreplacerg.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-addreplacerg 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-addreplacerg 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools addreplacerg \- adds or replaces read group tags
 .\"
diff --git a/doc/samtools-ampliconclip.1 b/doc/samtools-ampliconclip.1
index 149589f2a..8c625f6b3 100644
--- a/doc/samtools-ampliconclip.1
+++ b/doc/samtools-ampliconclip.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-ampliconclip 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-ampliconclip 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools ampliconclip \- clip reads using a BED file
 .\"
diff --git a/doc/samtools-ampliconstats.1 b/doc/samtools-ampliconstats.1
index 295db5254..ffb102341 100644
--- a/doc/samtools-ampliconstats.1
+++ b/doc/samtools-ampliconstats.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-ampliconstats 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-ampliconstats 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools ampliconstats \- produces statistics from amplicon sequencing alignment file
 .\"
diff --git a/doc/samtools-bedcov.1 b/doc/samtools-bedcov.1
index 1e0371e5b..b9be90df4 100644
--- a/doc/samtools-bedcov.1
+++ b/doc/samtools-bedcov.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-bedcov 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-bedcov 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools bedcov \- reports coverage over regions in a supplied BED file
 .\"
diff --git a/doc/samtools-calmd.1 b/doc/samtools-calmd.1
index e20514883..27afec5ef 100644
--- a/doc/samtools-calmd.1
+++ b/doc/samtools-calmd.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-calmd 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-calmd 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools calmd \- calculates MD and NM tags
 .\"
diff --git a/doc/samtools-cat.1 b/doc/samtools-cat.1
index ae53981ff..6b80ed98c 100644
--- a/doc/samtools-cat.1
+++ b/doc/samtools-cat.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-cat 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-cat 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools cat \- concatenate files together
 .\"
diff --git a/doc/samtools-collate.1 b/doc/samtools-collate.1
index fe77f2ba4..64f09dd46 100644
--- a/doc/samtools-collate.1
+++ b/doc/samtools-collate.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-collate 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-collate 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools collate \- shuffles and groups reads together by their names
 .\"
diff --git a/doc/samtools-consensus.1 b/doc/samtools-consensus.1
index 683b9b80e..af8f395e1 100644
--- a/doc/samtools-consensus.1
+++ b/doc/samtools-consensus.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-consensus 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-consensus 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools consensus \- produces a consensus FASTA/FASTQ/PILEUP
 .\"
@@ -264,13 +264,9 @@ output "N".
 
 .TP 0
 The following options apply only to Bayesian consensus mode enabled
-with the \fB-5\fR option.
+(default on).
 
 .TP 10
-.B -5
-Enable Bayesian consensus algorithm.
-
-.TP
 .BI "-C " C ", --cutoff " C
 Only used for the Gap5 consensus mode, which produces a Phred style
 score for the final consensus quality.  If this is below
diff --git a/doc/samtools-coverage.1 b/doc/samtools-coverage.1
index cf71d50be..3cdf19500 100644
--- a/doc/samtools-coverage.1
+++ b/doc/samtools-coverage.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-coverage 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-coverage 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools coverage \- produces a histogram or table of coverage per chromosome
 .\"
@@ -103,6 +103,10 @@ Output options:
 .BI -m,\ --histogram
 Show histogram instead of tabular output.
 .TP
+.BI -D,\ --plot-depth
+As above but displays the depth of coverage instead of the percent of coverage.
+This option can be used to visualize copy number variations in the terminal.
+.TP
 .BI -A,\ --ascii
 Show only ASCII characters in histogram using colon and fullstop for
 full and half height characters.
@@ -154,6 +158,24 @@ chr1 (249.25Mbp)
         1.00M     4.44M     7.87M       12.00M 
 .EE
 
+.EX 2
+samtools coverage  -m -r 'chr1:24500000-25600000' --plot-depth -w 32 -A input.bam
+
+chr1 (249.25Mbp)
+>    38.8 |            .:::::::            | Number of reads: 283218
+>    34.5 |            ::::::::            |     (3327 filtered)
+>    30.2 |           :::::::::.           | Covered bases:   1.10Mbp
+>    25.9 |.:::::.:.::::::::::::::::::::::.| Percent covered: 99.83%
+>    21.6 |::::::::::::::::::::::::::::::::| Mean coverage:   33.2x
+>    17.2 |::::::::::::::::::::::::::::::::| Mean baseQ:      37.2
+>    12.9 |::::::::::::::::::::::::::::::::| Mean mapQ:       59.3
+>     8.6 |::::::::::::::::::::::::::::::::|
+>     4.3 |::::::::::::::::::::::::::::::::| Histo bin width: 34.5Kbp
+>     0.0 |::::::::::::::::::::::::::::::::| Histo max cov:   43.117
+        24.50M    24.84M    25.19M      25.60M
+
+.EE
+
 
 .SH AUTHOR
 .PP
diff --git a/doc/samtools-cram-size.1 b/doc/samtools-cram-size.1
index 2453ea902..b0689b755 100644
--- a/doc/samtools-cram-size.1
+++ b/doc/samtools-cram-size.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-cram-size 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-cram-size 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools cram-size \- list a break down of data types in a CRAM file
 .\"
diff --git a/doc/samtools-depad.1 b/doc/samtools-depad.1
index b6bb3fd1d..6464ba833 100644
--- a/doc/samtools-depad.1
+++ b/doc/samtools-depad.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-depad 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-depad 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools depad \- convert padded BAM to unpadded BAM
 .\"
diff --git a/doc/samtools-depth.1 b/doc/samtools-depth.1
index c7ca100cd..97ac383d4 100644
--- a/doc/samtools-depth.1
+++ b/doc/samtools-depth.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-depth 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-depth 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools depth \- computes the read depth at each position or region
 .\"
diff --git a/doc/samtools-dict.1 b/doc/samtools-dict.1
index 9d8471cea..d6d983cfa 100644
--- a/doc/samtools-dict.1
+++ b/doc/samtools-dict.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-dict 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-dict 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools dict \- create a sequence dictionary file from a fasta file
 .\"
diff --git a/doc/samtools-faidx.1 b/doc/samtools-faidx.1
index d9c040239..0624b7661 100644
--- a/doc/samtools-faidx.1
+++ b/doc/samtools-faidx.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-faidx 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-faidx 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools faidx \- indexes or queries regions from a fasta file
 .\"
diff --git a/doc/samtools-fasta.1 b/doc/samtools-fasta.1
index 7eb47e84a..edc9d8f49 100644
--- a/doc/samtools-fasta.1
+++ b/doc/samtools-fasta.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-fasta 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-fasta 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools-fasta, samtools-fastq \- converts a SAM/BAM/CRAM file to FASTA or FASTQ
 .\"
@@ -128,6 +128,8 @@ soft-clipped CIGAR records then the soft-clipped data will be in the
 output FASTA/FASTQ.  Hard-clipped data is, by definition, absent from
 the SAM record and hence will be absent in any FASTA/FASTQ produced.
 
+The filter options order of precedence is -d, -f, -F, --rf and -G.
+
 .SH OPTIONS
 .TP 8
 .B -n
diff --git a/doc/samtools-fixmate.1 b/doc/samtools-fixmate.1
index 63f171048..31a2c06fe 100644
--- a/doc/samtools-fixmate.1
+++ b/doc/samtools-fixmate.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-fixmate 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-fixmate 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools fixmate \- fills in mate coordinates and insert size fields.
 .\"
diff --git a/doc/samtools-flags.1 b/doc/samtools-flags.1
index 543d7bc0a..1a4c811af 100644
--- a/doc/samtools-flags.1
+++ b/doc/samtools-flags.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-flags 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-flags 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools flags \- convert between textual and numeric flag representation.
 .\"
diff --git a/doc/samtools-flagstat.1 b/doc/samtools-flagstat.1
index 8ecf89f0b..39a58f23d 100644
--- a/doc/samtools-flagstat.1
+++ b/doc/samtools-flagstat.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-flagstat 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-flagstat 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools flagstat \- counts the number of alignments for each FLAG type
 .\"
diff --git a/doc/samtools-fqidx.1 b/doc/samtools-fqidx.1
index 1676c7bb4..c955fd028 100644
--- a/doc/samtools-fqidx.1
+++ b/doc/samtools-fqidx.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-fqidx 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-fqidx 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools fqidx \- Indexes or queries regions from a fastq file
 .\"
diff --git a/doc/samtools-head.1 b/doc/samtools-head.1
index 347bc3bd8..e7d8252a8 100644
--- a/doc/samtools-head.1
+++ b/doc/samtools-head.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-head 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-head 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools head \- view SAM/BAM/CRAM file headers
 .\"
diff --git a/doc/samtools-idxstats.1 b/doc/samtools-idxstats.1
index 7ae25c341..799877dea 100644
--- a/doc/samtools-idxstats.1
+++ b/doc/samtools-idxstats.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-idxstats 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-idxstats 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools idxstats \- reports alignment summary statistics
 .\"
diff --git a/doc/samtools-import.1 b/doc/samtools-import.1
index a30095b87..2e782a72f 100644
--- a/doc/samtools-import.1
+++ b/doc/samtools-import.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-import 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-import 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools import \- converts FASTQ files to unmapped SAM/BAM/CRAM
 .\"
diff --git a/doc/samtools-index.1 b/doc/samtools-index.1
index 61a297371..1bb607730 100644
--- a/doc/samtools-index.1
+++ b/doc/samtools-index.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-index 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-index 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools index \- indexes SAM/BAM/CRAM files
 .\"
diff --git a/doc/samtools-markdup.1 b/doc/samtools-markdup.1
index 4feeeac85..fcf5e8053 100644
--- a/doc/samtools-markdup.1
+++ b/doc/samtools-markdup.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-markdup 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-markdup 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools markdup \- mark duplicate alignments in a coordinate sorted file
 .\"
diff --git a/doc/samtools-merge.1 b/doc/samtools-merge.1
index 342313010..db81041bf 100644
--- a/doc/samtools-merge.1
+++ b/doc/samtools-merge.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-merge 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-merge 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools merge \- merges multiple sorted files into a single file
 .\"
@@ -83,8 +83,10 @@ of existing IDs in the output header will have a suffix appended to them to diff
 records from other files and the read records will be updated to reflect this.
 
 The ordering of the records in the input files must match the usage of the
-\fB-n\fP and \fB-t\fP command-line options.  If they do not, the output
-order will be undefined.  See
+\fB-n\fP, \fB-N\fP and \fB-t\fP command-line options.  If they do not,
+the output order will be undefined.  Note this also extends to
+disallowing mixing of "queryname" files with a combination of natural
+and lexicographical sort orders.  See
 .B sort
 for information about record ordering.
 
@@ -122,8 +124,20 @@ is actually in SAM format, though any alignment records it may contain
 are ignored.)
 .TP
 .B -n
-The input alignments are sorted by read names rather than by chromosomal
-coordinates
+The input alignments are sorted by read names using an alpha-numeric
+ordering, rather than by chromosomal coordinates.
+The alpha-numeric or \*(lqnatural\*(rq sort order detects runs of digits in the
+strings and sorts these numerically.  Hence "a7b" appears before "a12b".
+Note this is not suitable where hexadecimal values are in use.
+.TP
+.B -N
+The input alignments are sorted by read names using a lexicographical
+ordering, rather than by chromosomal coordinates.  Unlike \fB-n\fR no
+detection of numeric components is used, instead relying purely on the
+ASCII value of each character.  Hence "x12" comes before "x7" as "1"
+is before "7" in ASCII.  This is a more appropriate name sort order
+where all digits in names are already zero-padded and/or hexadecimal
+values are being used.
 .TP
 .BI -o \ FILE
 Write merged output to
@@ -187,7 +201,7 @@ Attach the
 .B RG
 tag while merging sorted alignments:
 .EX 2
-perl -e 'print "@RG\\tID:ga\\tSM:hs\\tLB:ga\\tPL:Illumina\\n@RG\\tID:454\\tSM:hs\\tLB:454\\tPL:454\\n"' > rg.txt
+printf '@RG\\tID:ga\\tSM:hs\\tLB:ga\\tPL:ILLUMINA\\n@RG\\tID:454\\tSM:hs\\tLB:454\\tPL:LS454\\n' > rg.txt
 samtools merge -rh rg.txt merged.bam ga.bam 454.bam
 .EE
 The value in a
diff --git a/doc/samtools-mpileup.1 b/doc/samtools-mpileup.1
index 35b14d4d4..9403c93ed 100644
--- a/doc/samtools-mpileup.1
+++ b/doc/samtools-mpileup.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-mpileup 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-mpileup 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools mpileup \- produces "pileup" textual format from an alignment
 .\"
@@ -76,6 +76,7 @@ could be specified using \fB-r 20 -l chr20.bed\fR, meaning that the
 index is used to find chromosome 20 and then it is filtered for the
 regions listed in the bed file.
 
+Unmapped reads are not considered and are always discarded.
 By default secondary alignments, QC failures and duplicate reads will
 be omitted, along with low quality bases and some reads in high depth
 regions.  See the \fB--ff\fR, \fB-Q\fR and \fB-d\fR options for
@@ -283,8 +284,8 @@ none of the mask bits set.  Hence this does not override the
 .TP
 .BI --ff,\ --excl-flags \ STR|INT
 Filter flags: skip reads with any of the mask bits set.  This defaults
-to UNMAP,SECONDARY,QCFAIL,DUP.  The option is not accumulative, so
-specifying e.g. \fB--ff UNMAP,QCFAIL\fR will reenable output of
+to SECONDARY,QCFAIL,DUP.  The option is not accumulative, so
+specifying e.g. \fB--ff QCFAIL\fR will reenable output of
 secondary and duplicate alignments.  Note this does not override the
 \fB--incl-flags\fR option.
 .TP
@@ -482,7 +483,42 @@ The
 .B -E
 option can be used to make it ignore the contents of the BQ:Z tag and
 force it to recalculate the BAQ scores by making a new alignment.
-
+.PP
+.SH EXAMPLES
+Using range:
+With implicit index files in1.bam.<ext> and in2.sam.gz.<ext>,
+.EX 2
+samtools mpileup in1.bam in2.sam.gz -r chr10:100000-200000
+.EE
+With explicit index files,
+.EX 2
+samtools mpileup in1.bam in2.sam.gz idx/in1.csi idx/in2.csi -X -r chr10:100000-200000
+.EE
+With fofn being a file of input file names, and implicit index files present with inputs,
+.EX 2
+samtools mpileup -b fofn -r chr10:100000-200000
+.EE
+Using flags:
+To get reads with flags READ2 or REVERSE and not having any of SECONDARY,QCFAIL,DUP,
+.EX 2
+samtools mpileup --rf READ2,REVERSE in.sam
+.EE
+or
+.EX 2
+samtools mpileup --rf 144 in.sam
+.EE
+To get reads with flag SECONDARY,
+.EX 2
+samtools mpileup --rf SECONDARY --ff QCFAIL,DUP in.sam
+.EE
+Using all possible alignmentes:
+To show all possible alignments, either of below two equivalent commands may be used,
+.EX 2
+samtools mpileup --count-orphans --no-BAQ --max-depth 0 --fasta-ref ref_file.fa \\
+--min-BQ 0 --excl-flags 0 --disable-overlap-removal in.sam
+
+samtools mpileup -A -B -d 0 -f ref_file.fa -Q 0 --ff 0 -x in.sam
+.EE
 .SH AUTHOR
 .PP
 Written by Heng Li from the Sanger Institute.
diff --git a/doc/samtools-phase.1 b/doc/samtools-phase.1
index a6d1e1df1..ba231e577 100644
--- a/doc/samtools-phase.1
+++ b/doc/samtools-phase.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-phase 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-phase 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools phase \- call and phase heterozygous SNPs
 .\"
diff --git a/doc/samtools-quickcheck.1 b/doc/samtools-quickcheck.1
index e726630e0..8373f5571 100644
--- a/doc/samtools-quickcheck.1
+++ b/doc/samtools-quickcheck.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-quickcheck 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-quickcheck 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools quickcheck \- a rapid sanity check on input files
 .\"
diff --git a/doc/samtools-reference.1 b/doc/samtools-reference.1
index 4daf65df7..6c33c8969 100644
--- a/doc/samtools-reference.1
+++ b/doc/samtools-reference.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-reference 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-reference 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools reference \- extracts an embedded reference from a CRAM file
 .\"
diff --git a/doc/samtools-reheader.1 b/doc/samtools-reheader.1
index be3dd3dc6..14bccf092 100644
--- a/doc/samtools-reheader.1
+++ b/doc/samtools-reheader.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-reheader 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-reheader 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools reheader \- replaces the header in the input file
 .\"
diff --git a/doc/samtools-reset.1 b/doc/samtools-reset.1
index c2b7288c8..7f64367a5 100644
--- a/doc/samtools-reset.1
+++ b/doc/samtools-reset.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-reset 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-reset 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools reset \- removes the alignment information added by aligners and updates flags accordingly
 .\"
diff --git a/doc/samtools-rmdup.1 b/doc/samtools-rmdup.1
index 64cf1679d..0d3902543 100644
--- a/doc/samtools-rmdup.1
+++ b/doc/samtools-rmdup.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-rmdup 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-rmdup 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools rmdup \- removes duplicate reads (obsolete)
 .\"
diff --git a/doc/samtools-samples.1 b/doc/samtools-samples.1
index 39bbda2bf..26afe25bb 100644
--- a/doc/samtools-samples.1
+++ b/doc/samtools-samples.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-samples 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-samples 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools samples \- prints the samples from an alignment file
 .\"
diff --git a/doc/samtools-sort.1 b/doc/samtools-sort.1
index 4e9be1976..26a1a51b7 100644
--- a/doc/samtools-sort.1
+++ b/doc/samtools-sort.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-sort 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-sort 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools sort \- sorts SAM/BAM/CRAM files
 .\"
@@ -48,11 +48,12 @@ samtools sort
 
 .SH DESCRIPTION
 .PP
-Sort alignments by leftmost coordinates, by read name when \fB-n\fR
-is used, by tag contents with \fB-t\fR, or a minimiser-based collation
-order with \fB-M\fR.  An appropriate
-.B @HD-SO
-sort order header tag will be added or an existing one updated if necessary.
+Sort alignments by leftmost coordinates, by read name when \fB-n\fR or
+\fB-N\fR are used, by tag contents with \fB-t\fR, or a minimiser-based
+collation order with \fB-M\fR.  An appropriate \fB@HD SO\fR
+sort order header tag will be added or an existing one updated if
+necessary, along with the \fB@HD SS\fR sub-sort header tag where
+appropriate.
 
 The sorted output is written to standard output by default, or to the
 specified file
@@ -74,7 +75,8 @@ instead if you need name collated data without a full lexicographical sort.
 Note that if the sorted output file is to be indexed with
 .BR "samtools index" ,
 the default coordinate sort must be used.
-Thus the \fB-n\fR, \fB-t\fR and \fB-M\fR options are incompatible with
+Thus the \fB-n\fR, \fB-N\fR, \fB-t\fR and \fB-M\fR options are
+incompatible with
 .BR "samtools index" .
 
 When sorting by minimisier (\fB-M\fR), the sort order is defined by
@@ -115,11 +117,26 @@ minimum value of 1M for this setting.
 .B -n
 Sort by read names (i.e., the
 .B QNAME
-field) rather than by chromosomal coordinates.
+field) using an alpha-numeric ordering, rather than by chromosomal coordinates.
+The alpha-numeric or \*(lqnatural\*(rq sort order detects runs of digits in the
+strings and sorts these numerically.  Hence "a7b" appears before "a12b".
+Note this is not suitable where hexadecimal values are in use.
+Sets the header sub-sort (\fB@HD SS\fR) tag to \fBqueryname:natural\fR.
+.TP
+.B -N
+Sort by read names (i.e., the
+.B QNAME
+field) using the lexicographical ordering, rather than by chromosomal
+coordinates.  Unlike \fB-n\fR no detection of numeric components is
+used, instead relying purely on the ASCII value of each character.
+Hence "x12" comes before "x7" as "1" is before "7" in ASCII.  This is
+a more appropriate name sort order where all digits in names are
+already zero-padded and/or hexadecimal values are being used.
+Sets the header sub-sort (\fB@HD SS\fR) tag to \fBqueryname:lexicographical\fR.
 .TP
 .BI "-t " TAG
 Sort first by the value in the alignment tag TAG, then by position or name (if
-also using \fB-n\fP).
+also using \fB-n\fP or \fB-N\fR).
 .TP
 .B "-M "
 Sort unmapped reads (those in chromosome "*") by their sequence
@@ -212,7 +229,8 @@ and the sub-sort (SS) is
 The following rules are used for ordering records.
 
 If option \fB-t\fP is in use, records are first sorted by the value of
-the given alignment tag, and then by position or name (if using \fB-n\fP).
+the given alignment tag, and then by position or name (if using \fB-n\fP
+or \fB-N\fP).
 For example, \*(lq-t RG\*(rq will make read group the primary sort key.  The
 rules for ordering by tag are:
 
@@ -236,11 +254,17 @@ Character tags (type A) are compared by binary character value.
 No attempt is made to compare tags of other types \(em notably type B
 array values will not be compared.
 .PP
-When the \fB-n\fP option is present, records are sorted by name.  Names are
-compared so as to give a \*(lqnatural\*(rq ordering \(em i.e. sections
-consisting of digits are compared numerically while all other sections are
-compared based on their binary representation.  This means \*(lqa1\*(rq will
-come before \*(lqb1\*(rq and \*(lqa9\*(rq will come before \*(lqa10\*(rq.
+When the \fB-n\fP or \fB-N\fP option is present, records are sorted by
+name.  Historically samtools has used a \*(lqnatural\*(rq ordering
+\(em i.e. sections consisting of digits are compared numerically while
+all other sections are compared based on their binary representation.
+This means \*(lqa1\*(rq will come before \*(lqb1\*(rq and \*(lqa9\*(rq
+will come before \*(lqa10\*(rq.  However this alpha-numeric sort can
+be confused by runs of hexadecimal digits.  The newer \fB-N\fP
+option adds a simpler lexicographical based name collation which does not
+attempt any numeric comparisons and may be more appropriate for some
+data sets.  Note care must be taken when using \fBsamtools merge\fP to
+ensure all files are using the same collation order.
 Records with the same name will be ordered according to the values of
 the READ1 and READ2 flags (see
 .BR flags ).
diff --git a/doc/samtools-split.1 b/doc/samtools-split.1
index 9ae71ce30..b442e09a0 100644
--- a/doc/samtools-split.1
+++ b/doc/samtools-split.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-split 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-split 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools split \- splits a file by read group.
 .\"
@@ -49,18 +49,41 @@ samtools split
 
 .SH DESCRIPTION
 .PP
-Splits a file by read group, producing one or more output files
-matching a common prefix (by default based on the input filename)
-each containing one read-group.
+Splits a file by read group, or a specified tag,
+producing one or more output files
+matching a common prefix (by default based on the input filename).
 
+Unless the \fB-d\fR option is used, the file will be split according to the
+.B @RG
+tags listed in the header.
 Records without an RG tag or with an RG tag undefined in the header will cause
 the program to exit with an error unless the \fB-u\fR option is used.
 
 RG values defined in the header but with no records will produce an output file
 only containing a header.
 
+If the
+.BI "-d " TAG
+option is used, the file will be split on the value in the given aux tag.
+Note that only string tags (type \fBZ\fR) are currently supported.
+Unless the \fB-u\fR option is used, the program will exit with an error if
+it finds a record without the given tag.
+
+Note that attempting to split on a tag with high cardinality may result
+in the creation of a large number of output files.
+To prevent this, the \fB-M\fR option can be used to set a limit on the
+number of splits made.
+
+Using
+.B -d RG
+behaves in a similar way to the default (without \fB-d\fR),
+opening an output file for each \fB@RG\fR line in the header.
+However, unlike the default,
+new output files will be opened for any RG tags found in the alignment records
+irrespective of if they have a matching header \fB@RG\fR line.
+
 The \fB-u\fR option may be used to specify the output filename for any
-records with a missing or unrecognised RG tag.  This option will always write
+records with a missing or unrecognised tag.  This option will always write
 out a file even if there are no records.
 
 Output format defaults to BAM.  For SAM or CRAM then either set the format with
@@ -70,7 +93,7 @@ Output format defaults to BAM.  For SAM or CRAM then either set the format with
 .SH OPTIONS
 .TP 14
 .BI "-u " FILE1
-.RI "Put reads with no RG tag or an unrecognised RG tag into " FILE1
+.RI "Put reads with no tag or an unrecognised tag into " FILE1
 .TP
 .BI "-h " FILE2
 .RI "Use the header from " FILE2 " when writing the file given in the " -u
@@ -84,6 +107,28 @@ references must be in the same order and have the same lengths.
 Output filename format string (see below)
 ["%*_%#.%."]
 .TP
+.BI "-d " TAG
+Split reads by TAG value into distinct files. Only the TAG key must be 
+supplied with the option. The value of the TAG has to be a string (i.e.
+.B key:Z:value
+)
+.TP
+.BI "-M,--max-split " NUM
+Limit the number of files created by the \fB-d\fR option to \fINUM\fR (default
+100).
+This prevents accidents where trying to split on a tag with high cardinality
+could result in the creation of a very large number of output files.
+Once the file limit is reached,
+any tag values not already seen will be treated as unmatched and the program
+will exit with an error unless the \fB-u\fR option is in use.
+
+If desired, the limit can be removed using \fB-M -1\fR,
+although in practice the number of outputs will still be restricted by
+system limits on the number of files that can be open at once.
+
+If splitting by read group, and the read group count in the header
+is higher than the requested limit then the limit will be raised to match.
+.TP
 .B -v
 Verbose output
 .TP
@@ -96,8 +141,8 @@ center;
 lb l .
 %%	%
 %*	basename
-%#	@RG index
-%!	@RG ID
+%#	index (of @RG in the header, or count of TAG values seen so far)
+%!	@RG ID or TAG value
 %.	output format filename extension
 .TE
 .TP
diff --git a/doc/samtools-stats.1 b/doc/samtools-stats.1
index 557ddccf9..5f4dd0ac7 100644
--- a/doc/samtools-stats.1
+++ b/doc/samtools-stats.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-stats 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-stats 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools stats \- produces comprehensive statistics from alignment file
 .\"
diff --git a/doc/samtools-targetcut.1 b/doc/samtools-targetcut.1
index c7f2ab226..be9a5a96e 100644
--- a/doc/samtools-targetcut.1
+++ b/doc/samtools-targetcut.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-targetcut 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-targetcut 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools targetcut \- cut fosmid regions (for fosmid pool only)
 .\"
diff --git a/doc/samtools-tview.1 b/doc/samtools-tview.1
index e9f976c75..c9f702874 100644
--- a/doc/samtools-tview.1
+++ b/doc/samtools-tview.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-tview 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-tview 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools tview \- display alignments in a curses-based interactive viewer.
 .\"
diff --git a/doc/samtools-view.1 b/doc/samtools-view.1
index f38d29d23..6f5eff91d 100644
--- a/doc/samtools-view.1
+++ b/doc/samtools-view.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools-view 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools-view 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools view \- views and converts SAM/BAM/CRAM files
 .\"
@@ -292,6 +292,10 @@ or
 .BI "-N " FILE ", --qname-file " FILE
 Output only alignments with read names listed in
 .IR FILE .
+If \fIFILE\fR starts with \fB^\fR then the operation is negated and
+only outputs alignment with read groups not listed in \fIFILE\fR.
+It is not permissible to mix both the filter-in and filter-out style
+syntax in the same command.
 .TP
 .BI "-r " STR ", --read-group " STR
 Output alignments in read group
@@ -306,6 +310,10 @@ This behaviour may change in a future release.
 Output alignments in read groups listed in
 .I FILE
 [null].
+If \fIFILE\fR starts with \fB^\fR then the operation is negated and
+only outputs alignment with read names not listed in \fIFILE\fR.
+It is not permissible to mix both the filter-in and filter-out style
+syntax in the same command.
 Note that records with no
 .B RG
 tag will also be output when using this option.
diff --git a/doc/samtools.1 b/doc/samtools.1
index be5b4aa7c..b25388fd7 100644
--- a/doc/samtools.1
+++ b/doc/samtools.1
@@ -1,5 +1,5 @@
 '\" t
-.TH samtools 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH samtools 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 samtools \- Utilities for the Sequence Alignment/Map (SAM) format
 .\"
@@ -1165,6 +1165,7 @@ flag.secondary	int	Single bit, 0 or 256
 flag.qcfail	int	Single bit, 0 or 512
 flag.dup	int	Single bit, 0 or 1024
 flag.supplementary	int	Single bit, 0 or 2048
+hclen	int	Number of hard-clipped bases
 library	string	Library (LB header via RG)
 mapq	int	Mapping quality
 mpos	int	Synonym for pnext
@@ -1193,10 +1194,12 @@ equivalent to \fBflag & 1024\fR.
 "qlen" and "rlen" are measured using the CIGAR string to count the
 number of query (sequence) and reference bases consumed.  Note "qlen"
 may not exactly match the length of the "seq" field if the sequence is
-"*".  "sclen" is the number of soft-clipped bases.  When combined in
-"qlen-sclen" it can give the number of sequence bases used in the
-alignment, distinguishing between global alignment and local alignment
-length.
+"*".
+
+"sclen" and "hclen" are the number of soft and hard-clipped
+bases respectively.  The formula "qlen-sclen" gives the
+number of sequence bases used in the alignment, distinguishing between
+global alignment and local alignment length.
 
 "endpos" is the (1-based inclusive) position of the rightmost mapped base
 of the read, as measured using the CIGAR string, and for mapped reads
diff --git a/misc/plot-bamstats b/misc/plot-bamstats
index d07c0b1e7..3231ff125 100755
--- a/misc/plot-bamstats
+++ b/misc/plot-bamstats
@@ -25,6 +25,8 @@
 use strict;
 use warnings;
 use Carp;
+use POSIX qw/floor/;
+use List::Util qw(max);
 
 my $opts = parse_params();
 parse_bamcheck($opts);
@@ -39,6 +41,7 @@ plot_coverage($opts);
 plot_mismatches_per_cycle($opts);
 plot_indel_dist($opts);
 plot_indel_cycles($opts);
+plot_read_len_hist($opts);
 create_html($opts);
 
 exit;
@@ -1204,6 +1207,136 @@ sub plot_indel_cycles
     plot($$args{gp});
 }
 
+sub plot_read_len_hist
+{
+    my ($opts) = @_;
+
+    my @rl = get_values($opts, 'RL');
+    if ( !@rl ) { return; }
+
+    # TODO: Add modified version for paired end reads.
+    # my $is_paired = get_value($opts,'SN','reads paired:');
+
+    my $args = get_defaults($opts,"$$opts{prefix}read_length.png");
+    my $fh = $$args{fh};
+
+    # anonymous subroutines for code readibility
+    my $log_bin = sub {
+        my $x = $_[0];
+        return floor((log($x) / log(10)) * 10);
+    };
+    my $inv_log_bin = sub {
+        my $x = $_[0];
+        return floor(10**($x/10));
+    };
+
+    # Open files to store data: this is more elegant than copying four sets of
+    # data into a single GNUplot file.
+    open(my $rd_fh, '>', "$$opts{prefix}read_len_rawdata.txt");
+    open(my $hd_fh, '>', "$$opts{prefix}read_len_histdata.txt");
+
+    # Create gnuplot command file options
+    my $min_x = $rl[0][0];
+    my $max_x = $rl[-1][0];
+
+    # Gnuplot commands for normalized binned data
+    # The normalized plot bins reads by read length with bin widths on a
+    # logarithmic scale (i.e. bins 10-11 bp, 12-14 bp, 15-18 bp, etc.), then
+    # bin height is normalized by the width of the bins.
+    # The end goal is to smooth the distribution of reads compared to plotting
+    # each length without binning.  Visualization of unbinned data is included
+    # in order to make single value spikes more visible, for example in the
+    # case that spike ins are used as a control.
+    print $fh qq[
+        set ytics nomirror
+        set y2tic nomirror
+        set terminal png size 800,1000 truecolor
+        set output "$$args{img}"
+        $$args{grid}
+        set grid noy2tics
+        set multiplot layout 2,1 columnsfirst scale 1,0.9
+        set autoscale y
+        set ylabel "Read Count"
+        set autoscale x
+        set logscale x
+        set xrange[$min_x: $max_x]
+        set xtics rotate by -45 out scale 1 nomirror
+        set xlabel "Read Length"
+        set boxwidth .8 relative
+        set title "$$args{title}" noenhanced
+        set title offset -15,-.5
+        set key at graph 1, 1.15
+        plot "$$opts{prefix}read_len_rawdata.txt" using 1:2 lt rgb "orange" with boxes fs solid .7 title 'unbinned', "$$opts{prefix}read_len_histdata.txt" using 4:3 lt rgb "blue" with boxes title 'normalized bins(count/bin width)' axis x1y2
+    ];
+    print $fh qq[];
+
+    # Gnuplot commands for non-normalized binned data
+    # The un-normalized plot has reads assigned to bins identically to the
+    # normalized plot, however, the bin height is not divided by bin width. The
+    # goal of this visualization is to more accurately reflect the distribution
+    # of reads by volume, and will generally collect around the mean read
+    # length.
+    # Inclusion of unbinned data is to give point of visual comparison to
+    # normalized histogram.
+    print $fh qq[
+        set boxwidth 1 relative
+        plot "$$opts{prefix}read_len_rawdata.txt" using 1:2 lt rgb "orange" with boxes fs solid .7 title 'unbinned', "$$opts{prefix}read_len_histdata.txt" using 4:2 lt rgb "blue" with boxes title 'unnormalized bins' axis x1y2
+    ];
+
+    my @logscale_data = (0)x&$log_bin($max_x);
+    my $log_x = 0;
+    my $start_i = 1;
+    for my $line (@rl)
+    {
+        my @deref_line = @{$line};
+        my $curr_x = $deref_line[0];
+        my $curr_y = $deref_line[1];
+
+        print $rd_fh "$curr_x $curr_y\n";
+        $log_x = max(&$log_bin($curr_x), 5);
+        @logscale_data[floor($log_x)] += $curr_y;
+        $start_i = $curr_x + 1;
+    }
+
+    # Copy binned data into its own file
+
+    # This is a special case to deal with the problem that reads of len < 5 do
+    # not cooperate with the log10(x)*10 transformation and cause small
+    # fragments to graph badly.
+    my $start = 1;
+    my $end = 3;
+    my $div_binw_val = $logscale_data[5]/($end - $start);
+    my $start_idx = -1;
+    for my $i (0..@logscale_data-1) {
+        if($logscale_data[$i] != 0)
+        {
+            $start_idx = $i;
+            if ($i == 5)
+            {
+                print $hd_fh "5 $logscale_data[5] $div_binw_val $start $end\n";
+                $start_idx += 1;
+            }
+            last;
+        }
+    }
+
+
+    for my $i ($start_idx..@logscale_data-1) {
+        $start = &$inv_log_bin($i);
+        $end = &$inv_log_bin($i+1);
+        $div_binw_val = $logscale_data[$i]/max(($end - $start), 1);
+        print $hd_fh "$i $logscale_data[$i] $div_binw_val $start $end\n";
+    }
+
+    # Close file buffers and clean up workspace
+    close($fh);
+    close($hd_fh);
+    close($rd_fh);
+    plot($$args{gp});
+    unlink("$$opts{prefix}read_len_rawdata.txt");
+    unlink("$$opts{prefix}read_len_histdata.txt");
+}
+
 sub merge_bamcheck
 {
     my ($opts) = @_;
diff --git a/misc/wgsim.1 b/misc/wgsim.1
index 0be5c8a2f..cfa46ae78 100644
--- a/misc/wgsim.1
+++ b/misc/wgsim.1
@@ -1,4 +1,4 @@
-.TH wgsim 1 "25 July 2023" "samtools-1.18" "Bioinformatics tools"
+.TH wgsim 1 "12 December 2023" "samtools-1.19" "Bioinformatics tools"
 .SH NAME
 wgsim \- Whole-genome sequencing read simulator
 .SH SYNOPSIS
diff --git a/sam_view.c b/sam_view.c
index aa5b92310..877f84a13 100644
--- a/sam_view.c
+++ b/sam_view.c
@@ -54,6 +54,8 @@ typedef struct samview_settings {
     strhash_t rnhash;
     strhash_t tvhash;
     int min_mapQ;
+    int rghash_discard; // 0 keep, 1 discard
+    int rnhash_discard; // 0 keep, 1 discard
 
     // Described here in the same terms as the usage statement.
     // The code however always negates to "reject if"         keep if:
@@ -176,7 +178,8 @@ static int process_aln(const sam_hdr_t *h, bam1_t *b, samview_settings_t* settin
         uint8_t *s = bam_aux_get(b, "RG");
         if (s) {
             khint_t k = kh_get(str, settings->rghash, (char*)(s + 1));
-            if (k == kh_end(settings->rghash)) return 1;
+            if ((k == kh_end(settings->rghash)) != settings->rghash_discard)
+                return 1;
         }
     }
     if (settings->tag) {
@@ -204,9 +207,11 @@ static int process_aln(const sam_hdr_t *h, bam1_t *b, samview_settings_t* settin
     }
     if (settings->rnhash) {
         const char* rn = bam_get_qname(b);
-        if (!rn || kh_get(str, settings->rnhash, rn) == kh_end(settings->rnhash)) {
+        strhash_t h = settings->rnhash;
+        if (!rn && !settings->rnhash_discard)
+            return 1;
+        if ((kh_get(str, h, rn) == kh_end(h)) != settings->rnhash_discard)
             return 1;
-        }
     }
     if (settings->library) {
         const char *p = bam_get_library((sam_hdr_t*)h, b);
@@ -305,7 +310,12 @@ static int add_read_group_single(const char *subcmd, samview_settings_t *setting
     if (settings->rghash == NULL) {
         settings->rghash = kh_init(str);
         if (settings->rghash == NULL) goto err;
+    } else if (settings->rghash_discard == 1) {
+        print_error("view", "cannot mix include and exclude read-group files in the same command line");
+        free(d);
+        return -1;
     }
+    settings->rghash_discard = 0;
 
     kh_put(str, settings->rghash, d, &ret);
     if (ret == -1) goto err;
@@ -326,8 +336,14 @@ static int add_read_names_file(const char *subcmd, samview_settings_t *settings,
             perror(NULL);
             return -1;
         }
+    } else if ((settings->rnhash_discard == 0 && *fn == '^') ||
+        (settings->rnhash_discard == 1 && *fn != '^')) {
+        print_error("view", "cannot mix include and exclude read-name files in the same command line");
+        return -1;
     }
-    return populate_lookup_from_file(subcmd, settings->rnhash, fn);
+    settings->rnhash_discard = (*fn == '^');
+    return populate_lookup_from_file(subcmd, settings->rnhash,
+                                     fn + (*fn == '^'));
 }
 
 static int add_read_groups_file(const char *subcmd, samview_settings_t *settings, char *fn)
@@ -338,8 +354,14 @@ static int add_read_groups_file(const char *subcmd, samview_settings_t *settings
             perror(NULL);
             return -1;
         }
+    } else if ((settings->rghash_discard == 0 && *fn == '^') ||
+        (settings->rghash_discard == 1 && *fn != '^')) {
+        print_error("view", "cannot mix include and exclude read-group files in the same command line");
+        return -1;
     }
-    return populate_lookup_from_file(subcmd, settings->rghash, fn);
+    settings->rghash_discard = (*fn == '^');
+    return populate_lookup_from_file(subcmd, settings->rghash,
+                                     fn + (*fn == '^'));
 }
 
 static int add_tag_value_single(const char *subcmd, samview_settings_t *settings, char *name)
@@ -755,13 +777,13 @@ static int multi_region_view(samview_settings_t *conf, hts_itr_multi_t *iter)
     while ((result = sam_itr_multi_next(conf->in, iter, b)) >= 0) {
         if (process_one_record(conf, b, &write_error) < 0) break;
     }
-    hts_itr_multi_destroy(iter);
     bam_destroy1(b);
 
     if (result < -1) {
         print_error("view", "retrieval of region #%d failed", iter->curr_tid);
-        return 1;
+        write_error = 1;
     }
+    hts_itr_multi_destroy(iter);
     return write_error;
 }
 
@@ -1354,8 +1376,6 @@ int main_samview(int argc, char *argv[])
         }
     }
 
-    if ( settings.hts_idx ) hts_idx_destroy(settings.hts_idx);
-
     if (ga.write_index) {
         if (sam_idx_save(settings.out) < 0) {
             print_error_errno("view", "writing index failed");
@@ -1368,6 +1388,8 @@ int main_samview(int argc, char *argv[])
     }
 
 view_end:
+    if ( settings.hts_idx ) hts_idx_destroy(settings.hts_idx);
+
     if (settings.is_count && ret == 0) {
         if (fprintf(settings.fn_out? fp_out : stdout, "%" PRId64 "\n", settings.count) < 0) {
             if (settings.fn_out) print_error_errno("view", "writing to \"%s\" failed", settings.fn_out);
@@ -1458,9 +1480,10 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
 "\n"
 "Filtering options (Only include in output reads that...):\n"
 "  -L, --target[s]-file FILE  ...overlap (BED) regions in FILE\n"
+"  -N, --qname-file [^]FILE   ...whose read name is listed in FILE (\"^\" negates)\n"
 "  -r, --read-group STR       ...are in read group STR\n"
-"  -R, --read-group-file FILE ...are in a read group listed in FILE\n"
-"  -N, --qname-file FILE      ...whose read name is listed in FILE\n"
+"  -R, --read-group-file [^]FILE\n"
+"                             ...are in a read group listed in FILE\n"
 "  -d, --tag STR1[:STR2]      ...have a tag STR1 (with associated value STR2)\n"
 "  -D, --tag-file STR:FILE    ...have a tag STR whose value is listed in FILE\n"
 "  -q, --min-MQ INT           ...have mapping quality >= INT\n"
diff --git a/stats.c b/stats.c
index 44783a974..1574b3203 100644
--- a/stats.c
+++ b/stats.c
@@ -766,6 +766,9 @@ static void collect_barcode_stats(bam1_t* bam_line, stats_t* stats) {
             continue;
 
         uint32_t barcode_len = strlen(barcode);
+        if (!barcode_len) {
+            continue;        //consider 0 size barcode same as no barcode - avoids issues with realloc below
+        }
         if (!stats->tags_barcode[tag].nbases) { // tag seen for the first time
             uint32_t offset = 0;
             for (i = 0; i < stats->ntags; i++)
diff --git a/test/sort/name.sort.expected.sam b/test/sort/name.sort.expected.sam
index 6e10914af..22fca6238 100644
--- a/test/sort/name.sort.expected.sam
+++ b/test/sort/name.sort.expected.sam
@@ -1,4 +1,4 @@
-@HD	VN:1.4	SO:queryname
+@HD	VN:1.4	SO:queryname	SS:queryname:natural
 @SQ	SN:insert	LN:599
 @SQ	SN:ref1	LN:45
 @SQ	SN:ref2	LN:40
diff --git a/test/sort/name2.sort.expected.sam b/test/sort/name2.sort.expected.sam
new file mode 100644
index 000000000..340d10821
--- /dev/null
+++ b/test/sort/name2.sort.expected.sam
@@ -0,0 +1,24 @@
+@HD	VN:1.4	SO:queryname	SS:queryname:lexicographical
+@SQ	SN:insert	LN:599
+@SQ	SN:ref1	LN:45
+@SQ	SN:ref2	LN:40
+@SQ	SN:ref3	LN:4
+@PG	ID:llama
+@RG	ID:fish	PG:llama
+@RG	ID:cow	PU:13_&^&&*(:332	PG:donkey
+@RG	PU:*9u8jkjjkjd:	ID:colt
+@PG	ID:bull	PP:donkey
+@PG	ID:donkey
+@CO	Do you know?
+r005	83	ref1	37	30	9M	=	7	-39	CAGCGCCAT	*	RG:Z:colt	PG:Z:donkey
+r005	163	ref1	7	30	8M4I4M1D3M	=	37	39	TTAGATAAAGAGGATACTG	*	XX:B:S,12561,2,20,112	YY:i:100	RG:Z:colt	PG:Z:donkey
+r006	0	ref1	9	30	1S2I6M1P1I1P1I4M2I	*	0	0	AAAAGATAAGGGATAAA	*	XA:Z:abc	XB:i:-10	RG:Z:colt	PG:Z:donkey
+r006	16	ref1	29	30	6H5M	*	0	0	TAGGC	*	RG:Z:colt	PG:Z:donkey
+r007	0	ref1	9	30	5H6M	*	0	0	AGCTAA	*	RG:Z:colt	PG:Z:donkey
+r007	0	ref1	16	30	6M14N1I5M	*	0	0	ATAGCTCTCAGC	*	RG:Z:colt	PG:Z:donkey
+x10	0	ref2	10	30	25M	*	0	0	CAAATAATTAAGTCTACAGAGCAAC	?????????????????????????	RG:Z:cow	PG:Z:bull
+x11	0	ref2	12	30	24M	*	0	0	AATAATTAAGTCTACAGAGCAACT	????????????????????????	RG:Z:cow	PG:Z:bull
+x12	0	ref2	14	30	23M	*	0	0	TAATTAAGTCTACAGAGCAACTA	???????????????????????	RG:Z:cow	PG:Z:bull
+x7	0	ref2	1	30	20M	*	0	0	AGGTTTTATAAAACAAATAA	*	RG:Z:cow	PG:Z:bull
+x8	0	ref2	2	30	21M	*	0	0	GGTTTTATAAAACAAATAATT	?????????????????????	RG:Z:cow	PG:Z:bull
+x9	0	ref2	6	30	9M4I13M	*	0	0	TTATAAAACAAATAATTAAGTCTACA	??????????????????????????	RG:Z:cow	PG:Z:bull
diff --git a/test/split/split.expected.grp1.sam b/test/split/split.expected.grp1.sam
index 23e15367d..4fbd1775d 100644
--- a/test/split/split.expected.grp1.sam
+++ b/test/split/split.expected.grp1.sam
@@ -24,17 +24,17 @@
 @SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
 @SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
 @RG	ID:grp1	DS:Group 1	LB:Library 1	SM:Sample
-ref1_grp1_p001	99	ref1	1	0	10M	=	25	34	CGAGCTCGGT	!!!!!!!!!!	RG:Z:grp1	BC:Z:ACGT	H0:i:1	aa:A:!	ab:A:~	fa:f:3.14159	za:Z:Hello world!	ha:H:DEADBEEF	ba:B:c,-128,0,127	bb:B:C,0,127,255	bc:B:s,-32768,0,32767	bd:B:S,0,32768,65535	be:B:i,-2147483648,0,2147483647	bf:B:I,0,2147483648,4294967295	bg:B:f,2.71828,6.626e-34,2.9979e+09	NM:i:0	MD:Z:10
-ref1_grp1_p002	99	ref1	5	2	10M	=	29	34	CTCGGTACCC	##########	RG:Z:grp1	BC:Z:AATTCCGG	H0:i:1	aa:A:a	ab:A:z	fa:f:4.3597e-18	za:Z:Another string	ha:H:2000AD	NM:i:0	MD:Z:10
-ref1_grp1_p003	99	ref1	9	4	10M	=	33	34	GTACCCGGGG	%%%%%%%%%%	fa:f:1.66e-27	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp1_p004	99	ref1	13	6	10M	=	37	34	CCGGGGATCC	''''''''''	fa:f:1.38e-23	za:Z:xRG:Z:grp2	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp1_p005	99	ref1	17	8	10M	=	41	34	GGATCCTCTA	))))))))))	RG:Z:grp1	ia:i:40000	NM:i:0	MD:Z:10
+ref1_grp1_p001	99	ref1	1	0	10M	=	25	34	CGAGCTCGGT	!!!!!!!!!!	RG:Z:grp1	BC:Z:ACGT	H0:i:1	aa:A:!	ab:A:~	fa:f:3.14159	za:Z:Hello world!	ha:H:DEADBEEF	ba:B:c,-128,0,127	bb:B:C,0,127,255	bc:B:s,-32768,0,32767	bd:B:S,0,32768,65535	be:B:i,-2147483648,0,2147483647	bf:B:I,0,2147483648,4294967295	bg:B:f,2.71828,6.626e-34,2.9979e+09	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p002	99	ref1	5	2	10M	=	29	34	CTCGGTACCC	##########	RG:Z:grp1	BC:Z:AATTCCGG	H0:i:1	aa:A:a	ab:A:z	fa:f:4.3597e-18	za:Z:Another string	ha:H:2000AD	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp1_p003	99	ref1	9	4	10M	=	33	34	GTACCCGGGG	%%%%%%%%%%	fa:f:1.66e-27	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp1_p004	99	ref1	13	6	10M	=	37	34	CCGGGGATCC	''''''''''	fa:f:1.38e-23	za:Z:xRG:Z:grp2	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p005	99	ref1	17	8	10M	=	41	34	GGATCCTCTA	))))))))))	RG:Z:grp1	ia:i:40000	NM:i:0	MD:Z:10	an:Z:badger
 ref1_grp1_p006	99	ref1	21	10	10M	=	45	34	CCTCTAGAGT	++++++++++	RG:Z:grp1	ia:i:255	NM:i:0	MD:Z:10
-ref1_grp1_p001	147	ref1	25	12	10M	=	1	-34	TAGAGTCGAC	----------	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp1_p002	147	ref1	29	14	10M	=	5	-34	GTCGACCTGC	//////////	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp1_p003	147	ref1	33	16	10M	=	9	-34	ACCTGCAGGC	1111111111	RG:Z:grp1	NM:i:0	MD:Z:10
-ref12_grp1_p001	97	ref1	36	50	10M	ref2	2	0	TGCAGGCATG	AAAAAAAAAA	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp1_p004	147	ref1	37	18	10M	=	13	-34	GCAGGCATGC	3333333333	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp1_p005	147	ref1	41	20	10M	=	17	-34	GCATGCAAGC	5555555555	RG:Z:grp1	NM:i:0	MD:Z:10
+ref1_grp1_p001	147	ref1	25	12	10M	=	1	-34	TAGAGTCGAC	----------	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p002	147	ref1	29	14	10M	=	5	-34	GTCGACCTGC	//////////	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp1_p003	147	ref1	33	16	10M	=	9	-34	ACCTGCAGGC	1111111111	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref12_grp1_p001	97	ref1	36	50	10M	ref2	2	0	TGCAGGCATG	AAAAAAAAAA	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp1_p004	147	ref1	37	18	10M	=	13	-34	GCAGGCATGC	3333333333	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p005	147	ref1	41	20	10M	=	17	-34	GCATGCAAGC	5555555555	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
 ref1_grp1_p006	147	ref1	45	22	10M	=	21	-34	GCAAGCTTGA	7777777777	RG:Z:grp1	NM:i:0	MD:Z:10
-ref12_grp1_p001	145	ref2	2	50	10M	ref1	36	0	TTCTATAGTG	BBBBBBBBBB	RG:Z:grp1	NM:i:0	MD:Z:10
+ref12_grp1_p001	145	ref2	2	50	10M	ref1	36	0	TTCTATAGTG	BBBBBBBBBB	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
diff --git a/test/split/split.expected.grp2.sam b/test/split/split.expected.grp2.sam
index 07798dd8c..6a3854c40 100644
--- a/test/split/split.expected.grp2.sam
+++ b/test/split/split.expected.grp2.sam
@@ -24,17 +24,17 @@
 @SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
 @SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
 @RG	ID:grp2	DS:Group 2	LB:Library 2	SM:Sample
-ref1_grp2_p001	99	ref1	3	1	10M	=	27	34	AGCTCGGTAC	""""""""""	RG:Z:grp2	BC:Z:TGCA	H0:i:1	aa:A:A	ab:A:Z	fa:f:6.67e-11	za:Z:!"$%^&*()	ha:H:CAFE	NM:i:0	MD:Z:10
-ref1_grp2_p002	99	ref1	7	3	10M	=	31	34	CGGTACCCGG	$$$$$$$$$$	fa:f:6.022e+23	RG:Z:grp2	NM:i:0	MD:Z:10
-ref1_grp2_p003	99	ref1	11	5	10M	=	35	34	ACCCGGGGAT	&&&&&&&&&&	RG:Z:grp2	ia:i:4294967295	NM:i:0	MD:Z:10
-ref1_grp2_p004	99	ref1	15	7	10M	=	39	34	GGGGATCCTC	((((((((((	RG:Z:grp2	ia:i:-2147483648	NM:i:0	MD:Z:10
-ref1_grp2_p005	99	ref1	19	9	10M	=	43	34	ATCCTCTAGA	**********	RG:Z:grp2	ia:i:-1000	NM:i:0	MD:Z:10
+ref1_grp2_p001	99	ref1	3	1	10M	=	27	34	AGCTCGGTAC	""""""""""	RG:Z:grp2	BC:Z:TGCA	H0:i:1	aa:A:A	ab:A:Z	fa:f:6.67e-11	za:Z:!"$%^&*()	ha:H:CAFE	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p002	99	ref1	7	3	10M	=	31	34	CGGTACCCGG	$$$$$$$$$$	fa:f:6.022e+23	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p003	99	ref1	11	5	10M	=	35	34	ACCCGGGGAT	&&&&&&&&&&	RG:Z:grp2	ia:i:4294967295	NM:i:0	MD:Z:10	an:Z:dog
+ref1_grp2_p004	99	ref1	15	7	10M	=	39	34	GGGGATCCTC	((((((((((	RG:Z:grp2	ia:i:-2147483648	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp2_p005	99	ref1	19	9	10M	=	43	34	ATCCTCTAGA	**********	RG:Z:grp2	ia:i:-1000	NM:i:0	MD:Z:10	an:Z:aardvark
 ref1_grp2_p006	99	ref1	23	11	10M	=	47	34	TCTAGAGTCG	,,,,,,,,,,	RG:Z:grp2	ia:i:-1	NM:i:0	MD:Z:10
-ref1_grp2_p001	147	ref1	27	13	10M	=	3	-34	GAGTCGACCT	..........	RG:Z:grp2	NM:i:0	MD:Z:10
-ref1_grp2_p002	147	ref1	31	15	10M	=	7	-34	CGACCTGCAG	0000000000	RG:Z:grp2	NM:i:0	MD:Z:10
-ref1_grp2_p003	147	ref1	35	17	10M	=	11	-34	CTGCAGGCAT	2222222222	RG:Z:grp2	NM:i:0	MD:Z:10
-ref1_grp2_p004	147	ref1	39	19	10M	=	15	-34	AGGCATGCAA	4444444444	RG:Z:grp2	NM:i:0	MD:Z:10
-ref1_grp2_p005	147	ref1	43	21	10M	=	19	-34	ATGCAAGCTT	6666666666	RG:Z:grp2	NM:i:0	MD:Z:10
-ref12_grp2_p001	97	ref1	46	50	10M	ref2	12	0	CAAGCTTGAG	AAAAAAAAAA	RG:Z:grp2	NM:i:0	MD:Z:10
+ref1_grp2_p001	147	ref1	27	13	10M	=	3	-34	GAGTCGACCT	..........	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p002	147	ref1	31	15	10M	=	7	-34	CGACCTGCAG	0000000000	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p003	147	ref1	35	17	10M	=	11	-34	CTGCAGGCAT	2222222222	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:dog
+ref1_grp2_p004	147	ref1	39	19	10M	=	15	-34	AGGCATGCAA	4444444444	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp2_p005	147	ref1	43	21	10M	=	19	-34	ATGCAAGCTT	6666666666	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:aardvark
+ref12_grp2_p001	97	ref1	46	50	10M	ref2	12	0	CAAGCTTGAG	AAAAAAAAAA	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:cat
 ref1_grp2_p006	147	ref1	47	23	10M	=	23	-34	AAGCTTGAGT	8888888888	RG:Z:grp2	NM:i:0	MD:Z:10
-ref12_grp2_p001	145	ref2	12	50	10M	ref1	46	0	TCACCTAAAT	BBBBBBBBBB	RG:Z:grp2	NM:i:0	MD:Z:10
+ref12_grp2_p001	145	ref2	12	50	10M	ref1	46	0	TCACCTAAAT	BBBBBBBBBB	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:cat
diff --git a/test/split/split.expected.unk.sam b/test/split/split.expected.unk.sam
index 59f6ff569..8417d13e5 100644
--- a/test/split/split.expected.unk.sam
+++ b/test/split/split.expected.unk.sam
@@ -26,9 +26,9 @@
 @SQ	SN:ref1	LN:56	M5:08c04d512d4797d9ba2a156c1daba468
 @SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
 @SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
-ref2_grp3_p001	83	ref2	1	99	15M	=	31	45	ATTCTATAGTGTCAC	~~~~~~~~~~~~~~~	NM:i:0	MD:Z:15
-ref2_grp3_p002	147	ref2	16	99	15M	=	46	45	CTAAATAGCTTGGCG	}}}}}}}}}}}}}}}	NM:i:0	MD:Z:15
-ref2_grp3_p001	163	ref2	31	99	15M	=	1	-45	CTGTTTCCTGTGTGA	|||||||||||||||	NM:i:13	MD:Z:0T0A0A1C0A0T0G0G0T0C0A1A0G0
-ref2_grp3_p002	99	ref2	46	99	15M	=	16	-45	CTGTTTCCTGTGTGA	{{{{{{{{{{{{{{{	NM:i:0	MD:Z:15
-unaligned_grp3_p001	77	*	0	0	*	*	0	0	CACTCGTTCATGACG	0123456789abcde
-unaligned_grp3_p001	141	*	0	0	*	*	0	0	GAAAGTGAGGAGGTG	edcba9876543210
+ref2_grp3_p001	83	ref2	1	99	15M	=	31	45	ATTCTATAGTGTCAC	~~~~~~~~~~~~~~~	NM:i:0	MD:Z:15	an:Z:cat
+ref2_grp3_p002	147	ref2	16	99	15M	=	46	45	CTAAATAGCTTGGCG	}}}}}}}}}}}}}}}	NM:i:0	MD:Z:15	RG:Z:grp3	an:Z:dog
+ref2_grp3_p001	163	ref2	31	99	15M	=	1	-45	CTGTTTCCTGTGTGA	|||||||||||||||	NM:i:13	MD:Z:0T0A0A1C0A0T0G0G0T0C0A1A0G0	an:Z:cat
+ref2_grp3_p002	99	ref2	46	99	15M	=	16	-45	CTGTTTCCTGTGTGA	{{{{{{{{{{{{{{{	NM:i:0	MD:Z:15	RG:Z:grp3	an:Z:dog
+unaligned_grp3_p001	77	*	0	0	*	*	0	0	CACTCGTTCATGACG	0123456789abcde	an:Z:dog
+unaligned_grp3_p001	141	*	0	0	*	*	0	0	GAAAGTGAGGAGGTG	edcba9876543210	an:Z:dog
diff --git a/test/split/split.expected_d_RG.grp3.sam b/test/split/split.expected_d_RG.grp3.sam
new file mode 100644
index 000000000..14e7997da
--- /dev/null
+++ b/test/split/split.expected_d_RG.grp3.sam
@@ -0,0 +1,29 @@
+@HD	VN:1.4	SO:coordinate
+@RG	ID:grp3
+@PG	ID:prog1	PN:emacs	CL:emacs	VN:23.1.1
+@CO	The MIT License
+@CO	
+@CO	Copyright (c) 2014 Genome Research Ltd.
+@CO	
+@CO	Permission is hereby granted, free of charge, to any person obtaining a copy
+@CO	of this software and associated documentation files (the "Software"), to deal
+@CO	in the Software without restriction, including without limitation the rights
+@CO	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+@CO	copies of the Software, and to permit persons to whom the Software is
+@CO	furnished to do so, subject to the following conditions:
+@CO	
+@CO	The above copyright notice and this permission notice shall be included in
+@CO	all copies or substantial portions of the Software.
+@CO	
+@CO	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+@CO	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+@CO	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+@CO	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+@CO	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+@CO	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+@CO	THE SOFTWARE.
+@SQ	SN:ref1	LN:56	M5:08c04d512d4797d9ba2a156c1daba468
+@SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
+@SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
+ref2_grp3_p002	147	ref2	16	99	15M	=	46	45	CTAAATAGCTTGGCG	}}}}}}}}}}}}}}}	NM:i:0	MD:Z:15	RG:Z:grp3	an:Z:dog
+ref2_grp3_p002	99	ref2	46	99	15M	=	16	-45	CTGTTTCCTGTGTGA	{{{{{{{{{{{{{{{	NM:i:0	MD:Z:15	RG:Z:grp3	an:Z:dog
diff --git a/test/split/split.expected_d_RG.unk.sam b/test/split/split.expected_d_RG.unk.sam
new file mode 100644
index 000000000..2676ba6d3
--- /dev/null
+++ b/test/split/split.expected_d_RG.unk.sam
@@ -0,0 +1,32 @@
+@HD	VN:1.4	SO:coordinate
+@RG	ID:grp1	DS:Group 1	LB:Library 1	SM:Sample
+@RG	ID:grp2	DS:Group 2	LB:Library 2	SM:Sample
+@PG	ID:prog1	PN:emacs	CL:emacs	VN:23.1.1
+@CO	The MIT License
+@CO	
+@CO	Copyright (c) 2014 Genome Research Ltd.
+@CO	
+@CO	Permission is hereby granted, free of charge, to any person obtaining a copy
+@CO	of this software and associated documentation files (the "Software"), to deal
+@CO	in the Software without restriction, including without limitation the rights
+@CO	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+@CO	copies of the Software, and to permit persons to whom the Software is
+@CO	furnished to do so, subject to the following conditions:
+@CO	
+@CO	The above copyright notice and this permission notice shall be included in
+@CO	all copies or substantial portions of the Software.
+@CO	
+@CO	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+@CO	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+@CO	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+@CO	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+@CO	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+@CO	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+@CO	THE SOFTWARE.
+@SQ	SN:ref1	LN:56	M5:08c04d512d4797d9ba2a156c1daba468
+@SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
+@SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
+ref2_grp3_p001	83	ref2	1	99	15M	=	31	45	ATTCTATAGTGTCAC	~~~~~~~~~~~~~~~	NM:i:0	MD:Z:15	an:Z:cat
+ref2_grp3_p001	163	ref2	31	99	15M	=	1	-45	CTGTTTCCTGTGTGA	|||||||||||||||	NM:i:13	MD:Z:0T0A0A1C0A0T0G0G0T0C0A1A0G0	an:Z:cat
+unaligned_grp3_p001	77	*	0	0	*	*	0	0	CACTCGTTCATGACG	0123456789abcde	an:Z:dog
+unaligned_grp3_p001	141	*	0	0	*	*	0	0	GAAAGTGAGGAGGTG	edcba9876543210	an:Z:dog
diff --git a/test/split/split.expected_d_an.aardvark.sam b/test/split/split.expected_d_an.aardvark.sam
new file mode 100644
index 000000000..b5ce3fb0b
--- /dev/null
+++ b/test/split/split.expected_d_an.aardvark.sam
@@ -0,0 +1,32 @@
+@HD	VN:1.4	SO:coordinate
+@RG	ID:grp1	DS:Group 1	LB:Library 1	SM:Sample
+@RG	ID:grp2	DS:Group 2	LB:Library 2	SM:Sample
+@PG	ID:prog1	PN:emacs	CL:emacs	VN:23.1.1
+@CO	The MIT License
+@CO	
+@CO	Copyright (c) 2014 Genome Research Ltd.
+@CO	
+@CO	Permission is hereby granted, free of charge, to any person obtaining a copy
+@CO	of this software and associated documentation files (the "Software"), to deal
+@CO	in the Software without restriction, including without limitation the rights
+@CO	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+@CO	copies of the Software, and to permit persons to whom the Software is
+@CO	furnished to do so, subject to the following conditions:
+@CO	
+@CO	The above copyright notice and this permission notice shall be included in
+@CO	all copies or substantial portions of the Software.
+@CO	
+@CO	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+@CO	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+@CO	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+@CO	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+@CO	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+@CO	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+@CO	THE SOFTWARE.
+@SQ	SN:ref1	LN:56	M5:08c04d512d4797d9ba2a156c1daba468
+@SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
+@SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
+ref1_grp2_p004	99	ref1	15	7	10M	=	39	34	GGGGATCCTC	((((((((((	RG:Z:grp2	ia:i:-2147483648	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp2_p005	99	ref1	19	9	10M	=	43	34	ATCCTCTAGA	**********	RG:Z:grp2	ia:i:-1000	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp2_p004	147	ref1	39	19	10M	=	15	-34	AGGCATGCAA	4444444444	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp2_p005	147	ref1	43	21	10M	=	19	-34	ATGCAAGCTT	6666666666	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:aardvark
diff --git a/test/split/split.expected_d_an.badger.sam b/test/split/split.expected_d_an.badger.sam
new file mode 100644
index 000000000..a78886b5a
--- /dev/null
+++ b/test/split/split.expected_d_an.badger.sam
@@ -0,0 +1,38 @@
+@HD	VN:1.4	SO:coordinate
+@RG	ID:grp1	DS:Group 1	LB:Library 1	SM:Sample
+@RG	ID:grp2	DS:Group 2	LB:Library 2	SM:Sample
+@PG	ID:prog1	PN:emacs	CL:emacs	VN:23.1.1
+@CO	The MIT License
+@CO	
+@CO	Copyright (c) 2014 Genome Research Ltd.
+@CO	
+@CO	Permission is hereby granted, free of charge, to any person obtaining a copy
+@CO	of this software and associated documentation files (the "Software"), to deal
+@CO	in the Software without restriction, including without limitation the rights
+@CO	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+@CO	copies of the Software, and to permit persons to whom the Software is
+@CO	furnished to do so, subject to the following conditions:
+@CO	
+@CO	The above copyright notice and this permission notice shall be included in
+@CO	all copies or substantial portions of the Software.
+@CO	
+@CO	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+@CO	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+@CO	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+@CO	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+@CO	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+@CO	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+@CO	THE SOFTWARE.
+@SQ	SN:ref1	LN:56	M5:08c04d512d4797d9ba2a156c1daba468
+@SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
+@SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
+ref1_grp1_p001	99	ref1	1	0	10M	=	25	34	CGAGCTCGGT	!!!!!!!!!!	RG:Z:grp1	BC:Z:ACGT	H0:i:1	aa:A:!	ab:A:~	fa:f:3.14159	za:Z:Hello world!	ha:H:DEADBEEF	ba:B:c,-128,0,127	bb:B:C,0,127,255	bc:B:s,-32768,0,32767	bd:B:S,0,32768,65535	be:B:i,-2147483648,0,2147483647	bf:B:I,0,2147483648,4294967295	bg:B:f,2.71828,6.626e-34,2.9979e+09	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p001	99	ref1	3	1	10M	=	27	34	AGCTCGGTAC	""""""""""	RG:Z:grp2	BC:Z:TGCA	H0:i:1	aa:A:A	ab:A:Z	fa:f:6.67e-11	za:Z:!"$%^&*()	ha:H:CAFE	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p002	99	ref1	7	3	10M	=	31	34	CGGTACCCGG	$$$$$$$$$$	fa:f:6.022e+23	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p004	99	ref1	13	6	10M	=	37	34	CCGGGGATCC	''''''''''	fa:f:1.38e-23	za:Z:xRG:Z:grp2	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p005	99	ref1	17	8	10M	=	41	34	GGATCCTCTA	))))))))))	RG:Z:grp1	ia:i:40000	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p001	147	ref1	25	12	10M	=	1	-34	TAGAGTCGAC	----------	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p001	147	ref1	27	13	10M	=	3	-34	GAGTCGACCT	..........	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p002	147	ref1	31	15	10M	=	7	-34	CGACCTGCAG	0000000000	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p004	147	ref1	37	18	10M	=	13	-34	GCAGGCATGC	3333333333	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p005	147	ref1	41	20	10M	=	17	-34	GCATGCAAGC	5555555555	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
diff --git a/test/split/split.expected_d_an.cat.sam b/test/split/split.expected_d_an.cat.sam
new file mode 100644
index 000000000..c2bdd3af6
--- /dev/null
+++ b/test/split/split.expected_d_an.cat.sam
@@ -0,0 +1,38 @@
+@HD	VN:1.4	SO:coordinate
+@RG	ID:grp1	DS:Group 1	LB:Library 1	SM:Sample
+@RG	ID:grp2	DS:Group 2	LB:Library 2	SM:Sample
+@PG	ID:prog1	PN:emacs	CL:emacs	VN:23.1.1
+@CO	The MIT License
+@CO	
+@CO	Copyright (c) 2014 Genome Research Ltd.
+@CO	
+@CO	Permission is hereby granted, free of charge, to any person obtaining a copy
+@CO	of this software and associated documentation files (the "Software"), to deal
+@CO	in the Software without restriction, including without limitation the rights
+@CO	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+@CO	copies of the Software, and to permit persons to whom the Software is
+@CO	furnished to do so, subject to the following conditions:
+@CO	
+@CO	The above copyright notice and this permission notice shall be included in
+@CO	all copies or substantial portions of the Software.
+@CO	
+@CO	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+@CO	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+@CO	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+@CO	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+@CO	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+@CO	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+@CO	THE SOFTWARE.
+@SQ	SN:ref1	LN:56	M5:08c04d512d4797d9ba2a156c1daba468
+@SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
+@SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
+ref1_grp1_p002	99	ref1	5	2	10M	=	29	34	CTCGGTACCC	##########	RG:Z:grp1	BC:Z:AATTCCGG	H0:i:1	aa:A:a	ab:A:z	fa:f:4.3597e-18	za:Z:Another string	ha:H:2000AD	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp1_p003	99	ref1	9	4	10M	=	33	34	GTACCCGGGG	%%%%%%%%%%	fa:f:1.66e-27	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp1_p002	147	ref1	29	14	10M	=	5	-34	GTCGACCTGC	//////////	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp1_p003	147	ref1	33	16	10M	=	9	-34	ACCTGCAGGC	1111111111	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref12_grp1_p001	97	ref1	36	50	10M	ref2	2	0	TGCAGGCATG	AAAAAAAAAA	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref12_grp2_p001	97	ref1	46	50	10M	ref2	12	0	CAAGCTTGAG	AAAAAAAAAA	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:cat
+ref2_grp3_p001	83	ref2	1	99	15M	=	31	45	ATTCTATAGTGTCAC	~~~~~~~~~~~~~~~	NM:i:0	MD:Z:15	an:Z:cat
+ref12_grp1_p001	145	ref2	2	50	10M	ref1	36	0	TTCTATAGTG	BBBBBBBBBB	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref12_grp2_p001	145	ref2	12	50	10M	ref1	46	0	TCACCTAAAT	BBBBBBBBBB	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:cat
+ref2_grp3_p001	163	ref2	31	99	15M	=	1	-45	CTGTTTCCTGTGTGA	|||||||||||||||	NM:i:13	MD:Z:0T0A0A1C0A0T0G0G0T0C0A1A0G0	an:Z:cat
diff --git a/test/split/split.expected_d_an.dog.sam b/test/split/split.expected_d_an.dog.sam
new file mode 100644
index 000000000..d7d699302
--- /dev/null
+++ b/test/split/split.expected_d_an.dog.sam
@@ -0,0 +1,34 @@
+@HD	VN:1.4	SO:coordinate
+@RG	ID:grp1	DS:Group 1	LB:Library 1	SM:Sample
+@RG	ID:grp2	DS:Group 2	LB:Library 2	SM:Sample
+@PG	ID:prog1	PN:emacs	CL:emacs	VN:23.1.1
+@CO	The MIT License
+@CO	
+@CO	Copyright (c) 2014 Genome Research Ltd.
+@CO	
+@CO	Permission is hereby granted, free of charge, to any person obtaining a copy
+@CO	of this software and associated documentation files (the "Software"), to deal
+@CO	in the Software without restriction, including without limitation the rights
+@CO	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+@CO	copies of the Software, and to permit persons to whom the Software is
+@CO	furnished to do so, subject to the following conditions:
+@CO	
+@CO	The above copyright notice and this permission notice shall be included in
+@CO	all copies or substantial portions of the Software.
+@CO	
+@CO	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+@CO	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+@CO	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+@CO	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+@CO	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+@CO	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+@CO	THE SOFTWARE.
+@SQ	SN:ref1	LN:56	M5:08c04d512d4797d9ba2a156c1daba468
+@SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
+@SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
+ref1_grp2_p003	99	ref1	11	5	10M	=	35	34	ACCCGGGGAT	&&&&&&&&&&	RG:Z:grp2	ia:i:4294967295	NM:i:0	MD:Z:10	an:Z:dog
+ref1_grp2_p003	147	ref1	35	17	10M	=	11	-34	CTGCAGGCAT	2222222222	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:dog
+ref2_grp3_p002	147	ref2	16	99	15M	=	46	45	CTAAATAGCTTGGCG	}}}}}}}}}}}}}}}	NM:i:0	MD:Z:15	RG:Z:grp3	an:Z:dog
+ref2_grp3_p002	99	ref2	46	99	15M	=	16	-45	CTGTTTCCTGTGTGA	{{{{{{{{{{{{{{{	NM:i:0	MD:Z:15	RG:Z:grp3	an:Z:dog
+unaligned_grp3_p001	77	*	0	0	*	*	0	0	CACTCGTTCATGACG	0123456789abcde	an:Z:dog
+unaligned_grp3_p001	141	*	0	0	*	*	0	0	GAAAGTGAGGAGGTG	edcba9876543210	an:Z:dog
diff --git a/test/split/split.expected_d_an.unk.sam b/test/split/split.expected_d_an.unk.sam
new file mode 100644
index 000000000..5092c89a6
--- /dev/null
+++ b/test/split/split.expected_d_an.unk.sam
@@ -0,0 +1,32 @@
+@HD	VN:1.4	SO:coordinate
+@RG	ID:grp1	DS:Group 1	LB:Library 1	SM:Sample
+@RG	ID:grp2	DS:Group 2	LB:Library 2	SM:Sample
+@PG	ID:prog1	PN:emacs	CL:emacs	VN:23.1.1
+@CO	The MIT License
+@CO	
+@CO	Copyright (c) 2014 Genome Research Ltd.
+@CO	
+@CO	Permission is hereby granted, free of charge, to any person obtaining a copy
+@CO	of this software and associated documentation files (the "Software"), to deal
+@CO	in the Software without restriction, including without limitation the rights
+@CO	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+@CO	copies of the Software, and to permit persons to whom the Software is
+@CO	furnished to do so, subject to the following conditions:
+@CO	
+@CO	The above copyright notice and this permission notice shall be included in
+@CO	all copies or substantial portions of the Software.
+@CO	
+@CO	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+@CO	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+@CO	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+@CO	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+@CO	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+@CO	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+@CO	THE SOFTWARE.
+@SQ	SN:ref1	LN:56	M5:08c04d512d4797d9ba2a156c1daba468
+@SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
+@SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
+ref1_grp1_p006	99	ref1	21	10	10M	=	45	34	CCTCTAGAGT	++++++++++	RG:Z:grp1	ia:i:255	NM:i:0	MD:Z:10
+ref1_grp2_p006	99	ref1	23	11	10M	=	47	34	TCTAGAGTCG	,,,,,,,,,,	RG:Z:grp2	ia:i:-1	NM:i:0	MD:Z:10
+ref1_grp1_p006	147	ref1	45	22	10M	=	21	-34	GCAAGCTTGA	7777777777	RG:Z:grp1	NM:i:0	MD:Z:10
+ref1_grp2_p006	147	ref1	47	23	10M	=	23	-34	AAGCTTGAGT	8888888888	RG:Z:grp2	NM:i:0	MD:Z:10
diff --git a/test/split/split.expected_d_an_M_3.unk.sam b/test/split/split.expected_d_an_M_3.unk.sam
new file mode 100644
index 000000000..dbe9c6936
--- /dev/null
+++ b/test/split/split.expected_d_an_M_3.unk.sam
@@ -0,0 +1,36 @@
+@HD	VN:1.4	SO:coordinate
+@RG	ID:grp1	DS:Group 1	LB:Library 1	SM:Sample
+@RG	ID:grp2	DS:Group 2	LB:Library 2	SM:Sample
+@PG	ID:prog1	PN:emacs	CL:emacs	VN:23.1.1
+@CO	The MIT License
+@CO	
+@CO	Copyright (c) 2014 Genome Research Ltd.
+@CO	
+@CO	Permission is hereby granted, free of charge, to any person obtaining a copy
+@CO	of this software and associated documentation files (the "Software"), to deal
+@CO	in the Software without restriction, including without limitation the rights
+@CO	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+@CO	copies of the Software, and to permit persons to whom the Software is
+@CO	furnished to do so, subject to the following conditions:
+@CO	
+@CO	The above copyright notice and this permission notice shall be included in
+@CO	all copies or substantial portions of the Software.
+@CO	
+@CO	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+@CO	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+@CO	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+@CO	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+@CO	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+@CO	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+@CO	THE SOFTWARE.
+@SQ	SN:ref1	LN:56	M5:08c04d512d4797d9ba2a156c1daba468
+@SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
+@SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
+ref1_grp2_p004	99	ref1	15	7	10M	=	39	34	GGGGATCCTC	((((((((((	RG:Z:grp2	ia:i:-2147483648	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp2_p005	99	ref1	19	9	10M	=	43	34	ATCCTCTAGA	**********	RG:Z:grp2	ia:i:-1000	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp1_p006	99	ref1	21	10	10M	=	45	34	CCTCTAGAGT	++++++++++	RG:Z:grp1	ia:i:255	NM:i:0	MD:Z:10
+ref1_grp2_p006	99	ref1	23	11	10M	=	47	34	TCTAGAGTCG	,,,,,,,,,,	RG:Z:grp2	ia:i:-1	NM:i:0	MD:Z:10
+ref1_grp2_p004	147	ref1	39	19	10M	=	15	-34	AGGCATGCAA	4444444444	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp2_p005	147	ref1	43	21	10M	=	19	-34	ATGCAAGCTT	6666666666	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp1_p006	147	ref1	45	22	10M	=	21	-34	GCAAGCTTGA	7777777777	RG:Z:grp1	NM:i:0	MD:Z:10
+ref1_grp2_p006	147	ref1	47	23	10M	=	23	-34	AAGCTTGAGT	8888888888	RG:Z:grp2	NM:i:0	MD:Z:10
diff --git a/test/split/split.sam b/test/split/split.sam
index c874469d1..c07002ee2 100644
--- a/test/split/split.sam
+++ b/test/split/split.sam
@@ -26,37 +26,37 @@
 @SQ	SN:ref1	LN:56	M5:08c04d512d4797d9ba2a156c1daba468
 @SQ	SN:ref2	LN:60	M5:7c35feac7036c1cdef3bee0cc4b21437
 @SQ	SN:ref3_unused	LN:70	M5:5fdd18c2c6ecac4838996d029bf395b5
-ref1_grp1_p001	99	ref1	1	0	10M	=	25	34	CGAGCTCGGT	!!!!!!!!!!	RG:Z:grp1	BC:Z:ACGT	H0:i:1	aa:A:!	ab:A:~	fa:f:3.14159	za:Z:Hello world!	ha:H:DEADBEEF	ba:B:c,-128,0,127	bb:B:C,0,127,255	bc:B:s,-32768,0,32767	bd:B:S,0,32768,65535	be:B:i,-2147483648,0,2147483647	bf:B:I,0,2147483648,4294967295	bg:B:f,2.71828,6.626e-34,2.9979e+09	NM:i:0	MD:Z:10
-ref1_grp2_p001	99	ref1	3	1	10M	=	27	34	AGCTCGGTAC	""""""""""	RG:Z:grp2	BC:Z:TGCA	H0:i:1	aa:A:A	ab:A:Z	fa:f:6.67e-11	za:Z:!"$%^&*()	ha:H:CAFE	NM:i:0	MD:Z:10
-ref1_grp1_p002	99	ref1	5	2	10M	=	29	34	CTCGGTACCC	##########	RG:Z:grp1	BC:Z:AATTCCGG	H0:i:1	aa:A:a	ab:A:z	fa:f:4.3597e-18	za:Z:Another string	ha:H:2000AD	NM:i:0	MD:Z:10
-ref1_grp2_p002	99	ref1	7	3	10M	=	31	34	CGGTACCCGG	$$$$$$$$$$	fa:f:6.022e+23	RG:Z:grp2	NM:i:0	MD:Z:10
-ref1_grp1_p003	99	ref1	9	4	10M	=	33	34	GTACCCGGGG	%%%%%%%%%%	fa:f:1.66e-27	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp2_p003	99	ref1	11	5	10M	=	35	34	ACCCGGGGAT	&&&&&&&&&&	RG:Z:grp2	ia:i:4294967295	NM:i:0	MD:Z:10
-ref1_grp1_p004	99	ref1	13	6	10M	=	37	34	CCGGGGATCC	''''''''''	fa:f:1.38e-23	za:Z:xRG:Z:grp2	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp2_p004	99	ref1	15	7	10M	=	39	34	GGGGATCCTC	((((((((((	RG:Z:grp2	ia:i:-2147483648	NM:i:0	MD:Z:10
-ref1_grp1_p005	99	ref1	17	8	10M	=	41	34	GGATCCTCTA	))))))))))	RG:Z:grp1	ia:i:40000	NM:i:0	MD:Z:10
-ref1_grp2_p005	99	ref1	19	9	10M	=	43	34	ATCCTCTAGA	**********	RG:Z:grp2	ia:i:-1000	NM:i:0	MD:Z:10
+ref1_grp1_p001	99	ref1	1	0	10M	=	25	34	CGAGCTCGGT	!!!!!!!!!!	RG:Z:grp1	BC:Z:ACGT	H0:i:1	aa:A:!	ab:A:~	fa:f:3.14159	za:Z:Hello world!	ha:H:DEADBEEF	ba:B:c,-128,0,127	bb:B:C,0,127,255	bc:B:s,-32768,0,32767	bd:B:S,0,32768,65535	be:B:i,-2147483648,0,2147483647	bf:B:I,0,2147483648,4294967295	bg:B:f,2.71828,6.626e-34,2.9979e+09	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p001	99	ref1	3	1	10M	=	27	34	AGCTCGGTAC	""""""""""	RG:Z:grp2	BC:Z:TGCA	H0:i:1	aa:A:A	ab:A:Z	fa:f:6.67e-11	za:Z:!"$%^&*()	ha:H:CAFE	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p002	99	ref1	5	2	10M	=	29	34	CTCGGTACCC	##########	RG:Z:grp1	BC:Z:AATTCCGG	H0:i:1	aa:A:a	ab:A:z	fa:f:4.3597e-18	za:Z:Another string	ha:H:2000AD	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp2_p002	99	ref1	7	3	10M	=	31	34	CGGTACCCGG	$$$$$$$$$$	fa:f:6.022e+23	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p003	99	ref1	9	4	10M	=	33	34	GTACCCGGGG	%%%%%%%%%%	fa:f:1.66e-27	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp2_p003	99	ref1	11	5	10M	=	35	34	ACCCGGGGAT	&&&&&&&&&&	RG:Z:grp2	ia:i:4294967295	NM:i:0	MD:Z:10	an:Z:dog
+ref1_grp1_p004	99	ref1	13	6	10M	=	37	34	CCGGGGATCC	''''''''''	fa:f:1.38e-23	za:Z:xRG:Z:grp2	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p004	99	ref1	15	7	10M	=	39	34	GGGGATCCTC	((((((((((	RG:Z:grp2	ia:i:-2147483648	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp1_p005	99	ref1	17	8	10M	=	41	34	GGATCCTCTA	))))))))))	RG:Z:grp1	ia:i:40000	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p005	99	ref1	19	9	10M	=	43	34	ATCCTCTAGA	**********	RG:Z:grp2	ia:i:-1000	NM:i:0	MD:Z:10	an:Z:aardvark
 ref1_grp1_p006	99	ref1	21	10	10M	=	45	34	CCTCTAGAGT	++++++++++	RG:Z:grp1	ia:i:255	NM:i:0	MD:Z:10
 ref1_grp2_p006	99	ref1	23	11	10M	=	47	34	TCTAGAGTCG	,,,,,,,,,,	RG:Z:grp2	ia:i:-1	NM:i:0	MD:Z:10
-ref1_grp1_p001	147	ref1	25	12	10M	=	1	-34	TAGAGTCGAC	----------	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp2_p001	147	ref1	27	13	10M	=	3	-34	GAGTCGACCT	..........	RG:Z:grp2	NM:i:0	MD:Z:10
-ref1_grp1_p002	147	ref1	29	14	10M	=	5	-34	GTCGACCTGC	//////////	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp2_p002	147	ref1	31	15	10M	=	7	-34	CGACCTGCAG	0000000000	RG:Z:grp2	NM:i:0	MD:Z:10
-ref1_grp1_p003	147	ref1	33	16	10M	=	9	-34	ACCTGCAGGC	1111111111	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp2_p003	147	ref1	35	17	10M	=	11	-34	CTGCAGGCAT	2222222222	RG:Z:grp2	NM:i:0	MD:Z:10
-ref12_grp1_p001	97	ref1	36	50	10M	ref2	2	0	TGCAGGCATG	AAAAAAAAAA	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp1_p004	147	ref1	37	18	10M	=	13	-34	GCAGGCATGC	3333333333	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp2_p004	147	ref1	39	19	10M	=	15	-34	AGGCATGCAA	4444444444	RG:Z:grp2	NM:i:0	MD:Z:10
-ref1_grp1_p005	147	ref1	41	20	10M	=	17	-34	GCATGCAAGC	5555555555	RG:Z:grp1	NM:i:0	MD:Z:10
-ref1_grp2_p005	147	ref1	43	21	10M	=	19	-34	ATGCAAGCTT	6666666666	RG:Z:grp2	NM:i:0	MD:Z:10
+ref1_grp1_p001	147	ref1	25	12	10M	=	1	-34	TAGAGTCGAC	----------	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p001	147	ref1	27	13	10M	=	3	-34	GAGTCGACCT	..........	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p002	147	ref1	29	14	10M	=	5	-34	GTCGACCTGC	//////////	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp2_p002	147	ref1	31	15	10M	=	7	-34	CGACCTGCAG	0000000000	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp1_p003	147	ref1	33	16	10M	=	9	-34	ACCTGCAGGC	1111111111	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp2_p003	147	ref1	35	17	10M	=	11	-34	CTGCAGGCAT	2222222222	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:dog
+ref12_grp1_p001	97	ref1	36	50	10M	ref2	2	0	TGCAGGCATG	AAAAAAAAAA	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref1_grp1_p004	147	ref1	37	18	10M	=	13	-34	GCAGGCATGC	3333333333	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p004	147	ref1	39	19	10M	=	15	-34	AGGCATGCAA	4444444444	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:aardvark
+ref1_grp1_p005	147	ref1	41	20	10M	=	17	-34	GCATGCAAGC	5555555555	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:badger
+ref1_grp2_p005	147	ref1	43	21	10M	=	19	-34	ATGCAAGCTT	6666666666	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:aardvark
 ref1_grp1_p006	147	ref1	45	22	10M	=	21	-34	GCAAGCTTGA	7777777777	RG:Z:grp1	NM:i:0	MD:Z:10
-ref12_grp2_p001	97	ref1	46	50	10M	ref2	12	0	CAAGCTTGAG	AAAAAAAAAA	RG:Z:grp2	NM:i:0	MD:Z:10
+ref12_grp2_p001	97	ref1	46	50	10M	ref2	12	0	CAAGCTTGAG	AAAAAAAAAA	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:cat
 ref1_grp2_p006	147	ref1	47	23	10M	=	23	-34	AAGCTTGAGT	8888888888	RG:Z:grp2	NM:i:0	MD:Z:10
-ref2_grp3_p001	83	ref2	1	99	15M	=	31	45	ATTCTATAGTGTCAC	~~~~~~~~~~~~~~~	NM:i:0	MD:Z:15
-ref12_grp1_p001	145	ref2	2	50	10M	ref1	36	0	TTCTATAGTG	BBBBBBBBBB	RG:Z:grp1	NM:i:0	MD:Z:10
-ref12_grp2_p001	145	ref2	12	50	10M	ref1	46	0	TCACCTAAAT	BBBBBBBBBB	RG:Z:grp2	NM:i:0	MD:Z:10
-ref2_grp3_p002	147	ref2	16	99	15M	=	46	45	CTAAATAGCTTGGCG	}}}}}}}}}}}}}}}	NM:i:0	MD:Z:15
-ref2_grp3_p001	163	ref2	31	99	15M	=	1	-45	CTGTTTCCTGTGTGA	|||||||||||||||	NM:i:13	MD:Z:0T0A0A1C0A0T0G0G0T0C0A1A0G0
-ref2_grp3_p002	99	ref2	46	99	15M	=	16	-45	CTGTTTCCTGTGTGA	{{{{{{{{{{{{{{{	NM:i:0	MD:Z:15
-unaligned_grp3_p001	77	*	0	0	*	*	0	0	CACTCGTTCATGACG	0123456789abcde
-unaligned_grp3_p001	141	*	0	0	*	*	0	0	GAAAGTGAGGAGGTG	edcba9876543210
+ref2_grp3_p001	83	ref2	1	99	15M	=	31	45	ATTCTATAGTGTCAC	~~~~~~~~~~~~~~~	NM:i:0	MD:Z:15	an:Z:cat
+ref12_grp1_p001	145	ref2	2	50	10M	ref1	36	0	TTCTATAGTG	BBBBBBBBBB	RG:Z:grp1	NM:i:0	MD:Z:10	an:Z:cat
+ref12_grp2_p001	145	ref2	12	50	10M	ref1	46	0	TCACCTAAAT	BBBBBBBBBB	RG:Z:grp2	NM:i:0	MD:Z:10	an:Z:cat
+ref2_grp3_p002	147	ref2	16	99	15M	=	46	45	CTAAATAGCTTGGCG	}}}}}}}}}}}}}}}	NM:i:0	MD:Z:15	RG:Z:grp3	an:Z:dog
+ref2_grp3_p001	163	ref2	31	99	15M	=	1	-45	CTGTTTCCTGTGTGA	|||||||||||||||	NM:i:13	MD:Z:0T0A0A1C0A0T0G0G0T0C0A1A0G0	an:Z:cat
+ref2_grp3_p002	99	ref2	46	99	15M	=	16	-45	CTGTTTCCTGTGTGA	{{{{{{{{{{{{{{{	NM:i:0	MD:Z:15	RG:Z:grp3	an:Z:dog
+unaligned_grp3_p001	77	*	0	0	*	*	0	0	CACTCGTTCATGACG	0123456789abcde	an:Z:dog
+unaligned_grp3_p001	141	*	0	0	*	*	0	0	GAAAGTGAGGAGGTG	edcba9876543210	an:Z:dog
diff --git a/test/test.pl b/test/test.pl
index 3702994d8..68b765015 100755
--- a/test/test.pl
+++ b/test/test.pl
@@ -3213,6 +3213,7 @@ sub test_sort
 
     # Name sort
     test_cmd($opts, out=>"sort/name.sort.expected.sam", ignore_pg_header => 1, cmd=>"$$opts{bin}/samtools sort${threads} -n -m 10M $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");
+    test_cmd($opts, out=>"sort/name2.sort.expected.sam", ignore_pg_header => 1, cmd=>"$$opts{bin}/samtools sort${threads} -N -m 10M $$opts{path}/dat/test_input_1_b.bam -O SAM -o -");
 
     # Tag sort (RG)
     test_cmd($opts, out=>"sort/tag.rg.sort.expected.sam", ignore_pg_header => 1, cmd=>"$$opts{bin}/samtools sort${threads} -t RG -m 10M $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");
@@ -3509,6 +3510,43 @@ sub test_split
              ignore_pg_header => 1,
              reorder_header => 1,
              cmd => "$$opts{bin}/samtools split $threads --output-fmt sam -u $$opts{path}/split/split.tmp.unk.sam -f $$opts{path}/split/split.tmp.\%!.\%. $$opts{path}/split/split.sam");
+
+    test_cmd($opts,
+             out=>"dat/empty.expected",
+             out_map => {
+                 'split/split.tmp.grp1.sam' => 'split/split.expected.grp1.sam',
+                 'split/split.tmp.grp2.sam' => 'split/split.expected.grp2.sam',
+                 'split/split.tmp.grp3.sam' => 'split/split.expected_d_RG.grp3.sam',
+                 'split/split.tmp.unk.sam' => 'split/split.expected_d_RG.unk.sam',
+             },
+             ignore_pg_header => 1,
+             reorder_header => 1,
+             cmd => "$$opts{bin}/samtools split $threads --output-fmt sam -d RG -u $$opts{path}/split/split.tmp.unk.sam -f $$opts{path}/split/split.tmp.\%!.\%. $$opts{path}/split/split.sam");
+
+    test_cmd($opts,
+             out=>"dat/empty.expected",
+             out_map => {
+                 'split/split.tmp.aardvark.sam' => 'split/split.expected_d_an.aardvark.sam',
+                 'split/split.tmp.badger.sam' => 'split/split.expected_d_an.badger.sam',
+                 'split/split.tmp.cat.sam' => 'split/split.expected_d_an.cat.sam',
+                 'split/split.tmp.dog.sam' => 'split/split.expected_d_an.dog.sam',
+                 'split/split.tmp.unk.sam' => 'split/split.expected_d_an.unk.sam',
+             },
+             ignore_pg_header => 1,
+             reorder_header => 1,
+             cmd => "$$opts{bin}/samtools split $threads --output-fmt sam -d an -u $$opts{path}/split/split.tmp.unk.sam -f $$opts{path}/split/split.tmp.\%!.\%. $$opts{path}/split/split.sam");
+
+    test_cmd($opts,
+             out=>"dat/empty.expected",
+             out_map => {
+                 'split/split.tmp.badger.sam' => 'split/split.expected_d_an.badger.sam',
+                 'split/split.tmp.cat.sam' => 'split/split.expected_d_an.cat.sam',
+                 'split/split.tmp.dog.sam' => 'split/split.expected_d_an.dog.sam',
+                 'split/split.tmp.unk.sam' => 'split/split.expected_d_an_M_3.unk.sam',
+             },
+             ignore_pg_header => 1,
+             reorder_header => 1,
+             cmd => "$$opts{bin}/samtools split $threads --output-fmt sam -d an -M 3 -u $$opts{path}/split/split.tmp.unk.sam -f $$opts{path}/split/split.tmp.\%!.\%. $$opts{path}/split/split.sam");
 }
 
 sub test_ampliconclip
diff --git a/version.sh b/version.sh
index 7d17aee18..e381bcf60 100755
--- a/version.sh
+++ b/version.sh
@@ -24,7 +24,7 @@
 # DEALINGS IN THE SOFTWARE.
 
 # Master version, for use in tarballs or non-git source copies
-VERSION=1.18
+VERSION=1.19
 
 # If we have a git clone, then check against the current tag
 if [ -e .git ]