diff --git a/README b/README
new file mode 100644
index 0000000..f617145
--- /dev/null
+++ b/README
@@ -0,0 +1,93 @@
+////////////////////////////////////////////////////////////////////////////
+//                        **** AUDIO-STRETCH ****                         //
+//                      Time Domain Harmonic Scaler                       //
+//                    Copyright (c) 2019 David Bryant                     //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+From Wikipedia, the free encyclopedia:
+
+    Time-domain harmonic scaling (TDHS) is a method for time-scale
+    modification of speech (or other audio signals), allowing the apparent
+    rate of speech articulation to be changed without affecting the
+    pitch-contour and the time-evolution of the formant structure. TDHS
+    differs from other time-scale modification algorithms in that
+    time-scaling operations are performed in the time domain (not the
+    frequency domain).
+
+This project is an implementation of a TDHS library and a command-line demo
+program to utilize it with standard WAV files.
+
+The vast majority of the time required for TDHS is in the pitch detection,
+and so this library implements two versions. The first is the standard
+one that includes every sample and pitch period, and the second is an
+optimized one that uses pairs of samples and only even pitch periods.
+This second version is about 4X faster than the standard version, but
+provides virtually the same quality. It is used by default for files with
+sample rates of 32 kHz or higher, but its use can be forced on or off
+from the command-line (see options below).
+
+There are two effects possible with TDHS and the audio-stretch demo. The
+first is the more obvious mentioned above of changing the duration (or
+speed) of a speech (or other audio) sample without modifying its pitch.
+The other effect is similar, but after applying the duration change we
+change the samping rate in a complimentary manner to restore the original
+duration and timing, which then results in the pitch being altered.
+
+So when a ratio is supplied to the audio-stretch program, the default
+operation is for the total duration of the audio file to be scaled by
+exactly that ratio (0.5X to 2.0X), with the pitches remaining constant.
+If the option to scale the sample-rate proportionally is specified (-s)
+then the total duration and timing of the audio file will be preserved,
+but the pitches will be scaled by the specified ratio instead. This is
+useful for creating a "helium voice" effect and lots of other fun stuff.
+
+Note that unless ratios of exactly 0.5 or 2.0 are used with the -s option,
+non-standard sampling rates will probably result. Many programs will still
+properly play these files, and audio editing programs will likely import
+them correctly (by resampling), but it is possible that some applications
+will barf on them.
+
+To build the demo app:
+
+    $ gcc -O2 *.c -o audio-stretch
+
+The "help" display from the demo app:
+
+ AUDIO-STRETCH  Time Domain Harmonic Scaling Demo  Version 0.1
+ Copyright (c) 2019 David Bryant. All Rights Reserved.
+
+ Usage:     AUDIO-STRETCH [-options] infile.wav outfile.wav
+
+ Options:  -r<n.n> = stretch ratio (0.5 to 2.0, default = 1.0)
+           -u<n>   = upper freq period limit (default = 333 Hz)
+           -l<n>   = lower freq period limit (default = 55 Hz)
+           -s      = scale rate to preserve duration (not pitch)
+           -f      = fast pitch detection (default >= 32 kHz)
+           -n      = normal pitch detection (default < 32 kHz)
+           -q      = quiet mode (display errors only)
+           -v      = verbose (display lots of info)
+           -y      = overwrite outfile if it exists
+
+ Web:      Visit www.github.com/dbry/audio-stretch for latest version
+
+Notes:
+
+1. The program will handle only mono or stereo files in the WAV format. The
+   audio must be 16-bit PCM and the acceptable sampling rates are from 8,000
+   to 48,000 Hz. Any additional RIFF info in the WAV file will be discarded.
+
+2. For stereo files, the pitch detection is done on a mono conversion of the
+   audio, but the scaling transformation is done on the independent channels.
+   If it is desired to have completely independent processing this can only
+   be done with two mono files. Note that this is not a limitation of the
+   library but of the demo utility (the library has no problem with multiple
+   contexts).
+
+3. This technique (TDHS) is ideal for speech signals, but can also be used
+   for homophonic musical instruments. As the sound becomes increasingly
+   polyphonic, however, the quality and effectiveness will decrease. Also,
+   the period frequency limits provided by default are optimized for speech;
+   adjusting these may be required for best quality with non-speech audio.
+
diff --git a/license.txt b/license.txt
new file mode 100644
index 0000000..65d4a2e
--- /dev/null
+++ b/license.txt
@@ -0,0 +1,25 @@
+                       Copyright (c) David Bryant
+                          All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Conifer Software nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..0f21b4b
--- /dev/null
+++ b/main.c
@@ -0,0 +1,405 @@
+////////////////////////////////////////////////////////////////////////////
+//                        **** AUDIO-STRETCH ****                         //
+//                      Time Domain Harmonic Scaler                       //
+//                    Copyright (c) 2019 David Bryant                     //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// main.c
+
+// This module provides a demo for the TDHS library using WAV files.
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "stretch.h"
+
+static const char *sign_on = "\n"
+" AUDIO-STRETCH  Time Domain Harmonic Scaling Demo  Version 0.1\n"
+" Copyright (c) 2019 David Bryant. All Rights Reserved.\n\n";
+
+static const char *usage =
+" Usage:     AUDIO-STRETCH [-options] infile.wav outfile.wav\n\n"
+" Options:  -r<n.n> = stretch ratio (0.5 to 2.0, default = 1.0)\n"
+"           -u<n>   = upper freq period limit (default = 333 Hz)\n"
+"           -l<n>   = lower freq period limit (default = 55 Hz)\n"
+"           -s      = scale rate to preserve duration (not pitch)\n"
+"           -f      = fast pitch detection (default >= 32 kHz)\n"
+"           -n      = normal pitch detection (default < 32 kHz)\n"
+"           -q      = quiet mode (display errors only)\n"
+"           -v      = verbose (display lots of info)\n"
+"           -y      = overwrite outfile if it exists\n\n"
+" Web:      Visit www.github.com/dbry/audio-stretch for latest version\n\n";
+
+typedef struct {
+    char ckID [4];
+    uint32_t ckSize;
+    char formType [4];
+} RiffChunkHeader;
+
+typedef struct {
+    char ckID [4];
+    uint32_t ckSize;
+} ChunkHeader;
+
+typedef struct {
+    uint16_t FormatTag, NumChannels;
+    uint32_t SampleRate, BytesPerSecond;
+    uint16_t BlockAlign, BitsPerSample;
+    uint16_t cbSize;
+    union {
+        uint16_t ValidBitsPerSample;
+        uint16_t SamplesPerBlock;
+        uint16_t Reserved;
+    } Samples;
+    int32_t ChannelMask;
+    uint16_t SubFormat;
+    char GUID [14];
+} WaveHeader;
+
+#define WAVE_FORMAT_PCM         0x1
+#define WAVE_FORMAT_EXTENSIBLE  0xfffe
+
+static int write_pcm_wav_header (FILE *outfile, size_t num_samples, int num_channels, int bytes_per_sample, int sample_rate);
+
+#define BUFFER_SAMPLES 1024
+
+static int verbose_mode, quiet_mode;
+
+int main (argc, argv) int argc; char **argv;
+{
+    int asked_help = 0, overwrite = 0, scale_rate = 0, force_fast = 0, force_normal = 0, fast_mode, scaled_rate;
+    int upper_frequency = 333, lower_frequency = 55, min_period, max_period;
+    int samples_to_process, insamples = 0, outsamples = 0;
+    char *infilename = NULL, *outfilename = NULL;
+    RiffChunkHeader riff_chunk_header;
+    WaveHeader WaveHeader = { 0 };
+    ChunkHeader chunk_header;
+    StretchHandle stretcher;
+    FILE *infile, *outfile;
+    float ratio = 1.0;
+
+    // loop through command-line arguments
+
+    while (--argc) {
+#ifdef _WIN32
+        if ((**++argv == '-' || **argv == '/') && (*argv)[1])
+#else
+        if ((**++argv == '-') && (*argv)[1])
+#endif
+            while (*++*argv)
+                switch (**argv) {
+
+                    case 'U': case 'u':
+                        upper_frequency = strtol (++*argv, argv, 10);
+
+                        if (upper_frequency <= 40) {
+                            fprintf (stderr, "\nupper frequency must be at least 40 Hz!\n");
+                            return -1;
+                        }
+
+                        --*argv;
+                        break;
+
+                    case 'L': case 'l':
+                        lower_frequency = strtol (++*argv, argv, 10);
+
+                        if (lower_frequency < 20) {
+                            fprintf (stderr, "\nlower frequency must be at least 20 Hz!\n");
+                            return -1;
+                        }
+
+                        --*argv;
+                        break;
+
+                    case 'R': case 'r':
+                        ratio = strtod (++*argv, argv);
+
+                        if (ratio < 0.5 || ratio > 2.0) {
+                            fprintf (stderr, "\nratio must be from 0.5 to 2.0!\n");
+                            return -1;
+                        }
+
+                        --*argv;
+                        break;
+
+                    case 'S': case 's':
+                        scale_rate = 1;
+                        break;
+
+                    case 'F': case 'f':
+                        force_fast = 1;
+                        break;
+
+                    case 'N': case 'n':
+                        force_normal = 1;
+                        break;
+
+                    case 'H': case 'h':
+                        asked_help = 1;
+                        break;
+
+                    case 'V': case 'v':
+                        verbose_mode = 1;
+                        break;
+
+                    case 'Q': case 'q':
+                        quiet_mode = 1;
+                        break;
+
+                    case 'Y': case 'y':
+                        overwrite = 1;
+                        break;
+
+                    default:
+                        fprintf (stderr, "\nillegal option: %c !\n", **argv);
+                        return -1;
+                }
+        else if (!infilename)
+            infilename = *argv;
+        else if (!outfilename)
+            outfilename = *argv;
+        else {
+            fprintf (stderr, "\nextra unknown argument: %s !\n", *argv);
+            return -1;
+        }
+    }
+
+    if (!quiet_mode)
+        fprintf (stderr, "%s", sign_on);
+
+    if (!outfilename || asked_help) {
+        printf ("%s", usage);
+        return 0;
+    }
+
+    if (!strcmp (infilename, outfilename)) {
+        fprintf (stderr, "can't overwrite input file (specify different/new output file name)\n");
+        return -1;
+    }
+
+    if (!overwrite && (outfile = fopen (outfilename, "r"))) {
+        fclose (outfile);
+        fprintf (stderr, "output file \"%s\" exists (use -y to overwrite)\n", outfilename);
+        return -1;
+    }
+
+    if (!(infile = fopen (infilename, "rb"))) {
+        fprintf (stderr, "can't open file \"%s\" for reading!\n", infilename);
+        return 1;
+    }
+
+    // read initial RIFF form header
+
+    if (!fread (&riff_chunk_header, sizeof (RiffChunkHeader), 1, infile) ||
+        strncmp (riff_chunk_header.ckID, "RIFF", 4) ||
+        strncmp (riff_chunk_header.formType, "WAVE", 4)) {
+            fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename);
+            return 1;
+    }
+
+    // loop through all elements of the RIFF wav header (until the data chuck)
+
+    while (1) {
+        if (!fread (&chunk_header, sizeof (ChunkHeader), 1, infile)) {
+            fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename);
+            return 1;
+        }
+
+        // if it's the format chunk, we want to get some info out of there and
+        // make sure it's a .wav file we can handle
+
+        if (!strncmp (chunk_header.ckID, "fmt ", 4)) {
+            int format, bits_per_sample;
+
+            if (chunk_header.ckSize < 16 || chunk_header.ckSize > sizeof (WaveHeader) ||
+                !fread (&WaveHeader, chunk_header.ckSize, 1, infile)) {
+                    fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename);
+                    return 1;
+            }
+
+            format = (WaveHeader.FormatTag == WAVE_FORMAT_EXTENSIBLE && chunk_header.ckSize == 40) ?
+                WaveHeader.SubFormat : WaveHeader.FormatTag;
+
+            bits_per_sample = (chunk_header.ckSize == 40 && WaveHeader.Samples.ValidBitsPerSample) ?
+                WaveHeader.Samples.ValidBitsPerSample : WaveHeader.BitsPerSample;
+
+            if (bits_per_sample != 16) {
+                fprintf (stderr, "\"%s\" is not a 16-bit .WAV file!\n", infilename);
+                return 1;
+            }
+
+            if (WaveHeader.NumChannels < 1 || WaveHeader.NumChannels > 2) {
+                fprintf (stderr, "\"%s\" is not a mono or stereo .WAV file!\n", infilename);
+                return 1;
+            }
+
+            if (WaveHeader.BlockAlign != WaveHeader.NumChannels * 2) {
+                fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename);
+                return 1;
+            }
+
+            if (format == WAVE_FORMAT_PCM) {
+                if (WaveHeader.SampleRate < 8000 || WaveHeader.SampleRate > 48000) {
+                    fprintf (stderr, "\"%s\" sample rate is %d, must be 8000 to 48000!\n", infilename, WaveHeader.SampleRate);
+                    return 1;
+                }
+            }
+            else {
+                fprintf (stderr, "\"%s\" is not a PCM .WAV file!\n", infilename);
+                return 1;
+            }
+        }
+        else if (!strncmp (chunk_header.ckID, "data", 4)) {
+
+            // on the data chunk, get size and exit parsing loop
+
+            if (!WaveHeader.SampleRate) {      // make sure we saw a "fmt" chunk...
+                fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename);
+                return 1;
+            }
+
+            if (!chunk_header.ckSize) {
+                fprintf (stderr, "this .WAV file has no audio samples, probably is corrupt!\n");
+                return 1;
+            }
+
+            if (chunk_header.ckSize % WaveHeader.BlockAlign) {
+                fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename);
+                return 1;
+            }
+
+            samples_to_process = chunk_header.ckSize / WaveHeader.BlockAlign;
+
+            if (!samples_to_process) {
+                fprintf (stderr, "this .WAV file has no audio samples, probably is corrupt!\n");
+                return 1;
+            }
+
+            break;
+        }
+        else {          // just ignore unknown chunks
+            int bytes_to_eat = (chunk_header.ckSize + 1) & ~1L;
+            char dummy;
+
+            while (bytes_to_eat--)
+                if (!fread (&dummy, 1, 1, infile)) {
+                    fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename);
+                    return 1;
+                }
+        }
+    }
+
+    if (upper_frequency < lower_frequency * 2 || upper_frequency >= WaveHeader.SampleRate / 2) {
+        fprintf (stderr, "invalid frequencies specified!\n");
+        fclose (infile);
+        return 1;
+    }
+
+    min_period = WaveHeader.SampleRate / upper_frequency;
+    max_period = WaveHeader.SampleRate / lower_frequency;
+    fast_mode = (force_fast || WaveHeader.SampleRate >= 32000) && !force_normal;
+
+    if (verbose_mode)
+        fprintf (stderr, "initializing stretch library with period range = %d to %d, %d channels, %s\n",
+            min_period, max_period, WaveHeader.NumChannels, fast_mode ? "fast mode" : "normal mode");
+
+    if (!quiet_mode && ratio == 1.0)
+        fprintf (stderr, "warning: a ratio of 1.0 will do nothing but copy the WAV file!\n");
+
+    stretcher = stretch_init (min_period, max_period, WaveHeader.NumChannels, fast_mode);
+
+    if (!stretcher) {
+        fprintf (stderr, "can't initialize stretcher\n");
+        fclose (infile);
+        return 1;
+    }
+
+    if (!(outfile = fopen (outfilename, "wb"))) {
+        fprintf (stderr, "can't open file \"%s\" for writing!\n", outfilename);
+        fclose (infile);
+        return 1;
+    }
+
+    scaled_rate = scale_rate ? (int)(WaveHeader.SampleRate * ratio + 0.5) : WaveHeader.SampleRate;
+    write_pcm_wav_header (outfile, 0, WaveHeader.NumChannels, 2, scaled_rate);
+
+    short *inbuffer = malloc (BUFFER_SAMPLES * WaveHeader.BlockAlign);
+    short *outbuffer = malloc ((BUFFER_SAMPLES * 2 + 2048) * WaveHeader.BlockAlign);
+
+    while (1) {
+        int samples_read = fread (inbuffer, WaveHeader.BlockAlign,
+            samples_to_process >= BUFFER_SAMPLES ? BUFFER_SAMPLES : samples_to_process, infile);
+        int samples_generated;
+
+        insamples += samples_read;
+        samples_to_process -= samples_read;
+
+        if (samples_read)
+            samples_generated = stretch_samples (stretcher, inbuffer, samples_read, outbuffer, ratio);
+        else
+            samples_generated = stretch_flush (stretcher, outbuffer);
+
+        if (samples_generated) {
+            fwrite (outbuffer, WaveHeader.BlockAlign, samples_generated, outfile);
+            outsamples += samples_generated;
+        }
+
+        if (!samples_read && !samples_generated)
+            break;
+    }
+
+    free (inbuffer);
+    free (outbuffer);
+    stretch_deinit (stretcher);
+
+    fclose (infile);
+
+    rewind (outfile);
+    write_pcm_wav_header (outfile, outsamples, WaveHeader.NumChannels, 2, scaled_rate);
+    fclose (outfile);
+
+    if (insamples && verbose_mode) {
+        fprintf (stderr, "done, %d samples --> %d samples (ratio = %.3f)\n", insamples, outsamples, (float) outsamples / insamples);
+        if (scale_rate)
+            fprintf (stderr, "sample rate changed from %d Hz to %d Hz\n", WaveHeader.SampleRate, scaled_rate);
+    }
+
+    return 0;
+}
+
+static int write_pcm_wav_header (FILE *outfile, size_t num_samples, int num_channels, int bytes_per_sample, int sample_rate)
+{
+    RiffChunkHeader riffhdr;
+    ChunkHeader datahdr, fmthdr;
+    WaveHeader wavhdr;
+
+    int wavhdrsize = 16;
+    size_t total_data_bytes = num_samples * bytes_per_sample * num_channels;
+
+    memset (&wavhdr, 0, sizeof (wavhdr));
+
+    wavhdr.FormatTag = WAVE_FORMAT_PCM;
+    wavhdr.NumChannels = num_channels;
+    wavhdr.SampleRate = sample_rate;
+    wavhdr.BytesPerSecond = sample_rate * num_channels * bytes_per_sample;
+    wavhdr.BlockAlign = bytes_per_sample * num_channels;
+    wavhdr.BitsPerSample = bytes_per_sample * 8;
+
+    strncpy (riffhdr.ckID, "RIFF", sizeof (riffhdr.ckID));
+    strncpy (riffhdr.formType, "WAVE", sizeof (riffhdr.formType));
+    riffhdr.ckSize = sizeof (riffhdr) + wavhdrsize + sizeof (datahdr) + total_data_bytes;
+    strncpy (fmthdr.ckID, "fmt ", sizeof (fmthdr.ckID));
+    fmthdr.ckSize = wavhdrsize;
+
+    strncpy (datahdr.ckID, "data", sizeof (datahdr.ckID));
+    datahdr.ckSize = total_data_bytes;
+
+    return fwrite (&riffhdr, sizeof (riffhdr), 1, outfile) &&
+        fwrite (&fmthdr, sizeof (fmthdr), 1, outfile) &&
+        fwrite (&wavhdr, wavhdrsize, 1, outfile) &&
+        fwrite (&datahdr, sizeof (datahdr), 1, outfile);
+}
diff --git a/stretch.c b/stretch.c
new file mode 100644
index 0000000..5de6bfc
--- /dev/null
+++ b/stretch.c
@@ -0,0 +1,401 @@
+////////////////////////////////////////////////////////////////////////////
+//                        **** AUDIO-STRETCH ****                         //
+//                      Time Domain Harmonic Scaler                       //
+//                    Copyright (c) 2019 David Bryant                     //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// stretch.c
+
+// Time Domain Harmonic Compression and Expansion
+//
+// This library performs time domain harmonic scaling with pitch detection
+// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
+// 1/2 to 2 times its original length. This is done without altering any of
+// the tonal characteristics.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+
+#include "stretch.h"
+
+#define MIN_PERIOD  24          /* minimum allowable pitch period */
+#define MAX_PERIOD  1024        /* maximum allowable pitch period */
+
+struct stretch_cnxt {
+    int num_chans, inbuff_samples, shortest, longest, tail, head, verbatim, fast_mode;
+    short *inbuff, *calcbuff;
+    unsigned int *results;
+};
+
+static void merge_blocks (short *output, short *input1, short *input2, int samples);
+static int find_period_fast (struct stretch_cnxt *cnxt, short *samples);
+static int find_period (struct stretch_cnxt *cnxt, short *samples);
+
+/*
+ * Initialize a context of the time stretching code. The shortest and longest periods
+ * are specified here. The longest period determines the lowest fundamental frequency
+ * that can be handled correctly. Note that higher frequencies can be handled than the
+ * shortest period would suggest because multiple periods can be combined, and the
+ * worst-case performance will suffer if too short a period is selected.
+ */
+
+StretchHandle stretch_init (int shortest_period, int longest_period, int num_channels, int fast_mode)
+{
+    struct stretch_cnxt *cnxt;
+
+    if (fast_mode) {
+        longest_period = (longest_period + 1) & ~1;
+        shortest_period &= ~1;
+    }
+
+    if (longest_period <= shortest_period || shortest_period < MIN_PERIOD || longest_period > MAX_PERIOD) {
+        printf ("stretch_init(): invalid periods!\n");
+        return NULL;
+    }
+
+    cnxt = (struct stretch_cnxt *) calloc (1, sizeof (struct stretch_cnxt));
+
+    if (cnxt) {
+        cnxt->inbuff_samples = longest_period * num_channels * 8;
+        cnxt->inbuff = calloc (cnxt->inbuff_samples, sizeof (*cnxt->inbuff));
+        cnxt->calcbuff = calloc (longest_period * num_channels, sizeof (*cnxt->calcbuff));
+        cnxt->results = calloc (longest_period, sizeof (*cnxt->results));
+    }
+
+    if (!cnxt || !cnxt->inbuff || !cnxt->calcbuff || !cnxt->results) {
+        fprintf (stderr, "stretch_init(): out of memory!\n");
+        return NULL;
+    }
+
+    cnxt->head = cnxt->tail = cnxt->longest = longest_period * num_channels;
+    cnxt->shortest = shortest_period * num_channels;
+    cnxt->num_chans = num_channels;
+    cnxt->fast_mode = fast_mode;
+
+    return (StretchHandle) cnxt;
+}
+
+/*
+ * Process the specified samples with the given ratio (which is clipped to the
+ * range 0.5 to 2.0). Note that the number of samples refers to total samples for
+ * both channels in stereo and can be as large as desired (samples are buffered
+ * here). The exact number of samples output is not possible to determine in
+ * advance, but it will be close to the number of input samples times the ratio
+ * plus or minus a couple of the longest periods.
+ */
+
+int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio)
+{
+    struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+    int out_samples = 0;
+
+    num_samples *= cnxt->num_chans;
+
+    if (ratio < 0.5)
+        ratio = 0.5;
+    else if (ratio > 2.0)
+        ratio = 2.0;
+
+    /* while we have samples to do... */
+
+    do {
+        /* if there are more samples and room for them, copy in */
+
+        if (num_samples && cnxt->head < cnxt->inbuff_samples) {
+            int samples_to_copy = num_samples;
+
+            if (samples_to_copy > cnxt->inbuff_samples - cnxt->head)
+                samples_to_copy = cnxt->inbuff_samples - cnxt->head; 
+
+            memcpy (cnxt->inbuff + cnxt->head, samples, samples_to_copy * sizeof (cnxt->inbuff [0]));
+            num_samples -= samples_to_copy;
+            samples += samples_to_copy;
+            cnxt->head += samples_to_copy;
+        }
+
+        /* while there are enough samples to process, do so */
+
+        while (1) {
+            if (cnxt->verbatim && cnxt->head > cnxt->tail) {
+                int samples_to_copy = cnxt->verbatim;
+                if (samples_to_copy > cnxt->head - cnxt->tail)
+                    samples_to_copy = cnxt->head - cnxt->tail;
+
+                memcpy (output + out_samples, cnxt->inbuff + cnxt->tail, samples_to_copy * sizeof (cnxt->inbuff [0]));
+                out_samples += samples_to_copy;
+                cnxt->verbatim -= samples_to_copy;
+                cnxt->tail += samples_to_copy;
+            }
+            else if (cnxt->tail >= cnxt->longest && cnxt->head - cnxt->tail >= cnxt->longest * 2) {
+                if (ratio == 1.0) {
+                    memcpy (output + out_samples, cnxt->inbuff + cnxt->tail,
+                        cnxt->longest * 2 * sizeof (cnxt->inbuff [0]));
+
+                    out_samples += cnxt->longest * 2;
+                    cnxt->tail += cnxt->longest * 2;
+                }
+                else {
+                    int period = cnxt->fast_mode ? find_period_fast (cnxt, cnxt->inbuff + cnxt->tail) : 
+                        find_period (cnxt, cnxt->inbuff + cnxt->tail), incnt, outcnt;
+
+                    if (ratio < 1.0) {
+                        merge_blocks (output + out_samples, cnxt->inbuff + cnxt->tail,
+                            cnxt->inbuff + cnxt->tail + period, period); 
+
+                        out_samples += period;
+                        cnxt->tail += period * 2;
+                        incnt = (outcnt = period) * 2;
+                    }
+                    else {
+                        merge_blocks (output + out_samples, cnxt->inbuff + cnxt->tail,
+                            cnxt->inbuff + cnxt->tail - period, period * 2);
+
+                        out_samples += period * 2;
+                        cnxt->tail += period;
+                        outcnt = (incnt = period) * 2;
+                    }
+
+                    cnxt->verbatim = (int) floor (((ratio * incnt) - outcnt) / (1.0 - ratio) / cnxt->num_chans + 0.5) * cnxt->num_chans;
+
+                    if (cnxt->verbatim < 0)     // this should not happen...
+                        cnxt->verbatim = 0;
+                }
+            }
+            else
+                break;
+        }
+
+        /* if we're almost done with buffer, copy the rest back to beginning */
+
+        if (cnxt->head == cnxt->inbuff_samples) {
+            int samples_to_move = cnxt->inbuff_samples - cnxt->tail + cnxt->longest;
+
+            memmove (cnxt->inbuff, cnxt->inbuff + cnxt->tail - cnxt->longest,
+                samples_to_move * sizeof (cnxt->inbuff [0]));
+
+            cnxt->head -= cnxt->tail - cnxt->longest;
+            cnxt->tail = cnxt->longest;
+        }
+
+    } while (num_samples);
+
+    return out_samples / cnxt->num_chans;
+}  
+
+/* flush any leftover samples out at normal speed */
+
+int stretch_flush (StretchHandle handle, short *output)
+{
+    struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+    int samples_to_copy = (cnxt->head - cnxt->tail) / cnxt->num_chans;
+
+    memcpy (output, cnxt->inbuff + cnxt->tail, samples_to_copy * cnxt->num_chans * sizeof (*output));
+    cnxt->tail = cnxt->head;
+
+    return samples_to_copy;
+}
+
+/* free handle */
+
+void stretch_deinit (StretchHandle handle)
+{
+    struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+
+    free (cnxt->calcbuff);
+    free (cnxt->results);
+    free (cnxt->inbuff);
+    free (cnxt);
+}
+
+/*
+ * The pitch detection is done by finding the period that produces the
+ * maximum value for the following correlation formula applied to two
+ * consecutive blocks of the given period length:
+ *
+ *         sum of the absolute values of each sample in both blocks
+ *   ---------------------------------------------------------------------
+ *   sum of the absolute differences of each corresponding pair of samples
+ *
+ * This formula was chosen for two reasons.  First, it produces output values
+ * that can directly compared regardless of the pitch period.  Second, the
+ * numerator can be accumulated for successive periods, and only the
+ * denominator need be completely recalculated.
+ */
+
+static int find_period (struct stretch_cnxt *cnxt, short *samples)
+{
+    unsigned int sum, diff, factor, best_factor = 0;
+    short *calcbuff = samples;
+    int period, best_period;
+    int i, j;
+
+    period = best_period = cnxt->shortest / cnxt->num_chans;
+
+    if (cnxt->num_chans == 2) {
+        calcbuff = cnxt->calcbuff;
+
+        for (i = j = 0; i < cnxt->longest * 2; i += 2)
+            calcbuff [j++] = (samples [i] + samples [i+1]) >> 1;
+    }
+
+    /* accumulate sum for shortest period size */
+
+    for (sum = i = 0; i < period; ++i)
+        sum += abs (calcbuff [i]) + abs (calcbuff [i+period]);
+
+    /* this loop actually cycles through all period lengths */
+
+    while (1) {
+        short *comp = calcbuff + period * 2;
+        short *ref = calcbuff + period;
+
+        /* compute sum of absolute differences */
+
+        diff = 0;
+
+        while (ref != calcbuff)
+            diff += abs (*--ref - *--comp);
+
+        /*
+         * Here we calculate and store the resulting correlation
+         * factor.  Note that we must watch for a difference of
+         * zero, meaning a perfect match.  Also, for increased
+         * precision using integer math, we scale the sum.  Care
+         * must be taken here to avoid possibility of overflow.
+         */
+
+        factor = diff ? (sum * 128) / diff : UINT32_MAX;
+
+        if (factor >= best_factor) {
+            best_factor = factor;
+            best_period = period;
+        }
+
+        /* see if we're done */
+
+        if (period * cnxt->num_chans == cnxt->longest)
+            break;
+
+        /* update accumulating sum and current period */
+
+        sum += abs (calcbuff [period * 2]);
+        sum += abs (calcbuff [period++ * 2 + 1]);
+    }
+
+    return best_period * cnxt->num_chans;
+}
+
+/*
+ * This pitch detection function is similar to find_period() above, except that it
+ * is optimized for speed. The audio data corresponding to two maximum periods is
+ * averaged 2:1 into the calculation buffer, and then the calulations are done
+ * for every other period length. Because the time is essentially proportional to
+ * both the number of samples and the number of period lengths to try, this scheme
+ * can reduce the time by a factor approaching 4x. The correlation results on either
+ * side of the peak are compared to calculate a more accurate center of the period.
+ */
+
+static int find_period_fast (struct stretch_cnxt *cnxt, short *samples)
+{
+    unsigned int sum, diff, best_factor = 0;
+    int period, best_period;
+    int i, j;
+
+    best_period = period = cnxt->shortest / (cnxt->num_chans * 2);
+
+    /* first step is compressing data 2:1 into calcbuff */
+
+    if (cnxt->num_chans == 2)
+        for (i = j = 0; i < cnxt->longest * 2; i += 4)
+            cnxt->calcbuff [j++] = (samples [i] + samples [i+1] + samples [i+2] + samples [i+3]) >> 2;
+    else
+        for (i = j = 0; i < cnxt->longest * 2; i += 2)
+            cnxt->calcbuff [j++] = (samples [i] + samples [i+1]) >> 1;
+
+    /* accumulate sum for shortest period */
+
+    for (sum = i = 0; i < period; ++i)
+        sum += abs (cnxt->calcbuff [i]) + abs (cnxt->calcbuff [i+period]);
+
+    /* this loop actually cycles through all period lengths */
+
+    while (1) {
+        short *comp = cnxt->calcbuff + period * 2;
+        short *ref = cnxt->calcbuff + period;
+
+        /* compute sum of absolute differences */
+
+        diff = 0;
+
+        while (ref != cnxt->calcbuff)
+            diff += abs (*--ref - *--comp);
+
+        /*
+         * Here we calculate and store the resulting correlation
+         * factor.  Note that we must watch for a difference of
+         * zero, meaning a perfect match.  Also, for increased
+         * precision using integer math, we scale the sum.  Care
+         * must be taken here to avoid possibility of overflow.
+         */
+
+        cnxt->results [period] = diff ? (sum * 128) / diff : UINT32_MAX;
+
+        if (cnxt->results [period] >= best_factor) {    /* check if best yet */
+            best_factor = cnxt->results [period];
+            best_period = period;
+        }
+
+        /* see if we're done */
+
+        if (period * cnxt->num_chans * 2 == cnxt->longest)
+            break;
+
+        /* update accumulating sum and current period */
+
+        sum += abs (cnxt->calcbuff [period * 2]);
+        sum += abs (cnxt->calcbuff [period++ * 2 + 1]);
+    }
+
+    if (best_period * cnxt->num_chans * 2 != cnxt->shortest && best_period * cnxt->num_chans * 2 != cnxt->longest) {
+        int high_side_diff = cnxt->results [best_period] - cnxt->results [best_period+1];
+        int low_side_diff = cnxt->results [best_period] - cnxt->results [best_period-1];
+
+        if (low_side_diff > high_side_diff * 2)
+            best_period = best_period * 2 + 1;
+        else if (high_side_diff > low_side_diff * 2)
+            best_period = best_period * 2 - 1;
+        else
+            best_period *= 2;
+    }
+    else
+        best_period *= 2;           /* shortest or longest use as is */
+
+    return best_period * cnxt->num_chans;
+}
+
+/*
+ * To combine the two periods into one, each corresponding pair of samples
+ * are averaged with a linearly sliding scale.  At the beginning of the period
+ * the first sample dominates, and at the end the second sample dominates.  In
+ * this way the resulting block blends with the previous and next blocks.
+ *
+ * The signed values are offset to unsigned for the calculation and then offset
+ * back to signed.  This is done to avoid the compression around zero that occurs
+ * with calculations of this type on C implementations that round division toward
+ * zero.
+ */
+
+static void merge_blocks (short *output, short *input1, short *input2, int samples)
+{
+    int i;
+
+    for (i = 0; i < samples; ++i)
+        output [i] = (((input1 [i] + 32768) * (samples - i) +
+            (input2 [i] + 32768) * i) / samples) - 32768;
+}
+
diff --git a/stretch.h b/stretch.h
new file mode 100644
index 0000000..ddbf081
--- /dev/null
+++ b/stretch.h
@@ -0,0 +1,37 @@
+////////////////////////////////////////////////////////////////////////////
+//                        **** AUDIO-STRETCH ****                         //
+//                      Time Domain Harmonic Scaler                       //
+//                    Copyright (c) 2019 David Bryant                     //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// stretch.h
+
+// Time Domain Harmonic Compression and Expansion
+//
+// This library performs time domain harmonic scaling with pitch detection
+// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
+// 1/2 to 2 times its original length. This is done without altering any of
+// its tonal characteristics.
+
+#ifndef STRETCH_H
+#define STRETCH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void *StretchHandle;
+
+StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
+int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio);
+int stretch_flush (StretchHandle handle, short *output);
+void stretch_deinit (StretchHandle handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+