Skip to content

Commit

Permalink
fix: Handle first chunk shorter than 7 bytes correctly when next chun…
Browse files Browse the repository at this point in the history
…ks are larger (#7)
  • Loading branch information
sttk committed Mar 29, 2022
1 parent bc47af6 commit 564f87b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 7 deletions.
12 changes: 7 additions & 5 deletions index.js
Expand Up @@ -4,28 +4,30 @@ var through = require('through2');
var removeBom = require('remove-bom-buffer');

function removeBomStream() {
var completed = false;
var state = 0; // 0:Not removed, -1:In removing, 1:Already removed
var buffer = Buffer.alloc(0);

return through(onChunk, onFlush);

function removeAndCleanup(data) {
completed = true;
state = 1; // Already removed

buffer = null;

return removeBom(data);
}

function onChunk(data, enc, cb) {
if (completed) {
if (state === 1) {
return cb(null, data);
}

if (data.length >= 7) {
if (state === 0 /* Not removed */ && data.length >= 7) {
return cb(null, removeAndCleanup(data));
}

state = -1; // In removing

var bufferLength = buffer.length;
var chunkLength = data.length;
var totalLength = bufferLength + chunkLength;
Expand All @@ -39,7 +41,7 @@ function removeBomStream() {
}

function onFlush(cb) {
if (completed || !buffer) {
if (state === 2 /* Already removed */ || !buffer) {
return cb();
}

Expand Down
36 changes: 34 additions & 2 deletions test/index.js
Expand Up @@ -29,7 +29,23 @@ describe('removeBomStream', function () {
);
});

it('removes the BOM from a UTF8 buffer', function (done) {
it('ignores UTF8 buffer without a BOM even if first chunk is shorter than 7 chars but second and subsequent are larger', function(done) {
var filepath = path.join(__dirname, './fixtures/test.txt');
var fileContent = fs.readFileSync(filepath, 'utf-8');

var rmBom = removeBomStream();
var output = '';
rmBom.on('data', function(d) {
output += d.toString();
});
rmBom.write(Buffer.from(fileContent.slice(0, 5)));
rmBom.write(Buffer.from(fileContent.slice(5)));

expect(output).toEqual(fileContent);
done();
});

it('removes the BOM from a UTF8 buffer', function(done) {
var filepath = path.join(__dirname, './fixtures/bom-utf8.txt');

var expected = fs.readFileSync(filepath).slice(3);
Expand Down Expand Up @@ -81,7 +97,23 @@ describe('removeBomStream', function () {
);
});

it('does not remove the BOM from a UTF16BE buffer', function (done) {
it('remove the BOM from a UTF8 buffer even if first chunk is shorter than 7 chars but second and subsequent are larger', function(done) {
var filepath = path.join(__dirname, './fixtures/bom-utf8.txt');
var fileContent = fs.readFileSync(filepath, 'utf-8');

var rmBom = removeBomStream();
var output = '';
rmBom.on('data', function(d) {
output += d.toString();
});
rmBom.write(Buffer.from(fileContent.slice(0, 5)));
rmBom.write(Buffer.from(fileContent.slice(5)));

expect(output).toEqual(fileContent.slice(1));
done();
});

it('does not remove the BOM from a UTF16BE buffer', function(done) {
var filepath = path.join(__dirname, './fixtures/bom-utf16be.txt');

var expected = fs.readFileSync(filepath);
Expand Down

0 comments on commit 564f87b

Please sign in to comment.