Skip to content

Commit

Permalink
fix: 708 captions multi-byte char fix (#439)
Browse files Browse the repository at this point in the history
  • Loading branch information
wseymour15 committed Oct 12, 2023
1 parent 21e55aa commit ec31749
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 7 deletions.
28 changes: 21 additions & 7 deletions lib/m2ts/caption-stream.js
Expand Up @@ -688,18 +688,32 @@ Cea708Stream.prototype.handleText = function(i, service, options) {
var char;
var charCodeArray;

// Converts an array of bytes to a unicode hex string.
function toHexString(byteArray) {
return byteArray.map((byte) => {
return ('0' + (byte & 0xFF).toString(16)).slice(-2);
}).join('');
};

if (isMultiByte) {
charCodeArray = [currentByte, nextByte];
i++;
} else {
charCodeArray = [currentByte];
}

// Use the TextDecoder if one was created for this service
if (service.textDecoder_ && !isExtended) {
char = service.textDecoder_.decode(new Uint8Array(charCodeArray));
} else {
// We assume any multi-byte char without a decoder is unicode.
if (isMultiByte) {
charCodeArray = [currentByte, nextByte];
i++;
const unicode = toHexString(charCodeArray);
// Takes a unicode hex string and creates a single character.
char = String.fromCharCode(parseInt(unicode, 16));
} else {
charCodeArray = [currentByte];
char = get708CharFromCode(extended | currentByte);
}

char = service.textDecoder_.decode(new Uint8Array(charCodeArray));
} else {
char = get708CharFromCode(extended | currentByte);
}

if (win.pendingNewLine && !win.isEmpty()) {
Expand Down
27 changes: 27 additions & 0 deletions test/caption-stream.test.js
Expand Up @@ -3051,6 +3051,33 @@ QUnit.test('Decodes multibyte characters if valid encoding option is provided an
}
});

QUnit.test('Decodes multi-byte characters as unicode if no valid encoding option is provided', function(assert) {
var captions = [];

cea708Stream = new m2ts.Cea708Stream({
captionServices: {
SERVICE1: {}
}
});

cea708Stream.on('data', function(caption) {
captions.push(caption);
});

cc708Korean.forEach(cea708Stream.push, cea708Stream);

cea708Stream.flushDisplayed(4721138662, cea708Stream.services[1]);

assert.equal(captions.length, 1, 'parsed single caption correctly');

assert.notOk(cea708Stream.services[1].textDecoder_, 'TextDecoder was not created');
assert.equal(
captions[0].text,
'듏낡 ',
'parsed multibyte characters correctly'
);
});

QUnit.test('Creates TextDecoder only if valid encoding value is provided', function(assert) {
var secondCea708Stream;

Expand Down

0 comments on commit ec31749

Please sign in to comment.