Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: 708 captions multi-byte char fix #439

Merged
merged 3 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 21 additions & 7 deletions lib/m2ts/caption-stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -688,18 +688,32 @@ Cea708Stream.prototype.handleText = function(i, service, options) {
var char;
var charCodeArray;

// Converts an array of bytes to a unicode hex string.
function toHexString(byteArray) {
return byteArray.map((byte) => {
return ('0' + (byte & 0xFF).toString(16)).slice(-2);
}).join('');
};

if (isMultiByte) {
charCodeArray = [currentByte, nextByte];
i++;
} else {
charCodeArray = [currentByte];
}

// Use the TextDecoder if one was created for this service
if (service.textDecoder_ && !isExtended) {
wseymour15 marked this conversation as resolved.
Show resolved Hide resolved
char = service.textDecoder_.decode(new Uint8Array(charCodeArray));
} else {
// We assume any multi-byte char without a decoder is unicode.
if (isMultiByte) {
charCodeArray = [currentByte, nextByte];
i++;
const unicode = toHexString(charCodeArray);
// Takes a unicode hex string and creates a single character.
char = String.fromCharCode(parseInt(unicode, 16));
} else {
charCodeArray = [currentByte];
char = get708CharFromCode(extended | currentByte);
}

char = service.textDecoder_.decode(new Uint8Array(charCodeArray));
} else {
char = get708CharFromCode(extended | currentByte);
}

if (win.pendingNewLine && !win.isEmpty()) {
Expand Down
27 changes: 27 additions & 0 deletions test/caption-stream.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -3051,6 +3051,33 @@ QUnit.test('Decodes multibyte characters if valid encoding option is provided an
}
});

QUnit.test('Decodes multi-byte characters as unicode if no valid encoding option is provided', function(assert) {
var captions = [];

cea708Stream = new m2ts.Cea708Stream({
captionServices: {
SERVICE1: {}
}
});

cea708Stream.on('data', function(caption) {
captions.push(caption);
});

cc708Korean.forEach(cea708Stream.push, cea708Stream);

cea708Stream.flushDisplayed(4721138662, cea708Stream.services[1]);

assert.equal(captions.length, 1, 'parsed single caption correctly');

assert.notOk(cea708Stream.services[1].textDecoder_, 'TextDecoder was not created');
assert.equal(
captions[0].text,
'듏낡 ',
'parsed multibyte characters correctly'
);
});

QUnit.test('Creates TextDecoder only if valid encoding value is provided', function(assert) {
var secondCea708Stream;

Expand Down