From ec31749b8aec22d52bba762176a5b9b1fcaa82ee Mon Sep 17 00:00:00 2001 From: Walter Seymour Date: Thu, 12 Oct 2023 11:53:22 -0500 Subject: [PATCH] fix: 708 captions multi-byte char fix (#439) --- lib/m2ts/caption-stream.js | 28 +++++++++++++++++++++------- test/caption-stream.test.js | 27 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/lib/m2ts/caption-stream.js b/lib/m2ts/caption-stream.js index ef633349..286c8ad1 100644 --- a/lib/m2ts/caption-stream.js +++ b/lib/m2ts/caption-stream.js @@ -688,18 +688,32 @@ Cea708Stream.prototype.handleText = function(i, service, options) { var char; var charCodeArray; + // Converts an array of bytes to a unicode hex string. + function toHexString(byteArray) { + return byteArray.map((byte) => { + return ('0' + (byte & 0xFF).toString(16)).slice(-2); + }).join(''); + }; + + if (isMultiByte) { + charCodeArray = [currentByte, nextByte]; + i++; + } else { + charCodeArray = [currentByte]; + } + // Use the TextDecoder if one was created for this service if (service.textDecoder_ && !isExtended) { + char = service.textDecoder_.decode(new Uint8Array(charCodeArray)); + } else { + // We assume any multi-byte char without a decoder is unicode. if (isMultiByte) { - charCodeArray = [currentByte, nextByte]; - i++; + const unicode = toHexString(charCodeArray); + // Takes a unicode hex string and creates a single character. + char = String.fromCharCode(parseInt(unicode, 16)); } else { - charCodeArray = [currentByte]; + char = get708CharFromCode(extended | currentByte); } - - char = service.textDecoder_.decode(new Uint8Array(charCodeArray)); - } else { - char = get708CharFromCode(extended | currentByte); } if (win.pendingNewLine && !win.isEmpty()) { diff --git a/test/caption-stream.test.js b/test/caption-stream.test.js index b60041a8..f0821a82 100644 --- a/test/caption-stream.test.js +++ b/test/caption-stream.test.js @@ -3051,6 +3051,33 @@ QUnit.test('Decodes multibyte characters if valid encoding option is provided an } }); +QUnit.test('Decodes multi-byte characters as unicode if no valid encoding option is provided', function(assert) { + var captions = []; + + cea708Stream = new m2ts.Cea708Stream({ + captionServices: { + SERVICE1: {} + } + }); + + cea708Stream.on('data', function(caption) { + captions.push(caption); + }); + + cc708Korean.forEach(cea708Stream.push, cea708Stream); + + cea708Stream.flushDisplayed(4721138662, cea708Stream.services[1]); + + assert.equal(captions.length, 1, 'parsed single caption correctly'); + + assert.notOk(cea708Stream.services[1].textDecoder_, 'TextDecoder was not created'); + assert.equal( + captions[0].text, + '듏낡 ', + 'parsed multibyte characters correctly' + ); +}); + QUnit.test('Creates TextDecoder only if valid encoding value is provided', function(assert) { var secondCea708Stream;