Skip to content

Commit

Permalink
Merge pull request #100 from frzsombor/token-patch-2
Browse files Browse the repository at this point in the history
Fix problems with special Unicode characters
  • Loading branch information
Stichoza committed Jun 26, 2018
2 parents fc5d3c7 + 90aacd8 commit 2ad0de0
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 16 deletions.
36 changes: 20 additions & 16 deletions src/Stichoza/GoogleTranslate/Tokens/GoogleTokenGenerator.php
Expand Up @@ -36,16 +36,16 @@ private function TL($a)
$tkk = $this->TKK();
$b = $tkk[0];

for ($d = [], $e = 0, $f = 0; $f < mb_strlen($a, 'UTF-8'); $f++) {
$g = $this->charCodeAt($a, $f);
for ($d = [], $e = 0, $f = 0; $f < $this->JS_length($a); $f++) {
$g = $this->JS_charCodeAt($a, $f);
if (128 > $g) {
$d[$e++] = $g;
} else {
if (2048 > $g) {
$d[$e++] = $g >> 6 | 192;
} else {
if (55296 == ($g & 64512) && $f + 1 < mb_strlen($a, 'UTF-8') && 56320 == ($this->charCodeAt($a, $f + 1) & 64512)) {
$g = 65536 + (($g & 1023) << 10) + ($this->charCodeAt($a, ++$f) & 1023);
if (55296 == ($g & 64512) && $f + 1 < $this->JS_length($a) && 56320 == ($this->JS_charCodeAt($a, $f + 1) & 64512)) {
$g = 65536 + (($g & 1023) << 10) + ($this->JS_charCodeAt($a, ++$f) & 1023);
$d[$e++] = $g >> 18 | 240;
$d[$e++] = $g >> 12 & 63 | 128;
} else {
Expand Down Expand Up @@ -138,23 +138,27 @@ private function unsignedRightShift($a, $b)
}

/**
* Get the Unicode of the character at the specified index in a string.
* Get JS charCodeAt equivalent result with UTF-16 encoding
*
* @param string $str
* @param int $index
*
* @return null|number
* @return number
*/
private function charCodeAt($str, $index)
{
$char = mb_substr($str, $index, 1, 'UTF-8');
if (mb_check_encoding($char, 'UTF-8')) {
$ret = mb_convert_encoding($char, 'UTF-32BE', 'UTF-8');
$result = hexdec(bin2hex($ret));

return $result;
}
private function JS_charCodeAt($str, $index) {
$utf16 = mb_convert_encoding($str, 'UTF-16LE', 'UTF-8');
return ord($utf16[$index*2]) + (ord($utf16[$index*2+1]) << 8);
}

return;
/**
* Get JS equivalent string length with UTF-16 encoding
*
* @param string $str
*
* @return number
*/
private function JS_length($str) {
$utf16 = mb_convert_encoding($str, 'UTF-16LE', 'UTF-8');
return strlen($utf16)/2;
}
}
8 changes: 8 additions & 0 deletions tests/TranslationTest.php
Expand Up @@ -19,6 +19,14 @@ public function testTranslationEquality()
$this->assertEquals($resultOne, $resultTwo, 'გამარჯობა');
}

public function testUTF16Translation()
{
$resultOne = TranslateClient::translate('en', 'de', 'yes 👍🏽');
$resultTwo = $this->tr->setSource('en')->setTarget('de')->translate('yes 👍🏽');

$this->assertEquals($resultOne, $resultTwo, 'ja 👍🏽');
}

public function testArrayTranslation()
{
$this->tr->setSource('en')->setTarget('ka');
Expand Down

0 comments on commit 2ad0de0

Please sign in to comment.