From 37ccdcc151db693e58b4c411b64bc09a25edf406 Mon Sep 17 00:00:00 2001 From: Kyle Milloy Date: Sat, 16 Dec 2023 16:09:18 -0700 Subject: [PATCH 1/4] add ability to extract keys --- .phpunit.result.cache | 1 + src/GoogleTranslate.php | 105 ++++++++++++++++++++++++++++++++++---- tests/TranslationTest.php | 49 ++++++++++++++++++ 3 files changed, 144 insertions(+), 11 deletions(-) create mode 100644 .phpunit.result.cache diff --git a/.phpunit.result.cache b/.phpunit.result.cache new file mode 100644 index 0000000..f556d59 --- /dev/null +++ b/.phpunit.result.cache @@ -0,0 +1 @@ +{"version":1,"defects":{"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testExtraction":3,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testExtract":3,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testTranslationKeyExtraction":3},"times":{"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testRateLimitException":0.457,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testRateLimitCaptchaException":0.33,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testLargeTextException":0.312,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testTranslationRequestException":0.328,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testTranslationDecodingException":0.33,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testInheritanceForUnexpectedValueException":0.301,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testInheritanceForErrorException":0.322,"Stichoza\\GoogleTranslate\\Tests\\LanguageDetectionTest::testSingleWord":0.24,"Stichoza\\GoogleTranslate\\Tests\\LanguageDetectionTest::testSingleSentence":0.208,"Stichoza\\GoogleTranslate\\Tests\\LanguageDetectionTest::testMultipleSentence":0.283,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testTranslation":0.161,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testTranslationEquality":0.304,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testNewerLanguageTranslation":0.147,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testUTF16Translation":0.158,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testLargeTextTranslation":0.174,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testRawResponse":0.145,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testExtraction":0.005,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testReplacement":0,"Stichoza\\GoogleTranslate\\Tests\\UtilityTest::testIsValidLocale":0,"Stichoza\\GoogleTranslate\\Tests\\UtilityTest::testSetOptions":0.3,"Stichoza\\GoogleTranslate\\Tests\\UtilityTest::testSetUrl":0.202,"Stichoza\\GoogleTranslate\\Tests\\UtilityTest::testSetClient":0.001,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testGetReplacements":0,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testExtract":0,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testInject":0,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testTranslationKeyExtraction":0.155,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testGetEmptyReplacements":0,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testEmptyExtract":0,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testEmptyInject":0}} \ No newline at end of file diff --git a/src/GoogleTranslate.php b/src/GoogleTranslate.php index f45fd67..ee65d6b 100644 --- a/src/GoogleTranslate.php +++ b/src/GoogleTranslate.php @@ -37,6 +37,11 @@ class GoogleTranslate */ protected ?string $target; + /* + * @var string|null Regex pattern to match replaceable parts in a string, defualts to "words" + */ + protected string $pattern = '/:(\w+)/'; + /** * @var string|null Last detected source language. */ @@ -112,7 +117,7 @@ class GoogleTranslate public function __construct(string $target = 'en', string $source = null, array $options = [], TokenProviderInterface $tokenProvider = null) { $this->client = new Client(); - $this->setTokenProvider($tokenProvider ?? new GoogleTokenGenerator) + $this->setTokenProvider($tokenProvider ?? new GoogleTokenGenerator()) ->setOptions($options) // Options are already set in client constructor tho. ->setSource($source) ->setTarget($target); @@ -190,6 +195,18 @@ public function setTokenProvider(TokenProviderInterface $tokenProvider): self return $this; } + /** + * Set the regex pattern to match replaceable parts in a string + * + * @param string $pattern + * @return self + */ + public function setPattern(string $pattern): self + { + $this->pattern = $pattern; + return $this; + } + /** * Get last detected source language * @@ -216,8 +233,8 @@ public function getLastDetectedSource(): ?string */ public static function trans(string $string, string $target = 'en', string $source = null, array $options = [], TokenProviderInterface $tokenProvider = null): ?string { - return (new self) - ->setTokenProvider($tokenProvider ?? new GoogleTokenGenerator) + return (new self()) + ->setTokenProvider($tokenProvider ?? new GoogleTokenGenerator()) ->setOptions($options) // Options are already set in client constructor tho. ->setSource($source) ->setTarget($target) @@ -244,7 +261,11 @@ public function translate(string $string): ?string return $string; } - $responseArray = $this->getResponse($string); + // Extract replaceable keywords from string and transform to array for use later + $replacements = $this->getReplacements($string); + + // Reaplce replaceable keywords with ${\d} for replacement later + $responseArray = $this->getResponse($this->extract($string)); // Check if translation exists if (empty($responseArray[0])) { @@ -278,18 +299,80 @@ public function translate(string $string): ?string } // The response sometime can be a translated string. + $output = ''; if (is_string($responseArray)) { - return $responseArray; - } - - if (is_array($responseArray[0])) { - return (string) array_reduce($responseArray[0], static function ($carry, $item) { + $output = $this->inject($responseArray, $replacements); + } elseif (is_array($responseArray[0])) { + $output = (string) $this->inject(array_reduce($responseArray[0], static function ($carry, $item) { $carry .= $item[0]; return $carry; - }); + }), $replacements); + } else { + $output = (string) $this->inject($responseArray[0], $replacements); } - return (string) $responseArray[0]; + return $this->inject($this->sanitize($output), $replacements); + } + + /** + * Extract replaceable keywords from string using the supplied pattern + * + * @param string $string + * @return string + */ + public function extract(string $string): string + { + return preg_replace_callback( + $this->pattern, + function ($matches) { + static $index = -1; + + $index++; + + return '${' . $index . '}'; + }, + $string + ); + } + + /** + * Inject the replacements back into the translated string + * + * @param string $string + * @param array $replacements + * @return string + */ + public function inject(string $string, array $replacements): string + { + return preg_replace_callback( + '/\${(\d+)}/', + fn($matches) => ':' . $replacements[$matches[1]], + $string + ); + } + + /** + * Extract an array of replaceable parts to be injected into the translated string + * at a later time + * + * @return array + */ + public function getReplacements(string $string): array + { + $matches = []; + preg_match_all($this->pattern, $string, $matches); + return $matches[1]; + } + + /** + * Cleans up weird spaces returned from Google Translate. + * + * @param string $string + * @return string + */ + protected function sanitize(string $string): string + { + return preg_replace('/\xc2\xa0/', ' ', $string); } /** diff --git a/tests/TranslationTest.php b/tests/TranslationTest.php index 7b9ef31..06194d6 100644 --- a/tests/TranslationTest.php +++ b/tests/TranslationTest.php @@ -29,6 +29,13 @@ public function testTranslationEquality(): void $this->assertEqualsIgnoringCase($resultOne, $resultTwo, 'Static and instance methods should return same result.'); } + public function testTranslationKeyExtraction(): void + { + $result = $this->tr->setSource('en')->setTarget('fr')->translate('Hello :name'); + + $this->assertEquals('Bonjour :name', $result, 'Translation should be correct with proper key extraction.'); + } + public function testNewerLanguageTranslation(): void { $result = $this->tr->setSource('en')->setTarget('tk')->translate('Hello'); @@ -60,4 +67,46 @@ public function testRawResponse(): void $this->assertIsArray($rawResult, 'Method getResponse() should return an array'); } + + public function testGetReplacements(): void + { + $replacements = $this->tr->getReplacements('Hello :name are you :some_greeting?'); + + $this->assertEquals(['name', 'some_greeting'], $replacements, 'Replacements should be extracted from string'); + } + + public function testGetEmptyReplacements(): void + { + $replacements = $this->tr->getReplacements('Hello'); + + $this->assertEquals([], $replacements, 'Replacements should be empty'); + } + + public function testExtract(): void + { + $extracted = $this->tr->extract('Hello :name are you :some_greeting?'); + + $this->assertEquals('Hello ${0} are you ${1}?', $extracted, 'Extraction should change strings to placeholder tokens'); + } + + public function testEmptyExtract(): void + { + $extracted = $this->tr->extract('Hello'); + + $this->assertEquals('Hello', $extracted, 'Extraction should not change strings'); + } + + public function testInject(): void + { + $replaced = $this->tr->inject('Hello ${0} are you ${1}?', ['name', 'some_greeting']); + + $this->assertEquals('Hello :name are you :some_greeting?', $replaced, 'Replacement should change placeholder tokens to strings'); + } + + public function testEmptyInject(): void + { + $replaced = $this->tr->inject('Hello', []); + + $this->assertEquals('Hello', $replaced, 'Replacement should not change strings'); + } } From f897ac039c7dd7bc3672b737d8b48c8c5d070b66 Mon Sep 17 00:00:00 2001 From: Kyle Milloy Date: Sat, 16 Dec 2023 16:10:00 -0700 Subject: [PATCH 2/4] remove cache --- .phpunit.result.cache | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .phpunit.result.cache diff --git a/.phpunit.result.cache b/.phpunit.result.cache deleted file mode 100644 index f556d59..0000000 --- a/.phpunit.result.cache +++ /dev/null @@ -1 +0,0 @@ -{"version":1,"defects":{"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testExtraction":3,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testExtract":3,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testTranslationKeyExtraction":3},"times":{"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testRateLimitException":0.457,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testRateLimitCaptchaException":0.33,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testLargeTextException":0.312,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testTranslationRequestException":0.328,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testTranslationDecodingException":0.33,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testInheritanceForUnexpectedValueException":0.301,"Stichoza\\GoogleTranslate\\Tests\\ExceptionTest::testInheritanceForErrorException":0.322,"Stichoza\\GoogleTranslate\\Tests\\LanguageDetectionTest::testSingleWord":0.24,"Stichoza\\GoogleTranslate\\Tests\\LanguageDetectionTest::testSingleSentence":0.208,"Stichoza\\GoogleTranslate\\Tests\\LanguageDetectionTest::testMultipleSentence":0.283,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testTranslation":0.161,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testTranslationEquality":0.304,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testNewerLanguageTranslation":0.147,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testUTF16Translation":0.158,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testLargeTextTranslation":0.174,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testRawResponse":0.145,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testExtraction":0.005,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testReplacement":0,"Stichoza\\GoogleTranslate\\Tests\\UtilityTest::testIsValidLocale":0,"Stichoza\\GoogleTranslate\\Tests\\UtilityTest::testSetOptions":0.3,"Stichoza\\GoogleTranslate\\Tests\\UtilityTest::testSetUrl":0.202,"Stichoza\\GoogleTranslate\\Tests\\UtilityTest::testSetClient":0.001,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testGetReplacements":0,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testExtract":0,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testInject":0,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testTranslationKeyExtraction":0.155,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testGetEmptyReplacements":0,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testEmptyExtract":0,"Stichoza\\GoogleTranslate\\Tests\\TranslationTest::testEmptyInject":0}} \ No newline at end of file From 2559e93bc3ce52c51b7b4122c3b1a3563466d2e3 Mon Sep 17 00:00:00 2001 From: Kyle Milloy Date: Sun, 17 Dec 2023 13:43:24 -0700 Subject: [PATCH 3/4] respond to feedback and update tests accordingly --- src/GoogleTranslate.php | 79 +++++++++++++++++++++++++-------------- tests/TranslationTest.php | 60 ++++++++--------------------- 2 files changed, 66 insertions(+), 73 deletions(-) diff --git a/src/GoogleTranslate.php b/src/GoogleTranslate.php index ee65d6b..00937b9 100644 --- a/src/GoogleTranslate.php +++ b/src/GoogleTranslate.php @@ -40,7 +40,7 @@ class GoogleTranslate /* * @var string|null Regex pattern to match replaceable parts in a string, defualts to "words" */ - protected string $pattern = '/:(\w+)/'; + protected ?string $pattern; /** * @var string|null Last detected source language. @@ -113,14 +113,16 @@ class GoogleTranslate * @param string|null $source Source language code (null for automatic language detection) * @param array $options HTTP client configuration options * @param TokenProviderInterface|null $tokenProvider + * @param string|null $pattern Regex pattern to match replaceable parts in a string */ - public function __construct(string $target = 'en', string $source = null, array $options = [], TokenProviderInterface $tokenProvider = null) + public function __construct(string $target = 'en', string $source = null, array $options = [], TokenProviderInterface $tokenProvider = null, ?string $pattern = null) { $this->client = new Client(); $this->setTokenProvider($tokenProvider ?? new GoogleTokenGenerator()) ->setOptions($options) // Options are already set in client constructor tho. ->setSource($source) - ->setTarget($target); + ->setTarget($target) + ->preserveParameters($pattern); } /** @@ -195,18 +197,6 @@ public function setTokenProvider(TokenProviderInterface $tokenProvider): self return $this; } - /** - * Set the regex pattern to match replaceable parts in a string - * - * @param string $pattern - * @return self - */ - public function setPattern(string $pattern): self - { - $this->pattern = $pattern; - return $this; - } - /** * Get last detected source language * @@ -225,19 +215,21 @@ public function getLastDetectedSource(): ?string * @param string|null $source Source language code (null for automatic language detection) * @param array $options HTTP client configuration options * @param TokenProviderInterface|null $tokenProvider Custom token provider + * @param string|null $pattern Regex pattern to match replaceable parts in a string * @return null|string * @throws LargeTextException If translation text is too large * @throws RateLimitException If Google has blocked you for excessive requests * @throws TranslationRequestException If any other HTTP related error occurs * @throws TranslationDecodingException If response JSON cannot be decoded */ - public static function trans(string $string, string $target = 'en', string $source = null, array $options = [], TokenProviderInterface $tokenProvider = null): ?string + public static function trans(string $string, string $target = 'en', string $source = null, array $options = [], TokenProviderInterface $tokenProvider = null, ?string $pattern = null): ?string { return (new self()) ->setTokenProvider($tokenProvider ?? new GoogleTokenGenerator()) ->setOptions($options) // Options are already set in client constructor tho. ->setSource($source) ->setTarget($target) + ->preserveParameters($pattern) ->translate($string); } @@ -262,10 +254,10 @@ public function translate(string $string): ?string } // Extract replaceable keywords from string and transform to array for use later - $replacements = $this->getReplacements($string); + $replacements = $this->getParameters($string); - // Reaplce replaceable keywords with ${\d} for replacement later - $responseArray = $this->getResponse($this->extract($string)); + // Replace replaceable keywords with ${\d} for replacement later + $responseArray = $this->getResponse($this->extractParameters($string)); // Check if translation exists if (empty($responseArray[0])) { @@ -301,17 +293,32 @@ public function translate(string $string): ?string // The response sometime can be a translated string. $output = ''; if (is_string($responseArray)) { - $output = $this->inject($responseArray, $replacements); + $output = $responseArray; } elseif (is_array($responseArray[0])) { - $output = (string) $this->inject(array_reduce($responseArray[0], static function ($carry, $item) { + $output = (string) array_reduce($responseArray[0], static function ($carry, $item) { $carry .= $item[0]; return $carry; - }), $replacements); + }); } else { - $output = (string) $this->inject($responseArray[0], $replacements); + $output = (string) $responseArray[0]; } - return $this->inject($this->sanitize($output), $replacements); + return $this->injectParameters($this->sanitize($output), $replacements); + } + + /** + * Set a custom pattern for extracting replaceable keywords from the string, + * default to extracting words prefixed with a colon + * + * @example (e.g. "Hello :name" will extract "name") + * + * @param string|null $pattern + * @return self + */ + public function preserveParameters(?string $pattern = '/:(\w+)/'): self + { + $this->pattern = $pattern; + return $this; } /** @@ -320,8 +327,14 @@ public function translate(string $string): ?string * @param string $string * @return string */ - public function extract(string $string): string + protected function extractParameters(string $string): string { + // If no pattern, return string as is + if (!$this->pattern) { + return $string; + } + + // Replace all matches of our pattern with ${\d} for replacement later return preg_replace_callback( $this->pattern, function ($matches) { @@ -342,11 +355,11 @@ function ($matches) { * @param array $replacements * @return string */ - public function inject(string $string, array $replacements): string + protected function injectParameters(string $string, array $replacements): string { return preg_replace_callback( '/\${(\d+)}/', - fn($matches) => ':' . $replacements[$matches[1]], + fn($matches) => $replacements[$matches[1]], $string ); } @@ -357,11 +370,19 @@ public function inject(string $string, array $replacements): string * * @return array */ - public function getReplacements(string $string): array + protected function getParameters(string $string): array { $matches = []; + + // If no pattern is set, return empty array + if (!$this->pattern) { + return $matches; + } + + // Find all matches for the pattern in our string preg_match_all($this->pattern, $string, $matches); - return $matches[1]; + + return $matches[0]; } /** diff --git a/tests/TranslationTest.php b/tests/TranslationTest.php index 06194d6..2a22a22 100644 --- a/tests/TranslationTest.php +++ b/tests/TranslationTest.php @@ -31,9 +31,23 @@ public function testTranslationEquality(): void public function testTranslationKeyExtraction(): void { - $result = $this->tr->setSource('en')->setTarget('fr')->translate('Hello :name'); + $result = $this->tr->setSource('en')->setTarget('fr')->preserveParameters()->translate('Hello :name, how are :type_of_greeting?'); - $this->assertEquals('Bonjour :name', $result, 'Translation should be correct with proper key extraction.'); + $this->assertEquals('Bonjour :name, comment vont :type_of_greeting ?', $result, 'Translation should be correct with proper key extraction.'); + } + + public function testCanIgnoreTranslationKeyExtraction() + { + $result = $this->tr->setSource('en')->setTarget('fr')->translate('Hello :name how are :greeting?'); + + $this->assertEquals('Bonjour :nom, comment allez-vous :salut ?', $result, 'Translation should be correct and ignores key extraction if not set.'); + } + + public function testCanCustomizeExtractionPattern() + { + $result = $this->tr->setSource('en')->setTarget('fr')->preserveParameters('/\{\{([^}]+)\}\}/')->translate('Hello {{name}}, how are {{type_of_greeting}}?'); + + $this->assertEquals('Bonjour {{name}}, comment vont {{type_of_greeting}} ?', $result, 'Translation should be correct and ignores key extraction if not set.'); } public function testNewerLanguageTranslation(): void @@ -67,46 +81,4 @@ public function testRawResponse(): void $this->assertIsArray($rawResult, 'Method getResponse() should return an array'); } - - public function testGetReplacements(): void - { - $replacements = $this->tr->getReplacements('Hello :name are you :some_greeting?'); - - $this->assertEquals(['name', 'some_greeting'], $replacements, 'Replacements should be extracted from string'); - } - - public function testGetEmptyReplacements(): void - { - $replacements = $this->tr->getReplacements('Hello'); - - $this->assertEquals([], $replacements, 'Replacements should be empty'); - } - - public function testExtract(): void - { - $extracted = $this->tr->extract('Hello :name are you :some_greeting?'); - - $this->assertEquals('Hello ${0} are you ${1}?', $extracted, 'Extraction should change strings to placeholder tokens'); - } - - public function testEmptyExtract(): void - { - $extracted = $this->tr->extract('Hello'); - - $this->assertEquals('Hello', $extracted, 'Extraction should not change strings'); - } - - public function testInject(): void - { - $replaced = $this->tr->inject('Hello ${0} are you ${1}?', ['name', 'some_greeting']); - - $this->assertEquals('Hello :name are you :some_greeting?', $replaced, 'Replacement should change placeholder tokens to strings'); - } - - public function testEmptyInject(): void - { - $replaced = $this->tr->inject('Hello', []); - - $this->assertEquals('Hello', $replaced, 'Replacement should not change strings'); - } } From 0d2addc3a13d4f0a0d87c99f65962f7086a51057 Mon Sep 17 00:00:00 2001 From: Kyle Milloy Date: Sun, 17 Dec 2023 13:47:03 -0700 Subject: [PATCH 4/4] delint --- src/GoogleTranslate.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/GoogleTranslate.php b/src/GoogleTranslate.php index 00937b9..85ea5b2 100644 --- a/src/GoogleTranslate.php +++ b/src/GoogleTranslate.php @@ -118,7 +118,7 @@ class GoogleTranslate public function __construct(string $target = 'en', string $source = null, array $options = [], TokenProviderInterface $tokenProvider = null, ?string $pattern = null) { $this->client = new Client(); - $this->setTokenProvider($tokenProvider ?? new GoogleTokenGenerator()) + $this->setTokenProvider($tokenProvider ?? new GoogleTokenGenerator) ->setOptions($options) // Options are already set in client constructor tho. ->setSource($source) ->setTarget($target) @@ -224,8 +224,8 @@ public function getLastDetectedSource(): ?string */ public static function trans(string $string, string $target = 'en', string $source = null, array $options = [], TokenProviderInterface $tokenProvider = null, ?string $pattern = null): ?string { - return (new self()) - ->setTokenProvider($tokenProvider ?? new GoogleTokenGenerator()) + return (new self) + ->setTokenProvider($tokenProvider ?? new GoogleTokenGenerator) ->setOptions($options) // Options are already set in client constructor tho. ->setSource($source) ->setTarget($target)