Skip to content

Commit

Permalink
minor #454 Conform to IDNA version 15.1.0 revision 31 (TRowbotham)
Browse files Browse the repository at this point in the history
This PR was merged into the 1.x branch.

Discussion
----------

Conform to IDNA version 15.1.0 revision 31

This adds the necessary changes to conform to [IDNA version 15.1.0 revision 31](https://www.unicode.org/reports/tr46/tr46-31.html#Modifications).

Notable Changes

* Transitional processing (the default in PHP) is now deprecated. No deprecation notices were added as PHP does not yet report a deprecation notice in this case.
* An error is no longer recorded for characters with a status of disallowed.
* When performing code point mapping and transitional processing is enabled the code point U+1E9E capital sharp s (ẞ), is replaced by the string “ss”
* A new internal option "IgnoreInvalidPunycode" was added, which is supposed to allow for an all-ASCII fast-path, however, the official tests do not test this configuration option.

Commits
-------

385d1d5 Conform to IDNA version 15.1.0 revision 31
  • Loading branch information
nicolas-grekas committed Apr 12, 2024
2 parents 6d5a7aa + 385d1d5 commit 1100c07
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 5 deletions.
20 changes: 15 additions & 5 deletions src/Intl/Idn/Idn.php
Expand Up @@ -280,10 +280,6 @@ private static function mapCodePoints($input, array $options, Info $info)

switch ($data['status']) {
case 'disallowed':
$info->errors |= self::ERROR_DISALLOWED;

// no break.

case 'valid':
$str .= mb_chr($codePoint, 'utf-8');

Expand All @@ -294,7 +290,7 @@ private static function mapCodePoints($input, array $options, Info $info)
break;

case 'mapped':
$str .= $data['mapping'];
$str .= $transitional && 0x1E9E === $codePoint ? 'ss' : $data['mapping'];

break;

Expand Down Expand Up @@ -346,6 +342,18 @@ private static function process($domain, array $options, Info $info)
$validationOptions = $options;

if ('xn--' === substr($label, 0, 4)) {
// Step 4.1. If the label contains any non-ASCII code point (i.e., a code point greater than U+007F),
// record that there was an error, and continue with the next label.
if (preg_match('/[^\x00-\x7F]/', $label)) {
$info->errors |= self::ERROR_PUNYCODE;

continue;
}

// Step 4.2. Attempt to convert the rest of the label to Unicode according to Punycode [RFC3492]. If
// that conversion fails, record that there was an error, and continue
// with the next label. Otherwise replace the original label in the string by the results of the
// conversion.
try {
$label = self::punycodeDecode(substr($label, 4));
} catch (\Exception $e) {
Expand Down Expand Up @@ -516,6 +524,8 @@ private static function validateLabel($label, Info $info, array $options, $canBe
if ('-' === substr($label, -1, 1)) {
$info->errors |= self::ERROR_TRAILING_HYPHEN;
}
} elseif ('xn--' === substr($label, 0, 4)) {
$info->errors |= self::ERROR_PUNYCODE;
}

// Step 4. The label must not contain a U+002E (.) FULL STOP.
Expand Down
49 changes: 49 additions & 0 deletions tests/Intl/Idn/IdnTest.php
Expand Up @@ -284,6 +284,55 @@ public function testEncodePhp53($decoded, $encoded)
$this->assertSame($encoded, $result);
}

/**
* IDNA 15.1.0 revision 31
*
* This tests the change in "Section 4 Processing step 1. Map" which conditionally maps U+1E9E capital sharp s to
* "ss" if Transitional_Processing is used.
*
* @dataProvider captialSharpSProvider
*/
public function testCapitalSharpSProcessing($input, $expected, $flags)
{
idn_to_utf8($input, $flags, \INTL_IDNA_VARIANT_UTS46, $info);
$this->assertSame($expected, $info['result']);
}

/**
* IDNA 15.1.0 revision 31
*
* This tests the additional validity check in "Section 4.1 Validity Criteria Processing step 4", which is used to
* disallow labels that do not round trip.
*/
public function testLabelsThatDoNotRoundTripAreDisallowed()
{
idn_to_utf8('xn--xn---epa.', \IDNA_DEFAULT, \INTL_IDNA_VARIANT_UTS46, $info1);
idn_to_ascii($info1['result'], \IDNA_DEFAULT, \INTL_IDNA_VARIANT_UTS46, $info2);
$this->assertSame(\IDNA_ERROR_PUNYCODE, \IDNA_ERROR_PUNYCODE & $info2['errors']);
}

/**
* IDNA 15.1.0 revision 31
*
* This tests the the additional condition in "Section 4 Processing step 4.1" where a label that starts with "xn--"
* and contains a non-ASCII codepoint records an error and the processing steps continue with the next label.
*/
public function testLabelStartingWithPunycodePrefixWithNonAsciiCharacterRecordsErrorAndIsSkipped()
{
\idn_to_utf8('xn--🌈.xn--fa-hia.de', \IDNA_DEFAULT, \INTL_IDNA_VARIANT_UTS46, $info);
$this->assertSame(\IDNA_ERROR_PUNYCODE, \IDNA_ERROR_PUNYCODE & $info['errors']);
$this->assertSame('xn--🌈.faß.de', $info['result']);
}

public static function captialSharpSProvider()
{
return [
['Faß.de', 'fass.de', \IDNA_DEFAULT],
['Faß.de', 'faß.de', \IDNA_NONTRANSITIONAL_TO_UNICODE],
];
}


public static function domainNamesProvider()
{
return [
Expand Down

0 comments on commit 1100c07

Please sign in to comment.