Skip to content

Commit

Permalink
59: add support for \uXXXX escapes within string literals
Browse files Browse the repository at this point in the history
This is in response to edn-format/edn#65 .

This is an extension as string literals as currently documented
do not specify support for \uXXXX escapes.

  https://github.com/edn-format/edn/tree/a51127aecd318096667ae0dafa25353ecb07c9c3

Notes:

- Unicode escape must begin with "\u". This is case sensitive "\U" will
  be rejected.
- "\u" must be followed by exactly four hex digits taken from this set:
  0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
- The digits are not case sensitive.
- Each such Unicode escape encodes a single 16-bit Java char. Since Java
  uses UTF-16 internally (for historical reasons) code points beyond
  the basic multilingual plane as a pair of unicode escapes.
  (see also "surrogate pairs")
  • Loading branch information
bpsm committed Apr 25, 2020
1 parent 89a7e56 commit 2a06a89
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/main/java/us/bpsm/edn/parser/ScannerImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,23 @@ private String readStringLiteral(Parseable pbr) throws IOException {
case '\\':
b.append('\\');
break;
case 'u':
// Support for reading unicode escapes within string
// literals is an extension to EDN. It is not currently
// part of the spec described here:
// https://github.com/edn-format/edn
int v = 0;
for (int i = 0; i < 4; i++) {
curr = pbr.read();
int d = Character.digit(curr, 16);
if (d == -1) {
throw new EdnSyntaxException(
"Invalid \\u Unicode escape in string.");
}
v = v * 16 + d;
}
b.append((char)v);
break;
default:
throw new EdnSyntaxException("Unsupported '"+ ((char)curr)
+"' escape in string");
Expand Down
30 changes: 30 additions & 0 deletions src/test/java/us/bpsm/edn/parser/ScannerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.junit.Test;

import us.bpsm.edn.EdnException;
import us.bpsm.edn.EdnSyntaxException;
import us.bpsm.edn.Keyword;
import us.bpsm.edn.Symbol;
import us.bpsm.edn.parser.Parseable;
Expand Down Expand Up @@ -366,6 +367,35 @@ public void unicodeEscapeCharacterLiterals() {
assertEquals(c, s.nextToken(pbr));
}

@Test
public void unicodeEscapesInStringLiterals() {
String txt = "\"" +
"\\" + "u0000" +
"\\" + "u1234" +
"\\" + "u0Ff0" +
"\"";
String expected = "\u0000\u1234\u0Ff0";
assertEquals(3, expected.length());
Parseable pbr = Parsers.newParseable(txt);
Scanner s = scanner();
assertEquals(expected, s.nextToken(pbr));
}

@Test(expected = EdnSyntaxException.class)
public void truncatedUnicodeEscapeInStringLiteral() {
scanner().nextToken(Parsers.newParseable("\"\\" + "u123\""));
}

@Test(expected = EdnSyntaxException.class)
public void truncatedInputInUnicodeEscapeInStringLiteral() {
scanner().nextToken(Parsers.newParseable("\"\\" + "u123"));
}

@Test(expected = EdnSyntaxException.class)
public void nonDigitInUnicodeEscapeInStringLiteral() {
scanner().nextToken(Parsers.newParseable("\"\\" + "u123?\""));
}

@Test
public void simpleStringWithLinebreak() {
assertEquals("\n", scan("\"\n\""));
Expand Down

0 comments on commit 2a06a89

Please sign in to comment.